sh2 drc: optimizations for MIPS code emitting

This commit is contained in:
kub 2019-11-09 10:30:57 +01:00
parent 9bd6706dca
commit aaea8e3ecd
6 changed files with 698 additions and 268 deletions

View file

@ -671,6 +671,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
literal_insn[pool_index] += move_offs; literal_insn[pool_index] += move_offs;
} }
#define EMITH_HINT_COND(cond) /**/
#define JMP_POS(ptr) { \ #define JMP_POS(ptr) { \
ptr = tcache_ptr; \ ptr = tcache_ptr; \
EMIT(0,M1(PC),0); \ EMIT(0,M1(PC),0); \
@ -721,9 +723,11 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) \ #define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) \
emith_add_r_r_r_lsl(d, s1, s2, lslimm) emith_add_r_r_r_lsl(d, s1, s2, lslimm)
#define emith_adc_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_ADC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
#define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \ #define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
#define emith_addf_r_r_r_lsr(d, s1, s2, lslimm) \ #define emith_addf_r_r_r_lsr(d, s1, s2, lslimm) \
EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSR,lslimm) EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSR,lslimm)
@ -733,6 +737,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) \ #define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_SUB_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) EOP_SUB_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
#define emith_sbc_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_SBC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
#define emith_subf_r_r_r_lsl(d, s1, s2, lslimm) \ #define emith_subf_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_SUB_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) EOP_SUB_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
@ -741,10 +748,11 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_or_r_r_r_lsl(d, s1, s2, lslimm) \ #define emith_or_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) \
EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm)
#define emith_eor_r_r_r_lsl(d, s1, s2, lslimm) \ #define emith_eor_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
#define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) \ #define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) \
EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm) EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm)
@ -753,13 +761,20 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_or_r_r_lsl(d, s, lslimm) \ #define emith_or_r_r_lsl(d, s, lslimm) \
emith_or_r_r_r_lsl(d, d, s, lslimm) emith_or_r_r_r_lsl(d, d, s, lslimm)
#define emith_or_r_r_lsr(d, s, lsrimm) \
emith_or_r_r_r_lsr(d, d, s, lsrimm)
#define emith_eor_r_r_lsl(d, s, lslimm) \
emith_eor_r_r_r_lsl(d, d, s, lslimm)
#define emith_eor_r_r_lsr(d, s, lsrimm) \ #define emith_eor_r_r_lsr(d, s, lsrimm) \
emith_eor_r_r_r_lsr(d, d, s, lsrimm) emith_eor_r_r_r_lsr(d, d, s, lsrimm)
#define emith_add_r_r_r(d, s1, s2) \ #define emith_add_r_r_r(d, s1, s2) \
emith_add_r_r_r_lsl(d, s1, s2, 0) emith_add_r_r_r_lsl(d, s1, s2, 0)
#define emith_adc_r_r_r(d, s1, s2) \
emith_adc_r_r_r_lsl(d, s1, s2, 0)
#define emith_addf_r_r_r(d, s1, s2) \ #define emith_addf_r_r_r(d, s1, s2) \
emith_addf_r_r_r_lsl(d, s1, s2, 0) emith_addf_r_r_r_lsl(d, s1, s2, 0)
@ -769,6 +784,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_sub_r_r_r(d, s1, s2) \ #define emith_sub_r_r_r(d, s1, s2) \
emith_sub_r_r_r_lsl(d, s1, s2, 0) emith_sub_r_r_r_lsl(d, s1, s2, 0)
#define emith_sbc_r_r_r(d, s1, s2) \
emith_sbc_r_r_r_lsl(d, s1, s2, 0)
#define emith_subf_r_r_r(d, s1, s2) \ #define emith_subf_r_r_r(d, s1, s2) \
emith_subf_r_r_r_lsl(d, s1, s2, 0) emith_subf_r_r_r_lsl(d, s1, s2, 0)
@ -790,11 +808,17 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_add_r_r_ptr(d, s) \ #define emith_add_r_r_ptr(d, s) \
emith_add_r_r_r(d, d, s) emith_add_r_r_r(d, d, s)
#define emith_adc_r_r(d, s) \
emith_adc_r_r_r(d, d, s)
#define emith_sub_r_r(d, s) \ #define emith_sub_r_r(d, s) \
emith_sub_r_r_r(d, d, s) emith_sub_r_r_r(d, d, s)
#define emith_adc_r_r(d, s) \ #define emith_sbc_r_r(d, s) \
EOP_ADC_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) emith_sbc_r_r_r(d, d, s)
#define emith_negc_r_r(d, s) \
EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,0,s,d,0,0)
#define emith_and_r_r_c(cond, d, s) \ #define emith_and_r_r_c(cond, d, s) \
EOP_AND_REG(cond,0,d,d,s,A_AM1_LSL,0) EOP_AND_REG(cond,0,d,d,s,A_AM1_LSL,0)
@ -987,9 +1011,13 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_rolcf(d) \ #define emith_rolcf(d) \
emith_adcf_r_r(d, d) emith_adcf_r_r(d, d)
#define emith_rolc(d) \
emith_adc_r_r(d, d)
#define emith_rorcf(d) \ #define emith_rorcf(d) \
EOP_MOV_REG(A_COND_AL,1,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */ EOP_MOV_REG(A_COND_AL,1,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */
#define emith_rorc(d) \
EOP_MOV_REG(A_COND_AL,0,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */
#define emith_negcf_r_r(d, s) \ #define emith_negcf_r_r(d, s) \
EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,1,s,d,0,0) EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,1,s,d,0,0)
@ -1329,6 +1357,18 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
} \ } \
} while (0) } while (0)
#define emith_t_to_carry(srr, is_sub) do { \
if (is_sub) { \
int t_ = rcache_get_tmp(); \
emith_eor_r_r_imm(t_, srr, 1); \
emith_rorf(t_, t_, 1); \
rcache_free_tmp(t_); \
} else { \
emith_rorf(srr, srr, 1); \
emith_rol(srr, srr, 1); \
} \
} while (0)
#define emith_tpop_carry(sr, is_sub) do { \ #define emith_tpop_carry(sr, is_sub) do { \
if (is_sub) \ if (is_sub) \
emith_eor_r_imm(sr, 1); \ emith_eor_r_imm(sr, 1); \

View file

@ -370,6 +370,8 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE };
JMP_EMIT_NC(else_ptr); \ JMP_EMIT_NC(else_ptr); \
} }
#define EMITH_HINT_COND(cond) /**/
// "simple" jump (no more then a few insns) // "simple" jump (no more then a few insns)
// ARM32 will use conditional instructions here // ARM32 will use conditional instructions here
#define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_START EMITH_JMP_START
@ -414,6 +416,24 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE };
#define emith_addf_r_r_r_lsr(d, s1, s2, simm) \ #define emith_addf_r_r_r_lsr(d, s1, s2, simm) \
EMIT(A64_ADDS_REG(d, s1, s2, ST_LSR, simm)) EMIT(A64_ADDS_REG(d, s1, s2, ST_LSR, simm))
#define emith_adc_r_r_r_lsl(d, s1, s2, simm) \
if (simm) { int _t = rcache_get_tmp(); \
emith_lsl(_t, s2, simm); \
emith_adc_r_r_r(d, s1, _t); \
rcache_free_tmp(_t); \
} else \
emith_adc_r_r_r(d, s1, s2); \
} while (0)
#define emith_sbc_r_r_r_lsl(d, s1, s2, simm) \
if (simm) { int _t = rcache_get_tmp(); \
emith_lsl(_t, s2, simm); \
emith_sbc_r_r_r(d, s1, _t); \
rcache_free_tmp(_t); \
} else \
emith_sbc_r_r_r(d, s1, s2); \
} while (0)
#define emith_sub_r_r_r_lsl(d, s1, s2, simm) \ #define emith_sub_r_r_r_lsl(d, s1, s2, simm) \
EMIT(A64_SUB_REG(d, s1, s2, ST_LSL, simm)) EMIT(A64_SUB_REG(d, s1, s2, ST_LSL, simm))
@ -422,10 +442,11 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE };
#define emith_or_r_r_r_lsl(d, s1, s2, simm) \ #define emith_or_r_r_r_lsl(d, s1, s2, simm) \
EMIT(A64_OR_REG(d, s1, s2, ST_LSL, simm)) EMIT(A64_OR_REG(d, s1, s2, ST_LSL, simm))
#define emith_or_r_r_r_lsr(d, s1, s2, simm) \
EMIT(A64_OR_REG(d, s1, s2, ST_LSR, simm))
#define emith_eor_r_r_r_lsl(d, s1, s2, simm) \ #define emith_eor_r_r_r_lsl(d, s1, s2, simm) \
EMIT(A64_EOR_REG(d, s1, s2, ST_LSL, simm)) EMIT(A64_EOR_REG(d, s1, s2, ST_LSL, simm))
#define emith_eor_r_r_r_lsr(d, s1, s2, simm) \ #define emith_eor_r_r_r_lsr(d, s1, s2, simm) \
EMIT(A64_EOR_REG(d, s1, s2, ST_LSR, simm)) EMIT(A64_EOR_REG(d, s1, s2, ST_LSR, simm))
@ -434,7 +455,11 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE };
#define emith_or_r_r_lsl(d, s, lslimm) \ #define emith_or_r_r_lsl(d, s, lslimm) \
emith_or_r_r_r_lsl(d, d, s, lslimm) emith_or_r_r_r_lsl(d, d, s, lslimm)
#define emith_or_r_r_lsr(d, s, lsrimm) \
emith_or_r_r_r_lsr(d, d, s, lsrimm)
#define emith_eor_r_r_lsl(d, s, lslimm) \
emith_eor_r_r_r_lsl(d, d, s, lslimm)
#define emith_eor_r_r_lsr(d, s, lsrimm) \ #define emith_eor_r_r_lsr(d, s, lsrimm) \
emith_eor_r_r_r_lsr(d, d, s, lsrimm) emith_eor_r_r_r_lsr(d, d, s, lsrimm)
@ -472,6 +497,9 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE };
#define emith_neg_r_r(d, s) \ #define emith_neg_r_r(d, s) \
EMIT(A64_NEG_REG(d, s, ST_LSL, 0)) EMIT(A64_NEG_REG(d, s, ST_LSL, 0))
#define emith_negc_r_r(d, s) \
EMIT(A64_NEGC_REG(d, s))
#define emith_adc_r_r_r(d, s1, s2) \ #define emith_adc_r_r_r(d, s1, s2) \
EMIT(A64_ADC_REG(d, s1, s2)) EMIT(A64_ADC_REG(d, s1, s2))
@ -481,6 +509,9 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE };
#define emith_adcf_r_r_r(d, s1, s2) \ #define emith_adcf_r_r_r(d, s1, s2) \
EMIT(A64_ADCS_REG(d, s1, s2)) EMIT(A64_ADCS_REG(d, s1, s2))
#define emith_sbc_r_r_r(d, s1, s2) \
EMIT(A64_SBC_REG(d, s1, s2))
#define emith_sbcf_r_r_r(d, s1, s2) \ #define emith_sbcf_r_r_r(d, s1, s2) \
EMIT(A64_SBCS_REG(d, s1, s2)) EMIT(A64_SBCS_REG(d, s1, s2))
@ -806,12 +837,19 @@ static void emith_log_imm(int op, int wx, int rd, int rn, u32 imm)
#define emith_rolcf(d) \ #define emith_rolcf(d) \
emith_adcf_r_r(d, d) emith_adcf_r_r(d, d)
#define emith_rolc(d) \
emith_adc_r_r(d, d)
#define emith_rorcf(d) do { \ #define emith_rorcf(d) do { \
EMIT(A64_RBIT_REG(d, d)); \ EMIT(A64_RBIT_REG(d, d)); \
emith_adcf_r_r(d, d); \ emith_adcf_r_r(d, d); \
EMIT(A64_RBIT_REG(d, d)); \ EMIT(A64_RBIT_REG(d, d)); \
} while (0) } while (0)
#define emith_rorc(d) do { \
EMIT(A64_RBIT_REG(d, d)); \
emith_adc_r_r(d, d); \
EMIT(A64_RBIT_REG(d, d)); \
} while (0)
// signed/unsigned extend // signed/unsigned extend
#define emith_clear_msb(d, s, count) /* bits to clear */ \ #define emith_clear_msb(d, s, count) /* bits to clear */ \
@ -1286,6 +1324,18 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
emith_eor_r_imm(sr, 1); \ emith_eor_r_imm(sr, 1); \
} while (0) } while (0)
#define emith_t_to_carry(srr, is_sub) do { \
if (is_sub) { \
int t_ = rcache_get_tmp(); \
emith_eor_r_r_imm(t_, srr, 1); \
emith_rorf(t_, t_, 1); \
rcache_free_tmp(t_); \
} else { \
emith_rorf(srr, srr, 1); \
emith_rol(srr, srr, 1); \
} \
} while (0)
#define emith_tpop_carry(sr, is_sub) do { \ #define emith_tpop_carry(sr, is_sub) do { \
if (is_sub) \ if (is_sub) \
emith_eor_r_imm(sr, 1); \ emith_eor_r_imm(sr, 1); \

View file

@ -173,15 +173,17 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 };
MIPS_OP_REG(FN_JALR,rd,rs,_) MIPS_OP_REG(FN_JALR,rd,rs,_)
// conditional branches; no condition code, these compare rs against rt or Z0 // conditional branches; no condition code, these compare rs against rt or Z0
#define MIPS_BEQ (OP_BEQ << 5) #define MIPS_BEQ (OP_BEQ << 5) // rs == rt (rt in lower 5 bits)
#define MIPS_BNE (OP_BNE << 5) #define MIPS_BNE (OP_BNE << 5) // rs != rt (ditto)
#define MIPS_BLE (OP_BLEZ << 5) #define MIPS_BLE (OP_BLEZ << 5) // rs <= 0
#define MIPS_BGT (OP_BGTZ << 5) #define MIPS_BGT (OP_BGTZ << 5) // rs > 0
#define MIPS_BLT ((OP__RT << 5)|RT_BLTZ) #define MIPS_BLT ((OP__RT << 5)|RT_BLTZ) // rs < 0
#define MIPS_BGE ((OP__RT << 5)|RT_BGEZ) #define MIPS_BGE ((OP__RT << 5)|RT_BGEZ) // rs >= 0
#define MIPS_BGTL ((OP__RT << 5)|RT_BLTZAL) #define MIPS_BGTL ((OP__RT << 5)|RT_BLTZAL) // rs > 0, link $ra if jumping
#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) #define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) // rs >= 0, link $ra if jumping
#define MIPS_BCOND(cond, rs, rt, offs16) \
MIPS_OP_IMM((cond >> 5), rt, rs, (offs16) >> 2)
#define MIPS_BCONDZ(cond, rs, offs16) \ #define MIPS_BCONDZ(cond, rs, offs16) \
MIPS_OP_IMM((cond >> 5), (cond & 0x1f), rs, (offs16) >> 2) MIPS_OP_IMM((cond >> 5), (cond & 0x1f), rs, (offs16) >> 2)
#define MIPS_B(offs16) \ #define MIPS_B(offs16) \
@ -216,25 +218,26 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 };
ptr = (void *)((u8 *)(ptr) + sizeof(u32)); \ ptr = (void *)((u8 *)(ptr) + sizeof(u32)); \
} while (0) } while (0)
// FIFO for 2 instructions, for delay slot handling // FIFO for some instructions, for delay slot handling
static u32 emith_last_insns[2] = { -1,-1 }; #define FSZ 4
static int emith_last_idx, emith_last_cnt; static u32 emith_last_insns[FSZ];
static unsigned emith_last_idx, emith_last_cnt;
#define EMIT_PUSHOP() \ #define EMIT_PUSHOP() \
do { \ do { \
emith_last_idx ^= 1; \ if (emith_last_cnt > 0) { \
if (emith_last_insns[emith_last_idx] != -1) { \
u32 *p = (u32 *)tcache_ptr - emith_last_cnt; \ u32 *p = (u32 *)tcache_ptr - emith_last_cnt; \
EMIT_PTR(p, emith_last_insns[emith_last_idx]);\ int idx = (emith_last_idx - emith_last_cnt+1) %FSZ; \
EMIT_PTR(p, emith_last_insns[idx]);\
emith_last_cnt --; \ emith_last_cnt --; \
} \ } \
emith_last_insns[emith_last_idx] = -1; \
} while (0) } while (0)
#define EMIT(op) \ #define EMIT(op) \
do { \ do { \
EMIT_PUSHOP(); \ if (emith_last_cnt >= FSZ) EMIT_PUSHOP(); \
tcache_ptr = (void *)((u32 *)tcache_ptr + 1); \ tcache_ptr = (void *)((u32 *)tcache_ptr + 1); \
emith_last_idx = (emith_last_idx+1) %FSZ; \
emith_last_insns[emith_last_idx] = op; \ emith_last_insns[emith_last_idx] = op; \
emith_last_cnt ++; \ emith_last_cnt ++; \
COUNT_OP; \ COUNT_OP; \
@ -242,7 +245,8 @@ static int emith_last_idx, emith_last_cnt;
#define emith_flush() \ #define emith_flush() \
do { \ do { \
int i; for (i = 0; i < 2; i++) EMIT_PUSHOP(); \ while (emith_last_cnt) EMIT_PUSHOP(); \
emith_flg_hint = _FHV|_FHC; \
} while (0) } while (0)
#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr - emith_last_cnt) #define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr - emith_last_cnt)
@ -279,11 +283,12 @@ static int emith_rt(u32 op)
return emith_has_(rt,2,op,26,0x3f) ? (op>>16)&0x1f : 0; return emith_has_(rt,2,op,26,0x3f) ? (op>>16)&0x1f : 0;
} }
static int emith_rd(u32 op) static int emith_rd(u32 op)
{ if ((op>>26) == OP__FN) { int ret = emith_has_(rd,2,op,26,0x3f) ? (op>>16)&0x1f :-1;
return emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1; if ((op>>26) == OP__FN)
ret = emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1;
if ((op>>26) == OP__RT) if ((op>>26) == OP__RT)
return -1; ret = -1;
return emith_has_(rd,2,op,26,0x3f) ? (op>>16)&0x1f :-1; return (ret ?: -1); // Z0 doesn't have dependencies
} }
static int emith_b_isswap(u32 bop, u32 lop) static int emith_b_isswap(u32 bop, u32 lop)
@ -292,48 +297,56 @@ static int emith_b_isswap(u32 bop, u32 lop)
return bop; return bop;
else if (emith_is_jr(bop) && emith_rd(lop) != emith_rs(bop)) else if (emith_is_jr(bop) && emith_rd(lop) != emith_rs(bop))
return bop; return bop;
else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop)) else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop) &&
emith_rd(lop) != emith_rt(bop))
if ((bop & 0xffff) != 0x7fff) // displacement overflow? if ((bop & 0xffff) != 0x7fff) // displacement overflow?
return (bop & 0xffff0000) | ((bop+1) & 0x0000ffff); return (bop & 0xffff0000) | ((bop+1) & 0x0000ffff);
return 0; return 0;
} }
static int emith_insn_swappable(u32 op1, u32 op2)
{
if (emith_rd(op1) != emith_rd(op2) &&
emith_rs(op1) != emith_rd(op2) && emith_rt(op1) != emith_rd(op2) &&
emith_rs(op2) != emith_rd(op1) && emith_rt(op2) != emith_rd(op1))
return 1;
return 0;
}
// emit branch, trying to fill the delay slot with one of the last insns // emit branch, trying to fill the delay slot with one of the last insns
static void *emith_branch(u32 op) static void *emith_branch(u32 op)
{ {
int idx = emith_last_idx; unsigned idx = emith_last_idx, ds = idx;
u32 op1 = emith_last_insns[idx], op2 = emith_last_insns[idx^1]; u32 bop = 0, sop;
u32 bop = 0;
void *bp; void *bp;
int i, j, s;
// check last insn (op1) // check for ds insn; older mustn't interact with newer ones to overtake
if (op1 != -1 && op1) for (i = 0; i < emith_last_cnt && !bop; i++) {
bop = emith_b_isswap(op, op1); ds = (idx-i)%FSZ;
// if not, check older insn (op2); mustn't interact with op1 to overtake sop = emith_last_insns[ds];
if (!bop && op2 != -1 && op2 && emith_rd(op1) != emith_rd(op2) && for (j = i, s = 1; j > 0 && s; j--)
emith_rs(op1) != emith_rd(op2) && emith_rt(op1) != emith_rd(op2) && s = emith_insn_swappable(emith_last_insns[(ds+j)%FSZ], sop);
emith_rs(op2) != emith_rd(op1) && emith_rt(op2) != emith_rd(op1)) { if (s)
idx ^= 1; bop = emith_b_isswap(op, sop);
bop = emith_b_isswap(op, op2);
} }
// flush FIFO and branch // flush FIFO, but omit delay slot insn
tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt); tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt);
if (emith_last_insns[idx^1] != -1) idx = (idx-emith_last_cnt+1)%FSZ;
EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]); for (i = emith_last_cnt; i > 0; i--, idx = (idx+1)%FSZ)
if (!bop || idx != ds)
EMIT_PTR(tcache_ptr, emith_last_insns[idx]);
emith_last_cnt = 0;
// emit branch and delay slot
bp = tcache_ptr;
if (bop) { // can swap if (bop) { // can swap
bp = tcache_ptr;
EMIT_PTR(tcache_ptr, bop); COUNT_OP; EMIT_PTR(tcache_ptr, bop); COUNT_OP;
EMIT_PTR(tcache_ptr, emith_last_insns[idx]); EMIT_PTR(tcache_ptr, emith_last_insns[ds]);
} else { // can't swap } else { // can't swap
if (emith_last_insns[idx] != -1)
EMIT_PTR(tcache_ptr, emith_last_insns[idx]);
bp = tcache_ptr;
EMIT_PTR(tcache_ptr, op); COUNT_OP; EMIT_PTR(tcache_ptr, op); COUNT_OP;
EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP; EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP;
} }
emith_last_insns[0] = emith_last_insns[1] = -1;
emith_last_cnt = 0;
return bp; return bp;
} }
@ -403,34 +416,56 @@ static void *emith_branch(u32 op)
// flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns. // flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns.
// flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check() // flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check()
static int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (cmp_r_r) static int emith_cmp_rs, emith_cmp_rt; // registers used in cmp_r_r/cmp_r_imm
static s32 emith_cmp_imm; // immediate value used in cmp_r_imm
enum { _FHC=1, _FHV=2 } emith_flg_hint; // C/V flag usage hinted by compiler
static int emith_flg_noV; // V flag known not to be set static int emith_flg_noV; // V flag known not to be set
#define EMITH_HINT_COND(cond) do { \
/* only need to check cond>>1 since the lowest bit inverts the cond */ \
unsigned _mv = BITMASK3(DCOND_VS>>1,DCOND_GE>>1,DCOND_GT>>1); \
unsigned _mc = _mv | BITMASK2(DCOND_HS>>1,DCOND_HI>>1); \
emith_flg_hint = (_mv & BITMASK1(cond >> 1) ? _FHV : 0); \
emith_flg_hint |= (_mc & BITMASK1(cond >> 1) ? _FHC : 0); \
} while (0)
// store minimal cc information: rd, rt^rs, carry // store minimal cc information: rd, rt^rs, carry
// NB: the result *must* first go to FNZ, in case rd == rs or rd == rt. // NB: the result *must* first go to FNZ, in case rd == rs or rd == rt.
// NB: for adcf and sbcf, carry-in must be dealt with separately (see there) // NB: for adcf and sbcf, carry-in must be dealt with separately (see there)
static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub)
{ {
if (sub && rd == FNZ && rt > AT && rs > AT) // is this cmp_r_r? if (emith_flg_hint & _FHC) {
emith_flg_rs = rs, emith_flg_rt = rt;
else emith_flg_rs = emith_flg_rt = 0;
if (sub) // C = sub:rt<rd, add:rd<rt if (sub) // C = sub:rt<rd, add:rd<rt
EMIT(MIPS_SLTU_REG(FC, rt, FNZ)); EMIT(MIPS_SLTU_REG(FC, rs, FNZ));
else EMIT(MIPS_SLTU_REG(FC, FNZ, rt));// C in FC, bit 0 else EMIT(MIPS_SLTU_REG(FC, FNZ, rs));// C in FC, bit 0
}
if (emith_flg_hint & _FHV) {
emith_flg_noV = 0; emith_flg_noV = 0;
if (rs > 0) // Nt^Ns if (rt >= 0) // Nt^Ns in FV, bit 31
EMIT(MIPS_XOR_REG(FV, rt, rs)); EMIT(MIPS_XOR_REG(FV, rs, rt));
else if (imm < 0) else if (imm == 0)
EMIT(MIPS_NOR_REG(FV, rt, Z0)); emith_flg_noV = 1; // imm #0 can't overflow
else if (imm > 0) else if ((imm < 0) == !sub)
EMIT(MIPS_OR_REG(FV, rt, Z0)); // Nt^Ns in FV, bit 31 EMIT(MIPS_NOR_REG(FV, rs, Z0));
else emith_flg_noV = 1; // imm #0, never overflows else if ((imm > 0) == !sub)
EMIT(MIPS_OR_REG(FV, rs, Z0));
}
// full V = Nd^Nt^Ns^C calculation is deferred until really needed // full V = Nd^Nt^Ns^C calculation is deferred until really needed
if (rd != FNZ) if (rd && rd != FNZ)
EMIT(MIPS_MOVE_REG(rd, FNZ)); // N,Z via result value in FNZ EMIT(MIPS_MOVE_REG(rd, FNZ)); // N,Z via result value in FNZ
emith_cmp_rs = emith_cmp_rt = -1;
}
// since MIPS has less-than and compare-branch insns, handle cmp separately by
// storing the involved regs for later use in one of those MIPS insns.
// This works for all conditions but VC/VS, but this is fortunately never used.
static void emith_set_compare_flags(int rs, int rt, s32 imm)
{
emith_cmp_rt = rt;
emith_cmp_rs = rs;
emith_cmp_imm = imm;
} }
// data processing, register // data processing, register
@ -510,6 +545,13 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub)
} else EMIT(MIPS_OR_REG(d, s1, s2)); \ } else EMIT(MIPS_OR_REG(d, s1, s2)); \
} while (0) } while (0)
#define emith_or_r_r_r_lsr(d, s1, s2, simm) do { \
if (simm) { \
EMIT(MIPS_LSR_IMM(AT, s2, simm)); \
EMIT(MIPS_OR_REG(d, s1, AT)); \
} else EMIT(MIPS_OR_REG(d, s1, s2)); \
} while (0)
#define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \ #define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \
if (simm) { \ if (simm) { \
EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \
@ -533,7 +575,11 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub)
#define emith_or_r_r_lsl(d, s, lslimm) \ #define emith_or_r_r_lsl(d, s, lslimm) \
emith_or_r_r_r_lsl(d, d, s, lslimm) emith_or_r_r_r_lsl(d, d, s, lslimm)
#define emith_or_r_r_lsr(d, s, lsrimm) \
emith_or_r_r_r_lsr(d, d, s, lsrimm)
#define emith_eor_r_r_lsl(d, s, lslimm) \
emith_eor_r_r_r_lsl(d, d, s, lslimm)
#define emith_eor_r_r_lsr(d, s, lsrimm) \ #define emith_eor_r_r_lsr(d, s, lsrimm) \
emith_eor_r_r_r_lsr(d, d, s, lsrimm) emith_eor_r_r_r_lsr(d, d, s, lsrimm)
@ -570,13 +616,21 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub)
EMIT(MIPS_NEG_REG(d, s)) EMIT(MIPS_NEG_REG(d, s))
#define emith_adc_r_r_r(d, s1, s2) do { \ #define emith_adc_r_r_r(d, s1, s2) do { \
emith_add_r_r_r(AT, s1, FC); \ emith_add_r_r_r(AT, s2, FC); \
emith_add_r_r_r(d, AT, s2); \ emith_add_r_r_r(d, s1, AT); \
} while (0)
#define emith_sbc_r_r_r(d, s1, s2) do { \
emith_add_r_r_r(AT, s2, FC); \
emith_sub_r_r_r(d, s1, AT); \
} while (0) } while (0)
#define emith_adc_r_r(d, s) \ #define emith_adc_r_r(d, s) \
emith_adc_r_r_r(d, d, s) emith_adc_r_r_r(d, d, s)
#define emith_negc_r_r(d, s) \
emith_sbc_r_r_r(d, Z0, s)
// NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW) // NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW)
// moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout // moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout
#define emith_adcf_r_r_r(d, s1, s2) do { \ #define emith_adcf_r_r_r(d, s1, s2) do { \
@ -606,16 +660,23 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub)
#define emith_eor_r_r(d, s) \ #define emith_eor_r_r(d, s) \
emith_eor_r_r_r(d, d, s) emith_eor_r_r_r(d, d, s)
#define emith_tst_r_r_ptr(d, s) \ #define emith_tst_r_r_ptr(d, s) do { \
emith_and_r_r_r(FNZ, d, s) if (d != s) { \
emith_and_r_r_r(FNZ, d, s); \
emith_cmp_rs = emith_cmp_rt = -1; \
} else emith_cmp_rs = s, emith_cmp_rt = Z0; \
} while (0)
#define emith_tst_r_r(d, s) \ #define emith_tst_r_r(d, s) \
emith_tst_r_r_ptr(d, s) emith_tst_r_r_ptr(d, s)
#define emith_teq_r_r(d, s) \ #define emith_teq_r_r(d, s) do { \
emith_eor_r_r_r(FNZ, d, s) emith_eor_r_r_r(FNZ, d, s); \
emith_cmp_rs = emith_cmp_rt = -1; \
} while (0)
#define emith_cmp_r_r(d, s) \ #define emith_cmp_r_r(d, s) \
emith_subf_r_r_r(FNZ, d, s) emith_set_compare_flags(d, s, 0)
// emith_subf_r_r_r(FNZ, d, s)
#define emith_addf_r_r(d, s) \ #define emith_addf_r_r(d, s) \
emith_addf_r_r_r(d, d, s) emith_addf_r_r_r(d, d, s)
@ -705,8 +766,8 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm)
emith_adcf_r_r_imm(r, r, imm) emith_adcf_r_r_imm(r, r, imm)
#define emith_cmp_r_imm(r, imm) \ #define emith_cmp_r_imm(r, imm) \
emith_subf_r_r_imm(FNZ, r, (s16)imm) emith_set_compare_flags(r, -1, imm)
// emith_subf_r_r_imm(FNZ, r, (s16)imm)
#define emith_add_r_r_ptr_imm(d, s, imm) \ #define emith_add_r_r_ptr_imm(d, s, imm) \
emith_arith_imm(OP_ADDIU, d, s, imm) emith_arith_imm(OP_ADDIU, d, s, imm)
@ -716,7 +777,7 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm)
#define emith_addf_r_r_imm(d, s, imm) do { \ #define emith_addf_r_r_imm(d, s, imm) do { \
emith_add_r_r_imm(FNZ, s, imm); \ emith_add_r_r_imm(FNZ, s, imm); \
emith_set_arith_flags(d, s, 0, imm, 0); \ emith_set_arith_flags(d, s, -1, imm, 0); \
} while (0) } while (0)
#define emith_adc_r_r_imm(d, s, imm) do { \ #define emith_adc_r_r_imm(d, s, imm) do { \
@ -725,11 +786,16 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm)
} while (0) } while (0)
#define emith_adcf_r_r_imm(d, s, imm) do { \ #define emith_adcf_r_r_imm(d, s, imm) do { \
if (imm == 0) { \
emith_add_r_r_r(FNZ, s, FC); \
emith_set_arith_flags(d, s, -1, 1, 0); \
} else { \
emith_add_r_r_r(FNZ, s, FC); \ emith_add_r_r_r(FNZ, s, FC); \
EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \
emith_add_r_r_imm(FNZ, FNZ, imm); \ emith_add_r_r_imm(FNZ, FNZ, imm); \
emith_set_arith_flags(d, s, 0, imm, 0); \ emith_set_arith_flags(d, s, -1, imm, 0); \
emith_or_r_r(FC, AT); \ emith_or_r_r(FC, AT); \
} \
} while (0) } while (0)
// NB: no SUBI in MIPS II, since ADDI takes a signed imm // NB: no SUBI in MIPS II, since ADDI takes a signed imm
@ -740,7 +806,7 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm)
#define emith_subf_r_r_imm(d, s, imm) do { \ #define emith_subf_r_r_imm(d, s, imm) do { \
emith_sub_r_r_imm(FNZ, s, imm); \ emith_sub_r_r_imm(FNZ, s, imm); \
emith_set_arith_flags(d, s, 0, imm, 1); \ emith_set_arith_flags(d, s, -1, imm, 1); \
} while (0) } while (0)
// logical, immediate // logical, immediate
@ -777,8 +843,10 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm)
#define emith_bic_r_imm_c(cond, r, imm) \ #define emith_bic_r_imm_c(cond, r, imm) \
emith_bic_r_imm(r, imm) emith_bic_r_imm(r, imm)
#define emith_tst_r_imm(r, imm) \ #define emith_tst_r_imm(r, imm) do { \
emith_log_imm(OP_ANDI, FNZ, r, imm) emith_log_imm(OP_ANDI, FNZ, r, imm); \
emith_cmp_rs = emith_cmp_rt = -1; \
} while (0)
#define emith_tst_r_imm_c(cond, r, imm) \ #define emith_tst_r_imm_c(cond, r, imm) \
emith_tst_r_imm(r, imm) emith_tst_r_imm(r, imm)
@ -816,6 +884,17 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm)
EMIT(MIPS_OR_REG(d, d, AT)); \ EMIT(MIPS_OR_REG(d, d, AT)); \
} while (0) } while (0)
#define emith_rorc(d) do { \
emith_lsr(d, d, 1); \
emith_lsl(AT, FC, 31); \
emith_or_r_r(d, AT); \
} while (0)
#define emith_rolc(d) do { \
emith_lsl(d, d, 1); \
emith_or_r_r(d, FC); \
} while (0)
// NB: all flag setting shifts make V undefined // NB: all flag setting shifts make V undefined
// NB: mips32r2 has EXT (useful for extracting C) // NB: mips32r2 has EXT (useful for extracting C)
#define emith_lslf(d, s, cnt) do { \ #define emith_lslf(d, s, cnt) do { \
@ -829,6 +908,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm)
emith_lsl(d, _s, 1); \ emith_lsl(d, _s, 1); \
} \ } \
emith_move_r_r(FNZ, d); \ emith_move_r_r(FNZ, d); \
emith_cmp_rs = emith_cmp_rt = -1; \
} while (0) } while (0)
#define emith_lsrf(d, s, cnt) do { \ #define emith_lsrf(d, s, cnt) do { \
@ -842,6 +922,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm)
emith_lsr(d, _s, 1); \ emith_lsr(d, _s, 1); \
} \ } \
emith_move_r_r(FNZ, d); \ emith_move_r_r(FNZ, d); \
emith_cmp_rs = emith_cmp_rt = -1; \
} while (0) } while (0)
#define emith_asrf(d, s, cnt) do { \ #define emith_asrf(d, s, cnt) do { \
@ -855,18 +936,21 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm)
emith_asr(d, _s, 1); \ emith_asr(d, _s, 1); \
} \ } \
emith_move_r_r(FNZ, d); \ emith_move_r_r(FNZ, d); \
emith_cmp_rs = emith_cmp_rt = -1; \
} while (0) } while (0)
#define emith_rolf(d, s, cnt) do { \ #define emith_rolf(d, s, cnt) do { \
emith_rol(d, s, cnt); \ emith_rol(d, s, cnt); \
emith_and_r_r_imm(FC, d, 1); \ emith_and_r_r_imm(FC, d, 1); \
emith_move_r_r(FNZ, d); \ emith_move_r_r(FNZ, d); \
emith_cmp_rs = emith_cmp_rt = -1; \
} while (0) } while (0)
#define emith_rorf(d, s, cnt) do { \ #define emith_rorf(d, s, cnt) do { \
emith_ror(d, s, cnt); \ emith_ror(d, s, cnt); \
emith_lsr(FC, d, 31); \ emith_lsr(FC, d, 31); \
emith_move_r_r(FNZ, d); \ emith_move_r_r(FNZ, d); \
emith_cmp_rs = emith_cmp_rt = -1; \
} while (0) } while (0)
#define emith_rolcf(d) do { \ #define emith_rolcf(d) do { \
@ -875,6 +959,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm)
emith_or_r_r(d, FC); \ emith_or_r_r(d, FC); \
emith_move_r_r(FC, AT); \ emith_move_r_r(FC, AT); \
emith_move_r_r(FNZ, d); \ emith_move_r_r(FNZ, d); \
emith_cmp_rs = emith_cmp_rt = -1; \
} while (0) } while (0)
#define emith_rorcf(d) do { \ #define emith_rorcf(d) do { \
@ -884,6 +969,7 @@ static void emith_log_imm(int op, int rd, int rs, u32 imm)
emith_or_r_r(d, FC); \ emith_or_r_r(d, FC); \
emith_move_r_r(FC, AT); \ emith_move_r_r(FC, AT); \
emith_move_r_r(FNZ, d); \ emith_move_r_r(FNZ, d); \
emith_cmp_rs = emith_cmp_rt = -1; \
} while (0) } while (0)
// signed/unsigned extend // signed/unsigned extend
@ -1108,24 +1194,82 @@ static void emith_lohi_nops(void)
(((cond) >> 5) == OP__RT ? (cond) ^ 0x01 : (cond) ^ 0x20) (((cond) >> 5) == OP__RT ? (cond) ^ 0x01 : (cond) ^ 0x20)
// evaluate the emulated condition, returns a register/branch type pair // evaluate the emulated condition, returns a register/branch type pair
static int emith_cmpr_check(int rs, int rt, int cond, int *r)
{
int b = 0;
// condition check for comparing 2 registers
switch (cond) {
case DCOND_EQ: *r = rs; b = MIPS_BEQ|rt; break;
case DCOND_NE: *r = rs; b = MIPS_BNE|rt; break;
case DCOND_LO: EMIT(MIPS_SLTU_REG(AT, rs, rt));
*r = AT, b = MIPS_BNE; break; // s < t unsigned
case DCOND_HS: EMIT(MIPS_SLTU_REG(AT, rs, rt));
*r = AT, b = MIPS_BEQ; break; // s >= t unsigned
case DCOND_LS: EMIT(MIPS_SLTU_REG(AT, rt, rs));
*r = AT, b = MIPS_BEQ; break; // s <= t unsigned
case DCOND_HI: EMIT(MIPS_SLTU_REG(AT, rt, rs));
*r = AT, b = MIPS_BNE; break; // s > t unsigned
case DCOND_LT: if (rt == 0) { *r = rs, b = MIPS_BLT; break; } // s < 0
EMIT(MIPS_SLT_REG(AT, rs, rt));
*r = AT, b = MIPS_BNE; break; // s < t
case DCOND_GE: if (rt == 0) { *r = rs, b = MIPS_BGE; break; } // s >= 0
EMIT(MIPS_SLT_REG(AT, rs, rt));
*r = AT, b = MIPS_BEQ; break; // s >= t
case DCOND_LE: if (rt == 0) { *r = rs, b = MIPS_BLE; break; } // s <= 0
EMIT(MIPS_SLT_REG(AT, rt, rs));
*r = AT, b = MIPS_BEQ; break; // s <= t
case DCOND_GT: if (rt == 0) { *r = rs, b = MIPS_BGT; break; } // s > 0
EMIT(MIPS_SLT_REG(AT, rt, rs));
*r = AT, b = MIPS_BNE; break; // s > t
}
return b;
}
static int emith_cmpi_check(int rs, s32 imm, int cond, int *r)
{
int b = 0;
// condition check for comparing register with immediate
if (imm == 0) return emith_cmpr_check(rs, Z0, cond, r);
switch (cond) {
case DCOND_EQ: emith_move_r_imm(AT, imm);
*r = rs; b = MIPS_BEQ|AT; break;
case DCOND_NE: emith_move_r_imm(AT, imm);
*r = rs; b = MIPS_BNE|AT; break;
case DCOND_LO: EMIT(MIPS_SLTU_IMM(AT, rs, imm));
*r = AT, b = MIPS_BNE; break; // s < imm unsigned
case DCOND_HS: EMIT(MIPS_SLTU_IMM(AT, rs, imm));
*r = AT, b = MIPS_BEQ; break; // s >= imm unsigned
case DCOND_LS: emith_move_r_imm(AT, imm);
EMIT(MIPS_SLTU_REG(AT, AT, rs));
*r = AT, b = MIPS_BEQ; break; // s <= imm unsigned
case DCOND_HI: emith_move_r_imm(AT, imm);
EMIT(MIPS_SLTU_REG(AT, AT, rs));
*r = AT, b = MIPS_BNE; break; // s > imm unsigned
case DCOND_LT: EMIT(MIPS_SLT_IMM(AT, rs, imm));
*r = AT, b = MIPS_BNE; break; // s < imm
case DCOND_GE: EMIT(MIPS_SLT_IMM(AT, rs, imm));
*r = AT, b = MIPS_BEQ; break; // s >= imm
case DCOND_LE: emith_move_r_imm(AT, imm);
EMIT(MIPS_SLT_REG(AT, AT, rs));
*r = AT, b = MIPS_BEQ; break; // s <= imm
case DCOND_GT: emith_move_r_imm(AT, imm);
EMIT(MIPS_SLT_REG(AT, AT, rs));
*r = AT, b = MIPS_BNE; break; // s > imm
}
return b;
}
static int emith_cond_check(int cond, int *r) static int emith_cond_check(int cond, int *r)
{ {
int b = 0; int b = 0;
// shortcut for comparing 2 registers if (emith_cmp_rs >= 0) {
if (emith_flg_rs || emith_flg_rt) switch (cond) { if (emith_cmp_rt != -1)
case DCOND_LS: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt)); b = emith_cmpr_check(emith_cmp_rs,emith_cmp_rt, cond,r);
*r = AT, b = MIPS_BEQ; break; // s <= t unsigned else b = emith_cmpi_check(emith_cmp_rs,emith_cmp_imm,cond,r);
case DCOND_HI: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt));
*r = AT, b = MIPS_BNE; break; // s > t unsigned
case DCOND_LT: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs));
*r = AT, b = MIPS_BNE; break; // s < t
case DCOND_GE: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs));
*r = AT, b = MIPS_BEQ; break; // s >= t
case DCOND_LE: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt));
*r = AT, b = MIPS_BEQ; break; // s <= t
case DCOND_GT: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt));
*r = AT, b = MIPS_BNE; break; // s > t
} }
// shortcut for V known to be 0 // shortcut for V known to be 0
@ -1373,8 +1517,10 @@ static int emith_cond_check(int cond, int *r)
#define emith_sh2_div1_step(rn, rm, sr) do { \ #define emith_sh2_div1_step(rn, rm, sr) do { \
emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
EMITH_JMP3_START(DCOND_EQ); \ EMITH_JMP3_START(DCOND_EQ); \
EMITH_HINT_COND(DCOND_CS); \
emith_addf_r_r(rn, rm); \ emith_addf_r_r(rn, rm); \
EMITH_JMP3_MID(DCOND_EQ); \ EMITH_JMP3_MID(DCOND_EQ); \
EMITH_HINT_COND(DCOND_CS); \
emith_subf_r_r(rn, rm); \ emith_subf_r_r(rn, rm); \
EMITH_JMP3_END(); \ EMITH_JMP3_END(); \
emith_eor_r_r(sr, FC); \ emith_eor_r_r(sr, FC); \
@ -1433,23 +1579,27 @@ static int emith_cond_check(int cond, int *r)
} while (0) } while (0)
#define emith_write_sr(sr, srcr) do { \ #define emith_write_sr(sr, srcr) do { \
emith_lsr(sr, sr, 10); \ emith_lsr(sr, sr , 10); emith_lsl(sr, sr, 10); \
emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ emith_lsl(AT, srcr, 22); emith_lsr(AT, AT, 22); \
emith_ror(sr, sr, 22); \ emith_or_r_r(sr, AT); \
} while (0) } while (0)
#define emith_carry_to_t(srr, is_sub) do { \ #define emith_carry_to_t(sr, is_sub) do { \
emith_lsr(sr, sr, 1); \ emith_and_r_imm(sr, 0xfffffffe); \
emith_adc_r_r(sr, sr); \ emith_or_r_r(sr, FC); \
} while (0)
#define emith_t_to_carry(sr, is_sub) do { \
emith_and_r_r_imm(FC, sr, 1); \
} while (0) } while (0)
#define emith_tpop_carry(sr, is_sub) do { \ #define emith_tpop_carry(sr, is_sub) do { \
emith_and_r_r_imm(FC, sr, 1); \ emith_and_r_r_imm(FC, sr, 1); \
emith_lsr(sr, sr, 1); \ emith_eor_r_r(sr, FC); \
} while (0) } while (0)
#define emith_tpush_carry(sr, is_sub) \ #define emith_tpush_carry(sr, is_sub) \
emith_adc_r_r(sr, sr) emith_or_r_r(sr, FC)
#ifdef T #ifdef T
// T bit handling // T bit handling
@ -1463,9 +1613,61 @@ static void emith_clr_t_cond(int sr)
static void emith_set_t_cond(int sr, int cond) static void emith_set_t_cond(int sr, int cond)
{ {
EMITH_SJMP_START(emith_invert_cond(cond)); int b, r;
emith_or_r_imm_c(cond, sr, T); u8 *ptr;
EMITH_SJMP_END(emith_invert_cond(cond)); u32 val = 0, inv = 0;
// try to avoid jumping around if possible
if (emith_cmp_rs >= 0) {
if (emith_cmp_rt >= 0)
b = emith_cmpr_check(emith_cmp_rs, emith_cmp_rt, cond, &r);
else
b = emith_cmpi_check(emith_cmp_rs, emith_cmp_imm, cond, &r);
// XXX this relies on the inner workings of cmp_check...
if (r == AT)
// result of slt check which returns either 0 or 1 in AT
val++, inv = (b == MIPS_BEQ);
} else {
b = emith_cond_check(cond, &r);
if (r == Z0) {
if (b == MIPS_BEQ || b == MIPS_BLE || b == MIPS_BGE)
emith_or_r_imm(sr, T);
return;
} else if (r == FC)
val++, inv = (b == MIPS_BEQ);
}
if (!val) switch (b) { // cases: b..z r, aka cmp r,Z0 or cmp r,#0
case MIPS_BEQ: EMIT(MIPS_SLTU_IMM(AT, r, 1)); r=AT; val++; break;
case MIPS_BNE: EMIT(MIPS_SLTU_REG(AT,Z0, r)); r=AT; val++; break;
case MIPS_BLT: EMIT(MIPS_SLT_REG(AT, r, Z0)); r=AT; val++; break;
case MIPS_BGE: EMIT(MIPS_SLT_REG(AT, r, Z0)); r=AT; val++; inv++; break;
case MIPS_BLE: EMIT(MIPS_SLT_REG(AT, Z0, r)); r=AT; val++; inv++; break;
case MIPS_BGT: EMIT(MIPS_SLT_REG(AT, Z0, r)); r=AT; val++; break;
default: // cases: beq/bne r,s, aka cmp r,s
if ((b>>5) == OP_BEQ) {
EMIT(MIPS_XOR_REG(AT, r, b&0x1f));
EMIT(MIPS_SLTU_IMM(AT,AT, 1)); r=AT; val++; break;
} else if ((b>>5) == OP_BNE) {
EMIT(MIPS_XOR_REG(AT, r, b&0x1f));
EMIT(MIPS_SLTU_IMM(AT,Z0,AT)); r=AT; val++; break;
}
}
if (val) {
emith_or_r_r(sr, r);
if (inv)
emith_eor_r_imm(sr, T);
return;
}
// can't obtain result directly, use presumably slower jump !cond + or sr,T
b = emith_invert_branch(b);
ptr = emith_branch(MIPS_BCONDZ(b, r, 0));
emith_or_r_imm(sr, T);
emith_flush(); // prohibit delay slot switching across jump targets
val = (u8 *)tcache_ptr - (u8 *)(ptr) - 4;
EMIT_PTR(ptr, MIPS_BCONDZ(b, r, val & 0x0003ffff));
} }
#define emith_get_t_cond() -1 #define emith_get_t_cond() -1

View file

@ -340,11 +340,29 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
rcache_free_tmp(tmp_); \ rcache_free_tmp(tmp_); \
} else emith_or_r_r_r(d, s1, s2); \ } else emith_or_r_r_r(d, s1, s2); \
} while (0) } while (0)
#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) do { \
if (lsrimm) { \
int tmp_ = rcache_get_tmp(); \
emith_lsr(tmp_, s2, lsrimm); \
emith_or_r_r_r(d, s1, tmp_); \
rcache_free_tmp(tmp_); \
} else emith_or_r_r_r(d, s1, s2); \
} while (0)
// _r_r_shift // _r_r_shift
#define emith_or_r_r_lsl(d, s, lslimm) \ #define emith_or_r_r_lsl(d, s, lslimm) \
emith_or_r_r_r_lsl(d, d, s, lslimm) emith_or_r_r_r_lsl(d, d, s, lslimm)
#define emith_or_r_r_lsr(d, s, lsrimm) \
emith_or_r_r_r_lsr(d, d, s, lsrimm)
#define emith_eor_r_r_lsl(d, s, lslimm) do { \
if (lslimm) { \
int tmp_ = rcache_get_tmp(); \
emith_lsl(tmp_, s, lslimm); \
emith_eor_r_r(d, tmp_); \
rcache_free_tmp(tmp_); \
} else emith_eor_r_r(d, s); \
} while (0)
#define emith_eor_r_r_lsr(d, s, lsrimm) do { \ #define emith_eor_r_r_lsr(d, s, lsrimm) do { \
if (lsrimm) { \ if (lsrimm) { \
int tmp_ = rcache_get_tmp(); \ int tmp_ = rcache_get_tmp(); \
@ -972,6 +990,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
#define EMITH_SJMP2_END(cond) \ #define EMITH_SJMP2_END(cond) \
EMITH_SJMP3_END() EMITH_SJMP3_END()
#define EMITH_HINT_COND(cond) /**/
#define emith_pass_arg_r(arg, reg) do { \ #define emith_pass_arg_r(arg, reg) do { \
int rd = 7; \ int rd = 7; \
host_arg2reg(rd, arg); \ host_arg2reg(rd, arg); \
@ -1255,6 +1275,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
emith_rol(sr, sr, 1); \ emith_rol(sr, sr, 1); \
} while (0) } while (0)
#define emith_t_to_carry(sr, is_sub) do { \
emith_ror(sr, sr, 1); \
emith_rol(sr, sr, 1); \
} while (0)
#define emith_tpop_carry(sr, is_sub) \ #define emith_tpop_carry(sr, is_sub) \
emith_lsr(sr, sr, 1) emith_lsr(sr, sr, 1)

View file

@ -45,6 +45,7 @@
#define REMAP_REGISTER 1 #define REMAP_REGISTER 1
#define LOOP_DETECTION 1 #define LOOP_DETECTION 1
#define LOOP_OPTIMIZER 1 #define LOOP_OPTIMIZER 1
#define T_OPTIMIZER 1
// limits (per block) // limits (per block)
#define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) #define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6)
@ -108,7 +109,7 @@ static int insns_compiled, hash_collisions, host_insn_count;
#define GET_Rn() \ #define GET_Rn() \
((op >> 8) & 0x0f) ((op >> 8) & 0x0f)
#define SHR_T SHR_SR // might make them separate someday #define SHR_T 30 // separate T for not-used detection
#define SHR_MEM 31 #define SHR_MEM 31
#define SHR_TMP -1 #define SHR_TMP -1
@ -122,6 +123,7 @@ static int insns_compiled, hash_collisions, host_insn_count;
#define I_SHIFT 4 #define I_SHIFT 4
#define Q_SHIFT 8 #define Q_SHIFT 8
#define M_SHIFT 9 #define M_SHIFT 9
#define T_SHIFT 11
static struct op_data { static struct op_data {
u8 op; u8 op;
@ -263,7 +265,6 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
return block; return block;
} }
#endif #endif
// } debug
#define TCACHE_BUFFERS 3 #define TCACHE_BUFFERS 3
@ -1527,7 +1528,7 @@ static void rcache_unmap_vreg(int x)
FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i, FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i,
if (guest_regs[i].flags & GRF_DIRTY) { if (guest_regs[i].flags & GRF_DIRTY) {
// if a dirty reg is unmapped save its value to context // if a dirty reg is unmapped save its value to context
if (~rcache_regs_discard & (1 << i)) if ((~rcache_regs_discard | rcache_regs_now) & (1 << i))
emith_ctx_write(cache_regs[x].hreg, i * 4); emith_ctx_write(cache_regs[x].hreg, i * 4);
guest_regs[i].flags &= ~GRF_DIRTY; guest_regs[i].flags &= ~GRF_DIRTY;
} }
@ -1565,7 +1566,7 @@ static void rcache_clean_vreg(int x)
if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) { if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) {
if (guest_regs[r].vreg != guest_regs[r].sreg && if (guest_regs[r].vreg != guest_regs[r].sreg &&
!cache_regs[guest_regs[r].sreg].locked && !cache_regs[guest_regs[r].sreg].locked &&
(~rcache_regs_discard & (1 << r)) && ((~rcache_regs_discard | rcache_regs_now) & (1 << r)) &&
!(rns & cache_regs[guest_regs[r].sreg].gregs)) { !(rns & cache_regs[guest_regs[r].sreg].gregs)) {
// statically mapped reg not in its sreg. move back to sreg // statically mapped reg not in its sreg. move back to sreg
rcache_evict_vreg(guest_regs[r].sreg); rcache_evict_vreg(guest_regs[r].sreg);
@ -1578,7 +1579,7 @@ static void rcache_clean_vreg(int x)
// cannot remap. keep dirty for writeback in unmap // cannot remap. keep dirty for writeback in unmap
cache_regs[x].flags |= HRF_DIRTY; cache_regs[x].flags |= HRF_DIRTY;
} else { } else {
if (~rcache_regs_discard & (1 << r)) if ((~rcache_regs_discard | rcache_regs_now) & (1 << r))
emith_ctx_write(cache_regs[x].hreg, r * 4); emith_ctx_write(cache_regs[x].hreg, r * 4);
guest_regs[r].flags &= ~GRF_DIRTY; guest_regs[r].flags &= ~GRF_DIRTY;
} }
@ -1875,9 +1876,22 @@ static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr
if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
guest_regs[r].sreg == dst && !tr->locked) { guest_regs[r].sreg == dst && !tr->locked) {
// split aliases if r is STATIC in sreg and dst isn't already locked // split aliases if r is STATIC in sreg and dst isn't already locked
int t;
FOR_ALL_BITS_SET_DO(ali, t,
if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) &&
!(ali & ~(1 << t)) &&
!cache_regs[guest_regs[t].sreg].locked &&
!(rsp_d & cache_regs[guest_regs[t].sreg].gregs)) {
// alias is a single STATIC and its sreg is available
x = guest_regs[t].sreg;
rcache_evict_vreg(x);
} else {
rcache_lock_vreg(dst); // lock to avoid evicting dst rcache_lock_vreg(dst); // lock to avoid evicting dst
x = rcache_allocate_vreg(rsp_d & ali); x = rcache_allocate_vreg(rsp_d & ali);
rcache_unlock_vreg(dst); rcache_unlock_vreg(dst);
}
break;
)
if (x >= 0) { if (x >= 0) {
src = x; src = x;
rcache_move_vreg(src, dst); rcache_move_vreg(src, dst);
@ -2855,11 +2869,11 @@ static void emit_do_static_regs(int is_write, int tmpr)
} }
#define DELAY_SAVE_T(sr) { \ #define DELAY_SAVE_T(sr) { \
int t_ = rcache_get_tmp(); \
emith_bic_r_imm(sr, T_save); \ emith_bic_r_imm(sr, T_save); \
emith_tst_r_imm(sr, T); \ emith_and_r_r_imm(t_, sr, 1); \
EMITH_SJMP_START(DCOND_EQ); \ emith_or_r_r_lsl(sr, t_, T_SHIFT); \
emith_or_r_imm_c(DCOND_NE, sr, T_save); \ rcache_free_tmp(t_); \
EMITH_SJMP_END(DCOND_EQ); \
} }
#define FLUSH_CYCLES(sr) \ #define FLUSH_CYCLES(sr) \
@ -2961,6 +2975,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, ); ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, );
if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc) if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc)
op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change
// unify T and SR since rcache doesn't know about "virtual" guest regs
if (ops[i].source & BITMASK1(SHR_T)) ops[i].source |= BITMASK1(SHR_SR);
if (ops[i].dest & BITMASK1(SHR_T)) ops[i].dest |= BITMASK1(SHR_SR);
#if LOOP_DETECTION #if LOOP_DETECTION
// loop types detected: // loop types detected:
// 1. target: ... BRA target -> idle loop // 1. target: ... BRA target -> idle loop
@ -3014,15 +3031,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
drcf.pending_branch_indirect = 1; // conditions g,h - cond.branch drcf.pending_branch_indirect = 1; // conditions g,h - cond.branch
// poll/idle loops terminate with their backwards branch to the loop start // poll/idle loops terminate with their backwards branch to the loop start
if (drcf.pending_branch_direct && !(op_flags[i+1] & OF_DELAY_OP)) { if (drcf.pending_branch_direct && !(op_flags[i+1] & OF_DELAY_OP)) {
m2 &= ~(m1 | BITMASK2(SHR_PC, SHR_SR)); // conditions d,e + g,h m2 &= ~(m1 | BITMASK3(SHR_PC, SHR_SR, SHR_T)); // conditions d,e + g,h
if (m2 || ((op == OF_IDLE_LOOP) == (drcf.pending_branch_indirect))) if (m2 || ((op == OF_IDLE_LOOP) == (drcf.pending_branch_indirect)))
op = 0; // conditions not met op = 0; // conditions not met
op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type
drcf.loop_type = 0; drcf.loop_type = 0;
#if LOOP_OPTIMIZER #if LOOP_OPTIMIZER
if (op_flags[v] & OF_BASIC_LOOP) { if (op_flags[v] & OF_BASIC_LOOP) {
m3 &= ~rcache_regs_static & ~BITMASK4(SHR_PC, SHR_PR, SHR_SR, SHR_MEM); m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM);
if (m3 && count_bits(m3) < count_bits(rcache_hregs_reg) && if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) &&
pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) { pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) {
pinned_loop_mask[pinned_loop_count] = m3; pinned_loop_mask[pinned_loop_count] = m3;
pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v;
@ -3154,48 +3171,63 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
rcache_free_tmp(tmp3); rcache_free_tmp(tmp3);
#endif #endif
// check cycles
sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
#if LOOP_OPTIMIZER #if LOOP_OPTIMIZER
if (op_flags[i] & OF_BASIC_LOOP) { if (op_flags[i] & OF_BASIC_LOOP) {
if (pinned_loop_pc[pinned_loop_count] == pc) { if (pinned_loop_pc[pinned_loop_count] == pc) {
// pin needed regs on loop entry // pin needed regs on loop entry
FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v)); FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v));
emith_flush(); emith_flush();
// store current PC as loop target
pinned_loop_ptr[pinned_loop_count] = tcache_ptr; pinned_loop_ptr[pinned_loop_count] = tcache_ptr;
} else } else
op_flags[i] &= ~OF_BASIC_LOOP; op_flags[i] &= ~OF_BASIC_LOOP;
} }
#endif
// check cycles
sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
emith_cmp_r_imm(sr, 0);
#if LOOP_OPTIMIZER
void *jp = NULL;
if (op_flags[i] & OF_BASIC_LOOP) { if (op_flags[i] & OF_BASIC_LOOP) {
// if exiting a pinned loop pinned regs must be written back to ctx // if exiting a pinned loop pinned regs must be written back to ctx
// since they are reloaded in the loop entry code // since they are reloaded in the loop entry code
jp = tcache_ptr; emith_cmp_r_imm(sr, 0);
emith_jump_cond_patchable(DCOND_GT, jp); // XXX need API for JMP_POS EMITH_JMP_START(DCOND_GT);
rcache_save_pinned(); rcache_save_pinned();
if (blx_target_count < ARRAY_SIZE(blx_target_pc)) {
// exit via stub in blx table (saves some 1-3 insns in the main flow)
blx_target_ptr[blx_target_count] = tcache_ptr;
blx_target_pc[blx_target_count] = pc|1;
blx_target_bl[blx_target_count++] = NULL;
emith_jump_patchable(tcache_ptr);
} else {
// blx table full, must inline exit code
tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, pc);
emith_jump(sh2_drc_exit);
rcache_free_tmp(tmp);
} }
EMITH_JMP_END(DCOND_GT);
} else
#endif #endif
{
if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { if (blx_target_count < ARRAY_SIZE(blx_target_pc)) {
// exit via stub in blx table (saves some 1-3 insns in the main flow) // exit via stub in blx table (saves some 1-3 insns in the main flow)
blx_target_pc[blx_target_count] = pc|1; blx_target_pc[blx_target_count] = pc|1;
blx_target_bl[blx_target_count] = NULL; blx_target_bl[blx_target_count] = NULL;
emith_cmp_r_imm(sr, 0);
blx_target_ptr[blx_target_count++] = tcache_ptr; blx_target_ptr[blx_target_count++] = tcache_ptr;
emith_jump_cond_patchable(DCOND_LE, tcache_ptr);
} else { } else {
// blx table full, must inline exit code // blx table full, must inline exit code
tmp = rcache_get_tmp_arg(0); tmp = rcache_get_tmp_arg(0);
emith_cmp_r_imm(sr, 0);
EMITH_SJMP_START(DCOND_GT);
emith_move_r_imm_c(DCOND_LE, tmp, pc); emith_move_r_imm_c(DCOND_LE, tmp, pc);
emith_jump_cond(DCOND_LE, sh2_drc_exit);
EMITH_SJMP_END(DCOND_GT);
rcache_free_tmp(tmp); rcache_free_tmp(tmp);
} }
emith_jump_cond_patchable(DCOND_LE, tcache_ptr); }
#if LOOP_OPTIMIZER
if (op_flags[i] & OF_BASIC_LOOP)
emith_jump_patch(jp, tcache_ptr, NULL);
#endif
#if (DRC_DEBUG & 32) #if (DRC_DEBUG & 32)
// block hit counter // block hit counter
@ -3328,7 +3360,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
rcache_set_usage_now(opd[0].source); // current insn rcache_set_usage_now(opd[0].source); // current insn
rcache_set_usage_soon(soon); // insns 1-4 rcache_set_usage_soon(soon); // insns 1-4
rcache_set_usage_late(late & ~soon); // insns 5-9 rcache_set_usage_late(late & ~soon); // insns 5-9
rcache_set_usage_discard(write & ~(late|soon|opd[0].source)); rcache_set_usage_discard(write & ~(late|soon));
if (v <= 9) if (v <= 9)
// upcoming rcache_flush, start writing back unused dirty stuff // upcoming rcache_flush, start writing back unused dirty stuff
rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest));
@ -3512,10 +3544,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
{ {
case 0: // CLRT 0000000000001000 case 0: // CLRT 0000000000001000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
#if T_OPTIMIZER
if (~rcache_regs_discard & BITMASK1(SHR_T))
#endif
emith_set_t(sr, 0); emith_set_t(sr, 0);
break; break;
case 1: // SETT 0000000000011000 case 1: // SETT 0000000000011000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
#if T_OPTIMIZER
if (~rcache_regs_discard & BITMASK1(SHR_T))
#endif
emith_set_t(sr, 1); emith_set_t(sr, 1);
break; break;
case 2: // CLRMAC 0000000000101000 case 2: // CLRMAC 0000000000101000
@ -3602,20 +3640,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
tmp = rcache_get_tmp();
emith_invalidate_t(); emith_invalidate_t();
emith_bic_r_imm(sr, M|Q|T); emith_bic_r_imm(sr, M|Q|T);
emith_tst_r_imm(tmp2, (1<<31)); emith_lsr(tmp, tmp2, 31); // Q = Nn
EMITH_SJMP_START(DCOND_EQ); emith_or_r_r_lsl(sr, tmp, Q_SHIFT);
emith_or_r_imm_c(DCOND_NE, sr, Q); emith_lsr(tmp, tmp3, 31); // M = Nm
EMITH_SJMP_END(DCOND_EQ); emith_or_r_r_lsl(sr, tmp, M_SHIFT);
emith_tst_r_imm(tmp3, (1<<31)); emith_eor_r_r_lsr(tmp, tmp2, 31);
EMITH_SJMP_START(DCOND_EQ); emith_or_r_r(sr, tmp); // T = Q^M
emith_or_r_imm_c(DCOND_NE, sr, M); rcache_free(tmp);
EMITH_SJMP_END(DCOND_EQ);
emith_teq_r_r(tmp2, tmp3);
EMITH_SJMP_START(DCOND_PL);
emith_or_r_imm_c(DCOND_MI, sr, T);
EMITH_SJMP_END(DCOND_PL);
goto end_op; goto end_op;
case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 case 0x08: // TST Rm,Rn 0010nnnnmmmm1000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
@ -3708,26 +3742,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
emith_clr_t_cond(sr);
emith_cmp_r_r(tmp2, tmp3);
switch (op & 0x07) switch (op & 0x07)
{ {
case 0x00: // CMP/EQ case 0x00: // CMP/EQ
emith_set_t_cond(sr, DCOND_EQ); tmp = DCOND_EQ;
break; break;
case 0x02: // CMP/HS case 0x02: // CMP/HS
emith_set_t_cond(sr, DCOND_HS); tmp = DCOND_HS;
break; break;
case 0x03: // CMP/GE case 0x03: // CMP/GE
emith_set_t_cond(sr, DCOND_GE); tmp = DCOND_GE;
break; break;
case 0x06: // CMP/HI case 0x06: // CMP/HI
emith_set_t_cond(sr, DCOND_HI); tmp = DCOND_HI;
break; break;
case 0x07: // CMP/GT case 0x07: // CMP/GT
emith_set_t_cond(sr, DCOND_GT); tmp = DCOND_GT;
break; break;
} }
emith_clr_t_cond(sr);
emith_cmp_r_r(tmp2, tmp3);
emith_set_t_cond(sr, tmp);
goto end_op; goto end_op;
case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100 case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100
// Q1 = carry(Rn = (Rn << 1) | T) // Q1 = carry(Rn = (Rn << 1) | T)
@ -3738,29 +3773,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
// Q = M ^ Q1 ^ Q2 // Q = M ^ Q1 ^ Q2
// T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2) // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2)
tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp); tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp4);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr); emith_sync_t(sr);
EMITH_HINT_COND(DCOND_CS);
emith_tpop_carry(sr, 0); emith_tpop_carry(sr, 0);
emith_adcf_r_r_r(tmp2, tmp, tmp); emith_adcf_r_r_r(tmp2, tmp4, tmp4);
emith_tpush_carry(sr, 0); // keep Q1 in T for now emith_tpush_carry(sr, 0); // keep Q1 in T for now
rcache_free(tmp); rcache_free(tmp4);
tmp4 = rcache_get_tmp(); tmp = rcache_get_tmp();
emith_and_r_r_imm(tmp4, sr, M); emith_and_r_r_imm(tmp, sr, M);
emith_eor_r_r_lsr(sr, tmp4, M_SHIFT - Q_SHIFT); // Q ^= M emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M
rcache_free_tmp(tmp4); rcache_free_tmp(tmp);
// add or sub, invert T if carry to get Q1 ^ Q2 // add or sub, invert T if carry to get Q1 ^ Q2
// in: (Q ^ M) passed in Q, Q1 in T // in: (Q ^ M) passed in Q, Q1 in T
emith_sh2_div1_step(tmp2, tmp3, sr); emith_sh2_div1_step(tmp2, tmp3, sr);
emith_bic_r_imm(sr, Q); tmp = rcache_get_tmp();
emith_tst_r_imm(sr, M); emith_bic_r_imm(sr, Q); // Q = M
EMITH_SJMP_START(DCOND_EQ); emith_and_r_r_imm(tmp, sr, M);
emith_or_r_imm_c(DCOND_NE, sr, Q); // Q = M emith_or_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT);
EMITH_SJMP_END(DCOND_EQ); emith_and_r_r_imm(tmp, sr, T); // Q = M ^ Q1 ^ Q2
emith_tst_r_imm(sr, T); emith_eor_r_r_lsl(sr, tmp, Q_SHIFT);
EMITH_SJMP_START(DCOND_EQ);
emith_eor_r_imm_c(DCOND_NE, sr, Q); // Q = M ^ Q1 ^ Q2
EMITH_SJMP_END(DCOND_EQ);
emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2) emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2)
goto end_op; goto end_op;
case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101
@ -3791,6 +3824,19 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr); emith_sync_t(sr);
#if T_OPTIMIZER
if (rcache_regs_discard & BITMASK1(SHR_T)) {
if (op & 4) {
emith_t_to_carry(sr, 0);
emith_adc_r_r_r(tmp, tmp3, tmp2);
} else {
emith_t_to_carry(sr, 1);
emith_sbc_r_r_r(tmp, tmp3, tmp2);
}
} else
#endif
{
EMITH_HINT_COND(DCOND_CS);
if (op & 4) { // adc if (op & 4) { // adc
emith_tpop_carry(sr, 0); emith_tpop_carry(sr, 0);
emith_adcf_r_r_r(tmp, tmp3, tmp2); emith_adcf_r_r_r(tmp, tmp3, tmp2);
@ -3800,18 +3846,30 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
emith_sbcf_r_r_r(tmp, tmp3, tmp2); emith_sbcf_r_r_r(tmp, tmp3, tmp2);
emith_tpush_carry(sr, 1); emith_tpush_carry(sr, 1);
} }
}
goto end_op; goto end_op;
case 0x0b: // SUBV Rm,Rn 0011nnnnmmmm1011 case 0x0b: // SUBV Rm,Rn 0011nnnnmmmm1011
case 0x0f: // ADDV Rm,Rn 0011nnnnmmmm1111 case 0x0f: // ADDV Rm,Rn 0011nnnnmmmm1111
tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_clr_t_cond(sr); #if T_OPTIMIZER
if (op & 4) { if (rcache_regs_discard & BITMASK1(SHR_T)) {
emith_addf_r_r_r(tmp, tmp3, tmp2); if (op & 4)
emith_add_r_r_r(tmp,tmp3,tmp2);
else
emith_sub_r_r_r(tmp,tmp3,tmp2);
} else } else
#endif
{
emith_clr_t_cond(sr);
EMITH_HINT_COND(DCOND_VS);
if (op & 4)
emith_addf_r_r_r(tmp, tmp3, tmp2);
else
emith_subf_r_r_r(tmp, tmp3, tmp2); emith_subf_r_r_r(tmp, tmp3, tmp2);
emith_set_t_cond(sr, DCOND_VS); emith_set_t_cond(sr, DCOND_VS);
}
goto end_op; goto end_op;
case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101 case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101
tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
@ -3834,9 +3892,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
case 2: // SHAL Rn 0100nnnn00100000 case 2: // SHAL Rn 0100nnnn00100000
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
#if T_OPTIMIZER
if (rcache_regs_discard & BITMASK1(SHR_T))
emith_lsl(tmp, tmp2, 1);
else
#endif
{
emith_invalidate_t(); emith_invalidate_t();
emith_lslf(tmp, tmp2, 1); emith_lslf(tmp, tmp2, 1);
emith_carry_to_t(sr, 0); emith_carry_to_t(sr, 0);
}
goto end_op; goto end_op;
case 1: // DT Rn 0100nnnn00010000 case 1: // DT Rn 0100nnnn00010000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
@ -3850,6 +3915,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
#endif #endif
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
emith_clr_t_cond(sr); emith_clr_t_cond(sr);
EMITH_HINT_COND(DCOND_EQ);
emith_subf_r_r_imm(tmp, tmp2, 1); emith_subf_r_r_imm(tmp, tmp2, 1);
emith_set_t_cond(sr, DCOND_EQ); emith_set_t_cond(sr, DCOND_EQ);
goto end_op; goto end_op;
@ -3862,12 +3928,22 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
case 2: // SHAR Rn 0100nnnn00100001 case 2: // SHAR Rn 0100nnnn00100001
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
#if T_OPTIMIZER
if (rcache_regs_discard & BITMASK1(SHR_T)) {
if (op & 0x20)
emith_asr(tmp,tmp2,1);
else
emith_lsr(tmp,tmp2,1);
} else
#endif
{
emith_invalidate_t(); emith_invalidate_t();
if (op & 0x20) { if (op & 0x20) {
emith_asrf(tmp, tmp2, 1); emith_asrf(tmp, tmp2, 1);
} else } else
emith_lsrf(tmp, tmp2, 1); emith_lsrf(tmp, tmp2, 1);
emith_carry_to_t(sr, 0); emith_carry_to_t(sr, 0);
}
goto end_op; goto end_op;
case 1: // CMP/PZ Rn 0100nnnn00010001 case 1: // CMP/PZ Rn 0100nnnn00010001
tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
@ -3919,24 +3995,45 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
case 0x05: // ROTR Rn 0100nnnn00000101 case 0x05: // ROTR Rn 0100nnnn00000101
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_invalidate_t(); #if T_OPTIMIZER
if (op & 1) { if (rcache_regs_discard & BITMASK1(SHR_T)) {
emith_rorf(tmp, tmp2, 1); if (op & 1)
emith_ror(tmp, tmp2, 1);
else
emith_rol(tmp, tmp2, 1);
} else } else
#endif
{
emith_invalidate_t();
if (op & 1)
emith_rorf(tmp, tmp2, 1);
else
emith_rolf(tmp, tmp2, 1); emith_rolf(tmp, tmp2, 1);
emith_carry_to_t(sr, 0); emith_carry_to_t(sr, 0);
}
goto end_op; goto end_op;
case 0x24: // ROTCL Rn 0100nnnn00100100 case 0x24: // ROTCL Rn 0100nnnn00100100
case 0x25: // ROTCR Rn 0100nnnn00100101 case 0x25: // ROTCR Rn 0100nnnn00100101
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr); emith_sync_t(sr);
emith_tpop_carry(sr, 0); #if T_OPTIMIZER
if (op & 1) { if (rcache_regs_discard & BITMASK1(SHR_T)) {
emith_rorcf(tmp); emith_t_to_carry(sr, 0);
if (op & 1)
emith_rorc(tmp);
else
emith_rolc(tmp);
} else } else
#endif
{
emith_tpop_carry(sr, 0);
if (op & 1)
emith_rorcf(tmp);
else
emith_rolcf(tmp); emith_rolcf(tmp);
emith_tpush_carry(sr, 0); emith_tpush_carry(sr, 0);
}
goto end_op; goto end_op;
case 0x15: // CMP/PL Rn 0100nnnn00010101 case 0x15: // CMP/PL Rn 0100nnnn00010101
tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
@ -4131,9 +4228,18 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr); emith_sync_t(sr);
#if T_OPTIMIZER
if (rcache_regs_discard & BITMASK1(SHR_T)) {
emith_t_to_carry(sr, 1);
emith_negc_r_r(tmp2, tmp);
} else
#endif
{
EMITH_HINT_COND(DCOND_CS);
emith_tpop_carry(sr, 1); emith_tpop_carry(sr, 1);
emith_negcf_r_r(tmp2, tmp); emith_negcf_r_r(tmp2, tmp);
emith_tpush_carry(sr, 1); emith_tpush_carry(sr, 1);
}
break; break;
case 0x0b: // NEG Rm,Rn 0110nnnnmmmm1011 case 0x0b: // NEG Rm,Rn 0110nnnnmmmm1011
emith_neg_r_r(tmp2, tmp); emith_neg_r_r(tmp2, tmp);
@ -4639,9 +4745,6 @@ static void sh2_generate_utils(void)
host_arg2reg(arg2, 2); host_arg2reg(arg2, 2);
host_arg2reg(arg3, 3); host_arg2reg(arg3, 3);
emith_move_r_r(arg0, arg0); // nop emith_move_r_r(arg0, arg0); // nop
emith_move_r_r(arg1, arg1); // nop
emith_move_r_r(arg2, arg2); // nop
emith_move_r_r(arg3, arg3); // nop
emith_flush(); emith_flush();
// sh2_drc_write8(u32 a, u32 d) // sh2_drc_write8(u32 a, u32 d)
@ -4665,6 +4768,7 @@ static void sh2_generate_utils(void)
// d = sh2_drc_read8(u32 a) // d = sh2_drc_read8(u32 a)
sh2_drc_read8 = (void *)tcache_ptr; sh2_drc_read8 = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map)); emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map));
EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3); emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CS); EMITH_SJMP_START(DCOND_CS);
emith_and_r_r_c(DCOND_CC, arg0, arg3); emith_and_r_r_c(DCOND_CC, arg0, arg3);
@ -4679,6 +4783,7 @@ static void sh2_generate_utils(void)
// d = sh2_drc_read16(u32 a) // d = sh2_drc_read16(u32 a)
sh2_drc_read16 = (void *)tcache_ptr; sh2_drc_read16 = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map)); emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map));
EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3); emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CS); EMITH_SJMP_START(DCOND_CS);
emith_and_r_r_c(DCOND_CC, arg0, arg3); emith_and_r_r_c(DCOND_CC, arg0, arg3);
@ -4692,6 +4797,7 @@ static void sh2_generate_utils(void)
// d = sh2_drc_read32(u32 a) // d = sh2_drc_read32(u32 a)
sh2_drc_read32 = (void *)tcache_ptr; sh2_drc_read32 = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map)); emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map));
EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3); emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CS); EMITH_SJMP_START(DCOND_CS);
emith_and_r_r_c(DCOND_CC, arg0, arg3); emith_and_r_r_c(DCOND_CC, arg0, arg3);
@ -4706,6 +4812,7 @@ static void sh2_generate_utils(void)
// d = sh2_drc_read8_poll(u32 a) // d = sh2_drc_read8_poll(u32 a)
sh2_drc_read8_poll = (void *)tcache_ptr; sh2_drc_read8_poll = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map)); emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map));
EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3); emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CC); EMITH_SJMP_START(DCOND_CC);
emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
@ -4723,6 +4830,7 @@ static void sh2_generate_utils(void)
// d = sh2_drc_read16_poll(u32 a) // d = sh2_drc_read16_poll(u32 a)
sh2_drc_read16_poll = (void *)tcache_ptr; sh2_drc_read16_poll = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map)); emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map));
EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3); emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CC); EMITH_SJMP_START(DCOND_CC);
emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
@ -4739,6 +4847,7 @@ static void sh2_generate_utils(void)
// d = sh2_drc_read32_poll(u32 a) // d = sh2_drc_read32_poll(u32 a)
sh2_drc_read32_poll = (void *)tcache_ptr; sh2_drc_read32_poll = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map)); emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map));
EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3); emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CC); EMITH_SJMP_START(DCOND_CC);
emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
@ -4834,16 +4943,19 @@ static void sh2_generate_utils(void)
emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0); emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0);
emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache)); emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache));
emith_cmp_r_r(arg0, arg3);
#if (DRC_DEBUG & 128) #if (DRC_DEBUG & 128)
emith_cmp_r_r(arg0, arg3);
EMITH_SJMP_START(DCOND_EQ); EMITH_SJMP_START(DCOND_EQ);
emith_move_r_ptr_imm(arg3, (uptr)&rcmiss); emith_move_r_ptr_imm(arg3, (uptr)&rcmiss);
emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0); emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
emith_add_r_imm_c(DCOND_NE, arg1, 1); emith_add_r_imm_c(DCOND_NE, arg1, 1);
emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0); emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
EMITH_SJMP_END(DCOND_EQ);
#endif
emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
EMITH_SJMP_END(DCOND_EQ);
#else
emith_cmp_r_r(arg0, arg3);
emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
#endif
emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *)); emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *));
emith_sub_r_imm(arg2, 2*sizeof(void *)); emith_sub_r_imm(arg2, 2*sizeof(void *));
emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
@ -4874,7 +4986,7 @@ static void sh2_generate_utils(void)
emith_sub_r_imm(tmp, 4*2); emith_sub_r_imm(tmp, 4*2);
rcache_clean(); rcache_clean();
// push SR // push SR
tmp = rcache_get_reg_arg(0, SHR_SP,&tmp2); tmp = rcache_get_reg_arg(0, SHR_SP, &tmp2);
emith_add_r_r_imm(tmp, tmp2, 4); emith_add_r_r_imm(tmp, tmp2, 4);
tmp = rcache_get_reg_arg(1, SHR_SR, NULL); tmp = rcache_get_reg_arg(1, SHR_SR, NULL);
emith_clear_msb(tmp, tmp, 22); emith_clear_msb(tmp, tmp, 22);
@ -5478,6 +5590,8 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
else if ((lowest_mova && lowest_mova <= pc) || else if ((lowest_mova && lowest_mova <= pc) ||
(lowest_literal && lowest_literal <= pc)) (lowest_literal && lowest_literal <= pc))
break; // text area collides with data area break; // text area collides with data area
else if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &i_end))
break; // branch target already compiled
op = FETCH_OP(pc); op = FETCH_OP(pc);
switch ((op & 0xf000) >> 12) switch ((op & 0xf000) >> 12)
@ -5490,19 +5604,19 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
switch (GET_Fx()) switch (GET_Fx())
{ {
case 0: // STC SR,Rn 0000nnnn00000010 case 0: // STC SR,Rn 0000nnnn00000010
tmp = SHR_SR; tmp = BITMASK2(SHR_SR, SHR_T);
break; break;
case 1: // STC GBR,Rn 0000nnnn00010010 case 1: // STC GBR,Rn 0000nnnn00010010
tmp = SHR_GBR; tmp = BITMASK1(SHR_GBR);
break; break;
case 2: // STC VBR,Rn 0000nnnn00100010 case 2: // STC VBR,Rn 0000nnnn00100010
tmp = SHR_VBR; tmp = BITMASK1(SHR_VBR);
break; break;
default: default:
goto undefined; goto undefined;
} }
opd->op = OP_MOVE; opd->op = OP_MOVE;
opd->source = BITMASK1(tmp); opd->source = tmp;
opd->dest = BITMASK1(GET_Rn()); opd->dest = BITMASK1(GET_Rn());
break; break;
case 0x03: case 0x03:
@ -5549,7 +5663,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
opd->imm = 1; opd->imm = 1;
break; break;
case 2: // CLRMAC 0000000000101000 case 2: // CLRMAC 0000000000101000
opd->dest = BITMASK3(SHR_T, SHR_MACL, SHR_MACH); opd->dest = BITMASK2(SHR_MACL, SHR_MACH);
break; break;
default: default:
goto undefined; goto undefined;
@ -5612,7 +5726,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
case 2: // RTE 0000000000101011 case 2: // RTE 0000000000101011
opd->op = OP_RTE; opd->op = OP_RTE;
opd->source = BITMASK1(SHR_SP); opd->source = BITMASK1(SHR_SP);
opd->dest = BITMASK3(SHR_SP, SHR_SR, SHR_PC); opd->dest = BITMASK4(SHR_SP, SHR_SR, SHR_T, SHR_PC);
opd->cycles = 4; opd->cycles = 4;
next_is_delay = 1; next_is_delay = 1;
end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
@ -5664,7 +5778,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
break; break;
case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111
opd->source = BITMASK2(GET_Rm(), GET_Rn()); opd->source = BITMASK2(GET_Rm(), GET_Rn());
opd->dest = BITMASK1(SHR_SR); opd->dest = BITMASK2(SHR_SR, SHR_T);
break; break;
case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 case 0x08: // TST Rm,Rn 0010nnnnmmmm1000
opd->source = BITMASK2(GET_Rm(), GET_Rn()); opd->source = BITMASK2(GET_Rm(), GET_Rn());
@ -5707,8 +5821,8 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
opd->dest = BITMASK1(SHR_T); opd->dest = BITMASK1(SHR_T);
break; break;
case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100 case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100
opd->source = BITMASK3(GET_Rm(), GET_Rn(), SHR_SR); opd->source = BITMASK4(GET_Rm(), GET_Rn(), SHR_SR, SHR_T);
opd->dest = BITMASK2(GET_Rn(), SHR_SR); opd->dest = BITMASK3(GET_Rn(), SHR_SR, SHR_T);
break; break;
case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101
case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101 case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101
@ -5778,30 +5892,30 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
switch (op & 0x3f) switch (op & 0x3f)
{ {
case 0x02: // STS.L MACH,@-Rn 0100nnnn00000010 case 0x02: // STS.L MACH,@-Rn 0100nnnn00000010
tmp = SHR_MACH; tmp = BITMASK1(SHR_MACH);
break; break;
case 0x12: // STS.L MACL,@-Rn 0100nnnn00010010 case 0x12: // STS.L MACL,@-Rn 0100nnnn00010010
tmp = SHR_MACL; tmp = BITMASK1(SHR_MACL);
break; break;
case 0x22: // STS.L PR,@-Rn 0100nnnn00100010 case 0x22: // STS.L PR,@-Rn 0100nnnn00100010
tmp = SHR_PR; tmp = BITMASK1(SHR_PR);
break; break;
case 0x03: // STC.L SR,@-Rn 0100nnnn00000011 case 0x03: // STC.L SR,@-Rn 0100nnnn00000011
tmp = SHR_SR; tmp = BITMASK2(SHR_SR, SHR_T);
opd->cycles = 2; opd->cycles = 2;
break; break;
case 0x13: // STC.L GBR,@-Rn 0100nnnn00010011 case 0x13: // STC.L GBR,@-Rn 0100nnnn00010011
tmp = SHR_GBR; tmp = BITMASK1(SHR_GBR);
opd->cycles = 2; opd->cycles = 2;
break; break;
case 0x23: // STC.L VBR,@-Rn 0100nnnn00100011 case 0x23: // STC.L VBR,@-Rn 0100nnnn00100011
tmp = SHR_VBR; tmp = BITMASK1(SHR_VBR);
opd->cycles = 2; opd->cycles = 2;
break; break;
default: default:
goto undefined; goto undefined;
} }
opd->source = BITMASK2(GET_Rn(), tmp); opd->source = BITMASK1(GET_Rn()) | tmp;
opd->dest = BITMASK2(GET_Rn(), SHR_MEM); opd->dest = BITMASK2(GET_Rn(), SHR_MEM);
break; break;
case 0x04: case 0x04:
@ -5831,26 +5945,26 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
switch (op & 0x3f) switch (op & 0x3f)
{ {
case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110 case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110
tmp = SHR_MACH; tmp = BITMASK1(SHR_MACH);
break; break;
case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110 case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110
tmp = SHR_MACL; tmp = BITMASK1(SHR_MACL);
break; break;
case 0x26: // LDS.L @Rm+,PR 0100mmmm00100110 case 0x26: // LDS.L @Rm+,PR 0100mmmm00100110
tmp = SHR_PR; tmp = BITMASK1(SHR_PR);
break; break;
case 0x07: // LDC.L @Rm+,SR 0100mmmm00000111 case 0x07: // LDC.L @Rm+,SR 0100mmmm00000111
tmp = SHR_SR; tmp = BITMASK2(SHR_SR, SHR_T);
opd->op = OP_LDC; opd->op = OP_LDC;
opd->cycles = 3; opd->cycles = 3;
break; break;
case 0x17: // LDC.L @Rm+,GBR 0100mmmm00010111 case 0x17: // LDC.L @Rm+,GBR 0100mmmm00010111
tmp = SHR_GBR; tmp = BITMASK1(SHR_GBR);
opd->op = OP_LDC; opd->op = OP_LDC;
opd->cycles = 3; opd->cycles = 3;
break; break;
case 0x27: // LDC.L @Rm+,VBR 0100mmmm00100111 case 0x27: // LDC.L @Rm+,VBR 0100mmmm00100111
tmp = SHR_VBR; tmp = BITMASK1(SHR_VBR);
opd->op = OP_LDC; opd->op = OP_LDC;
opd->cycles = 3; opd->cycles = 3;
break; break;
@ -5858,7 +5972,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
goto undefined; goto undefined;
} }
opd->source = BITMASK2(GET_Rn(), SHR_MEM); opd->source = BITMASK2(GET_Rn(), SHR_MEM);
opd->dest = BITMASK2(GET_Rn(), tmp); opd->dest = BITMASK1(GET_Rn()) | tmp;
break; break;
case 0x08: case 0x08:
case 0x09: case 0x09:
@ -5931,20 +6045,20 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
switch (GET_Fx()) switch (GET_Fx())
{ {
case 0: // LDC Rm,SR 0100mmmm00001110 case 0: // LDC Rm,SR 0100mmmm00001110
tmp = SHR_SR; tmp = BITMASK2(SHR_SR, SHR_T);
break; break;
case 1: // LDC Rm,GBR 0100mmmm00011110 case 1: // LDC Rm,GBR 0100mmmm00011110
tmp = SHR_GBR; tmp = BITMASK1(SHR_GBR);
break; break;
case 2: // LDC Rm,VBR 0100mmmm00101110 case 2: // LDC Rm,VBR 0100mmmm00101110
tmp = SHR_VBR; tmp = BITMASK1(SHR_VBR);
break; break;
default: default:
goto undefined; goto undefined;
} }
opd->op = OP_LDC; opd->op = OP_LDC;
opd->source = BITMASK1(GET_Rn()); opd->source = BITMASK1(GET_Rn());
opd->dest = BITMASK1(tmp); opd->dest = tmp;
break; break;
case 0x0f: case 0x0f:
// MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111
@ -6130,7 +6244,7 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out,
break; break;
case 0x0300: // TRAPA #imm 11000011iiiiiiii case 0x0300: // TRAPA #imm 11000011iiiiiiii
opd->op = OP_TRAPA; opd->op = OP_TRAPA;
opd->source = BITMASK3(SHR_SP, SHR_PC, SHR_SR); opd->source = BITMASK4(SHR_SP, SHR_PC, SHR_SR, SHR_T);
opd->dest = BITMASK2(SHR_SP, SHR_PC); opd->dest = BITMASK2(SHR_SP, SHR_PC);
opd->imm = (op & 0xff); opd->imm = (op & 0xff);
opd->cycles = 8; opd->cycles = 8;
@ -6256,9 +6370,6 @@ end:
last_btarget = 0; last_btarget = 0;
op = 0; // delay/poll insns counter op = 0; // delay/poll insns counter
for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) {
int null;
if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &null))
break; // branch target already compiled
opd = &ops[i]; opd = &ops[i];
crc += FETCH_OP(pc); crc += FETCH_OP(pc);

View file

@ -38,17 +38,19 @@ void p32x_update_irls(SH2 *active_sh2, unsigned int m68k_cycles)
if (active_sh2 != NULL) if (active_sh2 != NULL)
m68k_cycles = sh2_cycles_done_m68k(active_sh2); m68k_cycles = sh2_cycles_done_m68k(active_sh2);
// find top bit = highest irq number (0 <= irl <= 14/2) by binary search
// msh2 // msh2
irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[0]; irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[0];
while ((irqs >>= 1)) if (irqs >= 0x10) mlvl += 8, irqs >>= 4;
mlvl++; if (irqs >= 0x04) mlvl += 4, irqs >>= 2;
mlvl *= 2; if (irqs >= 0x02) mlvl += 2, irqs >>= 1;
// ssh2 // ssh2
irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[1]; irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[1];
while ((irqs >>= 1)) if (irqs >= 0x10) slvl += 8, irqs >>= 4;
slvl++; if (irqs >= 0x04) slvl += 4, irqs >>= 2;
slvl *= 2; if (irqs >= 0x02) slvl += 2, irqs >>= 1;
mrun = sh2_irl_irq(&msh2, mlvl, msh2.state & SH2_STATE_RUN); mrun = sh2_irl_irq(&msh2, mlvl, msh2.state & SH2_STATE_RUN);
if (mrun) { if (mrun) {