SH2 drc: bug fixing and small speed improvements

This commit is contained in:
kub 2019-06-25 20:23:45 +02:00
parent 1891e649e5
commit 748b8187db
8 changed files with 254 additions and 203 deletions

View file

@ -4,9 +4,10 @@ CC = arm-linux-gnueabi-gcc
CXX = arm-linux-gnueabi-g++ CXX = arm-linux-gnueabi-g++
AS = arm-linux-gnueabi-as AS = arm-linux-gnueabi-as
STRIP = arm-linux-gnueabi-strip STRIP = arm-linux-gnueabi-strip
CFLAGS += -mabi=apcs-gnu -mno-thumb-interwork -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__ CFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t
CFLAGS += -Wno-unused-result -D__GP2X__ -mno-thumb-interwork -fno-stack-protector -fno-common
CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include
CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const
ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t
LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static
LDLIBS += -lpng -lm -ldl LDLIBS += -lpng -lm -ldl

View file

@ -6,3 +6,39 @@ extern u8 *tcache;
void drc_cmn_init(void); void drc_cmn_init(void);
void drc_cmn_cleanup(void); void drc_cmn_cleanup(void);
#define BITMASK1(v0) (1 << (v0))
#define BITMASK2(v0,v1) ((1 << (v0)) | (1 << (v1)))
#define BITMASK3(v0,v1,v2) (BITMASK2(v0,v1) | (1 << (v2)))
#define BITMASK4(v0,v1,v2,v3) (BITMASK3(v0,v1,v2) | (1 << (v3)))
#define BITMASK5(v0,v1,v2,v3,v4) (BITMASK4(v0,v1,v2,v3) | (1 << (v4)))
#define BITMASK6(v0,v1,v2,v3,v4,v5) (BITMASK5(v0,v1,v2,v3,v4) | (1 << (v5)))
#define BITRANGE(v0,v1) (BITMASK1(v1+1)-BITMASK1(v0)) // set with v0..v1
// binary search approach, since we don't have CLZ on ARM920T
#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \
u32 __mask = mask; \
for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \
if (!(__mask & (0xffff << 16))) \
bit -= 16, __mask <<= 16; \
if (!(__mask & (0xff << 24))) \
bit -= 8, __mask <<= 8; \
if (!(__mask & (0xf << 28))) \
bit -= 4, __mask <<= 4; \
if (!(__mask & (0x3 << 30))) \
bit -= 2, __mask <<= 2; \
if (!(__mask & (0x1 << 31))) \
bit -= 1, __mask <<= 1; \
if (__mask & (0x1 << 31)) { \
code; \
} \
} \
}
// inspired by https://graphics.stanford.edu/~seander/bithacks.html
static inline int count_bits(unsigned val)
{
val = val - ((val >> 1) & 0x55555555);
val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
}

View file

@ -382,13 +382,6 @@ static void emith_flush(void)
#define EOP_MOVT(rd,imm) \ #define EOP_MOVT(rd,imm) \
EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000), M1(rd), NO) EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000), M1(rd), NO)
static inline int count_bits(unsigned val)
{
val = val - ((val >> 1) & 0x55555555);
val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
}
// host literal pool; must be significantly smaller than 1024 (max LDR offset = 4096) // host literal pool; must be significantly smaller than 1024 (max LDR offset = 4096)
#define MAX_HOST_LITERALS 128 #define MAX_HOST_LITERALS 128
static u32 literal_pool[MAX_HOST_LITERALS]; static u32 literal_pool[MAX_HOST_LITERALS];
@ -429,18 +422,26 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int
// count insns needed for mov/orr #imm // count insns needed for mov/orr #imm
for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++)
v = (v << 2) | (v >> 30); v = (v << 2) | (v >> 30);
#ifdef HAVE_ARMV7
for (i = 2; i > 0; i--, v >>= 8) for (i = 2; i > 0; i--, v >>= 8)
while (v > 0xff && !(v & 3)) while (v > 0xff && !(v & 3))
v >>= 2; v >>= 2;
if (v) { // 3+ insns needed... if (v) { // 3+ insns needed...
if (op == A_OP_MVN) if (op == A_OP_MVN)
imm = ~imm; imm = ~imm;
#ifdef HAVE_ARMV7
// ...prefer movw/movt // ...prefer movw/movt
EOP_MOVW(rd, imm); EOP_MOVW(rd, imm);
if (imm & 0xffff0000) if (imm & 0xffff0000)
EOP_MOVT(rd, imm); EOP_MOVT(rd, imm);
return;
}
#else #else
for (i = 3; i > 0; i--, v >>= 8)
while (v > 0xff && !(v & 3))
v >>= 2;
if (v) { // 4 insns needed...
if (op == A_OP_MVN)
imm = ~imm;
// ...emit literal load // ...emit literal load
int idx, o; int idx, o;
if (literal_iindex >= MAX_HOST_LITERALS) { if (literal_iindex >= MAX_HOST_LITERALS) {
@ -455,9 +456,9 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int
EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o); EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o);
else if (o < 0) else if (o < 0)
EOP_C_DOP_IMM(cond, A_OP_SUB, 0, rd, rd, 0, -o); EOP_C_DOP_IMM(cond, A_OP_SUB, 0, rd, rd, 0, -o);
#endif
return; return;
} }
#endif
break; break;
case A_OP_AND: case A_OP_AND:
@ -544,7 +545,7 @@ static int emith_xbranch(int cond, void *target, int is_call)
EMIT((u32)target,M1(PC),0); EMIT((u32)target,M1(PC),0);
#else #else
// should never happen // should never happen
elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %08x->%08x", target, tcache_ptr); elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %8p->%8p", target, tcache_ptr);
exit(1); exit(1);
#endif #endif
} }
@ -633,9 +634,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define EMITH_NOTHING1(cond) \ #define EMITH_NOTHING1(cond) \
(void)(cond) (void)(cond)
#define EMITH_SJMP_DECL_()
#define EMITH_SJMP_START_(cond) EMITH_NOTHING1(cond)
#define EMITH_SJMP_END_(cond) EMITH_NOTHING1(cond)
#define EMITH_SJMP_START(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP_START(cond) EMITH_NOTHING1(cond)
#define EMITH_SJMP_END(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP_END(cond) EMITH_NOTHING1(cond)
#define EMITH_SJMP2_START(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP2_START(cond) EMITH_NOTHING1(cond)
@ -806,6 +804,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_eor_r_imm(r, imm) \ #define emith_eor_r_imm(r, imm) \
emith_op_imm(A_COND_AL, 0, A_OP_EOR, r, imm) emith_op_imm(A_COND_AL, 0, A_OP_EOR, r, imm)
#define emith_eor_r_imm_ptr(r, imm) \
emith_eor_r_imm(r, imm)
// note: only use 8bit imm for these // note: only use 8bit imm for these
#define emith_tst_r_imm(r, imm) \ #define emith_tst_r_imm(r, imm) \
emith_top_imm(A_COND_AL, A_OP_TST, r, imm) emith_top_imm(A_COND_AL, A_OP_TST, r, imm)
@ -837,6 +838,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
#define emith_eor_r_imm_c(cond, r, imm) \ #define emith_eor_r_imm_c(cond, r, imm) \
emith_op_imm(cond, 0, A_OP_EOR, r, imm) emith_op_imm(cond, 0, A_OP_EOR, r, imm)
#define emith_eor_r_imm_ptr_c(cond, r, imm) \
emith_eor_r_imm_c(cond, r, imm)
#define emith_bic_r_imm_c(cond, r, imm) \ #define emith_bic_r_imm_c(cond, r, imm) \
emith_op_imm(cond, 0, A_OP_BIC, r, imm) emith_op_imm(cond, 0, A_OP_BIC, r, imm)
@ -1139,6 +1143,8 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
emith_jump(target); \ emith_jump(target); \
} while (0) } while (0)
#define emith_call_cleanup() /**/
#define emith_ret_c(cond) \ #define emith_ret_c(cond) \
emith_jump_reg_c(cond, LR) emith_jump_reg_c(cond, LR)
@ -1228,10 +1234,10 @@ static inline void emith_pool_adjust(int pool_index, int move_offs)
/* if (reg <= turns) turns = reg-1 */ \ /* if (reg <= turns) turns = reg-1 */ \
t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \
emith_cmp_r_r(t3, t2); \ emith_cmp_r_r(t3, t2); \
emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1); \ emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \
/* if (reg <= 1) turns = 0 */ \ /* if (reg <= 1) turns = 0 */ \
emith_cmp_r_imm(t3, 1); \ emith_cmp_r_imm(t3, 1); \
emith_move_r_imm_c(DCOND_LE, t2, 0); \ emith_move_r_imm_c(DCOND_LS, t2, 0); \
/* reg -= turns */ \ /* reg -= turns */ \
emith_sub_r_r(t3, t2); \ emith_sub_r_r(t3, t2); \
} \ } \
@ -1361,7 +1367,7 @@ static int tcond = -1;
#define emith_set_t(sr, val) \ #define emith_set_t(sr, val) \
tcond = ((val) ? A_COND_AL: A_COND_NV) tcond = ((val) ? A_COND_AL: A_COND_NV)
static void emith_sync_t(sr) static void emith_sync_t(int sr)
{ {
if (tcond == A_COND_AL) if (tcond == A_COND_AL)
emith_or_r_imm(sr, T); emith_or_r_imm(sr, T);

View file

@ -396,6 +396,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
#define emith_cmp_r_imm(r, imm) \ #define emith_cmp_r_imm(r, imm) \
emith_arith_r_imm(7, r, imm) emith_arith_r_imm(7, r, imm)
#define emith_eor_r_imm_ptr(r, imm) do { \
EMIT_REX_IF(1, 0, r); \
EMIT_OP_MODRM64(0x81, 3, 6, r); \
EMIT(imm, u32); \
} while (0)
#define emith_tst_r_imm(r, imm) do { \ #define emith_tst_r_imm(r, imm) do { \
EMIT_REX_IF(0, 0, r); \ EMIT_REX_IF(0, 0, r); \
EMIT_OP_MODRM64(0xf7, 3, 0, r); \ EMIT_OP_MODRM64(0xf7, 3, 0, r); \
@ -417,6 +423,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
emith_or_r_imm(r, imm) emith_or_r_imm(r, imm)
#define emith_eor_r_imm_c(cond, r, imm) \ #define emith_eor_r_imm_c(cond, r, imm) \
emith_eor_r_imm(r, imm) emith_eor_r_imm(r, imm)
#define emith_eor_r_imm_ptr_c(cond, r, imm) \
emith_eor_r_imm_ptr(r, imm)
#define emith_bic_r_imm_c(cond, r, imm) \ #define emith_bic_r_imm_c(cond, r, imm) \
emith_bic_r_imm(r, imm) emith_bic_r_imm(r, imm)
#define emith_tst_r_imm_c(cond, r, imm) \ #define emith_tst_r_imm_c(cond, r, imm) \
@ -589,9 +597,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
// XXX: stupid mess // XXX: stupid mess
#define emith_mul_(op, dlo, dhi, s1, s2) do { \ #define emith_mul_(op, dlo, dhi, s1, s2) do { \
int rmr; \ int rmr; \
if (dlo != xAX && dhi != xAX) \ if (dlo != xAX && dhi != xAX && rcache_is_hreg_used(xAX)) \
emith_push(xAX); \ emith_push(xAX); \
if (dlo != xDX && dhi != xDX) \ if (dlo != xDX && dhi != xDX && rcache_is_hreg_used(xDX)) \
emith_push(xDX); \ emith_push(xDX); \
if ((s1) == xAX) \ if ((s1) == xAX) \
rmr = s2; \ rmr = s2; \
@ -609,9 +617,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
} \ } \
if (dhi != xDX && dhi != -1 && !(dhi == xAX && dlo == xDX)) \ if (dhi != xDX && dhi != -1 && !(dhi == xAX && dlo == xDX)) \
emith_move_r_r(dhi, (dlo == xDX ? xAX : xDX)); \ emith_move_r_r(dhi, (dlo == xDX ? xAX : xDX)); \
if (dlo != xDX && dhi != xDX) \ if (dlo != xDX && dhi != xDX && rcache_is_hreg_used(xDX)) \
emith_pop(xDX); \ emith_pop(xDX); \
if (dlo != xAX && dhi != xAX) \ if (dlo != xAX && dhi != xAX && rcache_is_hreg_used(xAX)) \
emith_pop(xAX); \ emith_pop(xAX); \
} while (0) } while (0)
@ -898,6 +906,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
emith_jump(target); \ emith_jump(target); \
} while (0) } while (0)
#define emith_call_cleanup() \
emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // remove return addr
#define emith_ret() \ #define emith_ret() \
EMIT_OP(0xc3) EMIT_OP(0xc3)
@ -912,10 +923,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
#define emith_push_ret(r) do { \ #define emith_push_ret(r) do { \
int r_ = (r >= 0 ? r : xSI); \ int r_ = (r >= 0 ? r : xSI); \
emith_push(r_); /* always push to align */ \ emith_push(r_); /* always push to align */ \
emith_add_r_r_ptr_imm(xSP, xSP, -8*4); /* args shadow space */ \
} while (0) } while (0)
#define emith_pop_and_ret(r) do { \ #define emith_pop_and_ret(r) do { \
int r_ = (r >= 0 ? r : xSI); \ int r_ = (r >= 0 ? r : xSI); \
emith_add_r_r_ptr_imm(xSP, xSP, 8*4); /* args shadow space */ \
emith_pop(r_); \ emith_pop(r_); \
emith_ret(); \ emith_ret(); \
} while (0) } while (0)
@ -942,15 +955,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
// "simple" jump (no more then a few insns) // "simple" jump (no more then a few insns)
// ARM will use conditional instructions here // ARM will use conditional instructions here
#define EMITH_SJMP_DECL_() \
u8 *cond_ptr
#define EMITH_SJMP_START_(cond) \
JMP8_POS(cond_ptr)
#define EMITH_SJMP_END_(cond) \
JMP8_EMIT(cond, cond_ptr)
#define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_START EMITH_JMP_START
#define EMITH_SJMP_END EMITH_JMP_END #define EMITH_SJMP_END EMITH_JMP_END
@ -1046,7 +1050,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
emith_push(xR15); \ emith_push(xR15); \
emith_push(xSI); \ emith_push(xSI); \
emith_push(xDI); \ emith_push(xDI); \
emith_add_r_r_ptr_imm(xSP, xSP, -8*5); /* align + ABI param area */ \ emith_add_r_r_ptr_imm(xSP, xSP, -8*5); /* align + args shadow space */ \
} while (0) } while (0)
#define emith_sh2_drc_exit() do { \ #define emith_sh2_drc_exit() do { \
@ -1106,19 +1110,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
#endif #endif
#define emith_save_caller_regs(mask) do { \ #define emith_save_caller_regs(mask) do { \
if ((mask) & (1 << xAX)) emith_push(xAX); \ int _c; u32 _m = mask & 0xfc7; /* AX, CX, DX, SI, DI, 8, 9, 10, 11 */ \
if ((mask) & (1 << xCX)) emith_push(xCX); \ if (__builtin_parity(_m) == 1) _m |= 0x8; /* BX for ABI align */ \
if ((mask) & (1 << xDX)) emith_push(xDX); \ for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \
if ((mask) & (1 << xSI)) emith_push(xSI); \ if (_m & (1 << _c)) emith_push(_c); \
if ((mask) & (1 << xDI)) emith_push(xDI); \
} while (0) } while (0)
#define emith_restore_caller_regs(mask) do { \ #define emith_restore_caller_regs(mask) do { \
if ((mask) & (1 << xDI)) emith_pop(xDI); \ int _c; u32 _m = mask & 0xfc7; \
if ((mask) & (1 << xSI)) emith_pop(xSI); \ if (__builtin_parity(_m) == 1) _m |= 0x8; /* BX for ABI align */ \
if ((mask) & (1 << xDX)) emith_pop(xDX); \ for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \
if ((mask) & (1 << xCX)) emith_pop(xCX); \ if (_m & (1 << _c)) emith_pop(_c); \
if ((mask) & (1 << xAX)) emith_pop(xAX); \
} while (0) } while (0)
#define emith_sh2_rcall(a, tab, func, mask) do { \ #define emith_sh2_rcall(a, tab, func, mask) do { \
@ -1192,14 +1194,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
/* if (reg <= turns) turns = reg-1 */ \ /* if (reg <= turns) turns = reg-1 */ \
t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \
emith_cmp_r_r(t3, t2); \ emith_cmp_r_r(t3, t2); \
EMITH_SJMP_START(DCOND_GT); \ EMITH_SJMP_START(DCOND_HI); \
emith_sub_r_r_imm_c(DCOND_LE, t2, t3, 1); \ emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \
EMITH_SJMP_END(DCOND_GT); \ EMITH_SJMP_END(DCOND_HI); \
/* if (reg <= 1) turns = 0 */ \ /* if (reg <= 1) turns = 0 */ \
emith_cmp_r_imm(t3, 1); \ emith_cmp_r_imm(t3, 1); \
EMITH_SJMP_START(DCOND_GT); \ EMITH_SJMP_START(DCOND_HI); \
emith_move_r_imm_c(DCOND_LE, t2, 0); \ emith_move_r_imm_c(DCOND_LS, t2, 0); \
EMITH_SJMP_END(DCOND_GT); \ EMITH_SJMP_END(DCOND_HI); \
/* reg -= turns */ \ /* reg -= turns */ \
emith_sub_r_r(t3, t2); \ emith_sub_r_r(t3, t2); \
} \ } \

View file

@ -106,14 +106,6 @@ static int insns_compiled, hash_collisions, host_insn_count;
#define GET_Rn() \ #define GET_Rn() \
((op >> 8) & 0x0f) ((op >> 8) & 0x0f)
#define BITMASK1(v0) (1 << (v0))
#define BITMASK2(v0,v1) ((1 << (v0)) | (1 << (v1)))
#define BITMASK3(v0,v1,v2) (BITMASK2(v0,v1) | (1 << (v2)))
#define BITMASK4(v0,v1,v2,v3) (BITMASK3(v0,v1,v2) | (1 << (v3)))
#define BITMASK5(v0,v1,v2,v3,v4) (BITMASK4(v0,v1,v2,v3) | (1 << (v4)))
#define BITMASK6(v0,v1,v2,v3,v4,v5) (BITMASK5(v0,v1,v2,v3,v4) | (1 << (v5)))
#define BITRANGE(v0,v1) (BITMASK1(v1+1)-BITMASK1(v0)) // set with v0..v1
#define SHR_T SHR_SR // might make them separate someday #define SHR_T SHR_SR // might make them separate someday
#define SHR_MEM 31 #define SHR_MEM 31
#define SHR_TMP -1 #define SHR_TMP -1
@ -174,6 +166,7 @@ enum op_types {
static u8 *tcache_dsm_ptrs[3]; static u8 *tcache_dsm_ptrs[3];
static char sh2dasm_buff[64]; static char sh2dasm_buff[64];
#define do_host_disasm(tcid) \ #define do_host_disasm(tcid) \
emith_flush(); \
host_dasm(tcache_dsm_ptrs[tcid], emith_insn_ptr() - tcache_dsm_ptrs[tcid]); \ host_dasm(tcache_dsm_ptrs[tcid], emith_insn_ptr() - tcache_dsm_ptrs[tcid]); \
tcache_dsm_ptrs[tcid] = emith_insn_ptr() tcache_dsm_ptrs[tcid] = emith_insn_ptr()
#else #else
@ -212,7 +205,6 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
static FILE *trace[2]; static FILE *trace[2];
int idx = sh2->is_slave; int idx = sh2->is_slave;
if (!trace[0]) { if (!trace[0]) {
truncate("pico.trace", 0);
trace[0] = fopen("pico.trace0", "wb"); trace[0] = fopen("pico.trace0", "wb");
trace[1] = fopen("pico.trace1", "wb"); trace[1] = fopen("pico.trace1", "wb");
} }
@ -286,7 +278,7 @@ static u8 *tcache_limit[TCACHE_BUFFERS];
// ptr for code emiters // ptr for code emiters
static u8 *tcache_ptr; static u8 *tcache_ptr;
#define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 8) #define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 6)
struct block_link { struct block_link {
u32 target_pc; u32 target_pc;
@ -330,32 +322,20 @@ struct block_desc {
struct block_entry entryp[MAX_BLOCK_ENTRIES]; struct block_entry entryp[MAX_BLOCK_ENTRIES];
}; };
static const int block_max_counts[TCACHE_BUFFERS] = { #define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 16*256)
4*1024,
256,
256,
};
static struct block_desc *block_tables[TCACHE_BUFFERS]; static struct block_desc *block_tables[TCACHE_BUFFERS];
static int block_counts[TCACHE_BUFFERS]; static int block_counts[TCACHE_BUFFERS];
static int block_limit[TCACHE_BUFFERS]; static int block_limit[TCACHE_BUFFERS];
// we have block_link_pool to avoid using mallocs // we have block_link_pool to avoid using mallocs
static const int block_link_pool_max_counts[TCACHE_BUFFERS] = { #define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 1024 : 16*1024)
16*1024,
4*256,
4*256,
};
static struct block_link *block_link_pool[TCACHE_BUFFERS]; static struct block_link *block_link_pool[TCACHE_BUFFERS];
static int block_link_pool_counts[TCACHE_BUFFERS]; static int block_link_pool_counts[TCACHE_BUFFERS];
static struct block_link **unresolved_links[TCACHE_BUFFERS]; static struct block_link **unresolved_links[TCACHE_BUFFERS];
static struct block_link *blink_free[TCACHE_BUFFERS]; static struct block_link *blink_free[TCACHE_BUFFERS];
// used for invalidation // used for invalidation
static const int ram_sizes[TCACHE_BUFFERS] = { #define RAM_SIZE(tcid) ((tcid) ? 0x1000 : 0x40000)
0x40000,
0x1000,
0x1000,
};
#define INVAL_PAGE_SIZE 0x100 #define INVAL_PAGE_SIZE 0x100
struct block_list { struct block_list {
@ -373,15 +353,11 @@ static struct block_list *inactive_blocks[TCACHE_BUFFERS];
// each array has len: sizeof(mem) / INVAL_PAGE_SIZE // each array has len: sizeof(mem) / INVAL_PAGE_SIZE
static struct block_list **inval_lookup[TCACHE_BUFFERS]; static struct block_list **inval_lookup[TCACHE_BUFFERS];
static const int hash_table_sizes[TCACHE_BUFFERS] = { #define HASH_TABLE_SIZE(tcid) ((tcid) ? 256 : 64*256)
0x4000,
0x100,
0x100,
};
static struct block_entry **hash_tables[TCACHE_BUFFERS]; static struct block_entry **hash_tables[TCACHE_BUFFERS];
#define HASH_FUNC(hash_tab, addr, mask) \ #define HASH_FUNC(hash_tab, addr, mask) \
(hash_tab)[(((addr) >> 20) ^ ((addr) >> 2)) & (mask)] (hash_tab)[((addr) >> 1) & (mask)]
#if (DRC_DEBUG & 128) #if (DRC_DEBUG & 128)
#if BRANCH_CACHE #if BRANCH_CACHE
@ -431,6 +407,10 @@ typedef struct {
} guest_reg_t; } guest_reg_t;
// possibly needed in code emitter
static int rcache_get_tmp(void);
static void rcache_free_tmp(int hr);
// Note: cache_regs[] must have at least the amount of REG and TEMP registers // Note: cache_regs[] must have at least the amount of REG and TEMP registers
// used by handlers in worst case (currently 4). // used by handlers in worst case (currently 4).
// Register assignment goes by ABI convention. Caller save registers are TEMP, // Register assignment goes by ABI convention. Caller save registers are TEMP,
@ -583,13 +563,12 @@ static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask)
// check if region is mapped memory // check if region is mapped memory
memptr = p32x_sh2_get_mem_ptr(a, mask, sh2); memptr = p32x_sh2_get_mem_ptr(a, mask, sh2);
if (memptr == NULL /*|| (a & ((1 << SH2_READ_SHIFT)-1) & ~*mask) != 0*/) if (memptr == NULL)
return poffs; return poffs;
if (memptr == sh2->p_bios) // BIOS if (memptr == sh2->p_bios) // BIOS
poffs = offsetof(SH2, p_bios); poffs = offsetof(SH2, p_bios);
else if (memptr == sh2->p_da) // data array else if (memptr == sh2->p_da) // data array
// FIXME: access sh2->data_array instead
poffs = offsetof(SH2, p_da); poffs = offsetof(SH2, p_da);
else if (memptr == sh2->p_sdram) // SDRAM else if (memptr == sh2->p_sdram) // SDRAM
poffs = offsetof(SH2, p_sdram); poffs = offsetof(SH2, p_sdram);
@ -602,16 +581,16 @@ static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask)
static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id)
{ {
struct block_entry *be; struct block_entry *be;
u32 tcid = 0, mask; u32 tcid = 0;
// data arrays have their own caches if ((pc & 0xe0000000) == 0xc0000000)
if ((pc & 0xe0000000) == 0xc0000000 || (pc & ~0xfff) == 0) tcid = 1 + is_slave; // data array
tcid = 1 + is_slave; if ((pc & ~0xfff) == 0)
tcid = 1 + is_slave; // BIOS
*tcache_id = tcid; *tcache_id = tcid;
mask = hash_table_sizes[tcid] - 1; be = HASH_FUNC(hash_tables[tcid], pc, HASH_TABLE_SIZE(tcid) - 1);
be = HASH_FUNC(hash_tables[tcid], pc, mask); if (be != NULL) // don't ask... gcc code generation hint
for (; be != NULL; be = be->next) for (; be != NULL; be = be->next)
if (be->pc == pc) if (be->pc == pc)
return be; return be;
@ -688,17 +667,17 @@ static void REGPARM(1) flush_tcache(int tcid)
int tc_used, bl_used; int tc_used, bl_used;
tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]);
bl_used = block_max_counts[tcid] - (block_limit[tcid] - block_counts[tcid]); bl_used = BLOCK_MAX_COUNT(tcid) - (block_limit[tcid] - block_counts[tcid]);
elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used,
tcache_sizes[tcid], bl_used, block_max_counts[tcid]); tcache_sizes[tcid], bl_used, BLOCK_MAX_COUNT(tcid));
#endif #endif
block_counts[tcid] = 0; block_counts[tcid] = 0;
block_limit[tcid] = block_max_counts[tcid] - 1; block_limit[tcid] = BLOCK_MAX_COUNT(tcid) - 1;
block_link_pool_counts[tcid] = 0; block_link_pool_counts[tcid] = 0;
blink_free[tcid] = NULL; blink_free[tcid] = NULL;
memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * hash_table_sizes[tcid]); memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid));
memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * hash_table_sizes[tcid]); memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid));
tcache_ptrs[tcid] = tcache_bases[tcid]; tcache_ptrs[tcid] = tcache_bases[tcid];
tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid];
if (Pico32xMem->sdram != NULL) { if (Pico32xMem->sdram != NULL) {
@ -724,14 +703,14 @@ static void REGPARM(1) flush_tcache(int tcid)
tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; tcache_dsm_ptrs[tcid] = tcache_bases[tcid];
#endif #endif
for (i = 0; i < ram_sizes[tcid] / INVAL_PAGE_SIZE; i++) for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++)
rm_block_list(&inval_lookup[tcid][i]); rm_block_list(&inval_lookup[tcid][i]);
rm_block_list(&inactive_blocks[tcid]); rm_block_list(&inactive_blocks[tcid]);
} }
static void add_to_hashlist(struct block_entry *be, int tcache_id) static void add_to_hashlist(struct block_entry *be, int tcache_id)
{ {
u32 tcmask = hash_table_sizes[tcache_id] - 1; u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask);
be->prev = NULL; be->prev = NULL;
@ -751,7 +730,7 @@ static void add_to_hashlist(struct block_entry *be, int tcache_id)
static void rm_from_hashlist(struct block_entry *be, int tcache_id) static void rm_from_hashlist(struct block_entry *be, int tcache_id)
{ {
u32 tcmask = hash_table_sizes[tcache_id] - 1; u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask);
#if DRC_DEBUG & 1 #if DRC_DEBUG & 1
@ -773,7 +752,7 @@ static void rm_from_hashlist(struct block_entry *be, int tcache_id)
static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id) static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id)
{ {
u32 tcmask = hash_table_sizes[tcache_id] - 1; u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask);
#if DRC_DEBUG & 1 #if DRC_DEBUG & 1
@ -794,7 +773,7 @@ static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id)
static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id) static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id)
{ {
u32 tcmask = hash_table_sizes[tcache_id] - 1; u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask);
#if DRC_DEBUG & 1 #if DRC_DEBUG & 1
@ -818,7 +797,7 @@ static void dr_free_oldest_block(int tcache_id)
{ {
struct block_desc *bd; struct block_desc *bd;
if (block_limit[tcache_id] >= block_max_counts[tcache_id]) { if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) {
// block desc wrap around // block desc wrap around
block_limit[tcache_id] = 0; block_limit[tcache_id] = 0;
} }
@ -833,7 +812,7 @@ static void dr_free_oldest_block(int tcache_id)
sh2_smc_rm_block_entry(bd, tcache_id, 0, 1); sh2_smc_rm_block_entry(bd, tcache_id, 0, 1);
block_limit[tcache_id]++; block_limit[tcache_id]++;
if (block_limit[tcache_id] >= block_max_counts[tcache_id]) if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id))
block_limit[tcache_id] = 0; block_limit[tcache_id] = 0;
bd = &block_tables[tcache_id][block_limit[tcache_id]]; bd = &block_tables[tcache_id][block_limit[tcache_id]];
if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) if (bd->tcache_ptr >= tcache_ptrs[tcache_id])
@ -898,7 +877,7 @@ static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u3
lit_ram_blk = Pico32xMem->drclit_ram; lit_ram_blk = Pico32xMem->drclit_ram;
shift = SH2_DRCBLK_RAM_SHIFT; shift = SH2_DRCBLK_RAM_SHIFT;
} }
mask = ram_sizes[tcache_id] - 1; mask = RAM_SIZE(tcache_id) - 1;
// mark recompiled insns // mark recompiled insns
addr = block->addr & ~((1 << shift) - 1); addr = block->addr & ~((1 << shift) - 1);
@ -957,7 +936,7 @@ static u32 dr_check_nolit(u32 start, u32 end, int tcache_id)
lit_ram_blk = Pico32xMem->drclit_ram; lit_ram_blk = Pico32xMem->drclit_ram;
shift = SH2_DRCBLK_RAM_SHIFT; shift = SH2_DRCBLK_RAM_SHIFT;
} }
mask = ram_sizes[tcache_id] - 1; mask = RAM_SIZE(tcache_id) - 1;
addr = start & ~((1 << shift) - 1); addr = start & ~((1 << shift) - 1);
for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift))
@ -1028,18 +1007,18 @@ static struct block_desc *dr_add_block(u32 addr, int size,
*blk_id = *bcount; *blk_id = *bcount;
(*bcount)++; (*bcount)++;
if (*bcount >= block_max_counts[tcache_id]) if (*bcount >= BLOCK_MAX_COUNT(tcache_id))
*bcount = 0; *bcount = 0;
return bd; return bd;
} }
static void REGPARM(3) *dr_lookup_block(u32 pc, int is_slave, int *tcache_id) static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id)
{ {
struct block_entry *be = NULL; struct block_entry *be = NULL;
void *block = NULL; void *block = NULL;
be = dr_get_entry(pc, is_slave, tcache_id); be = dr_get_entry(pc, sh2->is_slave, tcache_id);
if (be != NULL) if (be != NULL)
block = be->tcache_ptr; block = be->tcache_ptr;
@ -1114,7 +1093,7 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla
if (blink_free[tcache_id] != NULL) { if (blink_free[tcache_id] != NULL) {
bl = blink_free[tcache_id]; bl = blink_free[tcache_id];
blink_free[tcache_id] = bl->next; blink_free[tcache_id] = bl->next;
} else if (cnt >= block_link_pool_max_counts[tcache_id]) { } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) {
dbg(1, "bl overflow for tcache %d", tcache_id); dbg(1, "bl overflow for tcache %d", tcache_id);
return sh2_drc_dispatcher; return sh2_drc_dispatcher;
} else { } else {
@ -1145,7 +1124,7 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla
static void dr_link_blocks(struct block_entry *be, int tcache_id) static void dr_link_blocks(struct block_entry *be, int tcache_id)
{ {
#if LINK_BRANCHES #if LINK_BRANCHES
u32 tcmask = hash_table_sizes[tcache_id] - 1; u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1;
u32 pc = be->pc; u32 pc = be->pc;
struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], pc, tcmask); struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], pc, tcmask);
struct block_link *bl = *head, *next; struct block_link *bl = *head, *next;
@ -1188,7 +1167,7 @@ static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave
array[count++] = item; \ array[count++] = item; \
} }
static int find_in_array(u32 *array, size_t size, u32 what) static inline int find_in_array(u32 *array, size_t size, u32 what)
{ {
size_t i; size_t i;
for (i = 0; i < size; i++) for (i = 0; i < size; i++)
@ -1198,6 +1177,23 @@ static int find_in_array(u32 *array, size_t size, u32 what)
return -1; return -1;
} }
static int find_in_sorted_array(u32 *array, size_t size, u32 what)
{
// binary search in sorted array
int left = 0, right = size-1;
while (left <= right)
{
int middle = (left + right) / 2;
if (array[middle] == what)
return middle;
else if (array[middle] < what)
left = middle + 1;
else
right = middle - 1;
}
return -1;
}
// --------------------------------------------------------------- // ---------------------------------------------------------------
// NB rcache allocation dependencies: // NB rcache allocation dependencies:
@ -1242,26 +1238,6 @@ static void rcache_remove_vreg_alias(int x, sh2_reg_e r);
} \ } \
} }
// binary search approach, since we don't have CLZ on ARM920T
#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \
u32 __mask = mask; \
for (bit = 31; bit >= 0 && mask; bit--, __mask <<= 1) { \
if (!(__mask & (0xffff << 16))) \
bit -= 16, __mask <<= 16; \
if (!(__mask & (0xff << 24))) \
bit -= 8, __mask <<= 8; \
if (!(__mask & (0xf << 28))) \
bit -= 4, __mask <<= 4; \
if (!(__mask & (0x3 << 30))) \
bit -= 2, __mask <<= 2; \
if (!(__mask & (0x1 << 31))) \
bit -= 1, __mask <<= 1; \
if (__mask & (0x1 << 31)) { \
code; \
} \
} \
}
#if PROPAGATE_CONSTANTS #if PROPAGATE_CONSTANTS
static inline int gconst_alloc(sh2_reg_e r) static inline int gconst_alloc(sh2_reg_e r)
{ {
@ -1319,6 +1295,7 @@ static int gconst_get(sh2_reg_e r, u32 *val)
*val = gconsts[guest_regs[r].cnst].val; *val = gconsts[guest_regs[r].cnst].val;
return 1; return 1;
} }
*val = 0;
return 0; return 0;
} }
@ -2043,13 +2020,22 @@ static inline int rcache_is_cached(sh2_reg_e r)
return (guest_regs[r].vreg >= 0); return (guest_regs[r].vreg >= 0);
} }
static inline int rcache_is_hreg_used(int hr)
{
int x = reg_map_host[hr];
// is hr in use?
return cache_regs[x].type != HR_FREE &&
(cache_regs[x].type != HR_TEMP || (cache_regs[x].flags & HRF_LOCKED));
}
static inline u32 rcache_used_hreg_mask(void) static inline u32 rcache_used_hreg_mask(void)
{ {
u32 mask = 0; u32 mask = 0;
int i; int i;
for (i = 0; i < ARRAY_SIZE(cache_regs); i++) for (i = 0; i < ARRAY_SIZE(cache_regs); i++)
if (cache_regs[i].type != HR_FREE) if ((cache_regs[i].flags & HRF_TEMP) && cache_regs[i].type != HR_FREE &&
(cache_regs[i].type != HR_TEMP || (cache_regs[i].flags & HRF_LOCKED)))
mask |= 1 << cache_regs[i].hreg; mask |= 1 << cache_regs[i].hreg;
return mask & ~rcache_static; return mask & ~rcache_static;
@ -2137,6 +2123,8 @@ static void rcache_invalidate(void)
{ {
int i; int i;
gconst_invalidate();
for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { for (i = 0; i < ARRAY_SIZE(cache_regs); i++) {
cache_regs[i].flags &= (HRF_TEMP|HRF_REG); cache_regs[i].flags &= (HRF_TEMP|HRF_REG);
if (cache_regs[i].type != HR_STATIC) if (cache_regs[i].type != HR_STATIC)
@ -2161,8 +2149,6 @@ static void rcache_invalidate(void)
rcache_counter = 0; rcache_counter = 0;
rcache_hint_soon = rcache_hint_late = rcache_hint_write = rcache_hint_clean = 0; rcache_hint_soon = rcache_hint_late = rcache_hint_write = rcache_hint_clean = 0;
gconst_invalidate();
} }
static void rcache_flush(void) static void rcache_flush(void)
@ -2221,14 +2207,20 @@ static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmod, u32 *offs)
if (poffs == -1) if (poffs == -1)
return -1; return -1;
if (mask < 0x1000) { if (mask < 0x20000) {
// data array or BIOS, can't safely access directly since translated code // data array, BIOS, DRAM, can't safely access directly since host addr may
// may run on both SH2s // change (BIOS,da code may run on either core, DRAM may be switched)
hr = rcache_get_tmp(); hr = rcache_get_tmp();
emith_ctx_read_ptr(hr, poffs); a = (a + *offs) & mask;
a += *offs; if (poffs == offsetof(SH2, p_da)) {
if (a & mask & ~omask) // access sh2->data_array directly
emith_add_r_r_ptr_imm(hr, hr, a & mask & ~omask); a += offsetof(SH2, data_array);
emith_add_r_r_ptr_imm(hr, CONTEXT_REG, a & ~omask);
} else {
emith_ctx_read_ptr(hr, poffs);
if (a & ~omask)
emith_add_r_r_ptr_imm(hr, hr, a & ~omask);
}
*offs = a & omask; *offs = a & omask;
return hr; return hr;
} }
@ -2269,7 +2261,7 @@ static int emit_get_rom_data(SH2 *sh2, sh2_reg_e r, u32 offs, int size, u32 *val
if (gconst_get(r, &a)) { if (gconst_get(r, &a)) {
a += offs; a += offs;
// check if rom is memory mapped (not bank switched), and address is in rom // check if rom is memory mapped (not bank switched), and address is in rom
if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2) != (void *)-1) { if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2) == sh2->p_rom) {
switch (size & MF_SIZEMASK) { switch (size & MF_SIZEMASK) {
case 0: *val = (s8)p32x_sh2_read8(a, sh2s); break; // 8 case 0: *val = (s8)p32x_sh2_read8(a, sh2s); break; // 8
case 1: *val = (s16)p32x_sh2_read16(a, sh2s); break; // 16 case 1: *val = (s16)p32x_sh2_read16(a, sh2s); break; // 16
@ -2507,9 +2499,10 @@ static int emit_indirect_indexed_read(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_
#if PROPAGATE_CONSTANTS #if PROPAGATE_CONSTANTS
u32 offs; u32 offs;
if (gconst_get(ry, &offs)) // if offs is larger than 0x01000000, it's most probably the base address part
if (gconst_get(ry, &offs) && offs < 0x01000000)
return emit_memhandler_read_rr(sh2, rd, rx, offs, size); return emit_memhandler_read_rr(sh2, rd, rx, offs, size);
if (gconst_get(rx, &offs)) if (gconst_get(rx, &offs) && offs < 0x01000000)
return emit_memhandler_read_rr(sh2, rd, ry, offs, size); return emit_memhandler_read_rr(sh2, rd, ry, offs, size);
#endif #endif
hr = rcache_get_reg_arg(0, rx, &tx); hr = rcache_get_reg_arg(0, rx, &tx);
@ -2541,9 +2534,10 @@ static void emit_indirect_indexed_write(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh
#if PROPAGATE_CONSTANTS #if PROPAGATE_CONSTANTS
u32 offs; u32 offs;
if (gconst_get(ry, &offs)) // if offs is larger than 0x01000000, it's most probably the base address part
if (gconst_get(ry, &offs) && offs < 0x01000000)
return emit_memhandler_write_rr(sh2, rd, rx, offs, size); return emit_memhandler_write_rr(sh2, rd, rx, offs, size);
if (gconst_get(rx, &offs)) if (gconst_get(rx, &offs) && offs < 0x01000000)
return emit_memhandler_write_rr(sh2, rd, ry, offs, size); return emit_memhandler_write_rr(sh2, rd, ry, offs, size);
#endif #endif
if (rd != SHR_TMP) if (rd != SHR_TMP)
@ -2601,15 +2595,6 @@ static void emit_do_static_regs(int is_write, int tmpr)
} }
} }
/* just after lookup function, jump to address returned */
static void emit_block_entry(void)
{
emith_tst_r_r_ptr(RET_REG, RET_REG);
EMITH_SJMP_START(DCOND_EQ);
emith_jump_reg_c(DCOND_NE, RET_REG);
EMITH_SJMP_END(DCOND_EQ);
}
#define DELAY_SAVE_T(sr) { \ #define DELAY_SAVE_T(sr) { \
emith_bic_r_imm(sr, T_save); \ emith_bic_r_imm(sr, T_save); \
emith_tst_r_imm(sr, T); \ emith_tst_r_imm(sr, T); \
@ -2861,7 +2846,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
if (!tcache_id) // can safely link from cpu-local to global memory if (!tcache_id) // can safely link from cpu-local to global memory
dr_link_blocks(entry, sh2->is_slave?2:1); dr_link_blocks(entry, sh2->is_slave?2:1);
v = find_in_array(branch_target_pc, branch_target_count, pc); v = find_in_sorted_array(branch_target_pc, branch_target_count, pc);
if (v >= 0) if (v >= 0)
branch_target_ptr[v] = tcache_ptr; branch_target_ptr[v] = tcache_ptr;
#if LOOP_DETECTION #if LOOP_DETECTION
@ -2870,14 +2855,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0); drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0);
#endif #endif
#if DRC_DEBUG #if (DRC_DEBUG & ~7)
// must update PC // must update PC
emit_move_r_imm32(SHR_PC, pc); emit_move_r_imm32(SHR_PC, pc);
#endif #endif
rcache_clean(); rcache_clean();
#if (DRC_DEBUG & 0x10) #if (DRC_DEBUG & 0x10)
rcache_get_reg_arg(0, SHR_PC, NULL); tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, pc);
tmp = emit_memhandler_read(1); tmp = emit_memhandler_read(1);
tmp2 = rcache_get_tmp(); tmp2 = rcache_get_tmp();
tmp3 = rcache_get_tmp(); tmp3 = rcache_get_tmp();
@ -2896,7 +2882,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
tmp = rcache_get_tmp_arg(0); tmp = rcache_get_tmp_arg(0);
sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
emith_cmp_r_imm(sr, 0); emith_cmp_r_imm(sr, 0);
emith_move_r_imm(tmp, pc); emith_move_r_imm_c(DCOND_LE, tmp, pc);
emith_jump_cond(DCOND_LE, sh2_drc_exit); emith_jump_cond(DCOND_LE, sh2_drc_exit);
rcache_free_tmp(tmp); rcache_free_tmp(tmp);
@ -3104,7 +3090,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
emith_clear_msb(tmp, tmp2, 22); emith_clear_msb(tmp, tmp2, 22);
emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR); emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR);
// push PC // push PC
if (op == OP_TRAPA) { if (opd->op == OP_TRAPA) {
tmp = rcache_get_tmp_arg(1); tmp = rcache_get_tmp_arg(1);
emith_move_r_imm(tmp, pc); emith_move_r_imm(tmp, pc);
} else if (drcf.pending_branch_indirect) { } else if (drcf.pending_branch_indirect) {
@ -3113,7 +3099,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
tmp = rcache_get_tmp_arg(1); tmp = rcache_get_tmp_arg(1);
emith_move_r_imm(tmp, pc - 2); emith_move_r_imm(tmp, pc - 2);
} }
emith_move_r_imm(tmp, pc);
emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR); emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR);
// obtain new PC // obtain new PC
emit_memhandler_read_rr(sh2, SHR_PC, SHR_VBR, opd->imm * 4, 2); emit_memhandler_read_rr(sh2, SHR_PC, SHR_VBR, opd->imm * 4, 2);
@ -3613,12 +3598,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
default: default:
goto default_; goto default_;
} }
tmp3 = rcache_get_reg_arg(1, tmp, &tmp4);
if (tmp == SHR_SR) { if (tmp == SHR_SR) {
tmp3 = rcache_get_reg_arg(1, tmp, &tmp4);
emith_sync_t(tmp4); emith_sync_t(tmp4);
emith_clear_msb(tmp3, tmp4, 22); // reserved bits defined by ISA as 0 emith_clear_msb(tmp3, tmp4, 22); // reserved bits defined by ISA as 0
} else if (tmp3 != tmp4) } else
emith_move_r_r(tmp3, tmp4); tmp3 = rcache_get_reg_arg(1, tmp, NULL);
emit_memhandler_write_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_PREDECR); emit_memhandler_write_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_PREDECR);
goto end_op; goto end_op;
case 0x04: case 0x04:
@ -4050,7 +4035,7 @@ end_op:
// no modification of host status/flags between here and branching! // no modification of host status/flags between here and branching!
#if LINK_BRANCHES #if LINK_BRANCHES
v = find_in_array(branch_target_pc, branch_target_count, target_pc); v = find_in_sorted_array(branch_target_pc, branch_target_count, target_pc);
if (v >= 0) if (v >= 0)
{ {
// local branch // local branch
@ -4151,7 +4136,7 @@ end_op:
{ {
void *target; void *target;
s32 tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
FLUSH_CYCLES(tmp); FLUSH_CYCLES(tmp);
emith_sync_t(tmp); emith_sync_t(tmp);
@ -4172,7 +4157,7 @@ end_op:
for (i = 0; i < branch_patch_count; i++) { for (i = 0; i < branch_patch_count; i++) {
void *target; void *target;
int t; int t;
t = find_in_array(branch_target_pc, branch_target_count, branch_patch_pc[i]); t = find_in_sorted_array(branch_target_pc, branch_target_count, branch_patch_pc[i]);
target = branch_target_ptr[t]; target = branch_target_ptr[t];
if (target == NULL) { if (target == NULL) {
// flush pc and go back to dispatcher (this should no longer happen) // flush pc and go back to dispatcher (this should no longer happen)
@ -4256,8 +4241,8 @@ static void sh2_generate_utils(void)
emith_sh2_rcall(arg0, arg1, arg2, arg3); emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CS); EMITH_SJMP_START(DCOND_CS);
emith_and_r_r_c(DCOND_CC, arg0, arg3); emith_and_r_r_c(DCOND_CC, arg0, arg3);
emith_eor_r_imm_c(DCOND_CC, arg0, 1); emith_eor_r_imm_ptr_c(DCOND_CC, arg0, 1);
emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0);
emith_ret_c(DCOND_CC); emith_ret_c(DCOND_CC);
EMITH_SJMP_END(DCOND_CS); EMITH_SJMP_END(DCOND_CS);
emith_move_r_r_ptr(arg1, CONTEXT_REG); emith_move_r_r_ptr(arg1, CONTEXT_REG);
@ -4270,7 +4255,7 @@ static void sh2_generate_utils(void)
emith_sh2_rcall(arg0, arg1, arg2, arg3); emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CS); EMITH_SJMP_START(DCOND_CS);
emith_and_r_r_c(DCOND_CC, arg0, arg3); emith_and_r_r_c(DCOND_CC, arg0, arg3);
emith_read16s_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); emith_read16s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0);
emith_ret_c(DCOND_CC); emith_ret_c(DCOND_CC);
EMITH_SJMP_END(DCOND_CS); EMITH_SJMP_END(DCOND_CS);
emith_move_r_r_ptr(arg1, CONTEXT_REG); emith_move_r_r_ptr(arg1, CONTEXT_REG);
@ -4283,7 +4268,7 @@ static void sh2_generate_utils(void)
emith_sh2_rcall(arg0, arg1, arg2, arg3); emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CS); EMITH_SJMP_START(DCOND_CS);
emith_and_r_r_c(DCOND_CC, arg0, arg3); emith_and_r_r_c(DCOND_CC, arg0, arg3);
emith_read_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); emith_read_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0);
emith_ror_c(DCOND_CC, RET_REG, RET_REG, 16); emith_ror_c(DCOND_CC, RET_REG, RET_REG, 16);
emith_ret_c(DCOND_CC); emith_ret_c(DCOND_CC);
EMITH_SJMP_END(DCOND_CS); EMITH_SJMP_END(DCOND_CS);
@ -4300,8 +4285,8 @@ static void sh2_generate_utils(void)
emith_jump_reg_c(DCOND_CS, arg2); emith_jump_reg_c(DCOND_CS, arg2);
EMITH_SJMP_END(DCOND_CC); EMITH_SJMP_END(DCOND_CC);
emith_and_r_r_r(arg1, arg0, arg3); emith_and_r_r_r(arg1, arg0, arg3);
emith_eor_r_imm(arg1, 1); emith_eor_r_imm_ptr(arg1, 1);
emith_read8s_r_r_r(arg1, arg1, arg2); emith_read8s_r_r_r(arg1, arg2, arg1);
emith_push_ret(arg1); emith_push_ret(arg1);
emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_move_r_r_ptr(arg2, CONTEXT_REG);
emith_call(p32x_sh2_poll_memory8); emith_call(p32x_sh2_poll_memory8);
@ -4317,7 +4302,7 @@ static void sh2_generate_utils(void)
emith_jump_reg_c(DCOND_CS, arg2); emith_jump_reg_c(DCOND_CS, arg2);
EMITH_SJMP_END(DCOND_CC); EMITH_SJMP_END(DCOND_CC);
emith_and_r_r_r(arg1, arg0, arg3); emith_and_r_r_r(arg1, arg0, arg3);
emith_read16s_r_r_r(arg1, arg1, arg2); emith_read16s_r_r_r(arg1, arg2, arg1);
emith_push_ret(arg1); emith_push_ret(arg1);
emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_move_r_r_ptr(arg2, CONTEXT_REG);
emith_call(p32x_sh2_poll_memory16); emith_call(p32x_sh2_poll_memory16);
@ -4333,7 +4318,7 @@ static void sh2_generate_utils(void)
emith_jump_reg_c(DCOND_CS, arg2); emith_jump_reg_c(DCOND_CS, arg2);
EMITH_SJMP_END(DCOND_CC); EMITH_SJMP_END(DCOND_CC);
emith_and_r_r_r(arg1, arg0, arg3); emith_and_r_r_r(arg1, arg0, arg3);
emith_read_r_r_r(arg1, arg1, arg2); emith_read_r_r_r(arg1, arg2, arg1);
emith_ror(arg1, arg1, 16); emith_ror(arg1, arg1, 16);
emith_push_ret(arg1); emith_push_ret(arg1);
emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_move_r_r_ptr(arg2, CONTEXT_REG);
@ -4382,13 +4367,13 @@ static void sh2_generate_utils(void)
emith_jump_reg_c(DCOND_EQ, RET_REG); emith_jump_reg_c(DCOND_EQ, RET_REG);
EMITH_SJMP_END(DCOND_NE); EMITH_SJMP_END(DCOND_NE);
#endif #endif
emith_ctx_read(arg1, offsetof(SH2, is_slave)); emith_move_r_r_ptr(arg1, CONTEXT_REG);
emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp));
emith_call(dr_lookup_block); emith_call(dr_lookup_block);
#if BRANCH_CACHE
// store PC and block entry ptr (in arg0) in branch target cache // store PC and block entry ptr (in arg0) in branch target cache
emith_tst_r_r_ptr(RET_REG, RET_REG); emith_tst_r_r_ptr(RET_REG, RET_REG);
EMITH_SJMP_START(DCOND_EQ); EMITH_SJMP_START(DCOND_EQ);
#if BRANCH_CACHE
#if (DRC_DEBUG & 128) #if (DRC_DEBUG & 128)
emith_move_r_ptr_imm(arg2, (uptr)&bcmiss); emith_move_r_ptr_imm(arg2, (uptr)&bcmiss);
emith_read_r_r_offs_c(DCOND_NE, arg3, arg2, 0); emith_read_r_r_offs_c(DCOND_NE, arg3, arg2, 0);
@ -4400,14 +4385,18 @@ static void sh2_generate_utils(void)
emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0); emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0);
emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache)); emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache));
emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
EMITH_SJMP_END(DCOND_EQ);
#endif #endif
emit_block_entry(); emith_jump_reg_c(DCOND_NE, RET_REG);
EMITH_SJMP_END(DCOND_EQ);
// lookup failed, call sh2_translate() // lookup failed, call sh2_translate()
emith_move_r_r_ptr(arg0, CONTEXT_REG); emith_move_r_r_ptr(arg0, CONTEXT_REG);
emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id
emith_call(sh2_translate); emith_call(sh2_translate);
emit_block_entry(); /* just after lookup function, jump to address returned */
emith_tst_r_r_ptr(RET_REG, RET_REG);
EMITH_SJMP_START(DCOND_EQ);
emith_jump_reg_c(DCOND_NE, RET_REG);
EMITH_SJMP_END(DCOND_EQ);
// XXX: can't translate, fail // XXX: can't translate, fail
emith_call(dr_failure); emith_call(dr_failure);
emith_flush(); emith_flush();
@ -4486,9 +4475,7 @@ static void sh2_generate_utils(void)
emith_call(sh2_drc_read32); emith_call(sh2_drc_read32);
if (arg0 != RET_REG) if (arg0 != RET_REG)
emith_move_r_r(arg0, RET_REG); emith_move_r_r(arg0, RET_REG);
#if defined(__i386__) || defined(__x86_64__) emith_call_cleanup();
emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // fix stack
#endif
emith_jump(sh2_drc_dispatcher); emith_jump(sh2_drc_dispatcher);
rcache_invalidate(); rcache_invalidate();
emith_flush(); emith_flush();
@ -4581,6 +4568,7 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol
return; return;
} }
#if LINK_BRANCHES
// remove from hash table, make incoming links unresolved // remove from hash table, make incoming links unresolved
if (bd->active) { if (bd->active) {
for (i = 0; i < bd->entry_count; i++) { for (i = 0; i < bd->entry_count; i++) {
@ -4596,8 +4584,10 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol
add_to_block_list(&inactive_blocks[tcache_id], bd); add_to_block_list(&inactive_blocks[tcache_id], bd);
} }
bd->active = 0; bd->active = 0;
#endif
if (free) { if (free) {
#if LINK_BRANCHES
// revoke outgoing links // revoke outgoing links
for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) {
if (bl->target) if (bl->target)
@ -4609,6 +4599,7 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol
blink_free[bl->tcache_id] = bl; blink_free[bl->tcache_id] = bl;
} }
bd->entryp[0].o_links = NULL; bd->entryp[0].o_links = NULL;
#endif
// invalidate block // invalidate block
rm_from_block_lists(bd); rm_from_block_lists(bd);
bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0;
@ -4619,7 +4610,7 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nol
static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift)
{ {
struct block_list **blist, *entry, *next; struct block_list **blist, *entry, *next;
u32 mask = ram_sizes[tcache_id] - 1; u32 mask = RAM_SIZE(tcache_id) - 1;
u32 wtmask = ~0x20000000; // writethrough area mask u32 wtmask = ~0x20000000; // writethrough area mask
u32 start_addr, end_addr; u32 start_addr, end_addr;
u32 start_lit, end_lit; u32 start_lit, end_lit;
@ -4722,7 +4713,7 @@ static void block_stats(void)
for (i = 0; i < block_counts[b]; i++) for (i = 0; i < block_counts[b]; i++)
if (block_tables[b][i].addr != 0) if (block_tables[b][i].addr != 0)
total += block_tables[b][i].refcount; total += block_tables[b][i].refcount;
for (i = block_limit[b]; i < block_max_counts[b]; i++) for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++)
if (block_tables[b][i].addr != 0) if (block_tables[b][i].addr != 0)
total += block_tables[b][i].refcount; total += block_tables[b][i].refcount;
} }
@ -4739,7 +4730,7 @@ static void block_stats(void)
maxb = blk; maxb = blk;
} }
} }
for (i = block_limit[b]; i < block_max_counts[b]; i++) { for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) {
blk = &block_tables[b][i]; blk = &block_tables[b][i];
if (blk->addr != 0 && blk->refcount > max) { if (blk->addr != 0 && blk->refcount > max) {
max = blk->refcount; max = blk->refcount;
@ -4757,7 +4748,7 @@ static void block_stats(void)
for (b = 0; b < ARRAY_SIZE(block_tables); b++) { for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
for (i = 0; i < block_counts[b]; i++) for (i = 0; i < block_counts[b]; i++)
block_tables[b][i].refcount = 0; block_tables[b][i].refcount = 0;
for (i = block_limit[b]; i < block_max_counts[b]; i++) for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++)
block_tables[b][i].refcount = 0; block_tables[b][i].refcount = 0;
} }
#endif #endif
@ -4774,7 +4765,7 @@ void entry_stats(void)
for (i = 0; i < block_counts[b]; i++) for (i = 0; i < block_counts[b]; i++)
for (j = 0; j < block_tables[b][i].entry_count; j++) for (j = 0; j < block_tables[b][i].entry_count; j++)
total += block_tables[b][i].entryp[j].entry_count; total += block_tables[b][i].entryp[j].entry_count;
for (i = block_limit[b]; i < block_max_counts[b]; i++) for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++)
for (j = 0; j < block_tables[b][i].entry_count; j++) for (j = 0; j < block_tables[b][i].entry_count; j++)
total += block_tables[b][i].entryp[j].entry_count; total += block_tables[b][i].entryp[j].entry_count;
} }
@ -4793,7 +4784,7 @@ void entry_stats(void)
maxb = &blk->entryp[j]; maxb = &blk->entryp[j];
} }
} }
for (i = block_limit[b]; i < block_max_counts[b]; i++) { for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++) {
blk = &block_tables[b][i]; blk = &block_tables[b][i];
for (j = 0; j < blk->entry_count; j++) for (j = 0; j < blk->entry_count; j++)
if (blk->entryp[j].entry_count > max) { if (blk->entryp[j].entry_count > max) {
@ -4813,7 +4804,7 @@ void entry_stats(void)
for (i = 0; i < block_counts[b]; i++) for (i = 0; i < block_counts[b]; i++)
for (j = 0; j < block_tables[b][i].entry_count; j++) for (j = 0; j < block_tables[b][i].entry_count; j++)
block_tables[b][i].entryp[j].entry_count = 0; block_tables[b][i].entryp[j].entry_count = 0;
for (i = block_limit[b]; i < block_max_counts[b]; i++) for (i = block_limit[b]; i < BLOCK_MAX_COUNT(b); i++)
for (j = 0; j < block_tables[b][i].entry_count; j++) for (j = 0; j < block_tables[b][i].entry_count; j++)
block_tables[b][i].entryp[j].entry_count = 0; block_tables[b][i].entryp[j].entry_count = 0;
} }
@ -4871,7 +4862,15 @@ static void bcache_stats(void)
for (i = 1; i < ARRAY_SIZE(sh2s->rts_cache); i++) for (i = 1; i < ARRAY_SIZE(sh2s->rts_cache); i++)
if (sh2s[0].rts_cache[i].pc == -1 && sh2s[1].rts_cache[i].pc == -1) break; if (sh2s[0].rts_cache[i].pc == -1 && sh2s[1].rts_cache[i].pc == -1) break;
printf("return cache hits:%d misses:%d depth: %d\n", rchit, rcmiss, i); printf("return cache hits:%d misses:%d depth: %d index: %d/%d\n", rchit, rcmiss, i,sh2s[0].rts_cache_idx,sh2s[1].rts_cache_idx);
for (i = 0; i < ARRAY_SIZE(sh2s[0].rts_cache); i++) {
printf("%08x ",sh2s[0].rts_cache[i].pc);
if ((i+1) % 8 == 0) printf("\n");
}
for (i = 0; i < ARRAY_SIZE(sh2s[1].rts_cache); i++) {
printf("%08x ",sh2s[1].rts_cache[i].pc);
if ((i+1) % 8 == 0) printf("\n");
}
#endif #endif
#if BRANCH_CACHE #if BRANCH_CACHE
printf("branch cache hits:%d misses:%d\n", bchit, bcmiss); printf("branch cache hits:%d misses:%d\n", bchit, bcmiss);
@ -4920,31 +4919,31 @@ int sh2_drc_init(SH2 *sh2)
if (block_tables[0] == NULL) if (block_tables[0] == NULL)
{ {
for (i = 0; i < TCACHE_BUFFERS; i++) { for (i = 0; i < TCACHE_BUFFERS; i++) {
block_tables[i] = calloc(block_max_counts[i], sizeof(*block_tables[0])); block_tables[i] = calloc(BLOCK_MAX_COUNT(i), sizeof(*block_tables[0]));
if (block_tables[i] == NULL) if (block_tables[i] == NULL)
goto fail; goto fail;
// max 2 block links (exits) per block // max 2 block links (exits) per block
block_link_pool[i] = calloc(block_link_pool_max_counts[i], block_link_pool[i] = calloc(BLOCK_LINK_MAX_COUNT(i),
sizeof(*block_link_pool[0])); sizeof(*block_link_pool[0]));
if (block_link_pool[i] == NULL) if (block_link_pool[i] == NULL)
goto fail; goto fail;
inval_lookup[i] = calloc(ram_sizes[i] / INVAL_PAGE_SIZE, inval_lookup[i] = calloc(RAM_SIZE(i) / INVAL_PAGE_SIZE,
sizeof(inval_lookup[0])); sizeof(inval_lookup[0]));
if (inval_lookup[i] == NULL) if (inval_lookup[i] == NULL)
goto fail; goto fail;
hash_tables[i] = calloc(hash_table_sizes[i], sizeof(*hash_tables[0])); hash_tables[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*hash_tables[0]));
if (hash_tables[i] == NULL) if (hash_tables[i] == NULL)
goto fail; goto fail;
unresolved_links[i] = calloc(hash_table_sizes[i], sizeof(*unresolved_links[0])); unresolved_links[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*unresolved_links[0]));
if (unresolved_links[i] == NULL) if (unresolved_links[i] == NULL)
goto fail; goto fail;
} }
memset(block_counts, 0, sizeof(block_counts)); memset(block_counts, 0, sizeof(block_counts));
for (i = 0; i < ARRAY_SIZE(block_counts); i++) { for (i = 0; i < ARRAY_SIZE(block_counts); i++) {
block_limit[i] = block_max_counts[i] - 1; block_limit[i] = BLOCK_MAX_COUNT(i) - 1;
} }
memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts)); memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts));
for (i = 0; i < ARRAY_SIZE(blink_free); i++) { for (i = 0; i < ARRAY_SIZE(blink_free); i++) {
@ -5044,12 +5043,12 @@ void sh2_drc_finish(SH2 *sh2)
static void *dr_get_pc_base(u32 pc, SH2 *sh2) static void *dr_get_pc_base(u32 pc, SH2 *sh2)
{ {
void *ret = NULL; void *ret;
u32 mask = 0; u32 mask = 0;
ret = p32x_sh2_get_mem_ptr(pc, &mask, sh2); ret = p32x_sh2_get_mem_ptr(pc, &mask, sh2);
if (ret == NULL) if (ret == (void *)-1)
return (void *)-1; // NULL is valid value return ret;
return (char *)ret - (pc & ~mask); return (char *)ret - (pc & ~mask);
} }

View file

@ -43,6 +43,7 @@ unsigned short scan_block(unsigned int base_pc, int is_slave,
#else #else
#warning "direct DRC register access not available for this host" #warning "direct DRC register access not available for this host"
#endif #endif
#endif
#ifdef DRC_SR_REG #ifdef DRC_SR_REG
#define __DRC_DECLARE_SR(SR) register int sh2_sr asm(#SR) #define __DRC_DECLARE_SR(SR) register int sh2_sr asm(#SR)
@ -59,4 +60,3 @@ unsigned short scan_block(unsigned int base_pc, int is_slave,
#define DRC_SAVE_SR(sh2) #define DRC_SAVE_SR(sh2)
#define DRC_RESTORE_SR(sh2) #define DRC_RESTORE_SR(sh2)
#endif #endif
#endif

View file

@ -171,7 +171,7 @@ DASM = $(R)platform/libpicofe/linux/host_dasm.c
DASMLIBS = -lbfd -lopcodes -liberty DASMLIBS = -lbfd -lopcodes -liberty
ifeq "$(ARCH)" "arm" ifeq "$(ARCH)" "arm"
ifeq ($(filter_out $(shell $(CC) --print-file-name=libbfd.so),"/"),) ifeq ($(filter_out $(shell $(CC) --print-file-name=libbfd.so),"/"),)
DASM = $(R)platform/common/host_dasm_arm.c DASM = $(R)platform/common/host_dasm.c
DASMLIBS = DASMLIBS =
endif endif
endif endif

View file

@ -1,9 +1,15 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h>
#include <string.h> #include <string.h>
#ifdef __mips__
#include "dismips.c"
#define disasm dismips
#else
#include "disarm.c" #include "disarm.c"
#define disasm disarm
#endif
/* symbols */ /* symbols */
typedef struct { const char *name; void *value; } asymbol; typedef struct { const char *name; void *value; } asymbol;
@ -40,7 +46,8 @@ void host_dasm(void *addr, int len)
insn = *(long *)addr; insn = *(long *)addr;
printf(" %08lx %08lx ", (long)addr, insn); printf(" %08lx %08lx ", (long)addr, insn);
if(disarm((unsigned)addr, insn, buf, sizeof(buf))) { if(disasm((unsigned)addr, insn, buf, sizeof(buf)))
{
symaddr = 0; symaddr = 0;
if ((insn & 0xe000000) == 0xa000000) { if ((insn & 0xe000000) == 0xa000000) {
symaddr = (long)addr + 8 + ((long)(insn << 8) >> 6); symaddr = (long)addr + 8 + ((long)(insn << 8) >> 6);
@ -53,7 +60,7 @@ void host_dasm(void *addr, int len)
else else
printf("%s\n", buf); printf("%s\n", buf);
} else } else
printf("unknown\n"); printf("unknown (0x%08lx)\n", insn);
addr = (char *)addr + sizeof(long); addr = (char *)addr + sizeof(long);
} }
} }