sh2 drc, fix mul/add saturation

This commit is contained in:
kub 2024-06-02 07:48:15 +00:00
parent 68e50296e6
commit 38fd3bd866
6 changed files with 78 additions and 78 deletions

View file

@ -1333,7 +1333,7 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs)
EMITH_JMP_START(DCOND_LE); \
/* turns = sr.cycles / cycles */ \
emith_asr(t2, sr, 12); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \
emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \
rcache_free_tmp(t3); \
if (reg >= 0) { \
@ -1427,16 +1427,16 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs)
emith_sext(mh, mh, 16); \
emith_mula_s64(ml, mh, rn, rm); \
/* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
/* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \
emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \
emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_LE); \
/* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
emith_asr(rn, mh, 15); \
emith_addf_r_r_r_lsr(rn, rn, mh, 31); \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP2_END(DCOND_NE); \
} while (0)
@ -1456,10 +1456,10 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs)
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
/* XXX: LSB signalling only in SH1, or in SH2 too? */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \
emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_LE); \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP2_END(DCOND_NE); \
} while (0)

View file

@ -1242,7 +1242,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
EMITH_JMP_START(DCOND_LE); \
/* turns = sr.cycles / cycles */ \
emith_asr(t2, sr, 12); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \
emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \
rcache_free_tmp(t3); \
if (reg >= 0) { \
@ -1309,13 +1309,13 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
/* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
emith_asr(rn, mh, 15); \
emith_addf_r_r_r_lsr(rn, rn, mh, 31); \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_LE); \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)
@ -1336,10 +1336,10 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
/* XXX: LSB signalling only in SH1, or in SH2 too? */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \
emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_LE); \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)

View file

@ -1736,7 +1736,7 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
EMITH_JMP_START(DCOND_LE); \
/* turns = sr.cycles / cycles */ \
emith_asr(t2, sr, 12); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \
emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \
rcache_free_tmp(t3); \
if (reg >= 0) { \
@ -1806,13 +1806,13 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
emith_asr(rn, mh, 15); \
emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \
emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_PL); \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)
@ -1835,10 +1835,10 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
/* XXX: LSB signalling only in SH1, or in SH2 too? */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \
emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_PL); \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
EMITH_SJMP_START(DCOND_MI); /* sum < 0 -> +ovrfl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)

View file

@ -1670,7 +1670,7 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
EMITH_JMP_START(DCOND_LE); \
/* turns = sr.cycles / cycles */ \
emith_asr(t2, sr, 12); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \
emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \
rcache_free_tmp(t3); \
if (reg >= 0) { \
@ -1742,13 +1742,13 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
emith_asr(rn, mh, 15); \
emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \
emith_tst_r_r(rn, rn); /* (need only N and Z flags) */ \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_PL); \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)
@ -1771,10 +1771,10 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
/* XXX: LSB signalling only in SH1, or in SH2 too? */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \
emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_PL); \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)

View file

@ -1494,7 +1494,7 @@ static int emith_cond_check(int cond, int *r, int *s)
EMITH_JMP_START(DCOND_LE); \
/* turns = sr.cycles / cycles */ \
emith_asr(t2, sr, 12); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \
emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \
rcache_free_tmp(t3); \
if (reg >= 0) { \
@ -1564,13 +1564,13 @@ static int emith_cond_check(int cond, int *r, int *s)
emith_asr(rn, mh, 15); \
emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \
emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_PL); \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)
@ -1593,10 +1593,10 @@ static int emith_cond_check(int cond, int *r, int *s)
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
/* XXX: LSB signalling only in SH1, or in SH2 too? */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \
emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_PL); \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)

View file

@ -1273,7 +1273,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
EMITH_JMP_START(DCOND_LE); \
/* turns = sr.cycles / cycles */ \
emith_asr(t2, sr, 12); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \
emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \
emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \
rcache_free_tmp(t3); \
if (reg >= 0) { \
@ -1363,16 +1363,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
emith_tst_r_imm(sr, S); \
EMITH_SJMP_START(DCOND_EQ); \
/* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
/* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \
emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \
emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \
emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_LE); \
/* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
emith_asr(rn, mh, 15); \
emith_addf_r_r_r_lsr(rn, rn, mh, 31); \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
EMITH_SJMP_START(DCOND_MI); /* sum < 0 -> -ovl */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)
@ -1394,10 +1394,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
/* XXX: LSB signalling only in SH1, or in SH2 too? */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \
emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_LE); \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -overflow */ \
EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +overflow */ \
emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
EMITH_SJMP_END(DCOND_MI); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)