sh2 drc, several bug fixes

This commit is contained in:
kub 2024-06-09 22:30:51 +00:00
parent a43c77c0e5
commit 31efd4546e
11 changed files with 51 additions and 40 deletions

View file

@ -1,7 +1,7 @@
/* /*
* Basic macros to emit ARM instructions and some utils * Basic macros to emit ARM instructions and some utils
* Copyright (C) 2008,2009,2010 notaz * Copyright (C) 2008,2009,2010 notaz
* Copyright (C) 2019 kub * Copyright (C) 2019-2024 kub
* *
* This work is licensed under the terms of MAME license. * This work is licensed under the terms of MAME license.
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.
@ -1196,7 +1196,7 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs)
#define emith_jump_at(ptr, target) do { \ #define emith_jump_at(ptr, target) do { \
u32 *ptr_ = (u32 *)ptr; \ u32 *ptr_ = (u32 *)ptr; \
u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ u32 val_ = (u32 *)(target) - ptr_ - 2; \
EOP_C_B_PTR(ptr_, A_COND_AL, 0, val_ & 0xffffff); \ EOP_C_B_PTR(ptr_, A_COND_AL, 0, val_ & 0xffffff); \
} while (0) } while (0)
#define emith_jump_at_size() 4 #define emith_jump_at_size() 4

View file

@ -1,6 +1,6 @@
/* /*
* Basic macros to emit ARM A64 instructions and some utils * Basic macros to emit ARM A64 instructions and some utils
* Copyright (C) 2019 kub * Copyright (C) 2019-2024 kub
* *
* This work is licensed under the terms of MAME license. * This work is licensed under the terms of MAME license.
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.

View file

@ -1,6 +1,6 @@
/* /*
* Basic macros to emit MIPS32/MIPS64 Release 1 or 2 instructions and some utils * Basic macros to emit MIPS32/MIPS64 Release 1 or 2 instructions and some utils
* Copyright (C) 2019 kub * Copyright (C) 2019-2024 kub
* *
* This work is licensed under the terms of MAME license. * This work is licensed under the terms of MAME license.
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.
@ -1671,12 +1671,20 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force)
asm volatile( asm volatile(
" rdhwr %2, $1;" " rdhwr %2, $1;"
" bal 0f;" // needed to allow for jr.hb: " bal 0f;" // needed to allow for jr.hb:
#if _MIPS_SZPTR == 64
"0: daddiu $ra, $ra, 3f-0b;" // set ra to insn after jr.hb
#else
"0: addiu $ra, $ra, 3f-0b;" // set ra to insn after jr.hb "0: addiu $ra, $ra, 3f-0b;" // set ra to insn after jr.hb
#endif
" beqz %2, 3f;" " beqz %2, 3f;"
"1: synci 0(%0);" "1: synci 0(%0);"
" sltu %3, %0, %1;" " sltu %3, %0, %1;"
#if _MIPS_SZPTR == 64
" daddu %0, %0, %2;"
#else
" addu %0, %0, %2;" " addu %0, %0, %2;"
#endif
" bnez %3, 1b;" " bnez %3, 1b;"
" sync;" " sync;"

View file

@ -1,6 +1,6 @@
/* /*
* Basic macros to emit PowerISA 2.03 64 bit instructions and some utils * Basic macros to emit PowerISA 2.03 64 bit instructions and some utils
* Copyright (C) 2020 kub * Copyright (C) 2020-2024 kub
* *
* This work is licensed under the terms of MAME license. * This work is licensed under the terms of MAME license.
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.

View file

@ -1,6 +1,6 @@
/* /*
* Basic macros to emit RISC-V RV64IM instructions and some utils * Basic macros to emit RISC-V RV64IM instructions and some utils
* Copyright (C) 2019 kub * Copyright (C) 2019-2024 kub
* *
* This work is licensed under the terms of MAME license. * This work is licensed under the terms of MAME license.
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.
@ -710,9 +710,9 @@ static void emith_move_imm(int r, uintptr_t imm)
if (lui >> 12) { if (lui >> 12) {
EMIT(R5_MOVT_IMM(r, lui)); EMIT(R5_MOVT_IMM(r, lui));
if (imm & 0xfff) if (imm & 0xfff)
EMIT(R5_ADD_IMM(r, r, imm)); EMIT(R5_ADDW_IMM(r, r, imm));
} else } else
EMIT(R5_ADD_IMM(r, Z0, imm)); EMIT(R5_ADDW_IMM(r, Z0, imm));
} }
static void emith_move_ptr_imm(int r, uintptr_t imm) static void emith_move_ptr_imm(int r, uintptr_t imm)

View file

@ -1,7 +1,7 @@
/* /*
* Basic macros to emit x86 instructions and some utils * Basic macros to emit x86 instructions and some utils
* Copyright (C) 2008,2009,2010 notaz * Copyright (C) 2008,2009,2010 notaz
* Copyright (C) 2019 kub * Copyright (C) 2019-2024 kub
* *
* This work is licensed under the terms of MAME license. * This work is licensed under the terms of MAME license.
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.
@ -1365,7 +1365,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
/* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
/* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
emith_asr(rn, mh, 15); \ emith_asr(rn, mh, 15); \
emith_addf_r_r_r_lsr(rn, rn, mh, 31); \ emith_lsr(rm, mh, 31); \
emith_addf_r_r(rn, rm); \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \

View file

@ -1,7 +1,7 @@
/* /*
* SH2 recompiler * SH2 recompiler
* (C) notaz, 2009,2010,2013 * (C) notaz, 2009,2010,2013
* (C) kub, 2018,2019,2020 * (C) kub, 2018-2024
* *
* This work is licensed under the terms of MAME license. * This work is licensed under the terms of MAME license.
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.
@ -2610,7 +2610,8 @@ static uptr split_address(uptr la, uptr mask, s32 *offs)
#ifdef __arm__ #ifdef __arm__
// arm32 offset has an add/sub flag and an unsigned 8 bit value, which only // arm32 offset has an add/sub flag and an unsigned 8 bit value, which only
// allows values of [-255...255]. the value -256 thus can't be used. // allows values of [-255...255]. the value -256 thus can't be used.
if (*offs + sign == 0) { if (*offs < 0) { // TODO not working at all with negative offsets on ARM?
//if (*offs == -sign) {
la -= sign; la -= sign;
*offs += sign; *offs += sign;
} }
@ -2631,7 +2632,7 @@ static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, s32 *offs)
// is r constant and points to a memory region? // is r constant and points to a memory region?
if (! gconst_get(r, &a)) if (! gconst_get(r, &a))
return -1; return -1;
poffs = dr_ctx_get_mem_ptr(sh2, a, &mask); poffs = dr_ctx_get_mem_ptr(sh2, a + *offs, &mask);
if (poffs == -1) if (poffs == -1)
return -1; return -1;
@ -3244,10 +3245,11 @@ static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tca
} }
#define FLUSH_CYCLES(sr) \ #define FLUSH_CYCLES(sr) \
if (cycles > 0) { \ if (cycles > 0) \
emith_sub_r_imm(sr, cycles << 12); \ emith_sub_r_imm(sr, cycles << 12); \
cycles = 0; \ else if (cycles < 0) /* may happen after a branch not taken */ \
} emith_add_r_imm(sr, -cycles << 12); \
cycles = 0; \
static void *dr_get_pc_base(u32 pc, SH2 *sh2); static void *dr_get_pc_base(u32 pc, SH2 *sh2);
static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free); static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free);
@ -3960,10 +3962,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
#if DIV_OPTIMIZER #if DIV_OPTIMIZER
if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) { if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) {
// divide 32/16 // divide 32/16
tmp = rcache_get_tmp_arg(1);
emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp));
rcache_get_reg_arg(0, div(opd).rn, NULL); rcache_get_reg_arg(0, div(opd).rn, NULL);
rcache_get_reg_arg(2, div(opd).rm, NULL); rcache_get_reg_arg(2, div(opd).rm, NULL);
tmp = rcache_get_tmp_arg(1);
emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp));
rcache_invalidate_tmp(); rcache_invalidate_tmp();
emith_abicall(sh2_drc_divu32); emith_abicall(sh2_drc_divu32);
tmp = rcache_get_tmp_ret(); tmp = rcache_get_tmp_ret();
@ -3979,16 +3981,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
emith_or_r_r_r(sr, sr, tmp3); // T emith_or_r_r_r(sr, sr, tmp3); // T
rcache_free_tmp(tmp3); rcache_free_tmp(tmp3);
skip_op = div(opd).div1 + div(opd).rotcl; skip_op = div(opd).div1 + div(opd).rotcl;
cycles += skip_op;
} }
else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) {
// divide 64/32 // divide 64/32
tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL); tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL);
emith_ctx_write(tmp4, offsetof(SH2, drc_tmp)); emith_ctx_write(tmp4, offsetof(SH2, drc_tmp));
rcache_free(tmp4); rcache_free(tmp4);
tmp = rcache_get_tmp_arg(1);
emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp));
rcache_get_reg_arg(0, div(opd).rn, NULL); rcache_get_reg_arg(0, div(opd).rn, NULL);
rcache_get_reg_arg(2, div(opd).rm, NULL); rcache_get_reg_arg(2, div(opd).rm, NULL);
tmp = rcache_get_tmp_arg(1);
emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp));
rcache_invalidate_tmp(); rcache_invalidate_tmp();
emith_abicall(sh2_drc_divu64); emith_abicall(sh2_drc_divu64);
tmp = rcache_get_tmp_ret(); tmp = rcache_get_tmp_ret();
@ -4004,6 +4007,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); emith_or_r_r_lsl(sr, tmp3, Q_SHIFT);
rcache_free_tmp(tmp3); rcache_free_tmp(tmp3);
skip_op = div(opd).div1 + div(opd).rotcl; skip_op = div(opd).div1 + div(opd).rotcl;
cycles += skip_op;
} }
#endif #endif
break; break;
@ -4085,13 +4089,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
#if DIV_OPTIMIZER #if DIV_OPTIMIZER
if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) { if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) {
// divide 32/16 // divide 32/16
tmp = rcache_get_tmp_arg(1); tmp = rcache_get_reg_arg(0, div(opd).rn, NULL);
emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp));
rcache_get_reg_arg(0, div(opd).rn, NULL);
tmp2 = rcache_get_reg_arg(2, div(opd).rm, NULL); tmp2 = rcache_get_reg_arg(2, div(opd).rm, NULL);
tmp3 = rcache_get_tmp(); tmp3 = rcache_get_tmp_arg(1);
emith_lsr(tmp3, tmp2, 31); emith_lsr(tmp3, tmp2, 31);
emith_or_r_r_lsl(sr, tmp3, M_SHIFT); // M = Rm[31] emith_or_r_r_lsl(sr, tmp3, M_SHIFT); // M = Rm[31]
emith_add_r_r_ptr_imm(tmp3, CONTEXT_REG, offsetof(SH2, drc_tmp));
rcache_invalidate_tmp(); rcache_invalidate_tmp();
emith_abicall(sh2_drc_divs32); emith_abicall(sh2_drc_divs32);
tmp = rcache_get_tmp_ret(); tmp = rcache_get_tmp_ret();
@ -4108,6 +4111,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
emith_or_r_r_r(sr, sr, tmp3); // T emith_or_r_r_r(sr, sr, tmp3); // T
rcache_free_tmp(tmp3); rcache_free_tmp(tmp3);
skip_op = div(opd).div1 + div(opd).rotcl; skip_op = div(opd).div1 + div(opd).rotcl;
cycles += skip_op;
} }
else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) {
// divide 64/32 // divide 64/32
@ -4138,6 +4142,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Ro[0]^M emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Ro[0]^M
rcache_free_tmp(tmp3); rcache_free_tmp(tmp3);
skip_op = div(opd).div1 + div(opd).rotcl; skip_op = div(opd).div1 + div(opd).rotcl;
cycles += skip_op;
} else } else
#endif #endif
{ {
@ -5113,7 +5118,7 @@ end_op:
emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret);
#endif #endif
// branch not taken, correct cycle count // branch not taken, correct cycle count (now, cycles < 0)
if (ctaken) if (ctaken)
cycles -= ctaken; cycles -= ctaken;
// set T bit to reflect branch not taken for OP_BRANCH_CT/CF // set T bit to reflect branch not taken for OP_BRANCH_CT/CF
@ -5243,10 +5248,6 @@ end_op:
printf("~~~\n"); printf("~~~\n");
*/ */
#if (DRC_DEBUG)
fflush(stdout);
#endif
return block_entry_ptr; return block_entry_ptr;
} }
@ -5675,8 +5676,9 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free)
a += rest, len -= rest; a += rest, len -= rest;
} while (len > 0); } while (len > 0);
if (!removed && len <= 4) { if (!removed) {
dbg(2, "rm_blocks called @%08x, no work?", _a); if (len <= 4)
dbg(2, "rm_blocks called @%08x, no work?", _a);
return; return;
} }
@ -5984,7 +5986,6 @@ int sh2_drc_init(SH2 *sh2)
// disasm the utils // disasm the utils
tcache_dsm_ptrs[0] = tcache; tcache_dsm_ptrs[0] = tcache;
do_host_disasm(0); do_host_disasm(0);
fflush(stdout);
#endif #endif
#if (DRC_DEBUG & 1) #if (DRC_DEBUG & 1)
hash_collisions = 0; hash_collisions = 0;

View file

@ -1,7 +1,7 @@
#include "../sh2.h" #include "../sh2.h"
#ifdef DRC_CMP #ifdef DRC_CMP
#include "../compiler.c" #include "../compiler.h"
#define BUSY_LOOP_HACKS 0 #define BUSY_LOOP_HACKS 0
#else #else
#define BUSY_LOOP_HACKS 1 #define BUSY_LOOP_HACKS 1

View file

@ -435,24 +435,25 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2)
old = r[a / 4]; old = r[a / 4];
r[a / 4] = d; r[a / 4] = d;
// TODO: DRC doesn't correctly extend 'd' parameter register to 64bit :-/
switch (a) { switch (a) {
// division unit (TODO: verify): // division unit (TODO: verify):
case 0x104: // DVDNT: divident L, starts divide case 0x104: // DVDNT: divident L, starts divide
elprintf_sh2(sh2, EL_32XP, "divide %08x / %08x", elprintf_sh2(sh2, EL_32XP, "divide %08x / %08x",
d, r[0x100 / 4]); r[0x104 / 4], r[0x100 / 4]);
if (r[0x100 / 4]) { if (r[0x100 / 4]) {
signed int divisor = r[0x100 / 4]; signed int divisor = r[0x100 / 4];
r[0x118 / 4] = r[0x110 / 4] = (signed int)d % divisor; r[0x118 / 4] = r[0x110 / 4] = (signed int)r[0x104 / 4] % divisor;
r[0x104 / 4] = r[0x11c / 4] = r[0x114 / 4] = (signed int)d / divisor; r[0x104 / 4] = r[0x11c / 4] = r[0x114 / 4] = (signed int)r[0x104 / 4] / divisor;
} }
else else
r[0x110 / 4] = r[0x114 / 4] = r[0x118 / 4] = r[0x11c / 4] = 0; // ? r[0x110 / 4] = r[0x114 / 4] = r[0x118 / 4] = r[0x11c / 4] = 0; // ?
break; break;
case 0x114: case 0x114:
elprintf_sh2(sh2, EL_32XP, "divide %08x%08x / %08x @%08x", elprintf_sh2(sh2, EL_32XP, "divide %08x%08x / %08x @%08x",
r[0x110 / 4], d, r[0x100 / 4], sh2_pc(sh2)); r[0x110 / 4], r[0x114 / 4], r[0x100 / 4], sh2_pc(sh2));
if (r[0x100 / 4]) { if (r[0x100 / 4]) {
signed long long divident = (signed long long)r[0x110 / 4] << 32 | d; signed long long divident = (signed long long)r[0x110 / 4] << 32 | r[0x114 / 4];
signed int divisor = r[0x100 / 4]; signed int divisor = r[0x100 / 4];
// XXX: undocumented mirroring to 0x118,0x11c? // XXX: undocumented mirroring to 0x118,0x11c?
r[0x118 / 4] = r[0x110 / 4] = divident % divisor; r[0x118 / 4] = r[0x110 / 4] = divident % divisor;

View file

@ -2542,7 +2542,7 @@ void retro_init(void)
| POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX | POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX
| POPT_EN_32X|POPT_EN_PWM | POPT_EN_32X|POPT_EN_PWM
| POPT_ACC_SPRITES|POPT_DIS_32C_BORDER; | POPT_ACC_SPRITES|POPT_DIS_32C_BORDER;
#ifdef __arm__ #ifdef DRC_SH2
#ifdef _3DS #ifdef _3DS
if (ctr_svchack_successful) if (ctr_svchack_successful)
#endif #endif

View file

@ -36,7 +36,7 @@ void pemu_prep_defconfig(void)
void pemu_validate_config(void) void pemu_validate_config(void)
{ {
#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__riscv__) && !defined(__riscv) && !defined(__powerpc__) && !defined(__ppc__) && !defined(__PPC__) && !defined(__i386__) && !defined(__x86_64__) #if !defined(DRC_SH2)
PicoIn.opt &= ~POPT_EN_DRC; PicoIn.opt &= ~POPT_EN_DRC;
#endif #endif
} }