svp compiler: jump fixup

git-svn-id: file:///home/notaz/opt/svn/PicoDrive@385 be3aeb3a-fb24-0410-a615-afba39da0efa
This commit is contained in:
notaz 2008-03-15 15:01:42 +00:00
parent fad248933b
commit 4588391846
5 changed files with 265 additions and 147 deletions

View file

@ -22,8 +22,9 @@ extern ssp1601_t *ssp;
#ifndef ARM #ifndef ARM
#define DUMP_BLOCK 0x0c9a #define DUMP_BLOCK 0x0c9a
unsigned int tcache[512*1024]; unsigned int tcache[512*1024];
void regfile_load(void){} void ssp_drc_next(void){}
void regfile_store(void){} void ssp_drc_next_patch(void){}
void ssp_drc_end(void){}
#endif #endif
#include "gen_arm.c" #include "gen_arm.c"
@ -343,7 +344,7 @@ static void tr_mov16_cond(int cond, int r, int val)
hostreg_r[r] = -1; hostreg_r[r] = -1;
} }
/* trashes r0 */ /* trashes r1 */
static void tr_flush_dirty_pmcrs(void) static void tr_flush_dirty_pmcrs(void)
{ {
u32 i, val = (u32)-1; u32 i, val = (u32)-1;
@ -756,7 +757,7 @@ static void tr_PMX_to_r0(int reg)
tr_flush_dirty_ST(); tr_flush_dirty_ST();
//tr_flush_dirty_pmcrs(); //tr_flush_dirty_pmcrs();
tr_mov16(0, reg); tr_mov16(0, reg);
emit_call(ssp_pm_read); emit_call(A_COND_AL, ssp_pm_read);
hostreg_clear(); hostreg_clear();
} }
@ -906,9 +907,12 @@ static void tr_r0_to_STACK(int const_val)
static void tr_r0_to_PC(int const_val) static void tr_r0_to_PC(int const_val)
{ {
/*
* do nothing - dispatcher will take care of this
EOP_MOV_REG_LSL(1, 0, 16); // mov r1, r0, lsl #16 EOP_MOV_REG_LSL(1, 0, 16); // mov r1, r0, lsl #16
EOP_STR_IMM(1,7,0x400+6*4); // str r1, [r7, #(0x400+6*8)] EOP_STR_IMM(1,7,0x400+6*4); // str r1, [r7, #(0x400+6*8)]
hostreg_r[1] = -1; hostreg_r[1] = -1;
*/
} }
static void tr_r0_to_AL(int const_val) static void tr_r0_to_AL(int const_val)
@ -990,7 +994,7 @@ static void tr_r0_to_PMX(int reg)
tr_flush_dirty_ST(); tr_flush_dirty_ST();
//tr_flush_dirty_pmcrs(); //tr_flush_dirty_pmcrs();
tr_mov16(1, reg); tr_mov16(1, reg);
emit_call(ssp_pm_write); emit_call(A_COND_AL, ssp_pm_write);
hostreg_clear(); hostreg_clear();
} }
@ -1166,7 +1170,7 @@ static int tr_detect_rotate(unsigned int op, int *pc, int imm)
// ----------------------------------------------------- // -----------------------------------------------------
static int translate_op(unsigned int op, int *pc, int imm) static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *jump_pc)
{ {
u32 tmpv, tmpv2, tmpv3; u32 tmpv, tmpv2, tmpv3;
int ret = 0; int ret = 0;
@ -1188,7 +1192,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
} }
tr_read_funcs[tmpv](op); tr_read_funcs[tmpv](op);
tr_write_funcs[tmpv2]((known_regb & (1 << tmpv)) ? known_regs.gr[tmpv].h : -1); tr_write_funcs[tmpv2]((known_regb & (1 << tmpv)) ? known_regs.gr[tmpv].h : -1);
if (tmpv2 == SSP_PC) ret |= 0x10000; if (tmpv2 == SSP_PC) {
ret |= 0x10000;
*end_cond = -A_COND_AL;
}
ret++; break; ret++; break;
// ld d, (ri) // ld d, (ri)
@ -1202,7 +1209,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
tr_rX_read(r, mod); tr_rX_read(r, mod);
else tr_ptrr_mod(r, mod, 1, 1); else tr_ptrr_mod(r, mod, 1, 1);
tr_write_funcs[tmpv](-1); tr_write_funcs[tmpv](-1);
if (tmpv == SSP_PC) ret |= 0x10000; if (tmpv == SSP_PC) {
ret |= 0x10000;
*end_cond = -A_COND_AL;
}
ret++; break; ret++; break;
} }
@ -1228,7 +1238,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
if (ret > 0) break; if (ret > 0) break;
tr_mov16(0, imm); tr_mov16(0, imm);
tr_write_funcs[tmpv](imm); tr_write_funcs[tmpv](imm);
if (tmpv == SSP_PC) ret |= 0x10000; if (tmpv == SSP_PC) {
ret |= 0x10000;
*jump_pc = imm;
}
ret += 2; break; ret += 2; break;
// ld d, ((ri)) // ld d, ((ri))
@ -1236,7 +1249,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
tmpv2 = (op >> 4) & 0xf; // dst tmpv2 = (op >> 4) & 0xf; // dst
tr_rX_read2(op); tr_rX_read2(op);
tr_write_funcs[tmpv2](-1); tr_write_funcs[tmpv2](-1);
if (tmpv2 == SSP_PC) ret |= 0x10000; if (tmpv2 == SSP_PC) {
ret |= 0x10000;
*end_cond = -A_COND_AL;
}
ret += 3; break; ret += 3; break;
// ldi (ri), imm // ldi (ri), imm
@ -1321,11 +1337,12 @@ static int translate_op(unsigned int op, int *pc, int imm)
tcache_ptr = real_ptr; tcache_ptr = real_ptr;
} }
tr_mov16_cond(tmpv, 0, imm); tr_mov16_cond(tmpv, 0, imm);
if (tmpv != A_COND_AL) { if (tmpv != A_COND_AL)
tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc); tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc);
}
tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1); tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1);
ret |= 0x10000; ret |= 0x10000;
*end_cond = tmpv;
*jump_pc = imm;
ret += 2; break; ret += 2; break;
} }
@ -1338,18 +1355,22 @@ static int translate_op(unsigned int op, int *pc, int imm)
EOP_LDRH_SIMPLE(0,0); // ldrh r0, [r0] EOP_LDRH_SIMPLE(0,0); // ldrh r0, [r0]
hostreg_r[0] = hostreg_r[1] = -1; hostreg_r[0] = hostreg_r[1] = -1;
tr_write_funcs[tmpv2](-1); tr_write_funcs[tmpv2](-1);
if (tmpv2 == SSP_PC) ret |= 0x10000; if (tmpv2 == SSP_PC) {
ret |= 0x10000;
*end_cond = -A_COND_AL;
}
ret += 3; break; ret += 3; break;
// bra cond, addr // bra cond, addr
case 0x26: case 0x26:
tmpv = tr_cond_check(op); tmpv = tr_cond_check(op);
tr_mov16_cond(tmpv, 0, imm); tr_mov16_cond(tmpv, 0, imm);
if (tmpv != A_COND_AL) { if (tmpv != A_COND_AL)
tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc); tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc);
}
tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1); tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1);
ret |= 0x10000; ret |= 0x10000;
*end_cond = tmpv;
*jump_pc = imm;
ret += 2; break; ret += 2; break;
// mod cond, op // mod cond, op
@ -1551,11 +1572,54 @@ static int translate_op(unsigned int op, int *pc, int imm)
return ret; return ret;
} }
static void emit_block_prologue(void)
{
// check if there are enough cycles..
// note: r0 must contain PC of current block
EOP_CMP_IMM(11,0,0); // cmp r11, #0
emit_call(A_COND_LE, ssp_drc_end);
}
/* cond:
* >0: direct (un)conditional jump
* <0: indirect jump
*/
static void emit_block_epilogue(int cycles, int cond, int pc, int end_pc)
{
if (cycles > 0xff) { printf("large cycle count: %i\n", cycles); cycles = 0xff; }
EOP_SUB_IMM(11,11,0,cycles); // sub r11, r11, #cycles
if (cond < 0 || (end_pc >= 0x400 && pc < 0x400)) {
// indirect jump, or rom -> iram jump, must use dispatcher
emit_jump(A_COND_AL, ssp_drc_next);
}
else if (cond == A_COND_AL) {
u32 *target = (pc < 0x400) ? block_table_iram[ssp->drc.iram_context][pc] : block_table[pc];
if (target != NULL)
emit_jump(A_COND_AL, target);
else {
emit_jump(A_COND_AL, ssp_drc_next);
// cause the next block to be emitted over jump instrction
tcache_ptr--;
}
}
else {
u32 *target1 = (pc < 0x400) ? block_table_iram[ssp->drc.iram_context][pc] : block_table[pc];
u32 *target2 = (end_pc < 0x400) ? block_table_iram[ssp->drc.iram_context][end_pc] : block_table[end_pc];
if (target1 != NULL)
emit_jump(cond, target1);
else emit_call(cond, ssp_drc_next_patch);
if (target2 != NULL)
emit_jump(tr_neg_cond(cond), target2); // neg_cond, to be able to swap jumps if needed
else emit_call(tr_neg_cond(cond), ssp_drc_next_patch);
}
}
void *ssp_translate_block(int pc) void *ssp_translate_block(int pc)
{ {
unsigned int op, op1, imm, ccount = 0; unsigned int op, op1, imm, ccount = 0;
unsigned int *block_start; unsigned int *block_start;
int ret, ret_prev = -1, tpc; int ret, end_cond = A_COND_AL, jump_pc = -1;
printf("translate %04x -> %04x\n", pc<<1, (tcache_ptr-tcache)<<2); printf("translate %04x -> %04x\n", pc<<1, (tcache_ptr-tcache)<<2);
block_start = tcache_ptr; block_start = tcache_ptr;
@ -1574,31 +1638,28 @@ void *ssp_translate_block(int pc)
if ((op1 & 0xf) == 4 || (op1 & 0xf) == 6) if ((op1 & 0xf) == 4 || (op1 & 0xf) == 6)
imm = PROGRAM(pc++); // immediate imm = PROGRAM(pc++); // immediate
tpc = pc;
ret = translate_op(op, &pc, imm); ret = translate_op(op, &pc, imm, &end_cond, &jump_pc);
if (ret <= 0) if (ret <= 0)
{ {
printf("NULL func! op=%08x (%02x)\n", op, op1); printf("NULL func! op=%08x (%02x)\n", op, op1);
exit(1); exit(1);
} }
else
{
ccount += ret & 0xffff;
if (ret & 0x10000) break;
}
ret_prev = ret; ccount += ret & 0xffff;
if (ret & 0x10000) break;
} }
if (ccount >= 100) if (ccount >= 100) {
emit_pc_dump(pc); end_cond = A_COND_AL;
jump_pc = pc;
emit_mov_const(A_COND_AL, 0, pc);
}
tr_flush_dirty_prs(); tr_flush_dirty_prs();
tr_flush_dirty_ST(); tr_flush_dirty_ST();
tr_flush_dirty_pmcrs(); tr_flush_dirty_pmcrs();
emit_block_epilogue(ccount + 1); emit_block_epilogue(ccount, end_cond, jump_pc, pc);
*tcache_ptr++ = 0xffffffff; // end of block
if (tcache_ptr - tcache > TCACHE_SIZE/4) { if (tcache_ptr - tcache > TCACHE_SIZE/4) {
printf("tcache overflow!\n"); printf("tcache overflow!\n");
@ -1641,7 +1702,6 @@ int ssp1601_dyn_startup(void)
memset(block_table, 0, sizeof(block_table)); memset(block_table, 0, sizeof(block_table));
memset(block_table_iram, 0, sizeof(block_table_iram)); memset(block_table_iram, 0, sizeof(block_table_iram));
tcache_ptr = tcache; tcache_ptr = tcache;
*tcache_ptr++ = 0xffffffff;
PicoLoadStateHook = ssp1601_state_load; PicoLoadStateHook = ssp1601_state_load;
@ -1657,6 +1717,22 @@ int ssp1601_dyn_startup(void)
void ssp1601_dyn_reset(ssp1601_t *ssp) void ssp1601_dyn_reset(ssp1601_t *ssp)
{ {
// debug
{
int i, u;
FILE *f = fopen("tcache.bin", "wb");
fwrite(tcache, 1, (tcache_ptr - tcache)*4, f);
fclose(f);
for (i = 0; i < 0x5090/2; i++)
if (block_table[i])
printf("%06x -> __:%04x\n", (block_table[i] - tcache)*4, i<<1);
for (u = 1; u < 15; u++)
for (i = 0; i < 0x800/2; i++)
if (block_table_iram[u][i])
printf("%06x -> %02i:%04x\n", (block_table_iram[u][i] - tcache)*4, u, i<<1);
}
ssp1601_reset(ssp); ssp1601_reset(ssp);
ssp->drc.iram_dirty = 1; ssp->drc.iram_dirty = 1;
ssp->drc.iram_context = 0; ssp->drc.iram_context = 0;
@ -1666,6 +1742,9 @@ void ssp1601_dyn_reset(ssp1601_t *ssp)
ssp->drc.ptr_dram = (u32) svp->dram; ssp->drc.ptr_dram = (u32) svp->dram;
ssp->drc.ptr_btable = (u32) block_table; ssp->drc.ptr_btable = (u32) block_table;
ssp->drc.ptr_btable_iram = (u32) block_table_iram; ssp->drc.ptr_btable_iram = (u32) block_table_iram;
// prevent new versions of IRAM from appearing
memset(svp->iram_rom, 0, 0x800);
} }
void ssp1601_dyn_run(int cycles) void ssp1601_dyn_run(int cycles)

View file

@ -2,10 +2,10 @@
extern unsigned int tcache[]; extern unsigned int tcache[];
void ssp_regfile_load(void);
void ssp_regfile_store(void);
int ssp_drc_entry(int cycles); int ssp_drc_entry(int cycles);
void ssp_drc_next(void); void ssp_drc_next(void);
void ssp_drc_next_patch(void);
void ssp_drc_end(void);
void ssp_hle_800(void); void ssp_hle_800(void);

View file

@ -15,6 +15,7 @@
#define A_COND_NE 0x1 #define A_COND_NE 0x1
#define A_COND_MI 0x4 #define A_COND_MI 0x4
#define A_COND_PL 0x5 #define A_COND_PL 0x5
#define A_COND_LE 0xd
/* addressing mode 1 */ /* addressing mode 1 */
#define A_AM1_LSL 0 #define A_AM1_LSL 0
@ -52,6 +53,7 @@
#define EOP_AND_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_AND,0,rn,rd,ror2,imm8) #define EOP_AND_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_AND,0,rn,rd,ror2,imm8)
#define EOP_SUB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_SUB,0,rn,rd,ror2,imm8) #define EOP_SUB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_SUB,0,rn,rd,ror2,imm8)
#define EOP_TST_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_TST,1,rn, 0,ror2,imm8) #define EOP_TST_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_TST,1,rn, 0,ror2,imm8)
#define EOP_CMP_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_CMP,1,rn, 0,ror2,imm8)
#define EOP_RSB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_RSB,0,rn,rd,ror2,imm8) #define EOP_RSB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_RSB,0,rn,rd,ror2,imm8)
#define EOP_MOV_REG(s, rd,shift_imm,shift_op,rm) EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_MOV,s, 0,rd,shift_imm,shift_op,rm) #define EOP_MOV_REG(s, rd,shift_imm,shift_op,rm) EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_MOV,s, 0,rd,shift_imm,shift_op,rm)
@ -171,15 +173,6 @@ static void emit_mov_const(int cond, int d, unsigned int val)
EOP_C_DOP_IMM(cond, need_or ? A_OP_ORR : A_OP_MOV, 0, need_or ? d : 0, d, 0, val&0xff); EOP_C_DOP_IMM(cond, need_or ? A_OP_ORR : A_OP_MOV, 0, need_or ? d : 0, d, 0, val&0xff);
} }
/*
static void check_offset_12(unsigned int val)
{
if (!(val & ~0xfff)) return;
printf("offset_12 overflow %04x\n", val);
exit(1);
}
*/
static void check_offset_24(int val) static void check_offset_24(int val)
{ {
if (val >= (int)0xff000000 && val <= 0x00ffffff) return; if (val >= (int)0xff000000 && val <= 0x00ffffff) return;
@ -187,35 +180,23 @@ static void check_offset_24(int val)
exit(1); exit(1);
} }
static void emit_call(void *target) static void emit_call(int cond, void *target)
{ {
int val = (unsigned int *)target - tcache_ptr - 2; int val = (unsigned int *)target - tcache_ptr - 2;
check_offset_24(val); check_offset_24(val);
EOP_BL(val & 0xffffff); // bl target EOP_C_B(cond,1,val & 0xffffff); // bl target
} }
static void emit_block_prologue(void) static void emit_jump(int cond, void *target)
{ {
// nothing int val = (unsigned int *)target - tcache_ptr - 2;
check_offset_24(val);
EOP_C_B(cond,0,val & 0xffffff); // b target
} }
static void emit_block_epilogue(int cycles) static void handle_caches(void)
{
if (cycles > 0xff) { printf("large cycle count: %i\n", cycles); cycles = 0xff; }
EOP_SUB_IMM(11,11,0,cycles); // sub r11, r11, #cycles
#ifdef ARM
emit_call(ssp_drc_next);
#endif
}
static void emit_pc_dump(int pc)
{
emit_mov_const(A_COND_AL, 3, pc<<16);
EOP_STR_IMM(3,7,0x400+6*4); // str r3, [r7, #(0x400+6*8)]
}
static void handle_caches()
{ {
#ifdef ARM #ifdef ARM
extern void flush_inval_caches(const void *start_addr, const void *end_addr); extern void flush_inval_caches(const void *start_addr, const void *end_addr);

View file

@ -61,6 +61,8 @@ typedef struct
unsigned int ptr_btable; // 49c unsigned int ptr_btable; // 49c
unsigned int ptr_btable_iram; // 4a0 unsigned int ptr_btable_iram; // 4a0
unsigned int tmp0; // 4a4 unsigned int tmp0; // 4a4
unsigned int tmp1; // 4a8
unsigned int tmp2; // 4ac
} drc; } drc;
} ssp1601_t; } ssp1601_t;

View file

@ -7,10 +7,10 @@
.global tcache .global tcache
.global flush_inval_caches .global flush_inval_caches
.global ssp_regfile_load
.global ssp_regfile_store
.global ssp_drc_entry .global ssp_drc_entry
.global ssp_drc_next .global ssp_drc_next
.global ssp_drc_next_patch
.global ssp_drc_end
.global ssp_hle_800 .global ssp_hle_800
@ translation cache buffer @ translation cache buffer
@ -46,8 +46,92 @@ flush_inval_caches:
@ r10: P @ r10: P
@ r11: cycles @ r11: cycles
@ trashes r2,r3
#define SSP_OFFS_GR 0x400
#define SSP_PC 6
#define SSP_P 7
#define SSP_PM0 8
#define SSP_OFFS_EMUSTAT 0x484 // emu_status
#define SSP_OFFS_IRAM_DIRTY 0x494
#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
#define SSP_OFFS_BLTAB 0x49c // block_table
#define SSP_OFFS_BLTAB_IRAM 0x4a0
#define SSP_OFFS_TMP0 0x4a4 // for entry PC
#define SSP_OFFS_TMP1 0x4a8
#define SSP_OFFS_TMP2 0x4ac
#define SSP_WAIT_PM0 0x2000
.macro ssp_drc_do_next patch_jump=0
.if \patch_jump
str lr, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
.endif
mov r0, r0, lsl #16
mov r0, r0, lsr #16
str r0, [r7, #SSP_OFFS_TMP0]
cmp r0, #0x400
blt 0f @ ssp_de_iram
ldr r2, [r7, #SSP_OFFS_BLTAB]
ldr r2, [r2, r0, lsl #2]
tst r2, r2
.if \patch_jump
bne ssp_drc_do_patch
.else
bxne r2
.endif
bl ssp_translate_block
mov r2, r0
ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
ldr r1, [r7, #SSP_OFFS_BLTAB]
str r2, [r1, r0, lsl #2]
.if \patch_jump
b ssp_drc_do_patch
.else
bx r2
.endif
0: @ ssp_de_iram:
ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
tst r1, r1
ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
beq 1f @ ssp_de_iram_ctx
bl ssp_get_iram_context
mov r1, #0
str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
mov r1, r0
str r1, [r7, #SSP_OFFS_IRAM_CTX]
ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
1: @ ssp_de_iram_ctx:
ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
add r1, r2, r0, lsl #2
ldr r2, [r1]
tst r2, r2
.if \patch_jump
bne ssp_drc_do_patch
.else
bxne r2
.endif
str r1, [r7, #SSP_OFFS_TMP1]
bl ssp_translate_block
mov r2, r0
ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
ldr r1, [r7, #SSP_OFFS_TMP1] @ &block_table_iram[iram_context][rPC]
str r2, [r1]
.if \patch_jump
b ssp_drc_do_patch
.else
bx r2
.endif
.endm @ ssp_drc_do_next
ssp_drc_entry:
stmfd sp!, {r4-r11, lr}
mov r11, r0
ssp_regfile_load: ssp_regfile_load:
ldr r7, =ssp ldr r7, =ssp
ldr r7, [r7] ldr r7, [r7]
@ -69,12 +153,60 @@ ssp_regfile_load:
ldr r8, [r7, #0x440] @ r0-r2 ldr r8, [r7, #0x440] @ r0-r2
ldr r9, [r7, #0x444] @ r4-r6 ldr r9, [r7, #0x444] @ r4-r6
ldr r10,[r7, #(0x400+7*4)] @ P ldr r10,[r7, #(0x400+SSP_P*4)] @ P
bx lr
ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
mov r0, r0, lsr #16
ssp_drc_next:
ssp_drc_do_next 0
ssp_drc_next_patch:
ssp_drc_do_next 1
ssp_drc_do_patch:
ldr r1, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
subs r12,r2, r1
moveq r3, #0xe1000000
orreq r3, r3, #0x00a00000 @ nop
streq r3, [r1, #-4]
beq ssp_drc_dp_end
cmp r12,#4
ldreq r3, [r1]
addeq r3, r3, #1
streq r3, [r1, #-4] @ move the other cond up
moveq r3, #0xe1000000
orreq r3, r3, #0x00a00000
streq r3, [r1] @ fill it's place with nop
beq ssp_drc_dp_end
ldr r3, [r1, #-4]
sub r12,r12,#4
mov r3, r3, lsr #24
bic r3, r3, #1 @ L bit
orr r3, r3, r12,lsl #6
mov r3, r3, ror #8 @ patched branch instruction
str r3, [r1, #-4]
ssp_drc_dp_end:
str r2, [r7, #SSP_OFFS_TMP1]
sub r0, r1, #4
add r1, r1, #4
bl flush_inval_caches
ldr r2, [r7, #SSP_OFFS_TMP1]
ldr r0, [r7, #SSP_OFFS_TMP0]
bx r2
ssp_drc_end:
mov r0, r0, lsl #16
str r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
ssp_regfile_store: ssp_regfile_store:
str r10,[r7, #(0x400+7*4)] @ P str r10,[r7, #(0x400+SSP_P*4)] @ P
str r8, [r7, #0x440] @ r0-r2 str r8, [r7, #0x440] @ r0-r2
str r9, [r7, #0x444] @ r4-r6 str r9, [r7, #0x444] @ r4-r6
@ -93,74 +225,7 @@ ssp_regfile_store:
add r8, r7, #0x400 add r8, r7, #0x400
add r8, r8, #4 add r8, r8, #4
stmia r8, {r2,r3,r5,r6,r9} stmia r8, {r2,r3,r5,r6,r9}
bx lr
#define SSP_OFFS_GR 0x400
#define SSP_PM0 8
#define SSP_PC 6
#define SSP_OFFS_EMUSTAT 0x484 // emu_status
#define SSP_OFFS_IRAM_DIRTY 0x494
#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
#define SSP_OFFS_BLTAB 0x49c // block_table
#define SSP_OFFS_BLTAB_IRAM 0x4a0
#define SSP_OFFS_TMP0 0x4a4
#define SSP_WAIT_PM0 0x2000
ssp_drc_entry:
stmfd sp!, {r4-r11, lr}
mov r11, r0
bl ssp_regfile_load
ssp_drc_next:
cmp r11, #0
bmi ssp_drc_end
ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
mov r0, r0, lsr #16
str r0, [r7, #SSP_OFFS_TMP0]
cmp r0, #0x400
blt ssp_de_iram
ldr r1, [r7, #SSP_OFFS_BLTAB]
ldr r1, [r1, r0, lsl #2]
tst r1, r1
bxne r1
bl ssp_translate_block
ldr r2, [r7, #SSP_OFFS_TMP0] @ entry PC
ldr r1, [r7, #SSP_OFFS_BLTAB]
str r0, [r1, r2, lsl #2]
bx r0
ssp_de_iram:
ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
tst r1, r1
ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
beq ssp_de_iram_ctx
bl ssp_get_iram_context
mov r1, #0
str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
mov r1, r0
str r1, [r7, #SSP_OFFS_IRAM_CTX]
ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
ssp_de_iram_ctx:
ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
add r2, r2, r0, lsl #2
ldr r1, [r2]
tst r1, r1
bxne r1
str r2, [r7, #SSP_OFFS_TMP0]
bl ssp_translate_block
ldr r2, [r7, #SSP_OFFS_TMP0] @ &block_table_iram[iram_context][rPC]
str r0, [r2]
bx r0
ssp_drc_end:
bl ssp_regfile_store
mov r0, r11 mov r0, r11
ldmfd sp!, {r4-r11, lr} ldmfd sp!, {r4-r11, lr}
bx lr bx lr
@ -171,25 +236,16 @@ ssp_drc_end:
@ andi 2 @ andi 2
@ bra z=1, gloc_0800 @ bra z=1, gloc_0800
ssp_hle_800: ssp_hle_800:
@ block prologue
@ stmfd sp!, {r4-r11, lr}
@ bl regfile_load
@ mov r11, #0
ldr r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)] ldr r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)]
ldr r1, [r7, #SSP_OFFS_EMUSTAT] ldr r1, [r7, #SSP_OFFS_EMUSTAT]
tst r0, #0x20000 tst r0, #0x20000
orreq r1, r1, #SSP_WAIT_PM0 orreq r1, r1, #SSP_WAIT_PM0
addeq r11,r11, #1024 subeq r11,r11, #1024
streq r1, [r7, #SSP_OFFS_EMUSTAT] streq r1, [r7, #SSP_OFFS_EMUSTAT]
movne r0, #0x04000000 mov r0, #0x400
orrne r0, r0, #0x00040000 beq ssp_drc_end
strne r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)] orrne r0, r0, #0x004
bl ssp_drc_next b ssp_drc_next
@ bl regfile_store
@ add r0, r11, #3
@ ldmfd sp!, {r4-r11, lr}
@ bx lr