mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 15:27:46 -04:00
sh2 drc: optimize T bit handling for A64
This commit is contained in:
parent
a5e51c16e6
commit
0e12269073
4 changed files with 58 additions and 38 deletions
5
Makefile
5
Makefile
|
@ -36,10 +36,11 @@ endif
|
||||||
|
|
||||||
ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1"))
|
ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1"))
|
||||||
# very small caches, avoid optimization options making the binary much bigger
|
# very small caches, avoid optimization options making the binary much bigger
|
||||||
CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp
|
CFLAGS += -finline-limit=43 -fno-unroll-loops -fno-ipa-cp -ffast-math
|
||||||
# this gets you about 20% better execution speed on 32bit arm/mips
|
# this gets you about 20% better execution speed on 32bit arm/mips
|
||||||
CFLAGS += -fno-common -fno-stack-protector -fno-guess-branch-probability -fno-caller-saves -fno-tree-loop-if-convert -ffast-math
|
CFLAGS += -fno-common -fno-stack-protector -fno-guess-branch-probability -fno-caller-saves -fno-tree-loop-if-convert -fno-regmove
|
||||||
endif
|
endif
|
||||||
|
#OBJS += align.o
|
||||||
|
|
||||||
# default settings
|
# default settings
|
||||||
ifeq "$(ARCH)" "arm"
|
ifeq "$(ARCH)" "arm"
|
||||||
|
|
|
@ -44,10 +44,11 @@
|
||||||
#define A64_COND_LE 0xd
|
#define A64_COND_LE 0xd
|
||||||
#define A64_COND_CS A64_COND_HS
|
#define A64_COND_CS A64_COND_HS
|
||||||
#define A64_COND_CC A64_COND_LO
|
#define A64_COND_CC A64_COND_LO
|
||||||
|
// "fake" conditions for T bit handling
|
||||||
#define A64_COND_AL 0xe
|
#define A64_COND_AL 0xe
|
||||||
#define A64_COND_NV 0xf
|
#define A64_COND_NV 0xf
|
||||||
|
|
||||||
/* unified conditions */
|
// DRC conditions
|
||||||
#define DCOND_EQ A64_COND_EQ
|
#define DCOND_EQ A64_COND_EQ
|
||||||
#define DCOND_NE A64_COND_NE
|
#define DCOND_NE A64_COND_NE
|
||||||
#define DCOND_MI A64_COND_MI
|
#define DCOND_MI A64_COND_MI
|
||||||
|
@ -261,6 +262,13 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe };
|
||||||
#define A64_BCOND(cond, offs19) \
|
#define A64_BCOND(cond, offs19) \
|
||||||
A64_INSN(0xa,0x2,_,_,_,_,_,(offs19) >> 2,(cond))
|
A64_INSN(0xa,0x2,_,_,_,_,_,(offs19) >> 2,(cond))
|
||||||
|
|
||||||
|
// conditional select
|
||||||
|
|
||||||
|
#define A64_CINC(cond, rn, rm) \
|
||||||
|
A64_INSN(0xd,0x0,0x2,0,rm,(cond)^1,0x1,rm,rn) /* CSINC */
|
||||||
|
#define A64_CSET(cond, rn) \
|
||||||
|
A64_CINC(cond, rn, Z0)
|
||||||
|
|
||||||
// load pc-relative
|
// load pc-relative
|
||||||
|
|
||||||
#define A64_LDRLIT_IMM(rd, offs19) \
|
#define A64_LDRLIT_IMM(rd, offs19) \
|
||||||
|
@ -1356,38 +1364,52 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode)
|
||||||
|
|
||||||
#ifdef T
|
#ifdef T
|
||||||
// T bit handling
|
// T bit handling
|
||||||
|
static int tcond = -1;
|
||||||
|
|
||||||
#define emith_invert_cond(cond) \
|
#define emith_invert_cond(cond) \
|
||||||
((cond) ^ 1)
|
((cond) ^ 1)
|
||||||
|
|
||||||
static void emith_clr_t_cond(int sr)
|
#define emith_clr_t_cond(sr) \
|
||||||
|
(void)sr
|
||||||
|
|
||||||
|
#define emith_set_t_cond(sr, cond) \
|
||||||
|
tcond = cond
|
||||||
|
|
||||||
|
#define emith_get_t_cond() \
|
||||||
|
tcond
|
||||||
|
|
||||||
|
#define emith_invalidate_t() \
|
||||||
|
tcond = -1
|
||||||
|
|
||||||
|
#define emith_set_t(sr, val) \
|
||||||
|
tcond = ((val) ? A64_COND_AL: A64_COND_NV)
|
||||||
|
|
||||||
|
static void emith_sync_t(int sr)
|
||||||
{
|
{
|
||||||
emith_bic_r_imm(sr, T);
|
if (tcond == A64_COND_AL)
|
||||||
}
|
emith_or_r_imm(sr, T);
|
||||||
|
else if (tcond == A64_COND_NV)
|
||||||
static void emith_set_t_cond(int sr, int cond)
|
emith_bic_r_imm(sr, T);
|
||||||
{
|
else if (tcond >= 0) {
|
||||||
EMITH_SJMP_START(emith_invert_cond(cond));
|
int tmp = rcache_get_tmp();
|
||||||
emith_or_r_imm_c(cond, sr, T);
|
EMIT(A64_CSET(tcond, tmp));
|
||||||
EMITH_SJMP_END(emith_invert_cond(cond));
|
EMIT(A64_BFI_IMM(sr, tmp, 0, 1)); // assumes SR.T = bit 0
|
||||||
}
|
rcache_free_tmp(tmp);
|
||||||
|
}
|
||||||
#define emith_get_t_cond() -1
|
tcond = -1;
|
||||||
|
|
||||||
#define emith_sync_t(sr) ((void)sr)
|
|
||||||
|
|
||||||
#define emith_invalidate_t()
|
|
||||||
|
|
||||||
static void emith_set_t(int sr, int val)
|
|
||||||
{
|
|
||||||
if (val)
|
|
||||||
emith_or_r_imm(sr, T);
|
|
||||||
else
|
|
||||||
emith_bic_r_imm(sr, T);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int emith_tst_t(int sr, int tf)
|
static int emith_tst_t(int sr, int tf)
|
||||||
{
|
{
|
||||||
emith_tst_r_imm(sr, T);
|
if (tcond < 0) {
|
||||||
return tf ? DCOND_NE: DCOND_EQ;
|
emith_tst_r_imm(sr, T);
|
||||||
|
return tf ? DCOND_NE: DCOND_EQ;
|
||||||
|
} else if (tcond >= A64_COND_AL) {
|
||||||
|
// MUST sync because A64_COND_AL/NV isn't a real condition
|
||||||
|
emith_sync_t(sr);
|
||||||
|
emith_tst_r_imm(sr, T);
|
||||||
|
return tf ? DCOND_NE: DCOND_EQ;
|
||||||
|
} else
|
||||||
|
return tf ? tcond : emith_invert_cond(tcond);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -87,8 +87,6 @@ enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU }; // LD/ST
|
||||||
// func7
|
// func7
|
||||||
enum { F2_ALT=0x20, F2_MULDIV=0x01 };
|
enum { F2_ALT=0x20, F2_MULDIV=0x01 };
|
||||||
|
|
||||||
#define __(n) o##n // enum marker for "undefined"
|
|
||||||
|
|
||||||
#define R5_NOP R5_I_INSN(OP_IMM, F1_ADD, Z0, Z0, 0) // nop: ADDI r0, r0, #0
|
#define R5_NOP R5_I_INSN(OP_IMM, F1_ADD, Z0, Z0, 0) // nop: ADDI r0, r0, #0
|
||||||
|
|
||||||
// arithmetic/logical
|
// arithmetic/logical
|
||||||
|
@ -687,9 +685,8 @@ static void emith_pool_check(void)
|
||||||
|
|
||||||
static void emith_move_imm(int r, uintptr_t imm)
|
static void emith_move_imm(int r, uintptr_t imm)
|
||||||
{
|
{
|
||||||
u32 lui = imm + _CB(imm,1,11,12);
|
u32 lui = imm + _CB(imm,1,11,12); // compensate for ADDI sign extension
|
||||||
if (lui >> 12) {
|
if (lui >> 12) {
|
||||||
// take out the effect of the sign extension of ADDI
|
|
||||||
EMIT(R5_MOVT_IMM(r, lui));
|
EMIT(R5_MOVT_IMM(r, lui));
|
||||||
if (imm & 0xfff)
|
if (imm & 0xfff)
|
||||||
EMIT(R5_ADD_IMM(r, r, imm));
|
EMIT(R5_ADD_IMM(r, r, imm));
|
||||||
|
|
|
@ -446,7 +446,6 @@ static void rcache_free_tmp(int hr);
|
||||||
// there must be at least 3 PARAM, and PARAM+TEMPORARY must be at least 4.
|
// there must be at least 3 PARAM, and PARAM+TEMPORARY must be at least 4.
|
||||||
// SR must and R0 should by all means be statically mapped.
|
// SR must and R0 should by all means be statically mapped.
|
||||||
// XXX the static definition of SR MUST match that in compiler.h
|
// XXX the static definition of SR MUST match that in compiler.h
|
||||||
// PC and PR must not be statically mapped (accessed in context by utils).
|
|
||||||
|
|
||||||
#ifdef __arm__
|
#ifdef __arm__
|
||||||
#include "../drc/emit_arm.c"
|
#include "../drc/emit_arm.c"
|
||||||
|
@ -3365,7 +3364,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
|
||||||
rcache_get_reg_arg(2, SHR_SR, NULL);
|
rcache_get_reg_arg(2, SHR_SR, NULL);
|
||||||
tmp2 = rcache_get_tmp_arg(0);
|
tmp2 = rcache_get_tmp_arg(0);
|
||||||
tmp3 = rcache_get_tmp_arg(1);
|
tmp3 = rcache_get_tmp_arg(1);
|
||||||
tmp4 = rcache_get_tmp_arg(3);
|
tmp4 = rcache_get_tmp();
|
||||||
emith_move_r_ptr_imm(tmp2, tcache_ptr);
|
emith_move_r_ptr_imm(tmp2, tcache_ptr);
|
||||||
emith_move_r_r_ptr(tmp3, CONTEXT_REG);
|
emith_move_r_r_ptr(tmp3, CONTEXT_REG);
|
||||||
emith_move_r_imm(tmp4, pc);
|
emith_move_r_imm(tmp4, pc);
|
||||||
|
@ -5049,11 +5048,12 @@ static void sh2_generate_utils(void)
|
||||||
emith_add_r_imm(arg2, (u32)(2*sizeof(void *)));
|
emith_add_r_imm(arg2, (u32)(2*sizeof(void *)));
|
||||||
emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
|
emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
|
||||||
emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
|
emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
|
||||||
emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0);
|
emith_add_r_r_r_lsl_ptr(arg3, CONTEXT_REG, arg2, 0);
|
||||||
emith_ctx_read(arg3, SHR_PR * 4);
|
rcache_get_reg_arg(2, SHR_PR, NULL);
|
||||||
emith_add_r_ret(arg1);
|
emith_add_r_ret(arg1);
|
||||||
emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache)+sizeof(void *));
|
emith_write_r_r_offs_ptr(arg1, arg3, offsetof(SH2, rts_cache)+sizeof(void *));
|
||||||
emith_write_r_r_offs(arg3, arg2, offsetof(SH2, rts_cache));
|
emith_write_r_r_offs(arg2, arg3, offsetof(SH2, rts_cache));
|
||||||
|
rcache_flush();
|
||||||
emith_ret();
|
emith_ret();
|
||||||
emith_flush();
|
emith_flush();
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue