various small improvements and fixes

This commit is contained in:
kub 2019-04-16 20:37:52 +02:00
parent f133766faa
commit d40a5af495
32 changed files with 372 additions and 241 deletions

View file

@ -195,10 +195,10 @@ LDFLAGS += -Wl,-Map=$(TARGET).map
endif endif
target_: pico/pico_int_o32.h $(TARGET) target_: pico/pico_int_offs.h $(TARGET)
clean: clean:
$(RM) $(TARGET) $(OBJS) pico/pico_int_o32.h $(RM) $(TARGET) $(OBJS) pico/pico_int_offs.h
$(RM) -r .opk_data $(RM) -r .opk_data
$(TARGET): $(OBJS) $(TARGET): $(OBJS)
@ -211,7 +211,7 @@ endif
pprof: platform/linux/pprof.c pprof: platform/linux/pprof.c
$(CC) $(CFLAGS) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ $(LDFLAGS) $(LDLIBS) $(CC) $(CFLAGS) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ $(LDFLAGS) $(LDLIBS)
pico/pico_int_o32.h:: tools/mkoffsets.sh pico/pico_int_offs.h:: tools/mkoffsets.sh
make -C tools/ XCC="$(CC)" XCFLAGS="$(CFLAGS)" make -C tools/ XCC="$(CC)" XCFLAGS="$(CFLAGS)"
.s.o: .s.o:

View file

@ -4,11 +4,11 @@ CC = arm-gph-linux-gnueabi-gcc
CXX = arm-gph-linux-gnueabi-g++ CXX = arm-gph-linux-gnueabi-g++
AS = arm-gph-linux-gnueabi-as AS = arm-gph-linux-gnueabi-as
STRIP = arm-gph-linux-gnueabi-strip STRIP = arm-gph-linux-gnueabi-strip
CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -fno-stack-protector -D__GP2X__ -DGPERF CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -fno-stack-protector -D__GP2X__
CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers
CFLAGS += -I/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I/home/build/src/gp2x/armroot-eabi/include CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include
ASFLAGS += -mfloat-abi=soft -mcpu=arm920t ASFLAGS += -mfloat-abi=soft -mcpu=arm920t
LDFLAGS += -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/src/gp2x/armroot-eabi/lib -static LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static
LDLIBS += -lpng -lm -ldl LDLIBS += -lpng -lm -ldl
ARCH = arm ARCH = arm

View file

@ -6,9 +6,9 @@ AS = arm-linux-gnueabi-as
STRIP = arm-linux-gnueabi-strip STRIP = arm-linux-gnueabi-strip
CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__ CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__
CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers
CFLAGS += -I/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I/home/build/src/gp2x/armroot-eabi/include CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include
ASFLAGS += -mfloat-abi=soft -mcpu=arm920t ASFLAGS += -mfloat-abi=soft -mcpu=arm920t
LDFLAGS += -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L/home/build/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -static LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -static
LDLIBS += -lpng -lm -ldl LDLIBS += -lpng -lm -ldl
ARCH = arm ARCH = arm

View file

@ -4,12 +4,12 @@ CC = mipsel-linux-gcc
CXX = mipsel-linux-g++ CXX = mipsel-linux-g++
AS = mipsel-linux-as AS = mipsel-linux-as
STRIP = mipsel-linux-strip STRIP = mipsel-linux-strip
CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/ CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/
CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/SDL CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/SDL
CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector
ASFLAGS += ASFLAGS +=
LDFLAGS += LDFLAGS +=
LDLIBS += -B/home/build/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lm -lstdc++ -ldl LDLIBS += -B${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lm -lstdc++ -ldl
ARCH = mipsel ARCH = mipsel
PLATFORM = opendingux PLATFORM = opendingux

View file

@ -4,12 +4,12 @@ CC = mipsel-linux-gnu-gcc
CXX = mipsel-linux-gnu-g++ CXX = mipsel-linux-gnu-g++
AS = mipsel-linux-gnu-as AS = mipsel-linux-gnu-as
STRIP = mipsel-linux-gnu-strip STRIP = mipsel-linux-gnu-strip
CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/ CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/
CFLAGS += -I/home/build/opt/opendingux-toolchain/usr/include/SDL CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/SDL
CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector
ASFLAGS += ASFLAGS +=
LDFLAGS += LDFLAGS +=
LDLIBS += -B/home/build/opt/opendingux-toolchain/usr/lib -B/home/build/opt/opendingux-toolchain/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=/home/build/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl LDLIBS += -B${HOME}/opt/opendingux-toolchain/usr/lib -B${HOME}/opt/opendingux-toolchain/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl
ARCH = mipsel ARCH = mipsel
PLATFORM = opendingux PLATFORM = opendingux

View file

@ -5,10 +5,10 @@ CXX = arm-open2x-linux-g++
AS = arm-open2x-linux-as AS = arm-open2x-linux-as
STRIP = arm-open2x-linux-strip STRIP = arm-open2x-linux-strip
CFLAGS += -msoft-float -mcpu=arm920t -mtune=arm920t -D__GP2X__ CFLAGS += -msoft-float -mcpu=arm920t -mtune=arm920t -D__GP2X__
CFLAGS += -I/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I/home/build/src/gp2x/armroot/include CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include
CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers
ASFLAGS += -mcpu=arm920t -mfloat-abi=soft ASFLAGS += -mcpu=arm920t -mfloat-abi=soft
LDFLAGS += --sysroot /home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L/home/build/src/gp2x/armroot/lib -static LDFLAGS += --sysroot ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static
LDLIBS += -lpng -lm -ldl LDLIBS += -lpng -lm -ldl
ARCH = arm ARCH = arm

View file

@ -5,10 +5,10 @@ CXX = arm-linux-gnueabi-g++
AS = arm-linux-gnueabi-as AS = arm-linux-gnueabi-as
STRIP = arm-linux-gnueabi-strip STRIP = arm-linux-gnueabi-strip
CFLAGS += -mabi=apcs-gnu -mno-thumb-interwork -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__ CFLAGS += -mabi=apcs-gnu -mno-thumb-interwork -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -Wno-unused-result -fno-stack-protector -D__GP2X__
CFLAGS += -I/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I/home/build/src/gp2x/armroot/include CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include
CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers CFLAGS += -fno-gcse -funswitch-loops -fweb -ftree-loop-im #-fpredictive-commoning -ftree-loop-distribution -frename-registers
ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t
LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L/home/build/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L/home/build/src/gp2x/armroot/lib -static LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static
LDLIBS += -lpng -lm -ldl LDLIBS += -lpng -lm -ldl
ARCH = arm ARCH = arm

14
config.i386 Normal file
View file

@ -0,0 +1,14 @@
# Automatically generated by configure
# Configured with: './configure' '--platform=generic'
CC = gcc
CXX = g++
AS = as
STRIP = strip
CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 # -pg
ASFLAGS +=
LDFLAGS += -m32 #-pg
LDLIBS += -L/usr/lib/i386-linux-gnu -L${HOME}/opt/lib32 -lSDL-1.2 -lasound -lpng -lz -lm -ldl
ARCH = i386
PLATFORM = generic
SOUND_DRIVERS = oss alsa sdl

View file

@ -4,11 +4,11 @@ CC = gcc
CXX = g++ CXX = g++
AS = as AS = as
STRIP = strip STRIP = strip
CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 # -pg CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result # -pg
ASFLAGS += ASFLAGS +=
LDFLAGS += -m32 #-pg LDFLAGS += #-pg
LDLIBS += -L/usr/lib/i386-linux-gnu/debug -L/home/build/opt/lib32 -lSDL-1.2 -lasound -lpng -lz -lm -ldl LDLIBS += -L/usr/lib/x86_64-linux-gnu -lSDL-1.2 -lasound -lpng -lz -lm -ldl
ARCH = x86 ARCH = x86_64
PLATFORM = generic PLATFORM = generic
SOUND_DRIVERS = oss alsa sdl SOUND_DRIVERS = oss alsa sdl

View file

@ -14,6 +14,7 @@
#include "cz80.h" #include "cz80.h"
#if PICODRIVE_HACKS #if PICODRIVE_HACKS
#include <pico/pico_int.h>
#include <pico/memory.h> #include <pico/memory.h>
#endif #endif

View file

@ -1,9 +1,3 @@
typedef unsigned char u8;
typedef signed char s8;
typedef unsigned short u16;
typedef signed short s16;
typedef unsigned int u32;
typedef signed int s32;
#define DRC_TCACHE_SIZE (4*1024*1024) #define DRC_TCACHE_SIZE (4*1024*1024)

View file

@ -177,26 +177,25 @@
#define EOP_C_AM3_REG(cond,u,l,rn,rd,s,h,rm) EOP_C_AM3(cond,u,0,l,rn,rd,s,h,rm) #define EOP_C_AM3_REG(cond,u,l,rn,rd,s,h,rm) EOP_C_AM3(cond,u,0,l,rn,rd,s,h,rm)
/* ldr and str */ /* ldr and str */
#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,0,1,rn,rd,offset_12) #define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,1,rn,rd,abs(offset_12))
#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,1,1,rn,rd,offset_12) #define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,1,1,rn,rd,abs(offset_12))
#define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,abs(offset_12)) #define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,abs(offset_12))
#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,offset_12) #define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,1,rn,rd,abs(offset_12))
#define EOP_LDR_NEGIMM(rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,0,0,1,rn,rd,offset_12)
#define EOP_LDR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,0) #define EOP_LDR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,0)
#define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,offset_12) #define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,0,rn,rd,abs(offset_12))
#define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0) #define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0)
#define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm)
#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm); #define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm);
#define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,1,1,rn,rd,0,1,offset_8) #define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8))
#define EOP_LDRH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,0,1,rm) #define EOP_LDRH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,0,1,rm)
#define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,offset_8) #define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8))
#define EOP_LDRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,0) #define EOP_LDRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,0)
#define EOP_LDRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,1,rn,rd,0,1,rm) #define EOP_LDRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,1,rn,rd,0,1,rm)
#define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,offset_8) #define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,0,rn,rd,0,1,abs(offset_8))
#define EOP_STRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,0) #define EOP_STRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,0)
#define EOP_STRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,0,rn,rd,0,1,rm) #define EOP_STRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,0,rn,rd,0,1,rm)
@ -285,11 +284,29 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int
imm = ~imm; imm = ~imm;
op = A_OP_MVN; op = A_OP_MVN;
} }
#ifdef HAVE_ARMV7
for (v = imm, ror2 = 0; v && !(v & 3); v >>= 2)
ror2--;
if (v >> 8) {
/* 2+ insns needed - prefer movw/movt */
if (op == A_OP_MVN)
imm = ~imm;
EOP_MOVW(rd, imm);
if (imm & 0xffff0000)
EOP_MOVT(rd, imm);
return;
}
#endif
break; break;
case A_OP_EOR:
case A_OP_SUB: case A_OP_SUB:
case A_OP_ADD: case A_OP_ADD:
// count bits in imm and swap ADD and SUB if more bits 1 than 0
if (s == 0 && count_bits(imm) > 16) {
imm = -imm;
op ^= (A_OP_ADD^A_OP_SUB);
}
case A_OP_EOR:
case A_OP_ORR: case A_OP_ORR:
case A_OP_BIC: case A_OP_BIC:
if (s == 0 && imm == 0 && rd == rn) if (s == 0 && imm == 0 && rd == rn)
@ -412,6 +429,8 @@ static int emith_xbranch(int cond, void *target, int is_call)
#define emith_add_r_r_r_lsl(d, s1, s2, lslimm) \ #define emith_add_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_ADD_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) EOP_ADD_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
#define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) \
emith_add_r_r_r_lsl(d, s1, s2, lslimm)
#define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \ #define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
@ -483,7 +502,7 @@ static int emith_xbranch(int cond, void *target, int is_call)
emith_add_r_r_r(d, d, s) emith_add_r_r_r(d, d, s)
#define emith_sub_r_r(d, s) \ #define emith_sub_r_r(d, s) \
EOP_SUB_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) emith_sub_r_r_r(d, d, s)
#define emith_adc_r_r(d, s) \ #define emith_adc_r_r(d, s) \
EOP_ADC_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) EOP_ADC_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0)
@ -529,6 +548,9 @@ static int emith_xbranch(int cond, void *target, int is_call)
#define emith_move_r_imm(r, imm) \ #define emith_move_r_imm(r, imm) \
emith_op_imm(A_COND_AL, 0, A_OP_MOV, r, imm) emith_op_imm(A_COND_AL, 0, A_OP_MOV, r, imm)
#define emith_move_r_ptr_imm(r, imm) \
emith_move_r_imm(r, (u32)(imm))
#define emith_add_r_imm(r, imm) \ #define emith_add_r_imm(r, imm) \
emith_op_imm(A_COND_AL, 0, A_OP_ADD, r, imm) emith_op_imm(A_COND_AL, 0, A_OP_ADD, r, imm)
@ -536,7 +558,7 @@ static int emith_xbranch(int cond, void *target, int is_call)
emith_op_imm(A_COND_AL, 0, A_OP_ADC, r, imm) emith_op_imm(A_COND_AL, 0, A_OP_ADC, r, imm)
#define emith_adcf_r_imm(r, imm) \ #define emith_adcf_r_imm(r, imm) \
emith_op_imm(A_COND_AL, 1, A_OP_ADC, r, (imm)) emith_op_imm(A_COND_AL, 1, A_OP_ADC, r, imm)
#define emith_sub_r_imm(r, imm) \ #define emith_sub_r_imm(r, imm) \
emith_op_imm(A_COND_AL, 0, A_OP_SUB, r, imm) emith_op_imm(A_COND_AL, 0, A_OP_SUB, r, imm)
@ -610,13 +632,13 @@ static int emith_xbranch(int cond, void *target, int is_call)
emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm) emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm)
#define emith_subf_r_r_imm(d, s, imm) \ #define emith_subf_r_r_imm(d, s, imm) \
emith_op_imm2(A_COND_AL, 1, A_OP_SUB, d, s, (imm)) emith_op_imm2(A_COND_AL, 1, A_OP_SUB, d, s, imm)
#define emith_or_r_r_imm(d, s, imm) \ #define emith_or_r_r_imm(d, s, imm) \
emith_op_imm2(A_COND_AL, 0, A_OP_ORR, d, s, (imm)) emith_op_imm2(A_COND_AL, 0, A_OP_ORR, d, s, imm)
#define emith_eor_r_r_imm(d, s, imm) \ #define emith_eor_r_r_imm(d, s, imm) \
emith_op_imm2(A_COND_AL, 0, A_OP_EOR, d, s, (imm)) emith_op_imm2(A_COND_AL, 0, A_OP_EOR, d, s, imm)
#define emith_neg_r_r(d, s) \ #define emith_neg_r_r(d, s) \
EOP_RSB_IMM(d, s, 0, 0) EOP_RSB_IMM(d, s, 0, 0)
@ -758,7 +780,7 @@ static int emith_xbranch(int cond, void *target, int is_call)
#define emith_clear_msb_c(cond, d, s, count) { \ #define emith_clear_msb_c(cond, d, s, count) { \
u32 t; \ u32 t; \
if ((count) <= 8) { \ if ((count) <= 8) { \
t = (count) - 8; \ t = 8 - (count); \
t = (0xff << t) & 0xff; \ t = (0xff << t) & 0xff; \
EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \ EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \
} else if ((count) >= 24) { \ } else if ((count) >= 24) { \
@ -880,7 +902,9 @@ static int emith_xbranch(int cond, void *target, int is_call)
#define emith_sh2_rcall(a, tab, func, mask) { \ #define emith_sh2_rcall(a, tab, func, mask) { \
emith_lsr(mask, a, SH2_READ_SHIFT); \ emith_lsr(mask, a, SH2_READ_SHIFT); \
EOP_ADD_REG_LSL(tab, tab, mask, 3); \ EOP_ADD_REG_LSL(tab, tab, mask, 3); \
EOP_LDMIA(tab, (1<<func)|(1<<mask)); \ if (func < mask) EOP_LDMIA(tab, (1<<func)|(1<<mask)); /* ldm if possible */ \
else { emith_read_r_r_offs(func, tab, 0); \
emith_read_r_r_offs(mask, tab, 4); } \
emith_addf_r_r_r(func,func,func); \ emith_addf_r_r_r(func,func,func); \
} }

View file

@ -194,6 +194,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
} \ } \
} while (0) } while (0)
#define emith_add_r_r_r_ptr(d, s1, s2) do { \
if (d == s1) { \
emith_add_r_r_ptr(d, s2); \
} else if (d == s2) { \
emith_add_r_r_ptr(d, s1); \
} else { \
emith_move_r_r_ptr(d, s1); \
emith_add_r_r_ptr(d, s2); \
} \
} while (0)
#define emith_sub_r_r_r(d, s1, s2) do { \ #define emith_sub_r_r_r(d, s1, s2) do { \
if (d == s1) { \ if (d == s1) { \
emith_sub_r_r(d, s2); \ emith_sub_r_r(d, s2); \
@ -268,9 +279,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
rcache_free_tmp(tmp_); \ rcache_free_tmp(tmp_); \
} while (0) } while (0)
#define emith_add_r_r_r_lsr(d, s1, s2, lslimm) do { \ #define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) do { \
int tmp_ = rcache_get_tmp(); \ int tmp_ = rcache_get_tmp(); \
emith_lsr(tmp_, s2, lslimm); \ emith_lsl(tmp_, s2, lslimm); \
emith_add_r_r_r_ptr(d, s1, tmp_); \
rcache_free_tmp(tmp_); \
} while (0)
#define emith_add_r_r_r_lsr(d, s1, s2, lsrimm) do { \
int tmp_ = rcache_get_tmp(); \
emith_lsr(tmp_, s2, lsrimm); \
emith_add_r_r_r(d, s1, tmp_); \ emith_add_r_r_r(d, s1, tmp_); \
rcache_free_tmp(tmp_); \ rcache_free_tmp(tmp_); \
} while (0) } while (0)
@ -297,6 +315,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
EMIT(imm, u32); \ EMIT(imm, u32); \
} while (0) } while (0)
#define emith_move_r_ptr_imm(r, imm) do { \
if ((uint64_t)(imm) <= UINT32_MAX) \
emith_move_r_imm(r, (uintptr_t)(imm)); \
else { \
EMIT_REX_IF(1, 0, r); \
EMIT_OP(0xb8 + (r)); \
EMIT((uint64_t)(imm), uint64_t); \
} \
} while (0)
#define emith_move_r_imm_s8(r, imm) \ #define emith_move_r_imm_s8(r, imm) \
emith_move_r_imm(r, (u32)(signed int)(signed char)(imm)) emith_move_r_imm(r, (u32)(signed int)(signed char)(imm))
@ -421,27 +449,28 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
#define emith_sub_r_r_imm(d, s, imm) do { \ #define emith_sub_r_r_imm(d, s, imm) do { \
if (d != s) \ if (d != s) \
emith_move_r_r(d, s); \ emith_move_r_r(d, s); \
if (imm) \ if ((s32)(imm) != 0) \
emith_sub_r_imm(d, imm); \ emith_sub_r_imm(d, imm); \
} while (0) } while (0)
#define emith_and_r_r_imm(d, s, imm) do { \ #define emith_and_r_r_imm(d, s, imm) do { \
if (d != s) \ if (d != s) \
emith_move_r_r(d, s); \ emith_move_r_r(d, s); \
emith_and_r_imm(d, imm); \ if ((s32)(imm) != -1) \
emith_and_r_imm(d, imm); \
} while (0) } while (0)
#define emith_or_r_r_imm(d, s, imm) do { \ #define emith_or_r_r_imm(d, s, imm) do { \
if (d != s) \ if (d != s) \
emith_move_r_r(d, s); \ emith_move_r_r(d, s); \
if ((s32)imm != 0) \ if ((s32)(imm) != 0) \
emith_or_r_imm(d, imm); \ emith_or_r_imm(d, imm); \
} while (0) } while (0)
#define emith_eor_r_r_imm(d, s, imm) do { \ #define emith_eor_r_r_imm(d, s, imm) do { \
if (d != s) \ if (d != s) \
emith_move_r_r(d, s); \ emith_move_r_r(d, s); \
if ((s32)imm != 0) \ if ((s32)(imm) != 0) \
emith_eor_r_imm(d, imm); \ emith_eor_r_imm(d, imm); \
} while (0) } while (0)
@ -612,31 +641,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
EMIT_REX_IF(1, r, rs); \ EMIT_REX_IF(1, r, rs); \
emith_deref_op(0x89, r, rs, offs) emith_deref_op(0x89, r, rs, offs)
// note: don't use prefixes on this
#define emith_read8_r_r_offs(r, rs, offs) do { \ #define emith_read8_r_r_offs(r, rs, offs) do { \
int r_ = r; \
if (!is_abcdx(r)) \
r_ = rcache_get_tmp(); \
EMIT(0x0f, u8); \ EMIT(0x0f, u8); \
emith_deref_op(0xb6, r_, rs, offs); \ emith_deref_op(0xb6, r, rs, offs); \
if ((r) != r_) { \
emith_move_r_r(r, r_); \
rcache_free_tmp(r_); \
} \
} while (0) } while (0)
#define emith_read8s_r_r_offs(r, rs, offs) do { \ #define emith_read8s_r_r_offs(r, rs, offs) do { \
int r_ = r; \
if (!is_abcdx(r)) \
r_ = rcache_get_tmp(); \
EMIT(0x0f, u8); \ EMIT(0x0f, u8); \
emith_deref_op(0xbe, r_, rs, offs); \ emith_deref_op(0xbe, r, rs, offs); \
if ((r) != r_) { \
emith_move_r_r(r, r_); \
rcache_free_tmp(r_); \
} \
} while (0) } while (0)
// note: don't use prefixes on this
#define emith_write8_r_r_offs(r, rs, offs) do {\ #define emith_write8_r_r_offs(r, rs, offs) do {\
int r_ = r; \ int r_ = r; \
if (!is_abcdx(r)) { \ if (!is_abcdx(r)) { \
@ -664,16 +679,9 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
} while (0) } while (0)
#define emith_read8_r_r_r(r, rs, rm) do { \ #define emith_read8_r_r_r(r, rs, rm) do { \
int r_ = r; \
if (!is_abcdx(r)) \
r_ = rcache_get_tmp(); \
EMIT(0x0f, u8); \ EMIT(0x0f, u8); \
EMIT_OP_MODRM(0xb6, 0, r, 4); \ EMIT_OP_MODRM(0xb6, 0, r, 4); \
EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \
if ((r) != r_) { \
emith_move_r_r(r, r_); \
rcache_free_tmp(r_); \
} \
} while (0) } while (0)
#define emith_read16_r_r_r(r, rs, rm) do { \ #define emith_read16_r_r_r(r, rs, rm) do { \

View file

@ -56,9 +56,10 @@
// 04 - asm // 04 - asm
// 08 - runtime block entry log // 08 - runtime block entry log
// 10 - smc self-check // 10 - smc self-check
// 20 - runtime block entry counter
// 100 - write trace // 100 - write trace
// 200 - compare trace // 200 - compare trace
// 400 - block entry backtraceA on exit // 400 - block entry backtrace on exit
// 800 - state dump on exit // 800 - state dump on exit
// { // {
#ifndef DRC_DEBUG #ifndef DRC_DEBUG
@ -178,7 +179,7 @@ static char sh2dasm_buff[64];
} }
#if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB) #if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB)
static SH2 csh2[2][4]; static SH2 csh2[2][8];
static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
{ {
if (block != NULL) { if (block != NULL) {
@ -190,7 +191,6 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
{ {
static FILE *trace[2]; static FILE *trace[2];
int idx = sh2->is_slave; int idx = sh2->is_slave;
if (sh2 != &sh2s[0] && sh2 != &sh2s[1]) printf("sh2 %p?\n",sh2);
if (!trace[0]) { if (!trace[0]) {
truncate("pico.trace", 0); truncate("pico.trace", 0);
trace[0] = fopen("pico.trace0", "wb"); trace[0] = fopen("pico.trace0", "wb");
@ -199,7 +199,8 @@ if (sh2 != &sh2s[0] && sh2 != &sh2s[1]) printf("sh2 %p?\n",sh2);
if (csh2[idx][0].pc != sh2->pc) { if (csh2[idx][0].pc != sh2->pc) {
fwrite(sh2, offsetof(SH2, read8_map), 1, trace[idx]); fwrite(sh2, offsetof(SH2, read8_map), 1, trace[idx]);
fwrite(&sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx]); fwrite(&sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx]);
memcpy(&csh2[idx][0], sh2, offsetof(SH2, icount)); memcpy(&csh2[idx][0], sh2, offsetof(SH2, poll_cnt)+4);
csh2[idx][0].is_slave = idx;
} }
} }
#elif (DRC_DEBUG & 512) #elif (DRC_DEBUG & 512)
@ -234,9 +235,10 @@ if (sh2 != &sh2s[0] && sh2 != &sh2s[1]) printf("sh2 %p?\n",sh2);
#elif (DRC_DEBUG & 1024) #elif (DRC_DEBUG & 1024)
{ {
int x = sh2->is_slave, i; int x = sh2->is_slave, i;
for (i = 0; i < ARRAY_SIZE(csh2[x]); i++) for (i = 0; i < ARRAY_SIZE(csh2[x])-1; i++)
memcpy(&csh2[x][i], &csh2[x][i+1], offsetof(SH2, icount)); memcpy(&csh2[x][i], &csh2[x][i+1], offsetof(SH2, poll_cnt)+4);
memcpy(&csh2[x][3], sh2, offsetof(SH2, icount)); memcpy(&csh2[x][ARRAY_SIZE(csh2[x])-1], sh2, offsetof(SH2, poll_cnt)+4);
csh2[x][0].is_slave = x;
} }
#endif #endif
} }
@ -252,9 +254,9 @@ if (sh2 != &sh2s[0] && sh2 != &sh2s[1]) printf("sh2 %p?\n",sh2);
// and can be discarded early // and can be discarded early
// XXX: need to tune sizes // XXX: need to tune sizes
static const int tcache_sizes[TCACHE_BUFFERS] = { static const int tcache_sizes[TCACHE_BUFFERS] = {
DRC_TCACHE_SIZE * 6 / 8, // ROM (rarely used), DRAM DRC_TCACHE_SIZE * 14 / 16, // ROM (rarely used), DRAM
DRC_TCACHE_SIZE / 8, // BIOS, data array in master sh2 DRC_TCACHE_SIZE / 16, // BIOS, data array in master sh2
DRC_TCACHE_SIZE / 8, // ... slave DRC_TCACHE_SIZE / 16, // ... slave
}; };
static u8 *tcache_bases[TCACHE_BUFFERS]; static u8 *tcache_bases[TCACHE_BUFFERS];
@ -287,6 +289,9 @@ struct block_entry {
#if (DRC_DEBUG & 2) #if (DRC_DEBUG & 2)
struct block_desc *block; struct block_desc *block;
#endif #endif
#if (DRC_DEBUG & 32)
int entry_count;
#endif
}; };
struct block_desc { struct block_desc {
@ -698,6 +703,14 @@ static void add_to_hashlist(struct block_entry *be, int tcache_id)
(*head)->prev = be; (*head)->prev = be;
be->next = *head; be->next = *head;
*head = be; *head = be;
#if (DRC_DEBUG & 2)
if (be->next != NULL) {
printf(" %08x: entry hash collision with %08x\n",
be->pc, be->next->pc);
hash_collisions++;
}
#endif
} }
static void rm_from_hashlist(struct block_entry *be, int tcache_id) static void rm_from_hashlist(struct block_entry *be, int tcache_id)
@ -727,6 +740,14 @@ static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id)
u32 tcmask = hash_table_sizes[tcache_id] - 1; u32 tcmask = hash_table_sizes[tcache_id] - 1;
struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask);
#if DRC_DEBUG & 1
struct block_link *current = *head;
while (current != NULL && current != bl)
current = current->next;
if (current == bl)
dbg(1, "add_to_hashlist_unresolved @%p: bl %p %p %08x already in?", head, bl, bl->target, bl->target_pc);
#endif
bl->target = NULL; // marker for not resolved bl->target = NULL; // marker for not resolved
bl->prev = NULL; bl->prev = NULL;
if (*head) if (*head)
@ -745,7 +766,7 @@ static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id)
while (current->prev != NULL) while (current->prev != NULL)
current = current->prev; current = current->prev;
if (current != *head) if (current != *head)
dbg(1, "rm_from_hashlist unresolved @%p: bl %p %p %08x missing?", head, bl, bl->target, bl->target_pc); dbg(1, "rm_from_hashlist_unresolved @%p: bl %p %p %08x missing?", head, bl, bl->target, bl->target_pc);
#endif #endif
if (bl->prev != NULL) if (bl->prev != NULL)
@ -980,10 +1001,12 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla
struct block_entry *be = NULL; struct block_entry *be = NULL;
int target_tcache_id; int target_tcache_id;
// get the target block entry
be = dr_get_entry(pc, is_slave, &target_tcache_id); be = dr_get_entry(pc, is_slave, &target_tcache_id);
if (target_tcache_id && target_tcache_id != tcache_id) if (target_tcache_id && target_tcache_id != tcache_id)
return sh2_drc_dispatcher; return sh2_drc_dispatcher;
// get a block link
if (blink_free[tcache_id] != NULL) { if (blink_free[tcache_id] != NULL) {
bl = blink_free[tcache_id]; bl = blink_free[tcache_id];
blink_free[tcache_id] = bl->next; blink_free[tcache_id] = bl->next;
@ -995,6 +1018,7 @@ static void *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_sla
block_link_pool_counts[tcache_id] = cnt+1; block_link_pool_counts[tcache_id] = cnt+1;
} }
// prepare link and add to ougoing list of owner
bl->tcache_id = tcache_id; bl->tcache_id = tcache_id;
bl->target_pc = pc; bl->target_pc = pc;
bl->jump = tcache_ptr; bl->jump = tcache_ptr;
@ -1940,6 +1964,7 @@ static void rcache_invalidate(void)
cache_regs[i].type = HR_FREE; cache_regs[i].type = HR_FREE;
cache_regs[i].gregs = 0; cache_regs[i].gregs = 0;
} }
for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { for (i = 0; i < ARRAY_SIZE(guest_regs); i++) {
guest_regs[i].flags &= GRF_STATIC; guest_regs[i].flags &= GRF_STATIC;
if (!(guest_regs[i].flags & GRF_STATIC)) if (!(guest_regs[i].flags & GRF_STATIC))
@ -1953,7 +1978,8 @@ static void rcache_invalidate(void)
cache_regs[guest_regs[i].sreg].gregs = 1 << i; cache_regs[guest_regs[i].sreg].gregs = 1 << i;
guest_regs[i].vreg = guest_regs[i].sreg; guest_regs[i].vreg = guest_regs[i].sreg;
} }
}; }
rcache_counter = 0; rcache_counter = 0;
rcache_hint_soon = rcache_hint_late = 0; rcache_hint_soon = rcache_hint_late = 0;
@ -2005,6 +2031,7 @@ static int emit_get_rbase_and_offs(SH2 *sh2, u32 a, u32 *offs)
u32 mask = 0; u32 mask = 0;
int poffs; int poffs;
int hr; int hr;
unsigned long la;
poffs = dr_ctx_get_mem_ptr(a, &mask); poffs = dr_ctx_get_mem_ptr(a, &mask);
if (poffs == -1) if (poffs == -1)
@ -2014,15 +2041,16 @@ static int emit_get_rbase_and_offs(SH2 *sh2, u32 a, u32 *offs)
if (mask < 0x1000) { if (mask < 0x1000) {
// can't access data array or BIOS directly from ROM or SDRAM, // can't access data array or BIOS directly from ROM or SDRAM,
// since code may run on both SH2s (tcache_id of translation block needed)) // since code may run on both SH2s (tcache_id of translation block needed))
emith_ctx_read(hr, poffs); emith_ctx_read_ptr(hr, poffs);
if (a & mask & ~omask) if (a & mask & ~omask)
emith_add_r_imm(hr, a & mask & ~omask); emith_add_r_r_ptr_imm(hr, hr, a & mask & ~omask);
*offs = a & omask;
} else { } else {
// known fixed host address // known fixed host address
a = (a & mask) + *(u32 *)((char *)sh2 + poffs); la = (unsigned long)*(void **)((char *)sh2 + poffs) + (a & mask);
emith_move_r_imm(hr, (a & ~omask)); *offs = la & omask;
emith_move_r_ptr_imm(hr, la & ~omask);
} }
*offs = a & omask;
return hr; return hr;
} }
@ -2392,8 +2420,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; void *branch_patch_ptr[MAX_LOCAL_BRANCHES];
u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; u32 branch_patch_pc[MAX_LOCAL_BRANCHES];
int branch_patch_count = 0; int branch_patch_count = 0;
u32 literal_addr[MAX_LITERALS];
int literal_addr_count = 0;
u8 op_flags[BLOCK_INSN_LIMIT]; u8 op_flags[BLOCK_INSN_LIMIT];
struct { struct {
u32 test_irq:1; u32 test_irq:1;
@ -2473,7 +2499,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
{ {
u32 delay_dep_fw = 0, delay_dep_bk = 0; u32 delay_dep_fw = 0, delay_dep_bk = 0;
int tmp3, tmp4; int tmp3, tmp4;
u32 sr; int sr;
opd = &ops[i]; opd = &ops[i];
op = FETCH_OP(pc); op = FETCH_OP(pc);
@ -2487,7 +2513,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
pc, op, sh2dasm_buff); pc, op, sh2dasm_buff);
#endif #endif
if ((op_flags[i] & OF_BTARGET) || pc == base_pc) if (op_flags[i] & OF_BTARGET)
{ {
if (pc != base_pc) if (pc != base_pc)
{ {
@ -2517,6 +2543,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
else { else {
dbg(1, "too many entryp for block #%d,%d pc=%08x", dbg(1, "too many entryp for block #%d,%d pc=%08x",
tcache_id, blkid_main, pc); tcache_id, blkid_main, pc);
break;
} }
} else { } else {
entry = block->entryp; entry = block->entryp;
@ -2537,10 +2564,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
#if (DRC_DEBUG & 0x10) #if (DRC_DEBUG & 0x10)
rcache_get_reg_arg(0, SHR_PC, NULL); rcache_get_reg_arg(0, SHR_PC, NULL);
tmp = emit_memhandler_read(2); tmp = emit_memhandler_read(1);
tmp2 = rcache_get_tmp(); tmp2 = rcache_get_tmp();
tmp3 = rcache_get_tmp(); tmp3 = rcache_get_tmp();
emith_move_r_imm(tmp2, FETCH32(pc)); emith_move_r_imm(tmp2, (s16)FETCH_OP(pc));
emith_move_r_imm(tmp3, 0); emith_move_r_imm(tmp3, 0);
emith_cmp_r_r(tmp, tmp2); emith_cmp_r_r(tmp, tmp2);
EMITH_SJMP_START(DCOND_EQ); EMITH_SJMP_START(DCOND_EQ);
@ -2556,9 +2583,20 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
emith_cmp_r_imm(sr, 0); emith_cmp_r_imm(sr, 0);
emith_jump_cond(DCOND_LE, sh2_drc_exit); emith_jump_cond(DCOND_LE, sh2_drc_exit);
#if (DRC_DEBUG & 32)
// block hit counter
tmp = rcache_get_tmp_arg(0);
tmp2 = rcache_get_tmp_arg(1);
emith_move_r_ptr_imm(tmp, (uptr)entry);
emith_read_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count));
emith_add_r_imm(tmp2, 1);
emith_write_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count));
rcache_free_tmp(tmp);
rcache_free_tmp(tmp2);
#endif
#if (DRC_DEBUG & (8|256|512|1024)) #if (DRC_DEBUG & (8|256|512|1024))
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
FLUSH_CYCLES(sr);
rcache_clean(); rcache_clean();
tmp = rcache_used_hreg_mask(); tmp = rcache_used_hreg_mask();
emith_save_caller_regs(tmp); emith_save_caller_regs(tmp);
@ -2566,7 +2604,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
rcache_get_reg_arg(2, SHR_SR, NULL); rcache_get_reg_arg(2, SHR_SR, NULL);
tmp2 = rcache_get_tmp_arg(0); tmp2 = rcache_get_tmp_arg(0);
tmp3 = rcache_get_tmp_arg(1); tmp3 = rcache_get_tmp_arg(1);
emith_move_r_imm(tmp2, (u32)tcache_ptr); emith_move_r_ptr_imm(tmp2, tcache_ptr);
emith_move_r_r_ptr(tmp3,CONTEXT_REG); emith_move_r_r_ptr(tmp3,CONTEXT_REG);
emith_call(sh2_drc_log_entry); emith_call(sh2_drc_log_entry);
emith_restore_caller_regs(tmp); emith_restore_caller_regs(tmp);
@ -2776,7 +2814,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
if ((opd->imm && opd->imm >= base_pc && opd->imm < end_literals) || if ((opd->imm && opd->imm >= base_pc && opd->imm < end_literals) ||
dr_is_rom(opd->imm)) dr_is_rom(opd->imm))
{ {
ADD_TO_ARRAY(literal_addr, literal_addr_count, opd->imm,);
if (opd->size == 2) if (opd->size == 2)
u = FETCH32(opd->imm); u = FETCH32(opd->imm);
else else
@ -2862,8 +2899,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110
emit_indirect_indexed_write(sh2, GET_Rm(), SHR_R0, GET_Rn(), op & 3); emit_indirect_indexed_write(sh2, GET_Rm(), SHR_R0, GET_Rn(), op & 3);
goto end_op; goto end_op;
case 0x07: case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111
// MUL.L Rm,Rn 0000nnnnmmmm0111
tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
@ -2941,8 +2977,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
goto default_; goto default_;
///////////////////////////////////////////// /////////////////////////////////////////////
case 0x01: case 0x01: // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd
// MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd
emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), (op & 0x0f) * 4, 2); emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), (op & 0x0f) * 4, 2);
goto end_op; goto end_op;
@ -3346,19 +3381,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
case 0x09: case 0x09:
switch (GET_Fx()) switch (GET_Fx())
{ {
case 0: case 0: // SHLL2 Rn 0100nnnn00001000
// SHLL2 Rn 0100nnnn00001000 // SHLR2 Rn 0100nnnn00001001
// SHLR2 Rn 0100nnnn00001001
tmp = 2; tmp = 2;
break; break;
case 1: case 1: // SHLL8 Rn 0100nnnn00011000
// SHLL8 Rn 0100nnnn00011000 // SHLR8 Rn 0100nnnn00011001
// SHLR8 Rn 0100nnnn00011001
tmp = 8; tmp = 8;
break; break;
case 2: case 2: // SHLL16 Rn 0100nnnn00101000
// SHLL16 Rn 0100nnnn00101000 // SHLR16 Rn 0100nnnn00101001
// SHLR16 Rn 0100nnnn00101001
tmp = 16; tmp = 16;
break; break;
default: default:
@ -3432,8 +3464,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
} else } else
emit_move_r_r(tmp2, GET_Rn()); emit_move_r_r(tmp2, GET_Rn());
goto end_op; goto end_op;
case 0x0f: case 0x0f: // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111
// MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111
emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 1); emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 1);
sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL); tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL);
@ -3446,8 +3477,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
goto default_; goto default_;
///////////////////////////////////////////// /////////////////////////////////////////////
case 0x05: case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd
// MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd
emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2); emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2);
goto end_op; goto end_op;
@ -3519,8 +3549,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
goto default_; goto default_;
///////////////////////////////////////////// /////////////////////////////////////////////
case 0x07: case 0x07: // ADD #imm,Rn 0111nnnniiiiiiii
// ADD #imm,Rn 0111nnnniiiiiiii
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
if (op & 0x80) { // adding negative if (op & 0x80) { // adding negative
emith_sub_r_r_imm(tmp, tmp2, -op & 0xff); emith_sub_r_r_imm(tmp, tmp2, -op & 0xff);
@ -3621,8 +3650,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
goto default_; goto default_;
///////////////////////////////////////////// /////////////////////////////////////////////
case 0x0e: case 0x0e: // MOV #imm,Rn 1110nnnniiiiiiii
// MOV #imm,Rn 1110nnnniiiiiiii
emit_move_r_imm32(GET_Rn(), (s8)op); emit_move_r_imm32(GET_Rn(), (s8)op);
goto end_op; goto end_op;
@ -3886,9 +3914,7 @@ static void sh2_generate_utils(void)
#if BRANCH_CACHE #if BRANCH_CACHE
// check if PC is in branch target cache // check if PC is in branch target cache
emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*4); emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*4);
// TODO implement emith_add_r_r_r_lsl_ptr, saves one insn on 32bit ARM emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 2 : 1);
emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1);
emith_add_r_r_ptr(arg1, CONTEXT_REG);
emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache)); emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache));
emith_cmp_r_r(arg2, arg0); emith_cmp_r_r(arg2, arg0);
EMITH_SJMP_START(DCOND_NE); EMITH_SJMP_START(DCOND_NE);
@ -3905,8 +3931,7 @@ static void sh2_generate_utils(void)
EMITH_SJMP_START(DCOND_EQ); EMITH_SJMP_START(DCOND_EQ);
emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4); emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4);
emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*4); emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*4);
emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1); emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 2 : 1);
emith_add_r_r_ptr(arg1, CONTEXT_REG);
emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache)); emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache));
emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
EMITH_SJMP_END(DCOND_EQ); EMITH_SJMP_END(DCOND_EQ);
@ -4174,7 +4199,8 @@ int sh2_execute_drc(SH2 *sh2c, int cycles)
static void block_stats(void) static void block_stats(void)
{ {
#if (DRC_DEBUG & 2) #if (DRC_DEBUG & 2)
int c, b, i, total = 0; int c, b, i;
long total = 0;
printf("block stats:\n"); printf("block stats:\n");
for (b = 0; b < ARRAY_SIZE(block_tables); b++) { for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
@ -4185,8 +4211,9 @@ static void block_stats(void)
if (block_tables[b][i].addr != 0) if (block_tables[b][i].addr != 0)
total += block_tables[b][i].refcount; total += block_tables[b][i].refcount;
} }
printf("total: %ld\n",total);
for (c = 0; c < 10; c++) { for (c = 0; c < 20; c++) {
struct block_desc *blk, *maxb = NULL; struct block_desc *blk, *maxb = NULL;
int max = 0; int max = 0;
for (b = 0; b < ARRAY_SIZE(block_tables); b++) { for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
@ -4221,6 +4248,63 @@ static void block_stats(void)
#endif #endif
} }
void entry_stats(void)
{
#if (DRC_DEBUG & 32)
int c, b, i, j;
long total = 0;
printf("block entry stats:\n");
for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
for (i = 0; i < block_counts[b]; i++)
for (j = 0; j < block_tables[b][i].entry_count; j++)
total += block_tables[b][i].entryp[j].entry_count;
for (i = block_limit[b]; i < block_max_counts[b]; i++)
for (j = 0; j < block_tables[b][i].entry_count; j++)
total += block_tables[b][i].entryp[j].entry_count;
}
printf("total: %ld\n",total);
for (c = 0; c < 20; c++) {
struct block_desc *blk;
struct block_entry *maxb = NULL;
int max = 0;
for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
for (i = 0; i < block_counts[b]; i++) {
blk = &block_tables[b][i];
for (j = 0; j < blk->entry_count; j++)
if (blk->entryp[j].entry_count > max) {
max = blk->entryp[j].entry_count;
maxb = &blk->entryp[j];
}
}
for (i = block_limit[b]; i < block_max_counts[b]; i++) {
blk = &block_tables[b][i];
for (j = 0; j < blk->entry_count; j++)
if (blk->entryp[j].entry_count > max) {
max = blk->entryp[j].entry_count;
maxb = &blk->entryp[j];
}
}
}
if (maxb == NULL)
break;
printf("%08x %p %9d %2.3f%%\n", maxb->pc, maxb->tcache_ptr, maxb->entry_count,
(double)100 * maxb->entry_count / total);
maxb->entry_count = 0;
}
for (b = 0; b < ARRAY_SIZE(block_tables); b++) {
for (i = 0; i < block_counts[b]; i++)
for (j = 0; j < block_tables[b][i].entry_count; j++)
block_tables[b][i].entryp[j].entry_count = 0;
for (i = block_limit[b]; i < block_max_counts[b]; i++)
for (j = 0; j < block_tables[b][i].entry_count; j++)
block_tables[b][i].entryp[j].entry_count = 0;
}
#endif
}
static void backtrace(void) static void backtrace(void)
{ {
#if (DRC_DEBUG & 1024) #if (DRC_DEBUG & 1024)
@ -4279,6 +4363,7 @@ void sh2_drc_flush_all(void)
backtrace(); backtrace();
state_dump(); state_dump();
block_stats(); block_stats();
entry_stats();
flush_tcache(0); flush_tcache(0);
flush_tcache(1); flush_tcache(1);
flush_tcache(2); flush_tcache(2);
@ -4364,6 +4449,7 @@ int sh2_drc_init(SH2 *sh2)
hash_collisions = 0; hash_collisions = 0;
#endif #endif
} }
memset(sh2->branch_cache, -1, sizeof(sh2->branch_cache));
return 0; return 0;

View file

@ -214,7 +214,7 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles)
if (sh2->pc < *base_pc || sh2->pc >= *end_pc) { if (sh2->pc < *base_pc || sh2->pc >= *end_pc) {
*base_pc = sh2->pc; *base_pc = sh2->pc;
scan_block(*base_pc, sh2->is_slave, scan_block(*base_pc, sh2->is_slave,
op_flags, end_pc, NULL); op_flags, end_pc, NULL, NULL);
} }
if ((op_flags[(sh2->pc - *base_pc) / 2] if ((op_flags[(sh2->pc - *base_pc) / 2]
& OF_BTARGET) || sh2->pc == *base_pc & OF_BTARGET) || sh2->pc == *base_pc

View file

@ -81,9 +81,9 @@ typedef struct SH2_
#define CYCLE_MULT_SHIFT 10 #define CYCLE_MULT_SHIFT 10
#define C_M68K_TO_SH2(xsh2, c) \ #define C_M68K_TO_SH2(xsh2, c) \
((int)((long long)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) (int)(((unsigned long long)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT)
#define C_SH2_TO_M68K(xsh2, c) \ #define C_SH2_TO_M68K(xsh2, c) \
((int)((long long)(c+3) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) (int)(((unsigned long long)(c+3U) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT)
int sh2_init(SH2 *sh2, int is_slave, SH2 *other_sh2); int sh2_init(SH2 *sh2, int is_slave, SH2 *other_sh2);
void sh2_finish(SH2 *sh2); void sh2_finish(SH2 *sh2);

View file

@ -30,7 +30,7 @@ static int REGPARM(2) sh2_irq_cb(SH2 *sh2, int level)
} }
// MUST specify active_sh2 when called from sh2 memhandlers // MUST specify active_sh2 when called from sh2 memhandlers
void p32x_update_irls(SH2 *active_sh2, int m68k_cycles) void p32x_update_irls(SH2 *active_sh2, unsigned int m68k_cycles)
{ {
int irqs, mlvl = 0, slvl = 0; int irqs, mlvl = 0, slvl = 0;
int mrun, srun; int mrun, srun;
@ -50,18 +50,18 @@ void p32x_update_irls(SH2 *active_sh2, int m68k_cycles)
slvl++; slvl++;
slvl *= 2; slvl *= 2;
mrun = sh2_irl_irq(&msh2, mlvl, active_sh2 == &msh2); mrun = sh2_irl_irq(&msh2, mlvl, msh2.state & SH2_STATE_RUN);
if (mrun) { if (mrun) {
p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, m68k_cycles); p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, m68k_cycles);
if (active_sh2 == &msh2) if (msh2.state & SH2_STATE_RUN)
sh2_end_run(active_sh2, 1); sh2_end_run(&msh2, 1);
} }
srun = sh2_irl_irq(&ssh2, slvl, active_sh2 == &ssh2); srun = sh2_irl_irq(&ssh2, slvl, ssh2.state & SH2_STATE_RUN);
if (srun) { if (srun) {
p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, m68k_cycles); p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, m68k_cycles);
if (active_sh2 == &ssh2) if (ssh2.state & SH2_STATE_RUN)
sh2_end_run(active_sh2, 1); sh2_end_run(&ssh2, 1);
} }
elprintf(EL_32X, "update_irls: m %d/%d, s %d/%d", mlvl, mrun, slvl, srun); elprintf(EL_32X, "update_irls: m %d/%d, s %d/%d", mlvl, mrun, slvl, srun);
@ -70,7 +70,7 @@ void p32x_update_irls(SH2 *active_sh2, int m68k_cycles)
// the mask register is inconsistent, CMD is supposed to be a mask, // the mask register is inconsistent, CMD is supposed to be a mask,
// while others are actually irq trigger enables? // while others are actually irq trigger enables?
// TODO: test on hw.. // TODO: test on hw..
void p32x_trigger_irq(SH2 *sh2, int m68k_cycles, unsigned int mask) void p32x_trigger_irq(SH2 *sh2, unsigned int m68k_cycles, unsigned int mask)
{ {
Pico32x.sh2irqs |= mask & P32XI_VRES; Pico32x.sh2irqs |= mask & P32XI_VRES;
Pico32x.sh2irqi[0] |= mask & (Pico32x.sh2irq_mask[0] << 3); Pico32x.sh2irqi[0] |= mask & (Pico32x.sh2irq_mask[0] << 3);
@ -79,7 +79,7 @@ void p32x_trigger_irq(SH2 *sh2, int m68k_cycles, unsigned int mask)
p32x_update_irls(sh2, m68k_cycles); p32x_update_irls(sh2, m68k_cycles);
} }
void p32x_update_cmd_irq(SH2 *sh2, int m68k_cycles) void p32x_update_cmd_irq(SH2 *sh2, unsigned int m68k_cycles)
{ {
if ((Pico32x.sh2irq_mask[0] & 2) && (Pico32x.regs[2 / 2] & 1)) if ((Pico32x.sh2irq_mask[0] & 2) && (Pico32x.regs[2 / 2] & 1))
Pico32x.sh2irqi[0] |= P32XI_CMD; Pico32x.sh2irqi[0] |= P32XI_CMD;
@ -207,8 +207,8 @@ void PicoReset32x(void)
{ {
if (PicoIn.AHW & PAHW_32X) { if (PicoIn.AHW & PAHW_32X) {
p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VRES); p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VRES);
p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, 0); p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, SekCyclesDone());
p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, 0); p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, SekCyclesDone());
p32x_pwm_ctl_changed(); p32x_pwm_ctl_changed();
p32x_timers_recalc(); p32x_timers_recalc();
} }
@ -258,7 +258,7 @@ static void p32x_start_blank(void)
p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, SekCyclesDone()); p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, SekCyclesDone());
} }
void p32x_schedule_hint(SH2 *sh2, int m68k_cycles) void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles)
{ {
// rather rough, 32x hint is useless in practice // rather rough, 32x hint is useless in practice
int after; int after;
@ -370,9 +370,9 @@ static void p32x_run_events(unsigned int until)
oldest, event_time_next); oldest, event_time_next);
} }
static void run_sh2(SH2 *sh2, int m68k_cycles) static void run_sh2(SH2 *sh2, unsigned int m68k_cycles)
{ {
int cycles, done; unsigned int cycles, done;
pevt_log_sh2_o(sh2, EVT_RUN_START); pevt_log_sh2_o(sh2, EVT_RUN_START);
sh2->state |= SH2_STATE_RUN; sh2->state |= SH2_STATE_RUN;

View file

@ -6,7 +6,7 @@
@* See COPYING file in the top-level directory. @* See COPYING file in the top-level directory.
@* @*
#include "pico/pico_int_o32.h" #include "pico/pico_int_offs.h"
.extern Pico32x .extern Pico32x
.extern Pico .extern Pico
@ -74,7 +74,7 @@ Pico32xNativePal:
ldr lr,=Pico ldr lr,=Pico
ldr r10,=Pico32x ldr r10,=Pico32x
ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB]
ldr r10,[r10, #0x40] @ Pico32x.vdp_regs[0] ldrh r10,[r10, #0x40] @ Pico32x.vdp_regs[0]
add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd
and r4, r2, #0xff and r4, r2, #0xff
@ -118,6 +118,8 @@ Pico32xNativePal:
mov r7, r7, lsl #1 mov r7, r7, lsl #1
ldreqh r12,[r9, r7] ldreqh r12,[r9, r7]
streqh r12,[r0], #2 @ *dst++ = palmd[*pmd] streqh r12,[r0], #2 @ *dst++ = palmd[*pmd]
.else
addeq r0, r0, #2
.endif .endif
beq 2b @ loop_inner beq 2b @ loop_inner
@ -182,8 +184,8 @@ Pico32xNativePal:
ldrneb r8, [r5, #2]! @ r7,r8 - pixel 0,1 index ldrneb r8, [r5, #2]! @ r7,r8 - pixel 0,1 index
subs r6, r6, #1 subs r6, r6, #1
blt 0b @ loop_outer blt 0b @ loop_outer
@ cmp r7, r8 @ is this really improving things? cmp r7, r8 @ is this really improving things?
@ beq 5f @ check_fill @ +8 beq 5f @ check_fill @ +8
3: @ no_fill: 3: @ no_fill:
mov r12,r7, lsl #1 mov r12,r7, lsl #1
@ -242,7 +244,7 @@ Pico32xNativePal:
beq 6b beq 6b
7: @ count_done 7: @ count_done
sub r5, r5, #4 @ undo readahead sub r5, r5, #4 @ undo readahead
@ fix alignment and check type @ fix alignment and check type
sub r8, r5, lr sub r8, r5, lr
@ -268,14 +270,14 @@ Pico32xNativePal:
b 2b @ loop_inner b 2b @ loop_inner
9: @ bg_mode: 9: @ bg_mode:
ldrb r12,[r11],#1 @ MD pixel ldrb r12,[r11],#1 @ MD pixel 0,1
ldrb lr, [r11],#1 ldrb lr, [r11],#1
cmp r3, lr, lsl #26 @ MD has bg pixel? cmp r3, r12,lsl #26 @ MD pixel 0 has bg?
.if \do_md .if \do_md
mov r12,r12,lsl #1 mov r12,r12,lsl #1
ldrneh r12,[r9, r12] @ t = palmd[*pmd] ldrneh r12,[r9, r12] @ t = palmd[*pmd]
moveq r12,r7 moveq r12,r7
cmp r3, lr, lsl #26 cmp r3, lr, lsl #26 @ MD pixel 1 has bg?
mov lr, lr, lsl #1 mov lr, lr, lsl #1
ldrneh lr, [r9, lr] ldrneh lr, [r9, lr]
moveq lr, r7 moveq lr, r7
@ -283,7 +285,7 @@ Pico32xNativePal:
strh lr, [r0], #2 strh lr, [r0], #2
.else .else
streqh r7, [r0] streqh r7, [r0]
cmp r3, lr, lsl #26 cmp r3, lr, lsl #26 @ MD pixel 1 has bg?
streqh r7, [r0, #2] streqh r7, [r0, #2]
add r0, r0, #4 add r0, r0, #4
.endif .endif

View file

@ -398,9 +398,6 @@ static void p32x_reg_write8(u32 a, u32 d)
p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles);
comreg = 1 << (a & 0x0f) / 2; comreg = 1 << (a & 0x0f) / 2;
Pico32x.comm_dirty |= comreg; Pico32x.comm_dirty |= comreg;
if (cycles - (int)msh2.m68krcycles_done > 120)
p32x_sync_sh2s(cycles);
return; return;
} }
} }
@ -453,6 +450,9 @@ static void p32x_reg_write16(u32 a, u32 d)
int cycles = SekCyclesDone(); int cycles = SekCyclesDone();
int comreg; int comreg;
if (r[a / 2] == d)
return;
p32x_sync_sh2s(cycles); p32x_sync_sh2s(cycles);
r[a / 2] = d; r[a / 2] = d;
@ -685,7 +685,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2)
case 0x3f: case 0x3f:
return; return;
pwm_write: pwm_write:
p32x_pwm_write16(a & ~1, d, sh2, 0); p32x_pwm_write16(a & ~1, d, sh2, sh2_cycles_done_m68k(sh2));
return; return;
} }

View file

@ -6,7 +6,7 @@
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.
*/ */
#include "../pico_int_o32.h" #include "../pico_int_offs.h"
@ 32X bank sizes... TODO this should somehow come from an include file @ 32X bank sizes... TODO this should somehow come from an include file
.equ SH2_ROM_SHIFT, 10 @ 0x003fffff .equ SH2_ROM_SHIFT, 10 @ 0x003fffff
@ -46,92 +46,92 @@
sh2_read8_rom: sh2_read8_rom:
ldr ip, [r1, #OFS_SH2_p_rom] ldr ip, [r1, #OFS_SH2_p_rom]
eor r0, r0, #1 eor r0, r0, #1
lsl r0, #SH2_ROM_SHIFT mov r0, r0, lsl #SH2_ROM_SHIFT
ldrb r0, [ip, r0, lsr #SH2_ROM_SHIFT] ldrb r0, [ip, r0, lsr #SH2_ROM_SHIFT]
bx lr bx lr
sh2_read8_sdram: sh2_read8_sdram:
ldr ip, [r1, #OFS_SH2_p_sdram] ldr ip, [r1, #OFS_SH2_p_sdram]
eor r0, r0, #1 eor r0, r0, #1
lsl r0, #SH2_RAM_SHIFT mov r0, r0, lsl #SH2_RAM_SHIFT
ldrb r0, [ip, r0, lsr #SH2_RAM_SHIFT] ldrb r0, [ip, r0, lsr #SH2_RAM_SHIFT]
bx lr bx lr
sh2_read8_da: sh2_read8_da:
ldr ip, [r1, #OFS_SH2_p_da] ldr ip, [r1, #OFS_SH2_p_da]
eor r0, r0, #1 eor r0, r0, #1
lsl r0, #SH2_DA_SHIFT mov r0, r0, lsl #SH2_DA_SHIFT
ldrb r0, [ip, r0, lsr #SH2_DA_SHIFT] ldrb r0, [ip, r0, lsr #SH2_DA_SHIFT]
bx lr bx lr
sh2_read8_dram: sh2_read8_dram:
ldr ip, [r1, #OFS_SH2_p_dram] ldr ip, [r1, #OFS_SH2_p_dram]
eor r0, r0, #1 eor r0, r0, #1
lsl r0, #SH2_DRAM_SHIFT mov r0, r0, lsl #SH2_DRAM_SHIFT
ldrb r0, [ip, r0, lsr #SH2_DRAM_SHIFT] ldrb r0, [ip, r0, lsr #SH2_DRAM_SHIFT]
bx lr bx lr
sh2_read16_rom: sh2_read16_rom:
ldr ip, [r1, #OFS_SH2_p_rom] ldr ip, [r1, #OFS_SH2_p_rom]
lsl r0, #SH2_ROM_SHIFT mov r0, r0, lsl #SH2_ROM_SHIFT
lsr r0, #SH2_ROM_SHIFT mov r0, r0, lsr #SH2_ROM_SHIFT
ldrh r0, [ip, r0] ldrh r0, [ip, r0]
bx lr bx lr
sh2_read16_sdram: sh2_read16_sdram:
ldr ip, [r1, #OFS_SH2_p_sdram] ldr ip, [r1, #OFS_SH2_p_sdram]
lsl r0, #SH2_RAM_SHIFT mov r0, r0, lsl #SH2_RAM_SHIFT
lsr r0, #SH2_RAM_SHIFT mov r0, r0, lsr #SH2_RAM_SHIFT
ldrh r0, [ip, r0] ldrh r0, [ip, r0]
bx lr bx lr
sh2_read16_da: sh2_read16_da:
ldr ip, [r1, #OFS_SH2_p_da] ldr ip, [r1, #OFS_SH2_p_da]
lsl r0, #SH2_DA_SHIFT mov r0, r0, lsl #SH2_DA_SHIFT
lsr r0, #SH2_DA_SHIFT mov r0, r0, lsr #SH2_DA_SHIFT
ldrh r0, [ip, r0] ldrh r0, [ip, r0]
bx lr bx lr
sh2_read16_dram: sh2_read16_dram:
ldr ip, [r1, #OFS_SH2_p_dram] ldr ip, [r1, #OFS_SH2_p_dram]
lsl r0, #SH2_DRAM_SHIFT mov r0, r0, lsl #SH2_DRAM_SHIFT
lsr r0, #SH2_DRAM_SHIFT mov r0, r0, lsr #SH2_DRAM_SHIFT
ldrh r0, [ip, r0] ldrh r0, [ip, r0]
bx lr bx lr
sh2_read32_rom: sh2_read32_rom:
ldr ip, [r1, #OFS_SH2_p_rom] ldr ip, [r1, #OFS_SH2_p_rom]
lsl r0, #SH2_ROM_SHIFT mov r0, r0, lsl #SH2_ROM_SHIFT
ldr r0, [ip, r0, lsr #SH2_ROM_SHIFT] ldr r0, [ip, r0, lsr #SH2_ROM_SHIFT]
ror r0, r0, #16 mov r0, r0, ror #16
bx lr bx lr
sh2_read32_sdram: sh2_read32_sdram:
ldr ip, [r1, #OFS_SH2_p_sdram] ldr ip, [r1, #OFS_SH2_p_sdram]
lsl r0, #SH2_RAM_SHIFT mov r0, r0, lsl #SH2_RAM_SHIFT
ldr r0, [ip, r0, lsr #SH2_RAM_SHIFT] ldr r0, [ip, r0, lsr #SH2_RAM_SHIFT]
ror r0, r0, #16 mov r0, r0, ror #16
bx lr bx lr
sh2_read32_da: sh2_read32_da:
ldr ip, [r1, #OFS_SH2_p_da] ldr ip, [r1, #OFS_SH2_p_da]
lsl r0, #SH2_DA_SHIFT mov r0, r0, lsl #SH2_DA_SHIFT
ldr r0, [ip, r0, lsr #SH2_DA_SHIFT] ldr r0, [ip, r0, lsr #SH2_DA_SHIFT]
ror r0, r0, #16 mov r0, r0, ror #16
bx lr bx lr
sh2_read32_dram: sh2_read32_dram:
ldr ip, [r1, #OFS_SH2_p_dram] ldr ip, [r1, #OFS_SH2_p_dram]
lsl r0, #SH2_DRAM_SHIFT mov r0, r0, lsl #SH2_DRAM_SHIFT
ldr r0, [ip, r0, lsr #SH2_DRAM_SHIFT] ldr r0, [ip, r0, lsr #SH2_DRAM_SHIFT]
ror r0, r0, #16 mov r0, r0, ror #16
bx lr bx lr
sh2_write8_sdram: sh2_write8_sdram:
@ preserve r0 and r2 for tail call @ preserve r0 and r2 for tail call
ldr ip, [r2, #OFS_SH2_p_sdram] ldr ip, [r2, #OFS_SH2_p_sdram]
eor r3, r0, #1 eor r3, r0, #1
lsl r3, #SH2_RAM_SHIFT mov r3, r3, lsl #SH2_RAM_SHIFT
strb r1, [ip, r3, lsr #SH2_RAM_SHIFT] strb r1, [ip, r3, lsr #SH2_RAM_SHIFT]
#ifdef DRC_SH2 #ifdef DRC_SH2
ldr ip, [r2, #OFS_SH2_p_drcblk_ram] ldr ip, [r2, #OFS_SH2_p_drcblk_ram]
@ -148,7 +148,7 @@ sh2_write8_da:
@ preserve r0 and r2 for tail call @ preserve r0 and r2 for tail call
ldr ip, [r2, #OFS_SH2_p_da] ldr ip, [r2, #OFS_SH2_p_da]
eor r3, r0, #1 eor r3, r0, #1
lsl r3, #SH2_DA_SHIFT mov r3, r3, lsl #SH2_DA_SHIFT
strb r1, [ip, r3, lsr #SH2_DA_SHIFT] strb r1, [ip, r3, lsr #SH2_DA_SHIFT]
#ifdef DRC_SH2 #ifdef DRC_SH2
ldr ip, [r2, #OFS_SH2_p_drcblk_da] ldr ip, [r2, #OFS_SH2_p_drcblk_da]
@ -165,15 +165,15 @@ sh2_write8_dram:
tst r1, #0xff tst r1, #0xff
ldrne ip, [r2, #OFS_SH2_p_dram] ldrne ip, [r2, #OFS_SH2_p_dram]
eorne r3, r0, #1 eorne r3, r0, #1
lslne r3, #SH2_DRAM_SHIFT movne r3, r3, lsl #SH2_DRAM_SHIFT
strneb r1, [ip, r3, lsr #SH2_DRAM_SHIFT] strneb r1, [ip, r3, lsr #SH2_DRAM_SHIFT]
bx lr bx lr
sh2_write16_sdram: sh2_write16_sdram:
@ preserve r0 and r2 for tail call @ preserve r0 and r2 for tail call
ldr ip, [r2, #OFS_SH2_p_sdram] ldr ip, [r2, #OFS_SH2_p_sdram]
lsl r3, r0, #SH2_RAM_SHIFT mov r3, r0, lsl #SH2_RAM_SHIFT
lsr r3, r3, #SH2_RAM_SHIFT mov r3, r3, lsr #SH2_RAM_SHIFT
strh r1, [ip, r3] strh r1, [ip, r3]
#ifdef DRC_SH2 #ifdef DRC_SH2
ldr ip, [r2, #OFS_SH2_p_drcblk_ram] ldr ip, [r2, #OFS_SH2_p_drcblk_ram]
@ -188,8 +188,8 @@ sh2_write16_sdram:
sh2_write16_da: sh2_write16_da:
@ preserve r0 and r2 for tail call @ preserve r0 and r2 for tail call
ldr ip, [r2, #OFS_SH2_p_da] ldr ip, [r2, #OFS_SH2_p_da]
lsl r3, r0, #SH2_DA_SHIFT mov r3, r0, lsl #SH2_DA_SHIFT
lsr r3, r3, #SH2_DA_SHIFT mov r3, r3, lsr #SH2_DA_SHIFT
strh r1, [ip, r3] strh r1, [ip, r3]
#ifdef DRC_SH2 #ifdef DRC_SH2
ldr ip, [r2, #OFS_SH2_p_drcblk_da] ldr ip, [r2, #OFS_SH2_p_drcblk_da]
@ -204,23 +204,23 @@ sh2_write16_da:
sh2_write16_dram: sh2_write16_dram:
ldr ip, [r2, #OFS_SH2_p_dram] ldr ip, [r2, #OFS_SH2_p_dram]
tst r0, #SH2_DRAM_OW tst r0, #SH2_DRAM_OW
lsl r3, r0, #SH2_DRAM_SHIFT mov r3, r0, lsl #SH2_DRAM_SHIFT
lsr r3, r3, #SH2_DRAM_SHIFT mov r3, r3, lsr #SH2_DRAM_SHIFT
streqh r1, [ip, r3] streqh r1, [ip, r3]
bxeq lr bxeq lr
add ip, ip, r3 add ip, ip, r3
tst r1, #0xff tst r1, #0xff
strneb r1, [ip, #0] strneb r1, [ip, #0]
tst r1, #0xff00 tst r1, #0xff00
lsrne r1, r1, #8 movne r1, r1, lsr #8
strneb r1, [ip, #1] strneb r1, [ip, #1]
bx lr bx lr
sh2_write32_sdram: sh2_write32_sdram:
@ preserve r0 and r2 for tail call @ preserve r0 and r2 for tail call
ldr ip, [r2, #OFS_SH2_p_sdram] ldr ip, [r2, #OFS_SH2_p_sdram]
ror r1, r1, #16 mov r1, r1, ror #16
lsl r3, r0, #SH2_RAM_SHIFT mov r3, r0, lsl #SH2_RAM_SHIFT
str r1, [ip, r3, lsr #SH2_RAM_SHIFT] str r1, [ip, r3, lsr #SH2_RAM_SHIFT]
#ifdef DRC_SH2 #ifdef DRC_SH2
ldr ip, [r2, #OFS_SH2_p_drcblk_ram] ldr ip, [r2, #OFS_SH2_p_drcblk_ram]
@ -242,8 +242,8 @@ sh2_write32_sdram:
sh2_write32_da: sh2_write32_da:
@ preserve r0 and r2 for tail call @ preserve r0 and r2 for tail call
ldr ip, [r2, #OFS_SH2_p_da] ldr ip, [r2, #OFS_SH2_p_da]
ror r1, r1, #16 mov r1, r1, ror #16
lsl r3, r0, #SH2_DA_SHIFT mov r3, r0, lsl #SH2_DA_SHIFT
str r1, [ip, r3, lsr #SH2_DA_SHIFT] str r1, [ip, r3, lsr #SH2_DA_SHIFT]
#ifdef DRC_SH2 #ifdef DRC_SH2
ldr ip, [r2, #OFS_SH2_p_drcblk_da] ldr ip, [r2, #OFS_SH2_p_drcblk_da]
@ -265,13 +265,13 @@ sh2_write32_da:
sh2_write32_dram: sh2_write32_dram:
ldr ip, [r2, #OFS_SH2_p_dram] ldr ip, [r2, #OFS_SH2_p_dram]
tst r0, #SH2_DRAM_OW tst r0, #SH2_DRAM_OW
lsl r3, r0, #SH2_DRAM_SHIFT mov r3, r0, lsl #SH2_DRAM_SHIFT
roreq r1, r1, #16 moveq r1, r1, ror #16
streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT] streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT]
bxeq lr bxeq lr
#if 1 #if 1
ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT] ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT]
ror r1, r1, #16 mov r1, r1, ror #16
mov r2, #0 mov r2, #0
tst r1, #0x00ff0000 tst r1, #0x00ff0000
orrne r2, r2, #0x00ff0000 orrne r2, r2, #0x00ff0000

View file

@ -10,10 +10,6 @@
#include "cell_map.c" #include "cell_map.c"
#ifndef UTYPES_DEFINED
typedef unsigned short u16;
#endif
// check: Heart of the alien, jaguar xj 220 // check: Heart of the alien, jaguar xj 220
PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, unsigned char inc) PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, unsigned char inc)
{ {

View file

@ -6,7 +6,7 @@
@* See COPYING file in the top-level directory. @* See COPYING file in the top-level directory.
@* @*
#include "../pico_int_o32.h" #include "../pico_int_offs.h"
.equiv PCM_STEP_SHIFT, 11 .equiv PCM_STEP_SHIFT, 11

View file

@ -8,7 +8,7 @@
* this is highly specialized, be careful if changing related C code! * this is highly specialized, be careful if changing related C code!
*/ */
#include "pico_int_o32.h" #include "pico_int_offs.h"
@ define these constants in your include file: @ define these constants in your include file:
@ .equiv START_ROW, 1 @ .equiv START_ROW, 1

View file

@ -8,7 +8,7 @@
* this is highly specialized, be careful if changing related C code! * this is highly specialized, be careful if changing related C code!
*/ */
#include "pico_int_o32.h" #include "pico_int_offs.h"
.extern DrawStripInterlace .extern DrawStripInterlace

View file

@ -2,11 +2,6 @@
#include "pico_port.h" #include "pico_port.h"
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef uintptr_t uptr; // unsigned pointer-sized int
#define M68K_MEM_SHIFT 16 #define M68K_MEM_SHIFT 16
// minimum size we can map // minimum size we can map
#define M68K_BANK_SIZE (1 << M68K_MEM_SHIFT) #define M68K_BANK_SIZE (1 << M68K_MEM_SHIFT)

View file

@ -8,7 +8,7 @@
# OUT OF DATE # OUT OF DATE
#include "pico_int_o32.h" #include "pico_int_offs.h"
.set noreorder .set noreorder
.set noat .set noat

View file

@ -6,7 +6,7 @@
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.
*/ */
#include "pico_int_o32.h" #include "pico_int_offs.h"
.equ SRR_MAPPED, (1 << 0) .equ SRR_MAPPED, (1 << 0)
.equ SRR_READONLY, (1 << 1) .equ SRR_READONLY, (1 << 1)

View file

@ -33,6 +33,14 @@ extern "C" {
#endif #endif
typedef unsigned char u8;
typedef signed char s8;
typedef unsigned short u16;
typedef signed short s16;
typedef unsigned int u32;
typedef signed int s32;
typedef uintptr_t uptr; // unsigned pointer-sized int
// ----------------------- 68000 CPU ----------------------- // ----------------------- 68000 CPU -----------------------
#ifdef EMU_C68K #ifdef EMU_C68K
#include "../cpu/cyclone/Cyclone.h" #include "../cpu/cyclone/Cyclone.h"
@ -427,7 +435,7 @@ struct PicoSound
short psg_line; short psg_line;
}; };
// run tools/mkoffsets pico/pico_int_o32.h if you change these // run tools/mkoffsets pico/pico_int_offs.h if you change these
// careful with savestate compat // careful with savestate compat
struct Pico struct Pico
{ {
@ -905,13 +913,13 @@ void PicoFrame32x(void);
void Pico32xStateLoaded(int is_early); void Pico32xStateLoaded(int is_early);
void p32x_sync_sh2s(unsigned int m68k_target); void p32x_sync_sh2s(unsigned int m68k_target);
void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target); void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target);
void p32x_update_irls(SH2 *active_sh2, int m68k_cycles); void p32x_update_irls(SH2 *active_sh2, unsigned int m68k_cycles);
void p32x_trigger_irq(SH2 *sh2, int m68k_cycles, unsigned int mask); void p32x_trigger_irq(SH2 *sh2, unsigned int m68k_cycles, unsigned int mask);
void p32x_update_cmd_irq(SH2 *sh2, int m68k_cycles); void p32x_update_cmd_irq(SH2 *sh2, unsigned int m68k_cycles);
void p32x_reset_sh2s(void); void p32x_reset_sh2s(void);
void p32x_event_schedule(unsigned int now, enum p32x_event event, int after); void p32x_event_schedule(unsigned int now, enum p32x_event event, int after);
void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after); void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after);
void p32x_schedule_hint(SH2 *sh2, int m68k_cycles); void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles);
// 32x/memory.c // 32x/memory.c
extern struct Pico32xMem *Pico32xMem; extern struct Pico32xMem *Pico32xMem;

View file

@ -9,6 +9,7 @@ asm_render = 0
asm_ym2612 = 0 asm_ym2612 = 0
asm_misc = 0 asm_misc = 0
asm_cdmemory = 0 asm_cdmemory = 0
asm_32xdraw = 0
asm_mix = 0 asm_mix = 0
endif endif

View file

@ -9,7 +9,7 @@
* to avoid under/overstepping the src region). * to avoid under/overstepping the src region).
* *
* ATTN does dirty aliasing tricks with undefined behaviour by standard. * ATTN does dirty aliasing tricks with undefined behaviour by standard.
* (however, this was needed to improve the generated code). * (however, this improved the generated code).
* ATTN uses struct assignment, which only works if the compiler is inlining * ATTN uses struct assignment, which only works if the compiler is inlining
* this (else it would probably call memcpy :-)). * this (else it would probably call memcpy :-)).
*/ */
@ -33,22 +33,24 @@ void *memcpy(void *dest, const void *src, size_t n)
const int lm = sizeof(uint32_t)-1; const int lm = sizeof(uint32_t)-1;
/* align src to word */ /* align src to word */
while (((unsigned)ss.c & lm) && n > 0) while (((uintptr_t)ss.c & lm) && n > 0)
*ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
if (((unsigned)ds.c & lm) == 0) { if (((uintptr_t)ds.c & lm) == 0) {
/* fast copy if pointers have the same aligment */ /* fast copy if pointers have the same aligment */
while (n >= sizeof(struct _16)) /* copy 16 bytes blocks */ while (n >= sizeof(struct _16)) /* copy 16 byte blocks */
*ds.s++ = *ss.s++, n -= sizeof(struct _16); *ds.s++ = *ss.s++, n -= sizeof(struct _16);
if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */ if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */
*ds.l++ = *ss.l++, n -= sizeof(uint64_t); *ds.l++ = *ss.l++, n -= sizeof(uint64_t);
// if (n >= sizeof(uint32_t)) /* copy leftover 4 byte block */
// *ds.i++ = *ss.i++, n -= sizeof(uint32_t);
} else if (n >= 2*sizeof(uint32_t)) { } else if (n >= 2*sizeof(uint32_t)) {
/* unaligned data big enough to avoid overstepping src */ /* unaligned data big enough to avoid overstepping src */
uint32_t v1, v2, b, s; uint32_t v1, v2, b, s;
/* align dest to word */ /* align dest to word */
while (((unsigned)ds.c & lm) && n > 0) while (((uintptr_t)ds.c & lm) && n > 0)
*ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
/* copy loop: load aligned words and store shifted words */ /* copy loop: load aligned words and store shifted words */
b = (unsigned)ss.c & lm, s = b*8; ss.c -= b; b = (uintptr_t)ss.c & lm, s = b*8; ss.c -= b;
v1 = *ss.i++, v2 = *ss.i++; v1 = *ss.i++, v2 = *ss.i++;
while (n >= 3*sizeof(uint32_t)) { while (n >= 3*sizeof(uint32_t)) {
*ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); v1 = *ss.i++; *ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); v1 = *ss.i++;
@ -78,28 +80,35 @@ void *memmove (void *dest, const void *src, size_t n)
struct _16 { uint32_t a[4]; }; struct _16 { uint32_t a[4]; };
union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; } union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; }
ss = { src+n }, ds = { dest+n }; ss = { src+n }, ds = { dest+n };
size_t pd = dest > src ? dest - src : src - dest;
const int lm = sizeof(uint32_t)-1; const int lm = sizeof(uint32_t)-1;
if (dest <= src || dest >= src+n) if (dest <= src || dest >= src+n)
return memcpy(dest, src, n); return memcpy(dest, src, n);
/* align src to word */ /* align src to word */
while (((unsigned)ss.c & lm) && n > 0) while (((uintptr_t)ss.c & lm) && n > 0)
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
if (((unsigned)ds.c & lm) == 0) { /* take care not to copy multi-byte data if it overlaps */
if (((uintptr_t)ds.c & lm) == 0) {
/* fast copy if pointers have the same aligment */ /* fast copy if pointers have the same aligment */
while (n >= sizeof(struct _16)) /* copy 16 byte blocks */ while (n >= sizeof(struct _16) && pd >= sizeof(struct _16))
/* copy 16 bytes blocks if no overlap */
*--ds.s = *--ss.s, n -= sizeof(struct _16); *--ds.s = *--ss.s, n -= sizeof(struct _16);
if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */ while (n >= sizeof(uint64_t) && pd >= sizeof(uint64_t))
/* copy leftover 8 byte blocks if no overlap */
*--ds.l = *--ss.l, n -= sizeof(uint64_t); *--ds.l = *--ss.l, n -= sizeof(uint64_t);
} else if (n >= 2*sizeof(uint32_t)) { while (n >= sizeof(uint32_t) && pd >= sizeof(uint32_t))
/* copy leftover 4 byte blocks if no overlap */
*--ds.i = *--ss.i, n -= sizeof(uint32_t);
} else if (n >= 2*sizeof(uint32_t) && pd >= 2*sizeof(uint32_t)) {
/* unaligned data big enough to avoid understepping src */ /* unaligned data big enough to avoid understepping src */
uint32_t v1, v2, b, s; uint32_t v1, v2, b, s;
/* align dest to word */ /* align dest to word */
while (((unsigned)ds.c & lm) && n > 0) while (((uintptr_t)ds.c & lm) && n > 0)
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
/* copy loop: load aligned words and store shifted words */ /* copy loop: load aligned words and store shifted words */
b = (unsigned)ss.c & lm, s = b*8; ss.c += b; b = (uintptr_t)ss.c & lm, s = b*8; ss.c += b;
v1 = *--ss.i, v2 = *--ss.i; v1 = *--ss.i, v2 = *--ss.i;
while (n >= 3*sizeof(uint32_t)) { while (n >= 3*sizeof(uint32_t)) {
*--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); v1 = *--ss.i; *--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); v1 = *--ss.i;
@ -114,7 +123,7 @@ void *memmove (void *dest, const void *src, size_t n)
} }
ss.c -= b - 2*sizeof(uint32_t); ss.c -= b - 2*sizeof(uint32_t);
} }
/* copy 0-7 leftover bytes */ /* copy 0-7 leftover bytes (or upto everything if ptrs are too close) */
while (n >= 4) { while (n >= 4) {
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
*--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;

View file

@ -114,14 +114,12 @@ subs r2, r2, #0x14
blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
stmdb sp!, {r4, r7, r8, r9, r10} /* borrow r4 */ stmdb sp!, {r4, r7, r8, r9, r10} /* borrow r4 */
/* blat 64 bytes at a time */ /* blat 32 bytes at a time */
/* XXX for really big copies perhaps we should use more registers */ /* XXX for really big copies perhaps we should use more registers */
Lmemcpy_floop32: Lmemcpy_floop32:
ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr} ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr} stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr} subs r2, r2, #0x20
stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
subs r2, r2, #0x40
bge Lmemcpy_floop32 bge Lmemcpy_floop32
cmn r2, #0x10 cmn r2, #0x10
@ -314,14 +312,12 @@ stmdb sp!, {r4, r7, r8, r9, r10, lr}
subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
blt Lmemcpy_bl32 blt Lmemcpy_bl32
/* blat 64 bytes at a time */ /* blat 32 bytes at a time */
/* XXX for really big copies perhaps we should use more registers */ /* XXX for really big copies perhaps we should use more registers */
Lmemcpy_bloop32: Lmemcpy_bloop32:
ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr} ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr} stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr} subs r2, r2, #0x20
stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
subs r2, r2, #0x40
bge Lmemcpy_bloop32 bge Lmemcpy_bloop32
Lmemcpy_bl32: Lmemcpy_bl32:

View file

@ -49,11 +49,8 @@ get_define () # prefix struct member member...
echo "const int one = 1;" >/tmp/getoffs.c echo "const int one = 1;" >/tmp/getoffs.c
compile_rodata compile_rodata
ENDIAN=$(if [ "$rodata" -eq 1 ]; then echo be; else echo le; fi) ENDIAN=$(if [ "$rodata" -eq 1 ]; then echo be; else echo le; fi)
# determine output file
echo "const int vsz = sizeof(void *);" >/tmp/getoffs.c
compile_rodata
fn="${1:-.}/pico_int_o$((8*$rodata)).h"
# output header # output header
fn="${1:-.}/pico_int_offs.h"
echo "/* autogenerated by mkoffset.sh, do not edit */" >$fn echo "/* autogenerated by mkoffset.sh, do not edit */" >$fn
echo "/* target endianess: $ENDIAN, compiled with: $CC $CFLAGS */" >>$fn echo "/* target endianess: $ENDIAN, compiled with: $CC $CFLAGS */" >>$fn
# output offsets # output offsets