mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 15:27:46 -04:00
workaround for all-tiles-hi-priority performance issue
git-svn-id: file:///home/notaz/opt/svn/PicoDrive@237 be3aeb3a-fb24-0410-a615-afba39da0efa
This commit is contained in:
parent
bf53765555
commit
740da8c60b
2 changed files with 153 additions and 42 deletions
69
Pico/Draw.c
69
Pico/Draw.c
|
@ -22,8 +22,8 @@ static int HighCacheS[80+1]; // and sprites
|
|||
static int HighPreSpr[80*2+1]; // slightly preprocessed sprites
|
||||
char HighSprZ[320+8+8]; // Z-buffer for accurate sprites and shadow/hilight mode
|
||||
// (if bit 7 == 0, sh caused by tile; if bit 6 == 0 pixel must be shadowed, else hilighted, if bit5 == 1)
|
||||
// lsb->msb: moved sprites, all window tiles don't use same priority, accurate sprites (copied from PicoOpt), interlace
|
||||
// dirty sprites, sonic mode
|
||||
// lsb->msb: moved sprites, not all window tiles use same priority, accurate sprites (copied from PicoOpt), interlace
|
||||
// dirty sprites, sonic mode, have layer with all hi prio tiles (mk3), layer sh/hi already processed
|
||||
int rendstatus;
|
||||
void *DrawLineDest=DefOutBuff; // pointer to dest buffer where to draw this line to
|
||||
int Scanline=0; // Scanline
|
||||
|
@ -316,6 +316,8 @@ static void DrawStrip(struct TileStrip *ts, int sh)
|
|||
|
||||
// terminate the cache list
|
||||
*ts->hc = 0;
|
||||
// if oldcode wasn't changed, it means all layer is hi priority
|
||||
if (oldcode == -1) rendstatus|=0x40;
|
||||
}
|
||||
|
||||
// this is messy
|
||||
|
@ -381,6 +383,7 @@ void DrawStripVSRam(struct TileStrip *ts, int plane)
|
|||
|
||||
// terminate the cache list
|
||||
*ts->hc = 0;
|
||||
if (oldcode == -1) rendstatus|=0x40;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -578,26 +581,64 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache
|
|||
|
||||
static void DrawTilesFromCache(int *hc, int sh)
|
||||
{
|
||||
int code, addr, zero, dx;
|
||||
int code, addr, dx;
|
||||
int pal;
|
||||
short blank=-1; // The tile we know is blank
|
||||
|
||||
// *ts->hc++ = code | (dx<<16) | (ty<<25); // cache it
|
||||
|
||||
while((code=*hc++)) {
|
||||
if(!sh && (short)code == blank) continue;
|
||||
if (sh && (rendstatus&0xc0))
|
||||
{
|
||||
if (!(rendstatus&0x80))
|
||||
{
|
||||
// as some layer has covered whole line with hi priority tiles,
|
||||
// we can process whole line and then act as if sh/hi mode was off.
|
||||
rendstatus|=0x80;
|
||||
int c = 320/4, *zb = (int *)(HighCol+8);
|
||||
while (c--)
|
||||
{
|
||||
int tmp = *zb;
|
||||
if (!(tmp & 0x80808080)) *zb=tmp&0x3f3f3f3f;
|
||||
else {
|
||||
if(!(tmp&0x00000080)) tmp&=~0x000000c0; if(!(tmp&0x00008000)) tmp&=~0x0000c000;
|
||||
if(!(tmp&0x00800000)) tmp&=~0x00c00000; if(!(tmp&0x80000000)) tmp&=~0xc0000000;
|
||||
*zb=tmp;
|
||||
}
|
||||
zb++;
|
||||
}
|
||||
}
|
||||
sh = 0;
|
||||
}
|
||||
|
||||
if (sh)
|
||||
{
|
||||
while((code=*hc++)) {
|
||||
unsigned char *zb;
|
||||
// Get tile address/2:
|
||||
addr=(code&0x7ff)<<4;
|
||||
addr+=(unsigned int)code>>25; // y offset into tile
|
||||
dx=(code>>16)&0x1ff;
|
||||
if(sh) {
|
||||
unsigned char *zb = HighCol+dx;
|
||||
zb = HighCol+dx;
|
||||
if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++;
|
||||
if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++;
|
||||
if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++;
|
||||
if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++;
|
||||
|
||||
pal=((code>>9)&0x30);
|
||||
|
||||
if (code&0x0800) TileFlip(dx,addr,pal);
|
||||
else TileNorm(dx,addr,pal);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
short blank=-1; // The tile we know is blank
|
||||
while((code=*hc++)) {
|
||||
int zero;
|
||||
if((short)code == blank) continue;
|
||||
// Get tile address/2:
|
||||
addr=(code&0x7ff)<<4;
|
||||
addr+=(unsigned int)code>>25; // y offset into tile
|
||||
dx=(code>>16)&0x1ff;
|
||||
|
||||
pal=((code>>9)&0x30);
|
||||
|
||||
|
@ -607,6 +648,7 @@ static void DrawTilesFromCache(int *hc, int sh)
|
|||
if(zero) blank=(short)code;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------
|
||||
|
||||
|
@ -1189,6 +1231,8 @@ static int DrawDisplay(int sh)
|
|||
int win=0,edge=0,hvwind=0;
|
||||
int maxw, maxcells;
|
||||
|
||||
rendstatus&=~0xc0;
|
||||
|
||||
if(pvid->reg[12]&1) {
|
||||
maxw = 328; maxcells = 40;
|
||||
} else {
|
||||
|
@ -1236,6 +1280,15 @@ static int DrawDisplay(int sh)
|
|||
if(HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh);
|
||||
DrawAllSprites(HighCacheS, maxw, 1, sh);
|
||||
|
||||
#if 0
|
||||
{
|
||||
int *c, a, b;
|
||||
for (a = 0, c = HighCacheA; *c; c++, a++);
|
||||
for (b = 0, c = HighCacheB; *c; c++, b++);
|
||||
printf("%i:%03i: a=%i, b=%i\n", Pico.m.frame_count, Scanline, a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
100
Pico/Draw.s
100
Pico/Draw.s
|
@ -306,7 +306,7 @@ DrawLayer:
|
|||
|
||||
tst r9, #1<<31
|
||||
mov r3, #0
|
||||
orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|ty)
|
||||
orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty)
|
||||
movne r3, #0x40 @ default to shadowed pal on sh mode
|
||||
|
||||
mvn r9, #0 @ r9=prevcode=-1
|
||||
|
@ -342,6 +342,7 @@ DrawLayer:
|
|||
beq .DrawStrip_samecode @ we know stuff about this tile already
|
||||
|
||||
mov r9, r7 @ remember code
|
||||
orr r10, r10, #1<<21 @ seen non hi-prio tile
|
||||
|
||||
movs r2, r9, lsl #20 @ if (code&0x1000)
|
||||
mov r2, r2, lsl #1
|
||||
|
@ -386,6 +387,20 @@ DrawLayer:
|
|||
strneb r4, [r1], #1 @ have a remaining unaligned pixel?
|
||||
b .dsloop_subr1
|
||||
|
||||
.DrawStrip_hiprio_maybempt:
|
||||
cmp r7, r9
|
||||
beq .dsloop @ must've been empty, otherwise we wouldn't get here
|
||||
movs r2, r7, lsl #20 @ if (code&0x1000)
|
||||
mov r2, r2, lsl #1
|
||||
add r2, r2, r10, lsl #17
|
||||
mov r2, r2, lsr #17
|
||||
eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;
|
||||
ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
|
||||
mov r9, r7 @ remember code
|
||||
tst r2, r2
|
||||
beq .dsloop
|
||||
orr r10, r10, #1<<22
|
||||
|
||||
.DrawStrip_hiprio:
|
||||
tst r10, #0x00c00000
|
||||
beq .DrawStrip_hiprio_maybempt
|
||||
|
@ -398,24 +413,14 @@ DrawLayer:
|
|||
mov r0, #0xf
|
||||
b .dsloop
|
||||
|
||||
.DrawStrip_hiprio_maybempt:
|
||||
cmp r7, r9
|
||||
beq .dsloop @ must've been empty, otherwise we wouldn't get here
|
||||
movs r2, r7, lsl #20 @ if (code&0x1000)
|
||||
mov r2, r2, lsl #1
|
||||
add r2, r2, r10, lsl #17
|
||||
mov r2, r2, lsr #17
|
||||
eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;
|
||||
ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
|
||||
mov r9, r7 @ remember code
|
||||
tst r2, r2
|
||||
orrne r10, r10, #1<<22
|
||||
bne .DrawStrip_hiprio
|
||||
b .dsloop
|
||||
|
||||
.dsloop_exit:
|
||||
tst r10, #1<<21 @ seen non hi-prio tile
|
||||
ldreq r1, =rendstatus
|
||||
mov r0, #0
|
||||
ldreq r2, [r1]
|
||||
str r0, [r6] @ terminate the cache list
|
||||
orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles
|
||||
streq r2, [r1]
|
||||
|
||||
ldmfd sp!, {r4-r11,lr}
|
||||
bx lr
|
||||
|
@ -426,7 +431,7 @@ DrawLayer:
|
|||
rsb r8, r3, #0
|
||||
mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3
|
||||
bic r8, r8, #0xff000000
|
||||
orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|tilex[15:0])
|
||||
orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[15:0])
|
||||
|
||||
ldr r4, =Scanline
|
||||
orr r5, r1, r10, lsl #24
|
||||
|
@ -463,7 +468,7 @@ DrawLayer:
|
|||
add r10,r10, #0x01000000
|
||||
and r4, r10, #0x003f0000
|
||||
cmp r4, r10, asr #8
|
||||
ble .dsloop_exit
|
||||
ble .dsloop_vs_exit
|
||||
|
||||
@ calc offset and read tileline code to r7, also calc ty
|
||||
add r7, lr, #0x012000
|
||||
|
@ -500,6 +505,7 @@ DrawLayer:
|
|||
beq .DrawStrip_vs_samecode @ we know stuff about this tile already
|
||||
|
||||
mov r9, r7 @ remember code
|
||||
orr r8, r8, #1<<24 @ seen non hi-prio tile
|
||||
|
||||
movs r2, r9, lsl #20 @ if (code&0x1000)
|
||||
mov r2, r2, lsl #1
|
||||
|
@ -571,6 +577,18 @@ DrawLayer:
|
|||
bne .DrawStrip_vs_hiprio
|
||||
b .dsloop_vs
|
||||
|
||||
.dsloop_vs_exit:
|
||||
tst r8, #1<<24 @ seen non hi-prio tile
|
||||
ldreq r1, =rendstatus
|
||||
mov r0, #0
|
||||
ldreq r2, [r1]
|
||||
str r0, [r6] @ terminate the cache list
|
||||
orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles
|
||||
streq r2, [r1]
|
||||
|
||||
ldmfd sp!, {r4-r11,lr}
|
||||
bx lr
|
||||
|
||||
|
||||
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
|
||||
|
@ -650,14 +668,15 @@ BackFill:
|
|||
DrawTilesFromCache:
|
||||
stmfd sp!, {r4-r8,r11,lr}
|
||||
|
||||
mvn r5, #0 @ r5=prevcode=-1
|
||||
mov r8, r1
|
||||
|
||||
@ cache some stuff to avoid mem access
|
||||
ldr r11,=HighCol
|
||||
ldr lr, =(Pico+0x10000) @ lr=Pico.vram
|
||||
mov r12,#0xf
|
||||
|
||||
mvn r5, #0 @ r5=prevcode=-1
|
||||
movs r8, r1
|
||||
bne .dtfc_check_rendflags
|
||||
|
||||
@ scratch: r4, r7
|
||||
.dtfc_loop:
|
||||
ldr r6, [r0], #4 @ read code
|
||||
|
@ -769,6 +788,45 @@ DrawTilesFromCache:
|
|||
mov r12, #0xf
|
||||
b .dtfc_loop
|
||||
|
||||
@ check if we have detected layer covered with hi-prio tiles:
|
||||
.dtfc_check_rendflags:
|
||||
ldr r1, =rendstatus
|
||||
ldr r2, [r1]
|
||||
tst r2, #0xc0
|
||||
beq .dtfc_loop
|
||||
mov r8, #0 @ sh/hi mode off
|
||||
tst r2, #0x80
|
||||
bne .dtfc_loop @ already processed
|
||||
orr r2, r2, #0x80
|
||||
str r2, [r1]
|
||||
|
||||
add r1, r11,#8
|
||||
mov r3, #320/4
|
||||
mov r7, #0x80
|
||||
orr r7, r7, r7, lsl #8
|
||||
orr r7, r7, r7, lsl #16
|
||||
mov r6, #0x3f
|
||||
orr r6, r6, r6, lsl #8
|
||||
orr r6, r6, r6, lsl #16
|
||||
.dtfc_loop_shprep:
|
||||
subs r3, r3, #1
|
||||
bmi .dtfc_loop @ done
|
||||
ldr r2, [r1]
|
||||
tst r2, r7
|
||||
andeq r2, r2, r6
|
||||
streq r2, [r1], #4
|
||||
beq .dtfc_loop_shprep
|
||||
tst r2, #0x80000000
|
||||
biceq r2, r2, #0xc0000000
|
||||
tst r2, #0x00800000
|
||||
biceq r2, r2, #0x00c00000
|
||||
tst r2, #0x00008000
|
||||
biceq r2, r2, #0x0000c000
|
||||
tst r2, #0x00000080
|
||||
biceq r2, r2, #0x000000c0
|
||||
str r2, [r1], #4
|
||||
b .dtfc_loop_shprep
|
||||
|
||||
.pool
|
||||
|
||||
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue