workaround for all-tiles-hi-priority performance issue

git-svn-id: file:///home/notaz/opt/svn/PicoDrive@237 be3aeb3a-fb24-0410-a615-afba39da0efa
This commit is contained in:
notaz 2007-09-02 14:52:01 +00:00
parent bf53765555
commit 740da8c60b
2 changed files with 153 additions and 42 deletions

View file

@ -22,8 +22,8 @@ static int HighCacheS[80+1]; // and sprites
static int HighPreSpr[80*2+1]; // slightly preprocessed sprites
char HighSprZ[320+8+8]; // Z-buffer for accurate sprites and shadow/hilight mode
// (if bit 7 == 0, sh caused by tile; if bit 6 == 0 pixel must be shadowed, else hilighted, if bit5 == 1)
// lsb->msb: moved sprites, all window tiles don't use same priority, accurate sprites (copied from PicoOpt), interlace
// dirty sprites, sonic mode
// lsb->msb: moved sprites, not all window tiles use same priority, accurate sprites (copied from PicoOpt), interlace
// dirty sprites, sonic mode, have layer with all hi prio tiles (mk3), layer sh/hi already processed
int rendstatus;
void *DrawLineDest=DefOutBuff; // pointer to dest buffer where to draw this line to
int Scanline=0; // Scanline
@ -316,6 +316,8 @@ static void DrawStrip(struct TileStrip *ts, int sh)
// terminate the cache list
*ts->hc = 0;
// if oldcode wasn't changed, it means all layer is hi priority
if (oldcode == -1) rendstatus|=0x40;
}
// this is messy
@ -381,6 +383,7 @@ void DrawStripVSRam(struct TileStrip *ts, int plane)
// terminate the cache list
*ts->hc = 0;
if (oldcode == -1) rendstatus|=0x40;
}
#endif
@ -578,26 +581,64 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache
static void DrawTilesFromCache(int *hc, int sh)
{
int code, addr, zero, dx;
int code, addr, dx;
int pal;
short blank=-1; // The tile we know is blank
// *ts->hc++ = code | (dx<<16) | (ty<<25); // cache it
while((code=*hc++)) {
if(!sh && (short)code == blank) continue;
if (sh && (rendstatus&0xc0))
{
if (!(rendstatus&0x80))
{
// as some layer has covered whole line with hi priority tiles,
// we can process whole line and then act as if sh/hi mode was off.
rendstatus|=0x80;
int c = 320/4, *zb = (int *)(HighCol+8);
while (c--)
{
int tmp = *zb;
if (!(tmp & 0x80808080)) *zb=tmp&0x3f3f3f3f;
else {
if(!(tmp&0x00000080)) tmp&=~0x000000c0; if(!(tmp&0x00008000)) tmp&=~0x0000c000;
if(!(tmp&0x00800000)) tmp&=~0x00c00000; if(!(tmp&0x80000000)) tmp&=~0xc0000000;
*zb=tmp;
}
zb++;
}
}
sh = 0;
}
if (sh)
{
while((code=*hc++)) {
unsigned char *zb;
// Get tile address/2:
addr=(code&0x7ff)<<4;
addr+=(unsigned int)code>>25; // y offset into tile
dx=(code>>16)&0x1ff;
if(sh) {
unsigned char *zb = HighCol+dx;
zb = HighCol+dx;
if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++;
if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++;
if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++;
if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++;
pal=((code>>9)&0x30);
if (code&0x0800) TileFlip(dx,addr,pal);
else TileNorm(dx,addr,pal);
}
}
else
{
short blank=-1; // The tile we know is blank
while((code=*hc++)) {
int zero;
if((short)code == blank) continue;
// Get tile address/2:
addr=(code&0x7ff)<<4;
addr+=(unsigned int)code>>25; // y offset into tile
dx=(code>>16)&0x1ff;
pal=((code>>9)&0x30);
@ -607,6 +648,7 @@ static void DrawTilesFromCache(int *hc, int sh)
if(zero) blank=(short)code;
}
}
}
// --------------------------------------------
@ -1189,6 +1231,8 @@ static int DrawDisplay(int sh)
int win=0,edge=0,hvwind=0;
int maxw, maxcells;
rendstatus&=~0xc0;
if(pvid->reg[12]&1) {
maxw = 328; maxcells = 40;
} else {
@ -1236,6 +1280,15 @@ static int DrawDisplay(int sh)
if(HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh);
DrawAllSprites(HighCacheS, maxw, 1, sh);
#if 0
{
int *c, a, b;
for (a = 0, c = HighCacheA; *c; c++, a++);
for (b = 0, c = HighCacheB; *c; c++, b++);
printf("%i:%03i: a=%i, b=%i\n", Pico.m.frame_count, Scanline, a, b);
}
#endif
return 0;
}

View file

@ -306,7 +306,7 @@ DrawLayer:
tst r9, #1<<31
mov r3, #0
orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|ty)
orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty)
movne r3, #0x40 @ default to shadowed pal on sh mode
mvn r9, #0 @ r9=prevcode=-1
@ -342,6 +342,7 @@ DrawLayer:
beq .DrawStrip_samecode @ we know stuff about this tile already
mov r9, r7 @ remember code
orr r10, r10, #1<<21 @ seen non hi-prio tile
movs r2, r9, lsl #20 @ if (code&0x1000)
mov r2, r2, lsl #1
@ -386,6 +387,20 @@ DrawLayer:
strneb r4, [r1], #1 @ have a remaining unaligned pixel?
b .dsloop_subr1
.DrawStrip_hiprio_maybempt:
cmp r7, r9
beq .dsloop @ must've been empty, otherwise we wouldn't get here
movs r2, r7, lsl #20 @ if (code&0x1000)
mov r2, r2, lsl #1
add r2, r2, r10, lsl #17
mov r2, r2, lsr #17
eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;
ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
mov r9, r7 @ remember code
tst r2, r2
beq .dsloop
orr r10, r10, #1<<22
.DrawStrip_hiprio:
tst r10, #0x00c00000
beq .DrawStrip_hiprio_maybempt
@ -398,24 +413,14 @@ DrawLayer:
mov r0, #0xf
b .dsloop
.DrawStrip_hiprio_maybempt:
cmp r7, r9
beq .dsloop @ must've been empty, otherwise we wouldn't get here
movs r2, r7, lsl #20 @ if (code&0x1000)
mov r2, r2, lsl #1
add r2, r2, r10, lsl #17
mov r2, r2, lsr #17
eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;
ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
mov r9, r7 @ remember code
tst r2, r2
orrne r10, r10, #1<<22
bne .DrawStrip_hiprio
b .dsloop
.dsloop_exit:
tst r10, #1<<21 @ seen non hi-prio tile
ldreq r1, =rendstatus
mov r0, #0
ldreq r2, [r1]
str r0, [r6] @ terminate the cache list
orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles
streq r2, [r1]
ldmfd sp!, {r4-r11,lr}
bx lr
@ -426,7 +431,7 @@ DrawLayer:
rsb r8, r3, #0
mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3
bic r8, r8, #0xff000000
orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|tilex[15:0])
orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[15:0])
ldr r4, =Scanline
orr r5, r1, r10, lsl #24
@ -463,7 +468,7 @@ DrawLayer:
add r10,r10, #0x01000000
and r4, r10, #0x003f0000
cmp r4, r10, asr #8
ble .dsloop_exit
ble .dsloop_vs_exit
@ calc offset and read tileline code to r7, also calc ty
add r7, lr, #0x012000
@ -500,6 +505,7 @@ DrawLayer:
beq .DrawStrip_vs_samecode @ we know stuff about this tile already
mov r9, r7 @ remember code
orr r8, r8, #1<<24 @ seen non hi-prio tile
movs r2, r9, lsl #20 @ if (code&0x1000)
mov r2, r2, lsl #1
@ -571,6 +577,18 @@ DrawLayer:
bne .DrawStrip_vs_hiprio
b .dsloop_vs
.dsloop_vs_exit:
tst r8, #1<<24 @ seen non hi-prio tile
ldreq r1, =rendstatus
mov r0, #0
ldreq r2, [r1]
str r0, [r6] @ terminate the cache list
orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles
streq r2, [r1]
ldmfd sp!, {r4-r11,lr}
bx lr
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@ -650,14 +668,15 @@ BackFill:
DrawTilesFromCache:
stmfd sp!, {r4-r8,r11,lr}
mvn r5, #0 @ r5=prevcode=-1
mov r8, r1
@ cache some stuff to avoid mem access
ldr r11,=HighCol
ldr lr, =(Pico+0x10000) @ lr=Pico.vram
mov r12,#0xf
mvn r5, #0 @ r5=prevcode=-1
movs r8, r1
bne .dtfc_check_rendflags
@ scratch: r4, r7
.dtfc_loop:
ldr r6, [r0], #4 @ read code
@ -769,6 +788,45 @@ DrawTilesFromCache:
mov r12, #0xf
b .dtfc_loop
@ check if we have detected layer covered with hi-prio tiles:
.dtfc_check_rendflags:
ldr r1, =rendstatus
ldr r2, [r1]
tst r2, #0xc0
beq .dtfc_loop
mov r8, #0 @ sh/hi mode off
tst r2, #0x80
bne .dtfc_loop @ already processed
orr r2, r2, #0x80
str r2, [r1]
add r1, r11,#8
mov r3, #320/4
mov r7, #0x80
orr r7, r7, r7, lsl #8
orr r7, r7, r7, lsl #16
mov r6, #0x3f
orr r6, r6, r6, lsl #8
orr r6, r6, r6, lsl #16
.dtfc_loop_shprep:
subs r3, r3, #1
bmi .dtfc_loop @ done
ldr r2, [r1]
tst r2, r7
andeq r2, r2, r6
streq r2, [r1], #4
beq .dtfc_loop_shprep
tst r2, #0x80000000
biceq r2, r2, #0xc0000000
tst r2, #0x00800000
biceq r2, r2, #0x00c00000
tst r2, #0x00008000
biceq r2, r2, #0x0000c000
tst r2, #0x00000080
biceq r2, r2, #0x000000c0
str r2, [r1], #4
b .dtfc_loop_shprep
.pool
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@