vdp sprite handling improvement (SAT cache)

This commit is contained in:
kub 2020-02-16 13:53:50 +01:00
parent daf29df963
commit 25be5c52b0
3 changed files with 173 additions and 193 deletions

View file

@ -45,6 +45,8 @@ static int HighCacheA[41+1]; // caches for high layers
static int HighCacheB[41+1];
static int HighPreSpr[80*2+1]; // slightly preprocessed sprites
unsigned int VdpSATCache[128]; // VDP sprite cache (1st 32 sprite attr bits)
#define LF_PLANE_1 (1 << 0)
#define LF_SH (1 << 1) // must be = 2
#define LF_FORCE (1 << 2)
@ -1124,14 +1126,14 @@ static void DrawSpritesForced(unsigned char *sprited)
// Index + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: vert./horiz. size
// Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8
static NOINLINE void PrepareSprites(int full)
static NOINLINE void PrepareSprites(int max_lines)
{
const struct PicoVideo *pvid=&Pico.video;
const struct PicoEState *est=&Pico.est;
int u,link=0,sh;
int table=0;
int *pd = HighPreSpr;
int max_lines = 224, max_sprites = 80, max_width = 328;
int max_sprites = 80, max_width = 328;
int max_line_sprites = 20; // 20 sprites, 40 tiles
if (!(Pico.video.reg[12]&1))
@ -1139,72 +1141,13 @@ static NOINLINE void PrepareSprites(int full)
if (PicoIn.opt & POPT_DIS_SPRITE_LIM)
max_line_sprites = MAX_LINE_SPRITES;
if (pvid->reg[1]&8) max_lines = 240;
sh = Pico.video.reg[0xC]&8; // shadow/hilight?
table=pvid->reg[5]&0x7f;
if (pvid->reg[12]&1) table&=0x7e; // Lowest bit 0 in 40-cell mode
table<<=8; // Get sprite table address/2
if (!full)
{
int pack;
// updates: tilecode, sx
for (u=0; u < max_sprites && link < max_sprites && (pack = *pd); u++, pd+=2)
{
unsigned int *sprite;
int code2, sx, sy, height, width;
sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite
// parse sprite info
code2 = sprite[1];
sx = (code2>>16)&0x1ff;
sx -= 0x78; // Get X coordinate + 8
sy = (pack << 16) >> 16;
height = (pack >> 24) & 0xf;
width = (pack >> 28);
if (sy < max_lines &&
sy + (height<<3) > est->DrawScanline) // sprite onscreen (y)?
{
int y = (sy >= est->DrawScanline) ? sy : est->DrawScanline;
int entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80);
for (; y < sy + (height<<3) && y < max_lines; y++)
{
int i, cnt;
cnt = HighLnSpr[y][0];
if (HighLnSpr[y][3] >= max_line_sprites) continue; // sprite limit?
for (i = 0; i < cnt; i++)
if (((HighLnSpr[y][4+i] ^ entry) & 0x7f) == 0) goto found;
// this sprite was previously missing
HighLnSpr[y][3] ++;
if (sx > -24 && sx < max_width) { // onscreen x
HighLnSpr[y][4+cnt] = entry; // XXX wrong sequence?
HighLnSpr[y][5+cnt] = width; // XXX should count tiles for limit
HighLnSpr[y][0] = cnt + 1;
}
found:;
if (entry & 0x80)
HighLnSpr[y][1] |= SPRL_HAVE_HI;
else HighLnSpr[y][1] |= SPRL_HAVE_LO;
}
}
code2 &= ~0xfe000000;
code2 -= 0x00780000; // Get X coordinate + 8 in upper 16 bits
pd[1] = code2;
// Find next sprite
link=(sprite[0]>>16)&0x7f;
if (!link) break; // End of sprites
}
}
else
{
for (u = 0; u < max_lines; u++)
for (u = est->DrawScanline; u < max_lines; u++)
*((int *)&HighLnSpr[u][0]) = 0;
for (u = 0; u < max_sprites && link < max_sprites; u++)
@ -1214,18 +1157,19 @@ found:;
sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite
// parse sprite info
code = sprite[0];
// parse sprite info. the 1st half comes from the VDPs internal cache,
// the 2nd half is read from VRAM
code = VdpSATCache[link]; // normally but not always equal to sprite[0]
sy = (code&0x1ff)-0x80;
hv = (code>>24)&0xf;
height = (hv&3)+1;
width = (hv>>2)+1;
code2 = sprite[1];
sx = (code2>>16)&0x1ff;
sx -= 0x78; // Get X coordinate + 8
if (sy < max_lines && sy + (height<<3) > est->DrawScanline) // sprite onscreen (y)?
if (sy < max_lines && sy + (height<<3) >= est->DrawScanline) // sprite onscreen (y)?
{
int entry, y, w, sx_min, onscr_x, maybe_op = 0;
@ -1292,7 +1236,6 @@ found:;
printf("\n");
}
#endif
}
}
#ifndef _ASM_DRAW_C
@ -1505,12 +1448,11 @@ static int DrawDisplay(int sh)
int win=0, edge=0, hvwind=0, lflags;
int maxw, maxcells;
if (est->rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES)) {
// elprintf(EL_STATUS, "PrepareSprites(%i)", (est->rendstatus>>4)&1);
PrepareSprites(est->rendstatus & PDRAW_DIRTY_SPRITES);
est->rendstatus &= ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES);
}
if (!(est->DrawScanline & 15) ||
(est->rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES)))
PrepareSprites((est->DrawScanline+16) & ~15);
est->rendstatus &= ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES);
est->rendstatus &= ~(PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO);
if (pvid->reg[12]&1) {
@ -1656,8 +1598,6 @@ PICO_INTERNAL void PicoFrameStart(void)
if (PicoIn.opt & POPT_ALT_RENDERER)
return;
PrepareSprites(1);
}
static void DrawBlankedLine(int line, int offs, int sh, int bgc)

View file

@ -676,6 +676,7 @@ extern int (*PicoScanEnd)(unsigned int num);
extern unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1];
extern void *DrawLineDestBase;
extern int DrawLineDestIncrement;
extern unsigned int VdpSATCache[128];
// draw2.c
void PicoDraw2Init(void);

View file

@ -15,6 +15,8 @@ extern const unsigned char hcounts_32[];
extern const unsigned char hcounts_40[];
static int blankline; // display disabled for this line
static unsigned sat; // VRAM addr of sprite attribute table
static int satxbits; // index bits in SAT address
int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask) = NULL;
@ -315,14 +317,37 @@ void PicoVideoFIFOMode(int active)
static __inline void AutoIncrement(void)
{
Pico.video.addr=(unsigned short)(Pico.video.addr+Pico.video.reg[0xf]);
if (Pico.video.addr < Pico.video.reg[0xf]) Pico.video.addr_u ^= 1;
}
static NOINLINE unsigned int VideoWrite128(u32 a, u16 d)
static __inline void UpdateSAT(u32 a, u32 d)
{
Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES;
if (!((a^sat) >> satxbits) && !(a & 4)) {
int num = (a >> 3) & 0x7f;
((u16 *)&VdpSATCache[num])[(a&3) >> 1] = d;
}
}
static NOINLINE void VideoWriteVRAM128(u32 a, u16 d)
{
// nasty
a = ((a & 2) >> 1) | ((a & 0x400) >> 9) | (a & 0x3FC) | ((a & 0x1F800) >> 1);
((u8 *)PicoMem.vram)[a] = d;
return a;
u32 b = ((a & 2) >> 1) | ((a & 0x400) >> 9) | (a & 0x3FC) | ((a & 0x1F800) >> 1);
((u8 *)PicoMem.vram)[b] = d;
if (!((u16)(b^sat) >> satxbits))
Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES;
if (!((u16)(a^sat) >> satxbits))
UpdateSAT(a, d);
}
static void VideoWriteVRAM(u32 a, u16 d)
{
PicoMem.vram [(u16)a >> 1] = d;
if (!((u16)(a^sat) >> satxbits))
UpdateSAT(a, d);
}
static void VideoWrite(u16 d)
@ -333,19 +358,15 @@ static void VideoWrite(u16 d)
{
case 1: if (a & 1)
d = (u16)((d << 8) | (d >> 8));
PicoMem.vram [(a >> 1) & 0x7fff] = d;
if ((unsigned)(a - ((Pico.video.reg[5]&0x7f) << 9)) < 0x400)
Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES;
a |= Pico.video.addr_u << 16;
VideoWriteVRAM(a, d);
break;
case 3: if (PicoMem.cram [(a >> 1) & 0x3f] != d) Pico.m.dirtyPal = 1;
PicoMem.cram [(a >> 1) & 0x3f] = d; break;
case 5: PicoMem.vsram[(a >> 1) & 0x3f] = d; break;
case 0x81: if (a & 1)
d = (u16)((d << 8) | (d >> 8));
PicoMem.cram [(a >> 1) & 0x3f] = d & 0xeee; break;
case 5: PicoMem.vsram[(a >> 1) & 0x3f] = d & 0x7ff; break;
case 0x81:
a |= Pico.video.addr_u << 16;
a = VideoWrite128(a, d);
if ((unsigned)(a - ((Pico.video.reg[5]&0x7f) << 9)) < 0x400)
Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES;
VideoWriteVRAM128(a, d);
break;
//default:elprintf(EL_ANOMALY, "VDP write %04x with bad type %i", d, Pico.video.type); break;
}
@ -363,9 +384,10 @@ static unsigned int VideoRead(void)
switch (Pico.video.type)
{
case 0: d=PicoMem.vram [a & 0x7fff]; break;
case 8: d=(PicoMem.cram [a & 0x003f] & 0x0eee) | (d & ~0x0eee); break;
case 8: d=PicoMem.cram [a & 0x003f] | (d & ~0x0eee); break;
case 4: if ((a & 0x3f) >= 0x28) a = 0;
d=(PicoMem.vsram [a & 0x003f] & 0x07ff) | (d & ~0x07ff); break;
d=PicoMem.vsram [a & 0x003f] | (d & ~0x07ff); break;
case 12:a=PicoMem.vram [a & 0x7fff]; if (Pico.video.addr&1) a >>= 8;
d=(a & 0x00ff) | (d & ~0x00ff); break;
default:elprintf(EL_ANOMALY, "VDP read with bad type %i", Pico.video.type); break;
@ -391,7 +413,7 @@ static int GetDmaLength(void)
static void DmaSlow(int len, unsigned int source)
{
u32 inc = Pico.video.reg[0xf];
u32 a = Pico.video.addr;
u32 a = Pico.video.addr | (Pico.video.addr_u << 16);
u16 *r, *base = NULL;
u32 mask = 0x1ffff;
@ -451,26 +473,28 @@ static void DmaSlow(int len, unsigned int source)
switch (Pico.video.type)
{
case 1: // vram
#if 0
r = PicoMem.vram;
if (inc == 2 && !(a & 1) && a + len * 2 < 0x10000
&& !(((source + len - 1) ^ source) & ~mask))
if (inc == 2 && !(a & 1) && (a >> 16) == ((a + len*2) >> 16) &&
(source & ~mask) == ((source + len-1) & ~mask) &&
(a << 16 >= (sat+0x280) << 16 || (a + len*2) << 16 <= sat << 16))
{
// most used DMA mode
memcpy((char *)r + a, base + (source & mask), len * 2);
a += len * 2;
}
else
#endif
{
for(; len; len--)
{
u16 d = base[source++ & mask];
if(a & 1) d=(d<<8)|(d>>8);
r[a >> 1] = d;
VideoWriteVRAM(a, d);
// AutoIncrement
a = (u16)(a + inc);
a = (a+inc) & ~0x20000;
}
}
Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES;
break;
case 3: // cram
@ -478,9 +502,9 @@ static void DmaSlow(int len, unsigned int source)
r = PicoMem.cram;
for (; len; len--)
{
r[(a / 2) & 0x3f] = base[source++ & mask];
r[(a / 2) & 0x3f] = base[source++ & mask] & 0xeee;
// AutoIncrement
a += inc;
a = (a+inc) & ~0x20000;
}
break;
@ -488,22 +512,20 @@ static void DmaSlow(int len, unsigned int source)
r = PicoMem.vsram;
for (; len; len--)
{
r[(a / 2) & 0x3f] = base[source++ & mask];
r[(a / 2) & 0x3f] = base[source++ & mask] & 0x7ff;
// AutoIncrement
a += inc;
a = (a+inc) & ~0x20000;
}
break;
case 0x81: // vram 128k
a |= Pico.video.addr_u << 16;
for(; len; len--)
{
VideoWrite128(a, base[source++ & mask]);
u16 d = base[source++ & mask];
VideoWriteVRAM128(a, d);
// AutoIncrement
a = (a + inc) & 0x1ffff;
a = (a+inc) & ~0x20000;
}
Pico.video.addr_u = a >> 16;
Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES;
break;
default:
@ -512,12 +534,13 @@ static void DmaSlow(int len, unsigned int source)
break;
}
// remember addr
Pico.video.addr=(u16)a;
Pico.video.addr = a;
Pico.video.addr_u = a >> 16;
}
static void DmaCopy(int len)
{
u16 a = Pico.video.addr;
u32 a = Pico.video.addr | (Pico.video.addr_u << 16);
u8 *vr = (u8 *)PicoMem.vram;
u8 inc = Pico.video.reg[0xf];
int source;
@ -528,21 +551,23 @@ static void DmaCopy(int len)
source =Pico.video.reg[0x15];
source|=Pico.video.reg[0x16]<<8;
// XXX implement VRAM 128k? Is this even working?
// XXX implement VRAM 128k? Is this even working? count still in bytes?
for (; len; len--)
{
vr[a] = vr[source++ & 0xffff];
vr[(u16)a] = vr[(u16)(source++)];
if (!((u16)(a^sat) >> satxbits))
UpdateSAT(a, ((u16 *)vr)[(u16)a >> 1]);
// AutoIncrement
a=(u16)(a+inc);
a = (a+inc) & ~0x20000;
}
// remember addr
Pico.video.addr=a;
Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES;
Pico.video.addr = a;
Pico.video.addr_u = a >> 16;
}
static NOINLINE void DmaFill(int data)
{
u16 a = Pico.video.addr;
u32 a = Pico.video.addr | (Pico.video.addr_u << 16);
u8 *vr = (u8 *)PicoMem.vram;
u8 high = (u8)(data >> 8);
u8 inc = Pico.video.reg[0xf];
@ -561,40 +586,41 @@ static NOINLINE void DmaFill(int data)
for (l = len; l; l--) {
// Write upper byte to adjacent address
// (here we are byteswapped, so address is already 'adjacent')
vr[a] = high;
vr[(u16)a] = high;
if (!((u16)(a^sat) >> satxbits))
UpdateSAT(a, ((u16 *)vr)[(u16)a >> 1]);
// Increment address register
a = (u16)(a + inc);
a = (a+inc) & ~0x20000;
}
Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES;
break;
case 3: // cram
Pico.m.dirtyPal = 1;
data &= 0xeee;
for (l = len; l; l--) {
PicoMem.cram[(a/2) & 0x3f] = data;
// Increment address register
a += inc;
a = (a+inc) & ~0x20000;
}
break;
case 5: { // vsram
data &= 0x7ff;
for (l = len; l; l--) {
PicoMem.vsram[(a/2) & 0x3f] = data;
// Increment address register
a += inc;
a = (a+inc) & ~0x20000;
}
break;
}
case 0x81: // vram 128k
for (l = len; l; l--) {
VideoWrite128(a, data);
VideoWriteVRAM128(a, data);
// Increment address register
a = (a + inc) & 0x1ffff;
a = (a+inc) & ~0x20000;
}
Pico.video.addr_u = a >> 16;
Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES;
break;
default:
a += len * inc;
@ -603,6 +629,7 @@ static NOINLINE void DmaFill(int data)
// remember addr
Pico.video.addr = a;
Pico.video.addr_u = a >> 16;
// register update
Pico.video.reg[0x13] = Pico.video.reg[0x14] = 0;
source = Pico.video.reg[0x15];
@ -779,14 +806,21 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d)
pvid->status |= ((d >> 3) ^ SR_VB) & SR_VB; // forced blanking
goto update_irq;
case 0x05:
//elprintf(EL_STATUS, "spritep moved to %04x", (unsigned)(Pico.video.reg[5]&0x7f) << 9);
case 0x06:
if (d^dold) Pico.est.rendstatus |= PDRAW_SPRITES_MOVED;
break;
case 0x0c:
// renderers should update their palettes if sh/hi mode is changed
if ((d^dold)&8) Pico.m.dirtyPal = 1;
break;
default:
return;
}
sat = ((pvid->reg[5]&0x7f) << 9) | ((pvid->reg[6]&0x20) << 11);
satxbits = 9;
if (Pico.video.reg[12]&1)
sat &= ~0x200, satxbits = 10; // H40, zero lowest SAT bit
//elprintf(EL_STATUS, "spritep moved to %04x", sat);
return;
update_irq:
@ -991,6 +1025,11 @@ void PicoVideoLoad(void)
Pico.m.dma_xfers = 0;
}
sat = ((pv->reg[5]&0x7f) << 9) | ((pv->reg[6]&0x20) << 11);
satxbits = 9;
if (pv->reg[12]&1)
sat &= ~0x200, satxbits = 10; // H40, zero lowest SAT bit
// rebuild SAT cache XXX wrong since cache and memory can differ
for (l = 0; l < 80; l++) {
*((u16 *)VdpSATCache + 2*l ) = PicoMem.vram[(sat>>1) + l*4 ];