vdp renderer, partial sync 8bit fast ARM asm with C code

This commit is contained in:
kub 2021-01-21 19:09:17 +01:00
parent 1cc774814d
commit 959ea39b23
4 changed files with 59 additions and 38 deletions

View file

@ -34,7 +34,7 @@ void (*PicoPrepareCram)()=0; // prepares PicoCramHigh for renderer to
// stuff available in asm:
#ifdef _ASM_DRAW_C
void BackFillFull(void *dst, int reg7);
void BackFillFull(void *dst, int reg7, int lwidth);
void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend,
struct PicoEState *est);
void DrawTilesFromCacheF(u32 *hc, struct PicoEState *est);
@ -183,7 +183,7 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est)
nametab += nametab_step*(start-scrstart);
// check priority
code=PicoMem.vram[nametab+tile_start];
code=est->PicoMem_vram[nametab+tile_start];
if ((code>>15) != prio) return; // hack: just assume that whole window uses same priority
scrpos+=8*est->Draw2Width+8;
@ -197,7 +197,7 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est)
// unsigned short *pal=NULL;
unsigned char pal;
code=PicoMem.vram[nametab+tilex];
code=est->PicoMem_vram[nametab+tilex];
if (code==blank) continue;
// Get tile address/2:
@ -250,7 +250,7 @@ static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend,
if(!(pvid->reg[11]&3)) { // full screen scroll
// Get horizontal scroll value
hscroll=PicoMem.vram[htab&0x7fff];
hscroll=est->PicoMem_vram[htab&0x7fff];
htab = 0; // this marks that we don't have to update scroll value
}
@ -297,7 +297,7 @@ static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend,
if(htab) {
int htaddr=htab+(trow<<4);
if(trow) htaddr-=(vscroll&7)<<1;
hscroll=PicoMem.vram[htaddr&0x7fff];
hscroll=est->PicoMem_vram[htaddr&0x7fff];
}
// Draw tiles across screen:
@ -323,7 +323,7 @@ static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend,
#endif
vsidx++;
code=PicoMem.vram[nametab_row+(tilex&xmask)];
code=est->PicoMem_vram[nametab_row+(tilex&xmask)];
if (code==blank) continue;
if (code>>15) { // high priority tile
@ -442,18 +442,19 @@ static void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est)
pal=(unsigned char)((code>>9)&0x30);
// goto first vertically visible tile
while(sy <= scrstart*8) { sy+=8; tile+=tdeltay; height--; }
sy -= scrstart*8;
while(sy <= 0) { sy+=8; tile+=tdeltay; height--; }
scrpos = est->Draw2FB;
if (est->rendstatus&PDRAW_BORDER_32)
scrpos += 32;
scrpos+=(sy-scrstart*8)*est->Draw2Width;
scrpos+=sy*est->Draw2Width;
for (; height > 0; height--, sy+=8, tile+=tdeltay)
{
int w = width, x=sx, t=tile, s;
if((sy-scrstart*8) >= END_ROW*8+8) return; // offscreen
if(sy >= END_ROW*8+8) return; // offscreen
for (; w; w--,x+=8,t+=tdeltax)
{
@ -502,7 +503,7 @@ static void DrawAllSpritesFull(int prio, int maxwidth, struct PicoEState *est)
unsigned int *sprite=NULL;
int code, code2, sx, sy, height;
sprite=(u32 *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite
sprite=(u32 *)(est->PicoMem_vram+((table+(link<<2))&0x7ffc)); // Find sprite
// get sprite info
code = sprite[0];
@ -559,16 +560,18 @@ static void DrawAllSpritesFull(int prio, int maxwidth, struct PicoEState *est)
}
#ifndef _ASM_DRAW_C
static void BackFillFull(void *dst, int reg7)
static void BackFillFull(unsigned char *dst, int reg7, int lwidth)
{
unsigned int back;
int i;
// Start with a background color:
back=reg7&0x3f;
back|=back<<8;
back|=back<<16;
memset32(dst, back, Pico.est.Draw2Width*(8+(END_ROW-START_ROW)*8)/4);
for (i = 0, dst += 8*lwidth; i < (END_ROW-START_ROW)*8; i++, dst += lwidth)
memset32(dst+8, back, 320/4);
}
#endif
@ -589,19 +592,19 @@ static void DrawDisplayFull(void)
}
if(est->rendstatus & PDRAW_30_ROWS) {
// In 240 line mode, the top and bottom 8 lines are omitted
// since this renderer always renderers 224 lines
// since this renderer always renders 224 lines
scrstart ++, scrend ++;
}
est->Draw2Start = scrstart;
planestart = scrstart, planeend = scrend;
winstart = scrstart, winend = scrend;
// 32C border for centering? (for asm)
est->rendstatus &= ~PDRAW_BORDER_32;
if ((est->rendstatus&PDRAW_32_COLS) && !(PicoIn.opt&POPT_DIS_32C_BORDER))
est->rendstatus |= PDRAW_BORDER_32;
planestart = scrstart, planeend = scrend;
winstart = scrstart, winend = scrend;
// horizontal window?
if ((win=pvid->reg[0x12]))
{
@ -716,7 +719,7 @@ PICO_INTERNAL void PicoFrameFull()
if (PicoPrepareCram) PicoPrepareCram();
// Draw screen:
BackFillFull(Pico.est.Draw2FB, Pico.video.reg[7]);
BackFillFull(Pico.est.Draw2FB, Pico.video.reg[7], Pico.est.Draw2Width);
if (Pico.video.reg[1] & 0x40)
DrawDisplayFull();

View file

@ -6,10 +6,17 @@
* See COPYING file in the top-level directory.
*
* this is highly specialized, be careful if changing related C code!
*
* NB: this only deals with buffers having line width at 328
*/
#include "pico_int_offs.h"
.equ PDRAW_INTERLACE, (1<<3)
.equ PDRAW_32_COLS, (1<<8)
.equ PDRAW_BORDER_32, (1<<9)
.equ PDRAW_30_ROWS, (1<<11)
@ define these constants in your include file:
@ .equiv START_ROW, 1
@ .equiv END_ROW, 27
@ -24,14 +31,17 @@
.text
.align 2
@ void BackFillFull(void *dst, int reg7)
@ void BackFillFull(unsigned char *dst, int reg7, int lwidth)
.global BackFillFull
BackFillFull:
stmfd sp!, {r4-r9,lr}
stmfd sp!, {r4-r10,lr}
sub r10,r2, #320 @ unused bytes in a line
add lr, r0, #8 @ 8 px overlap area at start of line
add lr, lr, r2, lsl #3 @ 8 lines overlap area at top
add lr, r0, #328*8
mov r0, r1, lsl #26
mov r0, r0, lsr #26
@ -52,7 +62,6 @@ BackFillFull:
@ go go go!
.bff_loop:
add lr, lr, #8
subs r12, r12, #1
stmia lr!, {r0-r9} @ 10*4*8
@ -64,9 +73,10 @@ BackFillFull:
stmia lr!, {r0-r9}
stmia lr!, {r0-r9}
add lr, lr, r10 @ skip unused rest of line
bne .bff_loop
ldmfd sp!, {r4-r9,lr}
ldmfd sp!, {r4-r10,lr}
bx lr
.pool
@ -413,14 +423,15 @@ DrawLayerFull:
orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13
ldr r11,[sp, #9*4] @ est
sub r4, r9, #(START_ROW<<24)
ldr r4, [r11, #OFS_EST_Draw2Start]
ldr r7, [r11, #OFS_EST_rendstatus]
ldr r11, [r11, #OFS_EST_Draw2FB]
tst r7, #0x100 @ H32 border mode?
sub r4, r9, r4, lsl #24
tst r7, #PDRAW_BORDER_32 @ H32 border mode?
addne r11, r11, #32
mov r4, r4, asr #24
mov r7, #328*8
mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW);
mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-Draw2Start);
@ Get vertical scroll value:
add_c24 r7, r10, (OFS_PMEM_vsram-OFS_PMEM_vram)
@ -588,15 +599,16 @@ DrawLayerFull:
.global DrawTilesFromCacheF
DrawTilesFromCacheF:
stmfd sp!, {r4-r10,lr}
stmfd sp!, {r4-r11,lr}
mov r9, #0xff000000 @ r9=prevcode=-1
mvn r6, #0 @ r6=prevy=-1
ldr r7, [r1, #OFS_EST_rendstatus]
ldr r4, [r1, #OFS_EST_Draw2FB]
ldr r11,[r1, #OFS_EST_Draw2Start]
ldr r2, [r0], #4 @ read y offset
tst r7, #0x100 @ H32 border mode?
tst r7, #PDRAW_BORDER_32 @ H32 border mode?
addne r4, r4, #32
mov r7, #328
mla r2, r7, r2, r4
@ -612,13 +624,14 @@ DrawTilesFromCacheF:
.dtfcf_loop:
ldr r7, [r8], #4 @ read code
movs r1, r7, lsr #16 @ r1=dx;
ldmeqfd sp!, {r4-r10,pc} @ dx is never zero, this must be a terminator, return
ldmeqfd sp!, {r4-r11,pc} @ dx is never zero, this must be a terminator, return
@ row changed?
cmp r6, r7, lsr #27
movne r6, r7, lsr #27
subne r6, r6, r11
movne r4, #328*8
mlane r5, r4, r6, r12 @ r5=pd = scrpos + prevy*328*8
mlane r5, r4, r6, r12 @ r5=pd = scrpos + (prevy-Draw2Start)*328*8
bic r1, r1, #0xf800
add r1, r5, r1 @ r1=pdest (halfwords)
@ -695,6 +708,7 @@ DrawWindowFull:
ldr r4, [r11, #OFS_Pico_video_reg+12]
mov r5, #1 @ nametab_step
ldr r11, [r3, #OFS_EST_Draw2FB]
ldr r6, [r3, #OFS_EST_Draw2Start]
tst r4, #1 @ 40 cell mode?
andne r12, r12, #0xf000 @ 0x3c<<10
movne r5, r5, lsl #7
@ -702,11 +716,12 @@ DrawWindowFull:
ldr r7, [r3, #OFS_EST_rendstatus]
and r12, r12, #0xf800
mov r5, r5, lsl #6 @ nametab_step
tst r7, #0x100
tst r7, #PDRAW_BORDER_32
addne r11, r11, #32 @ center screen in H32 mode
0: and r4, r0, #0xff
mla r12, r5, r4, r12 @ nametab += nametab_step*start;
sub r4, r4, r6
mla r12, r5, r4, r12 @ nametab += nametab_step*(start-Draw2Start];
ldr r10, [r3, #OFS_EST_PicoMem_vram]
mov r4, r0, lsr #16 @ r4=start_cell_h
@ -728,11 +743,11 @@ DrawWindowFull:
and r4, r0, #0xff
add r11, r11, #328*8
sub r4, r4, #START_ROW
sub r4, r4, r6
add r11, r11, #8
mov r7, #328*8
mla r11, r7, r4, r11 @ scrpos+=8*328*(start-START_ROW);
mla r11, r7, r4, r11 @ scrpos+=8*328*(start-Draw2Start);
mov r0, #0xf
.dwfloop_outer:
@ -927,12 +942,13 @@ DrawSpriteFull:
ldr r0, [r1, #OFS_EST_rendstatus]
ldr r11, [r1, #OFS_EST_Draw2FB]
ldr r2, [r1, #OFS_EST_Draw2Start]
ldr r10, [r1, #OFS_EST_PicoMem_vram]
tst r0, #0x100 @ H32 border mode?
tst r0, #PDRAW_BORDER_32 @ H32 border mode?
addne r11, r11, #32
sub r1, r12, #(START_ROW*8)
sub r12, r12, r2, lsl #3
mov r0, #328
mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328;
mla r11, r12, r0, r11 @ scrpos+=(sy-Draw2Start*8)*328;
orr r5, r5, r5, lsl #16 @
orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24)

View file

@ -355,8 +355,8 @@ struct PicoEState
unsigned char *HighCol;
u32 *HighPreSpr;
struct Pico *Pico;
void *PicoMem_vram;
void *PicoMem_cram;
unsigned short *PicoMem_vram;
unsigned short *PicoMem_cram;
unsigned int *PicoOpt;
unsigned char *Draw2FB;
int Draw2Width;

View file

@ -144,6 +144,8 @@ get_define OFS_EST_ PicoEState PicoMem_vram ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState PicoMem_cram ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState PicoOpt ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState Draw2FB ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState Draw2Width ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState Draw2Start ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState HighPal ; echo "$line" >>$fn
get_define OFS_PMEM_ PicoMem vram ; echo "$line" >>$fn