vdp renderer, partial sync 8bit fast ARM asm with C code

This commit is contained in:
kub 2021-01-21 19:09:17 +01:00
parent 1cc774814d
commit 959ea39b23
4 changed files with 59 additions and 38 deletions

View file

@ -34,7 +34,7 @@ void (*PicoPrepareCram)()=0; // prepares PicoCramHigh for renderer to
// stuff available in asm: // stuff available in asm:
#ifdef _ASM_DRAW_C #ifdef _ASM_DRAW_C
void BackFillFull(void *dst, int reg7); void BackFillFull(void *dst, int reg7, int lwidth);
void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend, void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend,
struct PicoEState *est); struct PicoEState *est);
void DrawTilesFromCacheF(u32 *hc, struct PicoEState *est); void DrawTilesFromCacheF(u32 *hc, struct PicoEState *est);
@ -183,7 +183,7 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est)
nametab += nametab_step*(start-scrstart); nametab += nametab_step*(start-scrstart);
// check priority // check priority
code=PicoMem.vram[nametab+tile_start]; code=est->PicoMem_vram[nametab+tile_start];
if ((code>>15) != prio) return; // hack: just assume that whole window uses same priority if ((code>>15) != prio) return; // hack: just assume that whole window uses same priority
scrpos+=8*est->Draw2Width+8; scrpos+=8*est->Draw2Width+8;
@ -197,7 +197,7 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est)
// unsigned short *pal=NULL; // unsigned short *pal=NULL;
unsigned char pal; unsigned char pal;
code=PicoMem.vram[nametab+tilex]; code=est->PicoMem_vram[nametab+tilex];
if (code==blank) continue; if (code==blank) continue;
// Get tile address/2: // Get tile address/2:
@ -250,7 +250,7 @@ static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend,
if(!(pvid->reg[11]&3)) { // full screen scroll if(!(pvid->reg[11]&3)) { // full screen scroll
// Get horizontal scroll value // Get horizontal scroll value
hscroll=PicoMem.vram[htab&0x7fff]; hscroll=est->PicoMem_vram[htab&0x7fff];
htab = 0; // this marks that we don't have to update scroll value htab = 0; // this marks that we don't have to update scroll value
} }
@ -297,7 +297,7 @@ static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend,
if(htab) { if(htab) {
int htaddr=htab+(trow<<4); int htaddr=htab+(trow<<4);
if(trow) htaddr-=(vscroll&7)<<1; if(trow) htaddr-=(vscroll&7)<<1;
hscroll=PicoMem.vram[htaddr&0x7fff]; hscroll=est->PicoMem_vram[htaddr&0x7fff];
} }
// Draw tiles across screen: // Draw tiles across screen:
@ -323,7 +323,7 @@ static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend,
#endif #endif
vsidx++; vsidx++;
code=PicoMem.vram[nametab_row+(tilex&xmask)]; code=est->PicoMem_vram[nametab_row+(tilex&xmask)];
if (code==blank) continue; if (code==blank) continue;
if (code>>15) { // high priority tile if (code>>15) { // high priority tile
@ -442,18 +442,19 @@ static void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est)
pal=(unsigned char)((code>>9)&0x30); pal=(unsigned char)((code>>9)&0x30);
// goto first vertically visible tile // goto first vertically visible tile
while(sy <= scrstart*8) { sy+=8; tile+=tdeltay; height--; } sy -= scrstart*8;
while(sy <= 0) { sy+=8; tile+=tdeltay; height--; }
scrpos = est->Draw2FB; scrpos = est->Draw2FB;
if (est->rendstatus&PDRAW_BORDER_32) if (est->rendstatus&PDRAW_BORDER_32)
scrpos += 32; scrpos += 32;
scrpos+=(sy-scrstart*8)*est->Draw2Width; scrpos+=sy*est->Draw2Width;
for (; height > 0; height--, sy+=8, tile+=tdeltay) for (; height > 0; height--, sy+=8, tile+=tdeltay)
{ {
int w = width, x=sx, t=tile, s; int w = width, x=sx, t=tile, s;
if((sy-scrstart*8) >= END_ROW*8+8) return; // offscreen if(sy >= END_ROW*8+8) return; // offscreen
for (; w; w--,x+=8,t+=tdeltax) for (; w; w--,x+=8,t+=tdeltax)
{ {
@ -502,7 +503,7 @@ static void DrawAllSpritesFull(int prio, int maxwidth, struct PicoEState *est)
unsigned int *sprite=NULL; unsigned int *sprite=NULL;
int code, code2, sx, sy, height; int code, code2, sx, sy, height;
sprite=(u32 *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite sprite=(u32 *)(est->PicoMem_vram+((table+(link<<2))&0x7ffc)); // Find sprite
// get sprite info // get sprite info
code = sprite[0]; code = sprite[0];
@ -559,16 +560,18 @@ static void DrawAllSpritesFull(int prio, int maxwidth, struct PicoEState *est)
} }
#ifndef _ASM_DRAW_C #ifndef _ASM_DRAW_C
static void BackFillFull(void *dst, int reg7) static void BackFillFull(unsigned char *dst, int reg7, int lwidth)
{ {
unsigned int back; unsigned int back;
int i;
// Start with a background color: // Start with a background color:
back=reg7&0x3f; back=reg7&0x3f;
back|=back<<8; back|=back<<8;
back|=back<<16; back|=back<<16;
memset32(dst, back, Pico.est.Draw2Width*(8+(END_ROW-START_ROW)*8)/4); for (i = 0, dst += 8*lwidth; i < (END_ROW-START_ROW)*8; i++, dst += lwidth)
memset32(dst+8, back, 320/4);
} }
#endif #endif
@ -589,19 +592,19 @@ static void DrawDisplayFull(void)
} }
if(est->rendstatus & PDRAW_30_ROWS) { if(est->rendstatus & PDRAW_30_ROWS) {
// In 240 line mode, the top and bottom 8 lines are omitted // In 240 line mode, the top and bottom 8 lines are omitted
// since this renderer always renderers 224 lines // since this renderer always renders 224 lines
scrstart ++, scrend ++; scrstart ++, scrend ++;
} }
est->Draw2Start = scrstart; est->Draw2Start = scrstart;
planestart = scrstart, planeend = scrend;
winstart = scrstart, winend = scrend;
// 32C border for centering? (for asm) // 32C border for centering? (for asm)
est->rendstatus &= ~PDRAW_BORDER_32; est->rendstatus &= ~PDRAW_BORDER_32;
if ((est->rendstatus&PDRAW_32_COLS) && !(PicoIn.opt&POPT_DIS_32C_BORDER)) if ((est->rendstatus&PDRAW_32_COLS) && !(PicoIn.opt&POPT_DIS_32C_BORDER))
est->rendstatus |= PDRAW_BORDER_32; est->rendstatus |= PDRAW_BORDER_32;
planestart = scrstart, planeend = scrend;
winstart = scrstart, winend = scrend;
// horizontal window? // horizontal window?
if ((win=pvid->reg[0x12])) if ((win=pvid->reg[0x12]))
{ {
@ -716,7 +719,7 @@ PICO_INTERNAL void PicoFrameFull()
if (PicoPrepareCram) PicoPrepareCram(); if (PicoPrepareCram) PicoPrepareCram();
// Draw screen: // Draw screen:
BackFillFull(Pico.est.Draw2FB, Pico.video.reg[7]); BackFillFull(Pico.est.Draw2FB, Pico.video.reg[7], Pico.est.Draw2Width);
if (Pico.video.reg[1] & 0x40) if (Pico.video.reg[1] & 0x40)
DrawDisplayFull(); DrawDisplayFull();

View file

@ -6,10 +6,17 @@
* See COPYING file in the top-level directory. * See COPYING file in the top-level directory.
* *
* this is highly specialized, be careful if changing related C code! * this is highly specialized, be careful if changing related C code!
*
* NB: this only deals with buffers having line width at 328
*/ */
#include "pico_int_offs.h" #include "pico_int_offs.h"
.equ PDRAW_INTERLACE, (1<<3)
.equ PDRAW_32_COLS, (1<<8)
.equ PDRAW_BORDER_32, (1<<9)
.equ PDRAW_30_ROWS, (1<<11)
@ define these constants in your include file: @ define these constants in your include file:
@ .equiv START_ROW, 1 @ .equiv START_ROW, 1
@ .equiv END_ROW, 27 @ .equiv END_ROW, 27
@ -24,14 +31,17 @@
.text .text
.align 2 .align 2
@ void BackFillFull(void *dst, int reg7) @ void BackFillFull(unsigned char *dst, int reg7, int lwidth)
.global BackFillFull .global BackFillFull
BackFillFull: BackFillFull:
stmfd sp!, {r4-r9,lr} stmfd sp!, {r4-r10,lr}
sub r10,r2, #320 @ unused bytes in a line
add lr, r0, #8 @ 8 px overlap area at start of line
add lr, lr, r2, lsl #3 @ 8 lines overlap area at top
add lr, r0, #328*8
mov r0, r1, lsl #26 mov r0, r1, lsl #26
mov r0, r0, lsr #26 mov r0, r0, lsr #26
@ -52,7 +62,6 @@ BackFillFull:
@ go go go! @ go go go!
.bff_loop: .bff_loop:
add lr, lr, #8
subs r12, r12, #1 subs r12, r12, #1
stmia lr!, {r0-r9} @ 10*4*8 stmia lr!, {r0-r9} @ 10*4*8
@ -64,9 +73,10 @@ BackFillFull:
stmia lr!, {r0-r9} stmia lr!, {r0-r9}
stmia lr!, {r0-r9} stmia lr!, {r0-r9}
add lr, lr, r10 @ skip unused rest of line
bne .bff_loop bne .bff_loop
ldmfd sp!, {r4-r9,lr} ldmfd sp!, {r4-r10,lr}
bx lr bx lr
.pool .pool
@ -413,14 +423,15 @@ DrawLayerFull:
orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13 orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13
ldr r11,[sp, #9*4] @ est ldr r11,[sp, #9*4] @ est
sub r4, r9, #(START_ROW<<24) ldr r4, [r11, #OFS_EST_Draw2Start]
ldr r7, [r11, #OFS_EST_rendstatus] ldr r7, [r11, #OFS_EST_rendstatus]
ldr r11, [r11, #OFS_EST_Draw2FB] ldr r11, [r11, #OFS_EST_Draw2FB]
tst r7, #0x100 @ H32 border mode? sub r4, r9, r4, lsl #24
tst r7, #PDRAW_BORDER_32 @ H32 border mode?
addne r11, r11, #32 addne r11, r11, #32
mov r4, r4, asr #24 mov r4, r4, asr #24
mov r7, #328*8 mov r7, #328*8
mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW); mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-Draw2Start);
@ Get vertical scroll value: @ Get vertical scroll value:
add_c24 r7, r10, (OFS_PMEM_vsram-OFS_PMEM_vram) add_c24 r7, r10, (OFS_PMEM_vsram-OFS_PMEM_vram)
@ -588,15 +599,16 @@ DrawLayerFull:
.global DrawTilesFromCacheF .global DrawTilesFromCacheF
DrawTilesFromCacheF: DrawTilesFromCacheF:
stmfd sp!, {r4-r10,lr} stmfd sp!, {r4-r11,lr}
mov r9, #0xff000000 @ r9=prevcode=-1 mov r9, #0xff000000 @ r9=prevcode=-1
mvn r6, #0 @ r6=prevy=-1 mvn r6, #0 @ r6=prevy=-1
ldr r7, [r1, #OFS_EST_rendstatus] ldr r7, [r1, #OFS_EST_rendstatus]
ldr r4, [r1, #OFS_EST_Draw2FB] ldr r4, [r1, #OFS_EST_Draw2FB]
ldr r11,[r1, #OFS_EST_Draw2Start]
ldr r2, [r0], #4 @ read y offset ldr r2, [r0], #4 @ read y offset
tst r7, #0x100 @ H32 border mode? tst r7, #PDRAW_BORDER_32 @ H32 border mode?
addne r4, r4, #32 addne r4, r4, #32
mov r7, #328 mov r7, #328
mla r2, r7, r2, r4 mla r2, r7, r2, r4
@ -612,13 +624,14 @@ DrawTilesFromCacheF:
.dtfcf_loop: .dtfcf_loop:
ldr r7, [r8], #4 @ read code ldr r7, [r8], #4 @ read code
movs r1, r7, lsr #16 @ r1=dx; movs r1, r7, lsr #16 @ r1=dx;
ldmeqfd sp!, {r4-r10,pc} @ dx is never zero, this must be a terminator, return ldmeqfd sp!, {r4-r11,pc} @ dx is never zero, this must be a terminator, return
@ row changed? @ row changed?
cmp r6, r7, lsr #27 cmp r6, r7, lsr #27
movne r6, r7, lsr #27 movne r6, r7, lsr #27
subne r6, r6, r11
movne r4, #328*8 movne r4, #328*8
mlane r5, r4, r6, r12 @ r5=pd = scrpos + prevy*328*8 mlane r5, r4, r6, r12 @ r5=pd = scrpos + (prevy-Draw2Start)*328*8
bic r1, r1, #0xf800 bic r1, r1, #0xf800
add r1, r5, r1 @ r1=pdest (halfwords) add r1, r5, r1 @ r1=pdest (halfwords)
@ -695,6 +708,7 @@ DrawWindowFull:
ldr r4, [r11, #OFS_Pico_video_reg+12] ldr r4, [r11, #OFS_Pico_video_reg+12]
mov r5, #1 @ nametab_step mov r5, #1 @ nametab_step
ldr r11, [r3, #OFS_EST_Draw2FB] ldr r11, [r3, #OFS_EST_Draw2FB]
ldr r6, [r3, #OFS_EST_Draw2Start]
tst r4, #1 @ 40 cell mode? tst r4, #1 @ 40 cell mode?
andne r12, r12, #0xf000 @ 0x3c<<10 andne r12, r12, #0xf000 @ 0x3c<<10
movne r5, r5, lsl #7 movne r5, r5, lsl #7
@ -702,11 +716,12 @@ DrawWindowFull:
ldr r7, [r3, #OFS_EST_rendstatus] ldr r7, [r3, #OFS_EST_rendstatus]
and r12, r12, #0xf800 and r12, r12, #0xf800
mov r5, r5, lsl #6 @ nametab_step mov r5, r5, lsl #6 @ nametab_step
tst r7, #0x100 tst r7, #PDRAW_BORDER_32
addne r11, r11, #32 @ center screen in H32 mode addne r11, r11, #32 @ center screen in H32 mode
0: and r4, r0, #0xff 0: and r4, r0, #0xff
mla r12, r5, r4, r12 @ nametab += nametab_step*start; sub r4, r4, r6
mla r12, r5, r4, r12 @ nametab += nametab_step*(start-Draw2Start];
ldr r10, [r3, #OFS_EST_PicoMem_vram] ldr r10, [r3, #OFS_EST_PicoMem_vram]
mov r4, r0, lsr #16 @ r4=start_cell_h mov r4, r0, lsr #16 @ r4=start_cell_h
@ -728,11 +743,11 @@ DrawWindowFull:
and r4, r0, #0xff and r4, r0, #0xff
add r11, r11, #328*8 add r11, r11, #328*8
sub r4, r4, #START_ROW sub r4, r4, r6
add r11, r11, #8 add r11, r11, #8
mov r7, #328*8 mov r7, #328*8
mla r11, r7, r4, r11 @ scrpos+=8*328*(start-START_ROW); mla r11, r7, r4, r11 @ scrpos+=8*328*(start-Draw2Start);
mov r0, #0xf mov r0, #0xf
.dwfloop_outer: .dwfloop_outer:
@ -927,12 +942,13 @@ DrawSpriteFull:
ldr r0, [r1, #OFS_EST_rendstatus] ldr r0, [r1, #OFS_EST_rendstatus]
ldr r11, [r1, #OFS_EST_Draw2FB] ldr r11, [r1, #OFS_EST_Draw2FB]
ldr r2, [r1, #OFS_EST_Draw2Start]
ldr r10, [r1, #OFS_EST_PicoMem_vram] ldr r10, [r1, #OFS_EST_PicoMem_vram]
tst r0, #0x100 @ H32 border mode? tst r0, #PDRAW_BORDER_32 @ H32 border mode?
addne r11, r11, #32 addne r11, r11, #32
sub r1, r12, #(START_ROW*8) sub r12, r12, r2, lsl #3
mov r0, #328 mov r0, #328
mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328; mla r11, r12, r0, r11 @ scrpos+=(sy-Draw2Start*8)*328;
orr r5, r5, r5, lsl #16 @ orr r5, r5, r5, lsl #16 @
orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24) orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24)

View file

@ -355,8 +355,8 @@ struct PicoEState
unsigned char *HighCol; unsigned char *HighCol;
u32 *HighPreSpr; u32 *HighPreSpr;
struct Pico *Pico; struct Pico *Pico;
void *PicoMem_vram; unsigned short *PicoMem_vram;
void *PicoMem_cram; unsigned short *PicoMem_cram;
unsigned int *PicoOpt; unsigned int *PicoOpt;
unsigned char *Draw2FB; unsigned char *Draw2FB;
int Draw2Width; int Draw2Width;

View file

@ -144,6 +144,8 @@ get_define OFS_EST_ PicoEState PicoMem_vram ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState PicoMem_cram ; echo "$line" >>$fn get_define OFS_EST_ PicoEState PicoMem_cram ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState PicoOpt ; echo "$line" >>$fn get_define OFS_EST_ PicoEState PicoOpt ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState Draw2FB ; echo "$line" >>$fn get_define OFS_EST_ PicoEState Draw2FB ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState Draw2Width ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState Draw2Start ; echo "$line" >>$fn
get_define OFS_EST_ PicoEState HighPal ; echo "$line" >>$fn get_define OFS_EST_ PicoEState HighPal ; echo "$line" >>$fn
get_define OFS_PMEM_ PicoMem vram ; echo "$line" >>$fn get_define OFS_PMEM_ PicoMem vram ; echo "$line" >>$fn