picodrive/pico/draw2_arm.S
2023-11-23 21:11:36 +01:00

982 lines
26 KiB
ArmAsm

/*
* assembly optimized versions of most funtions from draw2.c
* (C) notaz, 2006-2008
*
* This work is licensed under the terms of MAME license.
* See COPYING file in the top-level directory.
*
* this is highly specialized, be careful if changing related C code!
*
* NB: this only deals with buffers having line width at 328
*/
#include "pico_int_offs.h"
.equ PDRAW_INTERLACE, (1<<3)
.equ PDRAW_32_COLS, (1<<8)
.equ PDRAW_BORDER_32, (1<<9)
.equ PDRAW_30_ROWS, (1<<11)
@ define these constants in your include file:
@ .equiv START_ROW, 1
@ .equiv END_ROW, 27
@ one row means 8 pixels. If above example was used, (27-1)*8=208 lines would be rendered.
#ifndef START_ROW
#define START_ROW 0
#endif
#ifndef END_ROW
#define END_ROW 28
#endif
.text
.align 2
@ void BackFillFull(unsigned char *dst, int reg7, int lwidth)
.global BackFillFull
BackFillFull:
stmfd sp!, {r4-r10,lr}
sub r10,r2, #320 @ unused bytes in a line
add lr, r0, #8 @ 8 px overlap area at start of line
add lr, lr, r2, lsl #3 @ 8 lines overlap area at top
mov r0, r1, lsl #26
mov r0, r0, lsr #26
orr r0, r0, r0, lsl #8
orr r0, r0, r0, lsl #16
mov r1, r0 @ 25 opcodes wasted?
mov r2, r0
mov r3, r0
mov r4, r0
mov r5, r0
mov r6, r0
mov r7, r0
mov r8, r0
mov r9, r0
mov r12, #(END_ROW-START_ROW)*8
@ go go go!
.bff_loop:
subs r12, r12, #1
stmia lr!, {r0-r9} @ 10*4*8
stmia lr!, {r0-r9}
stmia lr!, {r0-r9}
stmia lr!, {r0-r9}
stmia lr!, {r0-r9}
stmia lr!, {r0-r9}
stmia lr!, {r0-r9}
stmia lr!, {r0-r9}
add lr, lr, r10 @ skip unused rest of line
bne .bff_loop
ldmfd sp!, {r4-r10,lr}
bx lr
.pool
@ -------- some macros --------
@ helpers
.macro add_c24 d s c
add \d, \s, #(\c & 0x00ff00)
.if \c & 0x0000ff
add \d, \d, #(\c & 0x0000ff)
.endif
.if \c & 0xff0000
add \d, \d, #(\c & 0xff0000)
.endif
.endm
@ TileLineSinglecol (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: pixels8_old
.macro TileLineSinglecol notsinglecol=0
and r2, r2, #0xf @ #0x0000000f
.if !\notsinglecol
cmp r2, r0, lsr #28 @ if these don't match,
bicne r9, r9, #2 @ it is a sign that whole tile is not singlecolor (only it's lines may be)
.endif
orr r4, r3, r2
orr r4, r4, r4, lsl #8
tst r1, #1 @ not aligned?
strneb r4, [r1], #1
streqh r4, [r1], #2
strh r4, [r1], #2
strh r4, [r1], #2
strh r4, [r1], #2
strneb r4, [r1], #1 @ have a remaining unaligned pixel?
sub r1, r1, #8
.if !\notsinglecol
mov r0, #0xf
orr r0, r0, r2, lsl #28 @ we will need the old palindex later
.endif
.endm
@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
.macro TileLineNorm
ands r4, r0, r2, lsr #12 @ #0x0000f000
orrne r4, r3, r4
strneb r4, [r1]
ands r4, r0, r2, lsr #8 @ #0x00000f00
orrne r4, r3, r4
strneb r4, [r1,#1]
ands r4, r0, r2, lsr #4 @ #0x000000f0
orrne r4, r3, r4
strneb r4, [r1,#2]
ands r4, r0, r2 @ #0x0000000f
orrne r4, r3, r4
strneb r4, [r1,#3]
ands r4, r0, r2, lsr #28 @ #0xf0000000
orrne r4, r3, r4
strneb r4, [r1,#4]
ands r4, r0, r2, lsr #24 @ #0x0f000000
orrne r4, r3, r4
strneb r4, [r1,#5]
ands r4, r0, r2, lsr #20 @ #0x00f00000
orrne r4, r3, r4
strneb r4, [r1,#6]
ands r4, r0, r2, lsr #16 @ #0x000f0000
orrne r4, r3, r4
strneb r4, [r1,#7]
.endm
@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
.macro TileLineFlip
ands r4, r0, r2, lsr #16 @ #0x000f0000
orrne r4, r3, r4
strneb r4, [r1]
ands r4, r0, r2, lsr #20 @ #0x00f00000
orrne r4, r3, r4
strneb r4, [r1,#1]
ands r4, r0, r2, lsr #24 @ #0x0f000000
orrne r4, r3, r4
strneb r4, [r1,#2]
ands r4, r0, r2, lsr #28 @ #0xf0000000
orrne r4, r3, r4
strneb r4, [r1,#3]
ands r4, r0, r2 @ #0x0000000f
orrne r4, r3, r4
strneb r4, [r1,#4]
ands r4, r0, r2, lsr #4 @ #0x000000f0
orrne r4, r3, r4
strneb r4, [r1,#5]
ands r4, r0, r2, lsr #8 @ #0x00000f00
orrne r4, r3, r4
strneb r4, [r1,#6]
ands r4, r0, r2, lsr #12 @ #0x0000f000
orrne r4, r3, r4
strneb r4, [r1,#7]
.endm
@ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
.macro Tile hflip vflip
mov r7, r9, lsl #13 @ r9=code<<8; addr=(code&0x7ff)<<4;
add r7, r10, r7, lsr #16
orr r9, r9, #3 @ emptytile=singlecolor=1, r9 must be <code_16> 00000xxx
.if \vflip
@ we read tilecodes in reverse order if we have vflip
add r7, r7, #8*4
.endif
@ loop through 8 lines
orr r9, r9, #(7<<24)
b 1f @ loop_enter
0: @ singlecol_loop
subs r9, r9, #(1<<24)
add r1, r1, #328 @ set pointer to next line
bmi 8f @ loop_exit with r0 restore
1:
.if \vflip
ldr r2, [r7, #-4]! @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
.else
ldr r2, [r7], #4
.endif
tst r2, r2
beq 2f @ empty line
bic r9, r9, #1
cmp r2, r2, ror #4
bne 3f @ not singlecolor
TileLineSinglecol
b 0b
2:
bic r9, r9, #2
2: @ empty_loop
subs r9, r9, #(1<<24)
add r1, r1, #328 @ set pointer to next line
bmi 8f @ loop_exit with r0 restore
.if \vflip
ldr r2, [r7, #-4]! @ next pack
.else
ldr r2, [r7], #4
.endif
mov r0, #0xf @ singlecol_loop might have messed r0
tst r2, r2
beq 2b
bic r9, r9, #3 @ if we are here, it means we have empty and not empty line
b 5f
3: @ not empty, not singlecol
mov r0, #0xf
bic r9, r9, #3
b 6f
4: @ not empty, not singlecol loop
subs r9, r9, #(1<<24)
add r1, r1, #328 @ set pointer to next line
bmi 9f @ loop_exit
.if \vflip
ldr r2, [r7, #-4]! @ next pack
.else
ldr r2, [r7], #4
.endif
tst r2, r2
beq 4b @ empty line
5:
cmp r2, r2, ror #4
beq 7f @ singlecolor line
6:
.if \hflip
TileLineFlip
.else
TileLineNorm
.endif
b 4b
7:
TileLineSinglecol 1
b 4b
8:
mov r0, #0xf
9: @ loop_exit
add r9, r9, #(1<<24) @ fix r9
sub r1, r1, #328*8 @ restore pdest pointer
.endm
@ TileLineSinglecolAl (r1=pdest, r4,r7=color)
.macro TileLineSinglecolAl0
stmia r1!, {r4,r7}
add r1, r1, #320
.endm
.macro TileLineSinglecolAl1
strb r4, [r1], #1
strh r4, [r1], #2
str r4, [r1], #4
strb r4, [r1], #1+320
@ add r1, r1, #320
.endm
.macro TileLineSinglecolAl2
strh r4, [r1], #2
str r4, [r1], #4
strh r4, [r1], #2
add r1, r1, #320
.endm
.macro TileLineSinglecolAl3
strb r4, [r1], #1
str r4, [r1], #4
strh r4, [r1], #2
strb r4, [r1], #1+320
@ add r1, r1, #320
.endm
@ TileSinglecol (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=0xf
@ kaligned==1, if dest is always aligned
.macro TileSinglecol kaligned=0
and r4, r2, #0xf @ we assume we have good r2 from previous time
orr r4, r4, r3
orr r4, r4, r4, lsl #8
orr r4, r4, r4, lsl #16
mov r7, r4
.if !\kaligned
tst r1, #2 @ not aligned?
bne 2f
tst r1, #1
bne 1f
.endif
TileLineSinglecolAl0
TileLineSinglecolAl0
TileLineSinglecolAl0
TileLineSinglecolAl0
TileLineSinglecolAl0
TileLineSinglecolAl0
TileLineSinglecolAl0
TileLineSinglecolAl0
.if !\kaligned
b 4f
1:
TileLineSinglecolAl1
TileLineSinglecolAl1
TileLineSinglecolAl1
TileLineSinglecolAl1
TileLineSinglecolAl1
TileLineSinglecolAl1
TileLineSinglecolAl1
TileLineSinglecolAl1
b 4f
2:
tst r1, #1
bne 3f
TileLineSinglecolAl2
TileLineSinglecolAl2
TileLineSinglecolAl2
TileLineSinglecolAl2
TileLineSinglecolAl2
TileLineSinglecolAl2
TileLineSinglecolAl2
TileLineSinglecolAl2
b 4f
3:
TileLineSinglecolAl3
TileLineSinglecolAl3
TileLineSinglecolAl3
TileLineSinglecolAl3
TileLineSinglecolAl3
TileLineSinglecolAl3
TileLineSinglecolAl3
TileLineSinglecolAl3
4:
.endif
sub r1, r1, #328*8 @ restore pdest pointer
.endm
@ DrawLayerTiles(*hcache, *scrpos, (cells<<24)|(nametab<<9)|(vscroll&0x3ff)<<11|(shift[width]<<8)|planeend, (ymask<<24)|(planestart<<16)|[htab||hscroll]
@ void DrawLayerFull(int plane, int *hcache, int planestart, int planeend,
@ struct PicoEState *est)
.global DrawLayerFull
DrawLayerFull:
ldr r12,[sp] @ est
stmfd sp!, {r4-r11,lr}
mov r6, r1 @ hcache
ldr r11, [r12, #OFS_EST_Pico]
ldr r10, [r12, #OFS_EST_PicoMem_vram]
ldrb r5, [r11, #OFS_Pico_video_reg+13] @ pvid->reg[13]
ldrb r7, [r11, #OFS_Pico_video_reg+11]
sub lr, r3, r2
and lr, lr, #0x00ff0000 @ lr=cells
mov r5, r5, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)
add r5, r5, r0, lsl #1 @ htab+=plane
bic r5, r5, #0x00ff0000 @ just in case
tst r7, #3 @ full screen scroll? (if ==0)
ldrb r7, [r11, #OFS_Pico_video_reg+16] @ ??hh??ww
ldreqh r5, [r10, r5]
biceq r5, r5, #0x0000fc00 @ r5=hscroll (0-0x3ff)
movne r5, r5, lsr #1
orrne r5, r5, #0x8000 @ this marks that we have htab pointer, not hscroll here
and r8, r7, #3
orr r5, r5, r7, lsl #1+24
orr r5, r5, #0x1f000000
cmp r8, #1
biclt r5, r5, #0x80000000
biceq r5, r5, #0xc0000000
bicgt r5, r5, #0xe0000000
mov r9, r2, lsl #24
orr r5, r5, r9, lsr #8 @ r5=(ymask<<24)|(trow<<16)|[htab||hscroll]
add r4, r8, #5
cmp r4, #7
subge r4, r4, #1 @ r4=shift[width] (5,6,6,7)
orr lr, lr, r4
orr lr, lr, r3, lsl #24 @ lr=(planeend<<24)|(cells<<16)|shift[width]
@ calculate xmask:
mov r8, r8, lsl #24+5
orr r8, r8, #0x1f000000
@ Find name table:
tst r0, r0
ldreqb r4, [r11, #OFS_Pico_video_reg+2]
moveq r4, r4, lsr #3
ldrneb r4, [r11, #OFS_Pico_video_reg+4]
and r4, r4, #7
orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13
ldr r11,[sp, #9*4] @ est
ldr r4, [r11, #OFS_EST_Draw2Start]
ldr r7, [r11, #OFS_EST_rendstatus]
ldr r11, [r11, #OFS_EST_Draw2FB]
sub r4, r9, r4, lsl #24
tst r7, #PDRAW_BORDER_32 @ H32 border mode?
tstne r7, #PDRAW_32_COLS
addne r11, r11, #32
mov r4, r4, asr #24
mov r7, #328*8
mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-Draw2Start);
@ Get vertical scroll value:
add_c24 r7, r10, (OFS_PMEM_vsram-OFS_PMEM_vram)
ldr r7, [r7]
tst r0, r0
moveq r7, r7, lsl #22
movne r7, r7, lsl #6
mov r7, r7, lsr #22 @ r7=vscroll (10 bits)
orr lr, lr, r7, lsl #3
mov lr, lr, ror #24 @ packed: cccccccc nnnvvvvv vvvvvsss pppppppp: cells, nametab, vscroll, shift[width], planeend
ands r7, r7, #7
addne lr, lr, #1 @ we have vertically clipped tiles due to vscroll, so we need 1 more row
rsb r7, r7, #8
str r7, [r6], #4 @ push y-offset to tilecache
mov r4, #328
mla r11, r4, r7, r11 @ scrpos+=(8-(vscroll&7))*328;
mov r9, #0xff000000 @ r9=(prevcode<<8)|flags: 1~tile empty, 2~tile singlecolor
.rtrloop_outer:
mov r4, lr, lsl #11
mov r4, r4, lsr #25 @ r4=vscroll>>3 (7 bits)
add r4, r4, r5, lsr #16 @ +trow
and r4, r4, r5, lsr #24 @ &=ymask
mov r7, lr, lsr #8
and r7, r7, #7 @ shift[width]
mov r0, lr, lsr #9
and r0, r0, #0x7000 @ nametab
add r12,r0, r4, lsl r7 @ nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<<shift[width]);
mov r4, lr, lsr #24
orr r12,r12,r4, lsl #23
mov r12,r12,lsl #1 @ (nametab_row|(cells<<24)) (halfword compliant)
@ htab?
tst r5, #0x8000
moveq r7, r5, lsl #22 @ hscroll (0-3FFh)
moveq r7, r7, lsr #22
beq .rtr_hscroll_done
@ get hscroll from htab
mov r7, r5, lsl #17
ands r4, r5, #0x00ff0000
add r7, r7, r4, lsl #5 @ +=trow<<4
andne r4, lr, #0x3800
subne r7, r7, r4, lsl #7 @ if(trow) htaddr-=(vscroll&7)<<1;
mov r7, r7, lsr #16 @ halfwords
ldrh r7, [r10, r7]
.rtr_hscroll_done:
and r8, r8, #0xff000000
rsb r4, r7, #0 @ r4=tilex=(-ts->hscroll)>>3
mov r4, r4, asr #3
and r4, r4, #0xff
orr r8, r8, r4 @ r8=(xmask<<24)|tilex
sub r7, r7, #1
and r7, r7, #7
add r7, r7, #1 @ r7=dx=((ts->hscroll-1)&7)+1
cmp r7, #8
subeq r12,r12, #0x01000000 @ we will loop cells+1 times, so loop less when there is no hscroll
add r1, r11, r7 @ r1=pdest
mov r0, #0xf
b .rtrloop_enter
@ r4 & r7 are scratch in this loop
.rtrloop: @ 40-41 times
add r1, r1, #8
subs r12,r12, #0x01000000
add r8, r8, #1
bmi .rtrloop_exit
.rtrloop_enter:
and r7, r8, r8, lsr #24
add r7, r10, r7, lsl #1
bic r4, r12, #0xff000000 @ Pico.vram[nametab_row+(tilex&xmask)];
ldrh r7, [r7, r4] @ r7=code (int, but from unsigned, no sign extend)
tst r7, #0x8000
bne .rtr_hiprio
cmp r7, r9, lsr #8
bne .rtr_notsamecode
@ we know stuff about this tile already
tst r9, #1
bne .rtrloop @ empty tile
tst r9, #2
bne .rtr_singlecolor @ singlecolor tile
b .rtr_samecode
.rtr_notsamecode:
and r4, r9, #0x600000
mov r9, r7, lsl #8 @ remember new code
@ update cram
and r7, r7, #0x6000
mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
.rtr_samecode:
tst r9, #0x100000 @ vflip?
bne .rtr_vflip
tst r9, #0x080000 @ hflip?
bne .rtr_hflip
@ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
Tile 0, 0
b .rtrloop
.rtr_hflip:
Tile 1, 0
b .rtrloop
.rtr_vflip:
tst r9, #0x080000 @ hflip?
bne .rtr_vflip_hflip
Tile 0, 1
b .rtrloop
.rtr_vflip_hflip:
Tile 1, 1
b .rtrloop
.rtr_singlecolor:
TileSinglecol
b .rtrloop
.rtr_hiprio:
@ *(*hcache)++ = code|(dx<<16)|(trow<<27);
sub r4, r1, r11
orr r7, r7, r4, lsl #16
and r4, r5, #0x00ff0000
orr r7, r7, r4, lsl #11 @ (trow<<27)
str r7, [r6], #4 @ cache hi priority tile
b .rtrloop
.rtrloop_exit:
add r5, r5, #0x00010000
mov r4, r5, lsl #8
cmp r4, lr, lsl #24
bge .rtrloop_outer_exit
add r11, r11, #328*8
b .rtrloop_outer
.rtrloop_outer_exit:
@ terminate cache list
mov r0, #0
str r0, [r6] @ save cache pointer
ldmfd sp!, {r4-r11,lr}
bx lr
.pool
@ void DrawTilesFromCacheF(int *hc, struct PicoEState *est)
.global DrawTilesFromCacheF
DrawTilesFromCacheF:
stmfd sp!, {r4-r11,lr}
mov r9, #0xff000000 @ r9=prevcode=-1
mvn r6, #0 @ r6=prevy=-1
ldr r7, [r1, #OFS_EST_rendstatus]
ldr r4, [r1, #OFS_EST_Draw2FB]
ldr r11,[r1, #OFS_EST_Draw2Start]
ldr r2, [r0], #4 @ read y offset
tst r7, #PDRAW_BORDER_32 @ H32 border mode?
tstne r7, #PDRAW_32_COLS
addne r4, r4, #32
mov r7, #328
mla r2, r7, r2, r4
sub r12, r2, #(328*8*START_ROW) @ r12=scrpos
ldr r10, [r1, #OFS_EST_PicoMem_vram]
mov r8, r0 @ hc
mov r0, #0xf
@ scratch: r4, r7
@ *hcache++ = code|(dx<<16)|(trow<<27); // cache it
.dtfcf_loop:
ldr r7, [r8], #4 @ read code
movs r1, r7, lsr #16 @ r1=dx;
ldmeqfd sp!, {r4-r11,pc} @ dx is never zero, this must be a terminator, return
@ row changed?
cmp r6, r7, lsr #27
movne r6, r7, lsr #27
subne r6, r6, r11
movne r4, #328*8
mlane r5, r4, r6, r12 @ r5=pd = scrpos + (prevy-Draw2Start)*328*8
bic r1, r1, #0xf800
add r1, r5, r1 @ r1=pdest (halfwords)
mov r7, r7, lsl #16
mov r7, r7, lsr #16
cmp r7, r9, lsr #8
bne .dtfcf_notsamecode
@ we know stuff about this tile already
tst r9, #1
bne .dtfcf_loop @ empty tile
tst r9, #2
bne .dtfcf_singlecolor @ singlecolor tile
b .dtfcf_samecode
.dtfcf_notsamecode:
and r4, r9, #0x600000
mov r9, r7, lsl #8 @ remember new code
@ update cram val
and r7, r7, #0x6000
mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
.dtfcf_samecode:
tst r9, #0x100000 @ vflip?
bne .dtfcf_vflip
tst r9, #0x080000 @ hflip?
bne .dtfcf_hflip
@ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
Tile 0, 0
b .dtfcf_loop
.dtfcf_hflip:
Tile 1, 0
b .dtfcf_loop
.dtfcf_vflip:
tst r9, #0x080000 @ hflip?
bne .dtfcf_vflip_hflip
Tile 0, 1
b .dtfcf_loop
.dtfcf_vflip_hflip:
Tile 1, 1
b .dtfcf_loop
.dtfcf_singlecolor:
TileSinglecol
b .dtfcf_loop
.pool
@ @@@@@@@@@@@@@@@
@ (tile_start<<16)|row_start
@ void DrawWindowFull(int start, int end, int prio, struct PicoEState *est)
.global DrawWindowFull
DrawWindowFull:
stmfd sp!, {r4-r11,lr}
ldr r11, [r3, #OFS_EST_Pico]
ldrb r12, [r11, #OFS_Pico_video_reg+3] @ pvid->reg[3]
mov r12, r12, lsl #10
ldr r4, [r11, #OFS_Pico_video_reg+12]
mov r5, #1 @ nametab_step
ldr r11, [r3, #OFS_EST_Draw2FB]
ldr r6, [r3, #OFS_EST_Draw2Start]
tst r4, #1 @ 40 cell mode?
andne r12, r12, #0xf000 @ 0x3c<<10
movne r5, r5, lsl #7
bne 0f
ldr r7, [r3, #OFS_EST_rendstatus]
and r12, r12, #0xf800
mov r5, r5, lsl #6 @ nametab_step
tst r7, #PDRAW_BORDER_32
tstne r7, #PDRAW_32_COLS
addne r11, r11, #32 @ center screen in H32 mode
0: and r4, r0, #0xff
sub r4, r4, r6
mla r12, r5, r4, r12 @ nametab += nametab_step*(start-Draw2Start];
ldr r10, [r3, #OFS_EST_PicoMem_vram]
mov r4, r0, lsr #16 @ r4=start_cell_h
add r7, r12, r4, lsl #1
@ fetch the first code now
ldrh r7, [r10, r7]
cmp r2, r7, lsr #15
ldmnefd sp!, {r4-r11,pc} @ hack: simply assume that whole window uses same priority
rsb r8, r4, r1, lsr #16 @ cells (h)
orr r8, r8, r4, lsl #8
mov r4, r1, lsl #24
sub r4, r4, r0, lsl #24
orr r8, r8, r4, lsr #8 @ r8=cells_h|(start_cell_h<<8)|(cells_v<<16)
sub r8, r8, #0x010000 @ adjust for algo
mov r9, #0xff000000 @ r9=prevcode=-1
and r4, r0, #0xff
add r11, r11, #328*8
sub r4, r4, r6
add r11, r11, #8
mov r7, #328*8
mla r11, r7, r4, r11 @ scrpos+=8*328*(start-Draw2Start);
mov r0, #0xf
.dwfloop_outer:
and r6, r8, #0xff00 @ r6=tilex
add r1, r11, r6, lsr #5 @ r1=pdest
add r6, r12, r6, lsr #7
add r6, r10, r6 @ r6=Pico.vram+nametab+tilex
orr r8, r8, r8, lsl #24
sub r8, r8, #0x01000000 @ cell loop counter
b .dwfloop_enter
@ r4 & r7 are scratch in this loop
.dwfloop:
add r1, r1, #8
subs r8, r8, #0x01000000
bmi .dwfloop_exit
.dwfloop_enter:
ldrh r7, [r6], #2 @ r7=code
cmp r7, r9, lsr #8
bne .dwf_notsamecode
@ we know stuff about this tile already
tst r9, #1
bne .dwfloop @ empty tile
tst r9, #2
bne .dwf_singlecolor @ singlecolor tile
b .dwf_samecode
.dwf_notsamecode:
and r4, r9, #0x600000
mov r9, r7, lsl #8 @ remember new code
@ update cram val
and r7, r7, #0x6000
mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
.dwf_samecode:
tst r9, #0x100000 @ vflip?
bne .dwf_vflip
tst r9, #0x080000 @ hflip?
bne .dwf_hflip
@ Tile (r1=pdest, r3=pal, r9=prevcode, r10=PicoMem.vram)
@ r2,r4,r7: scratch, r0=0xf
Tile 0, 0
b .dwfloop
.dwf_hflip:
Tile 1, 0
b .dwfloop
.dwf_vflip:
tst r9, #0x080000 @ hflip?
bne .dwf_vflip_hflip
Tile 0, 1
b .dwfloop
.dwf_vflip_hflip:
Tile 1, 1
b .dwfloop
.dwf_singlecolor:
TileSinglecol 1
b .dwfloop
.dwfloop_exit:
bic r8, r8, #0xff000000 @ fix r8
subs r8, r8, #0x010000
ldmmifd sp!, {r4-r11,pc}
add r11, r11, #328*8
add r12, r12, r5 @ nametab+=nametab_step
b .dwfloop_outer
.pool
@ ---------------- sprites ---------------
.macro SpriteLoop hflip vflip
.if \vflip
mov r1, r5, lsr #24 @ height
mov r0, #328*8
mla r11, r1, r0, r11 @ scrpos+=height*328*8;
add r12, r12, r1, lsl #3 @ sy+=height*8
.endif
mov r0, #0xf
.if \hflip
and r1, r5, #0xff
add r8, r8, r1, lsl #3 @ sx+=width*8
58:
cmp r8, #336
blt 51f
add r9, r9, r5, lsr #16
sub r5, r5, #1 @ sub width
sub r8, r8, #8
b 58b
.else
cmp r8, #0 @ skip tiles hidden on the left of screen
bgt 51f
58:
add r9, r9, r5, lsr #16
sub r5, r5, #1
adds r8, r8, #8
ble 58b
b 51f
.endif
50: @ outer
.if !\hflip
add r8, r8, #8 @ sx+=8
.endif
bic r5, r5, #0xff000000 @ fix height
orr r5, r5, r5, lsl #16
51: @ outer_enter
sub r5, r5, #1 @ width--
movs r1, r5, lsl #24
ldmmifd sp!, {r4-r11,pc} @ end of tile
.if \hflip
subs r8, r8, #8 @ sx-=8
ldmlefd sp!, {r4-r11,pc} @ tile offscreen
.else
cmp r8, #328
ldmgefd sp!, {r4-r11,pc} @ tile offscreen
.endif
mov r6, r12 @ r6=sy
add r1, r11, r8 @ pdest=scrpos+sx
b 53f
52: @ inner
add r9, r9, #1<<8 @ tile++
.if !\vflip
add r6, r6, #8 @ sy+=8
add r1, r1, #328*8
.endif
53: @ inner_enter
@ end of sprite?
subs r5, r5, #0x01000000
bmi 50b @ ->outer
.if \vflip
sub r6, r6, #8 @ sy-=8
sub r1, r1, #328*8
.endif
@ offscreen?
cmp r6, #(START_ROW*8)
ble 52b
cmp r6, #(END_ROW*8+8)
bge 52b
@ Tile (r1=pdest, r3=pal, r9=prevcode, r10=PicoMem.vram)
@ r2,r4,r7: scratch, r0=0xf
Tile \hflip, \vflip
b 52b
.endm
@ void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est)
.global DrawSpriteFull
DrawSpriteFull:
stmfd sp!, {r4-r11,lr}
ldr r3, [r0] @ sprite[0]
mov r5, r3, lsl #4
mov r6, r5, lsr #30
add r6, r6, #1 @ r6=width
mov r5, r5, lsl #2
mov r5, r5, lsr #30
add r5, r5, #1 @ r5=height
mov r12, r3, lsl #23
mov r12, r12, lsr #23
ldr lr, [r0, #4] @ lr=code
sub r12, r12, #0x78 @ r12=sy
mov r8, lr, lsl #7
mov r8, r8, lsr #23
sub r8, r8, #0x78 @ r8=sx
mov r9, lr, lsl #21
mov r9, r9, lsr #13 @ r9=tile<<8
and r3, lr, #0x6000
mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30);
ldr r0, [r1, #OFS_EST_rendstatus]
ldr r11, [r1, #OFS_EST_Draw2FB]
ldr r2, [r1, #OFS_EST_Draw2Start]
ldr r10, [r1, #OFS_EST_PicoMem_vram]
tst r0, #PDRAW_BORDER_32 @ H32 border mode?
tstne r0, #PDRAW_32_COLS
addne r11, r11, #32
sub r12, r12, r2, lsl #3
mov r0, #328
mla r11, r12, r0, r11 @ scrpos+=(sy-Draw2Start*8)*328;
orr r5, r5, r5, lsl #16 @
orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24)
tst lr, #0x1000 @ vflip?
bne .dsf_vflip
tst lr, #0x0800 @ hflip?
bne .dsf_hflip
SpriteLoop 0, 0
.dsf_hflip:
SpriteLoop 1, 0
.dsf_vflip:
tst lr, #0x0800 @ hflip?
bne .dsf_vflip_hflip
SpriteLoop 0, 1
.dsf_vflip_hflip:
SpriteLoop 1, 1
.pool
@ vim:filetype=armasm