mirror of
				https://github.com/RaySollium99/picodrive.git
				synced 2025-10-27 21:48:50 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			1870 lines
		
	
	
	
		
			53 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			1870 lines
		
	
	
	
		
			53 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * assembly optimized versions of most funtions from draw.c
 | |
|  * (C) notaz, 2006-2010,2017
 | |
|  * (C) kub, 2020
 | |
|  *
 | |
|  * This work is licensed under the terms of MAME license.
 | |
|  * See COPYING file in the top-level directory.
 | |
|  *
 | |
|  * this is highly specialized, be careful if changing related C code!
 | |
|  */
 | |
| 
 | |
| #include "pico_int_offs.h"
 | |
| 
 | |
| .extern DrawStripInterlace
 | |
| 
 | |
| .equ PDRAW_SPRITES_MOVED, (1<<0)
 | |
| .equ PDRAW_WND_DIFF_PRIO, (1<<1)
 | |
| .equ PDRAW_PARSE_SPRITES, (1<<2)
 | |
| .equ PDRAW_DIRTY_SPRITES, (1<<4)
 | |
| .equ PDRAW_PLANE_HI_PRIO, (1<<6)
 | |
| .equ PDRAW_SHHI_DONE,     (1<<7)
 | |
| 
 | |
| @ helpers
 | |
| .macro add_c24 d s c
 | |
|     add     \d, \s, #(\c & 0x00ff00)
 | |
| .if \c & 0x0000ff
 | |
|     add     \d, \d, #(\c & 0x0000ff)
 | |
| .endif
 | |
| .if \c & 0xff0000
 | |
|     add     \d, \d, #(\c & 0xff0000)
 | |
| .endif
 | |
| .endm
 | |
| 
 | |
| .macro TilePixel pat lsrr offs
 | |
| .if !\lsrr
 | |
|     ands    r4, \pat, r2
 | |
| .else
 | |
|     ands    r4, \pat, r2, lsr #\lsrr
 | |
| .endif
 | |
|     orrne   r4, r3, r4
 | |
|     strneb  r4, [r1,#\offs]
 | |
| .endm
 | |
| 
 | |
| @ TileNorm (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf
 | |
| .macro TileNorm pat
 | |
|     TilePixel \pat, 12, 0         @ #0x0000f000
 | |
|     TilePixel \pat,  8, 1         @ #0x00000f00
 | |
|     TilePixel \pat,  4, 2         @ #0x000000f0
 | |
|     TilePixel \pat,  0, 3         @ #0x0000000f
 | |
|     TilePixel \pat, 28, 4         @ #0xf0000000
 | |
|     TilePixel \pat, 24, 5         @ #0x0f000000
 | |
|     TilePixel \pat, 20, 6         @ #0x00f00000
 | |
|     TilePixel \pat, 16, 7         @ #0x000f0000
 | |
| .endm
 | |
| 
 | |
| @ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf
 | |
| .macro TileFlip pat
 | |
|     TilePixel \pat, 16, 0         @ #0x000f0000
 | |
|     TilePixel \pat, 20, 1         @ #0x00f00000
 | |
|     TilePixel \pat, 24, 2         @ #0x0f000000
 | |
|     TilePixel \pat, 28, 3         @ #0xf0000000
 | |
|     TilePixel \pat,  0, 4         @ #0x0000000f
 | |
|     TilePixel \pat,  4, 5         @ #0x000000f0
 | |
|     TilePixel \pat,  8, 6         @ #0x00000f00
 | |
|     TilePixel \pat, 12, 7         @ #0x0000f000
 | |
| .endm
 | |
| 
 | |
| @ shadow/hilight mode
 | |
| 
 | |
| @ this one is for hi priority layer
 | |
| .macro TilePixelShHP lsrr offs
 | |
| .if !\lsrr
 | |
|     ands    r4, r12, r2
 | |
| .else
 | |
|     ands    r4, r12, r2, lsr #\lsrr
 | |
| .endif
 | |
|     ldreqb  r4, [r1,#\offs]
 | |
|     orrne   r4, r3, r4
 | |
|     andeq   r4, r4, #0x7f
 | |
|     strb    r4, [r1,#\offs]
 | |
| .endm
 | |
| 
 | |
| @ TileNormShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits
 | |
| .macro TileNormShHP
 | |
|     TilePixelShHP 12, 0         @ #0x0000f000
 | |
|     TilePixelShHP  8, 1         @ #0x00000f00
 | |
|     TilePixelShHP  4, 2         @ #0x000000f0
 | |
|     TilePixelShHP  0, 3         @ #0x0000000f
 | |
|     TilePixelShHP 28, 4         @ #0xf0000000
 | |
|     TilePixelShHP 24, 5         @ #0x0f000000
 | |
|     TilePixelShHP 20, 6         @ #0x00f00000
 | |
|     TilePixelShHP 16, 7         @ #0x000f0000
 | |
| .endm
 | |
| 
 | |
| @ TileFlipShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf
 | |
| .macro TileFlipShHP
 | |
|     TilePixelShHP 16, 0         @ #0x000f0000
 | |
|     TilePixelShHP 20, 1         @ #0x00f00000
 | |
|     TilePixelShHP 24, 2         @ #0x0f000000
 | |
|     TilePixelShHP 28, 3         @ #0xf0000000
 | |
|     TilePixelShHP  0, 4         @ #0x0000000f
 | |
|     TilePixelShHP  4, 5         @ #0x000000f0
 | |
|     TilePixelShHP  8, 6         @ #0x00000f00
 | |
|     TilePixelShHP 12, 7         @ #0x0000f000
 | |
| .endm
 | |
| 
 | |
| 
 | |
| @ TileSingleSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx; r12: helper pattern 0xf
 | |
| .macro TileSingleSh
 | |
|     tst     r0, #1              @ not aligned?
 | |
|     mov     r7, #0x008000
 | |
|     orr     r7, r7, #0x80
 | |
|     ldrneb  r4,  [r1], #1
 | |
|     ldreqh  r4,  [r1], #2        @ 1ci
 | |
|     ldrh    r12, [r1], #2
 | |
|     orr     r4,  r4,  r7
 | |
|     strneb  r4,  [r1, #-3]
 | |
|     streqh  r4,  [r1, #-4]
 | |
|     ldrh    r4,  [r1], #2
 | |
|     orr     r12, r12, r7
 | |
|     strh    r12, [r1, #-4]
 | |
|     ldrh    r12, [r1], #2
 | |
|     orr     r4,  r4,  r7
 | |
|     strh    r4,  [r1, #-4]
 | |
|     ldrneb  r4,  [r1]
 | |
|     orr     r12, r12, r7
 | |
|     strh    r12, [r1, #-2]
 | |
|     orrne   r4,  r4,  r7
 | |
|     strneb  r4,  [r1], #1
 | |
|     mov     r12, #0xf
 | |
| .endm
 | |
| 
 | |
| @ TileSingleHi (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf
 | |
| .macro TileSingleHi
 | |
|     tst     r1,  #1              @ not aligned?
 | |
|     mov     r7,  #0x004000
 | |
|     orr     r7,  r7, #0x40
 | |
|     ldrneb  r4,  [r1], #1
 | |
|     ldreqh  r4,  [r1], #2        @ 1ci
 | |
|     ldrh    r12, [r1], #2
 | |
|     orr     r4,  r4,  r7
 | |
|     strneb  r4,  [r1, #-3]
 | |
|     streqh  r4,  [r1, #-4]
 | |
|     ldrh    r4,  [r1], #2
 | |
|     orr     r12, r12, r7
 | |
|     strh    r12, [r1, #-4]
 | |
|     ldrh    r12, [r1], #2
 | |
|     orr     r4,  r4,  r7
 | |
|     strh    r4,  [r1, #-4]
 | |
|     ldrneb  r4,  [r1]
 | |
|     orr     r12, r12, r7
 | |
|     strh    r12, [r1, #-2]
 | |
|     orrne   r4,  r4,  r7
 | |
|     strneb  r4,  [r1], #1
 | |
|     mov     r12, #0xf
 | |
| .endm
 | |
| 
 | |
| .macro TileDoShGenPixel shift ofs
 | |
| .if \shift
 | |
|     ands    r4, r12, r2, lsr #\shift
 | |
| .else
 | |
|     ands    r4, r12, r2
 | |
| .endif
 | |
|     beq     0f
 | |
|     cmp     r4, #0xe
 | |
|     ldrgeb  r7, [r1,#\ofs]
 | |
|     orrlt   r7, r3, r4            @ normal
 | |
| 
 | |
|     subge   r4, r4, #1
 | |
|     orrge   r7, r7, r4, lsl #6
 | |
|     strb    r7, [r1,#\ofs]
 | |
| 0:
 | |
| .endm
 | |
| 
 | |
| @ TileFlipSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf
 | |
| .macro TileFlipSh
 | |
|     TileDoShGenPixel 16,  0 @ #0x000f0000
 | |
|     TileDoShGenPixel 20,  1 @ #0x00f00000
 | |
|     TileDoShGenPixel 24,  2 @ #0x0f000000
 | |
|     TileDoShGenPixel 28,  3 @ #0xf0000000
 | |
|     TileDoShGenPixel  0,  4 @ #0x0000000f
 | |
|     TileDoShGenPixel  4,  5 @ #0x000000f0
 | |
|     TileDoShGenPixel  8,  6 @ #0x00000f00
 | |
|     TileDoShGenPixel 12,  7 @ #0x0000f000
 | |
| .endm
 | |
| 
 | |
| @ TileNormSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf
 | |
| .macro TileNormSh
 | |
|     TileDoShGenPixel 12,  0 @ #0x0000f000
 | |
|     TileDoShGenPixel  8,  1 @ #0x00000f00
 | |
|     TileDoShGenPixel  4,  2 @ #0x000000f0
 | |
|     TileDoShGenPixel  0,  3 @ #0x0000000f
 | |
|     TileDoShGenPixel 28,  4 @ #0xf0000000
 | |
|     TileDoShGenPixel 24,  5 @ #0x0f000000
 | |
|     TileDoShGenPixel 20,  6 @ #0x00f00000
 | |
|     TileDoShGenPixel 16,  7 @ #0x000f0000
 | |
| .endm
 | |
| 
 | |
| .macro TileDoShGenPixel_markop shift ofs
 | |
| .if \shift
 | |
|     ands    r4, r12, r2, lsr #\shift
 | |
| .else
 | |
|     ands    r4, r12, r2
 | |
| .endif
 | |
|     beq     0f
 | |
|     cmp     r4, #0xe
 | |
|     ldrgeb  r4, [r1,#\ofs]
 | |
|     orrlt   r4, r3, r4
 | |
|     orrge   r4, r4, #0x40
 | |
|     strb    r4, [r1,#\ofs]
 | |
| 0:
 | |
| .endm
 | |
| 
 | |
| .macro TileFlipSh_markop
 | |
|     TileDoShGenPixel_markop 16,  0 @ #0x000f0000
 | |
|     TileDoShGenPixel_markop 20,  1 @ #0x00f00000
 | |
|     TileDoShGenPixel_markop 24,  2 @ #0x0f000000
 | |
|     TileDoShGenPixel_markop 28,  3 @ #0xf0000000
 | |
|     TileDoShGenPixel_markop  0,  4 @ #0x0000000f
 | |
|     TileDoShGenPixel_markop  4,  5 @ #0x000000f0
 | |
|     TileDoShGenPixel_markop  8,  6 @ #0x00000f00
 | |
|     TileDoShGenPixel_markop 12,  7 @ #0x0000f000
 | |
| .endm
 | |
| 
 | |
| .macro TileNormSh_markop
 | |
|     TileDoShGenPixel_markop 12,  0 @ #0x0000f000
 | |
|     TileDoShGenPixel_markop  8,  1 @ #0x00000f00
 | |
|     TileDoShGenPixel_markop  4,  2 @ #0x000000f0
 | |
|     TileDoShGenPixel_markop  0,  3 @ #0x0000000f
 | |
|     TileDoShGenPixel_markop 28,  4 @ #0xf0000000
 | |
|     TileDoShGenPixel_markop 24,  5 @ #0x0f000000
 | |
|     TileDoShGenPixel_markop 20,  6 @ #0x00f00000
 | |
|     TileDoShGenPixel_markop 16,  7 @ #0x000f0000
 | |
| .endm
 | |
| 
 | |
| .macro TileDoShGenPixel_onlyop_lp shift ofs
 | |
| .if \shift
 | |
|     ands    r7, r12, r2, lsr #\shift
 | |
| .else
 | |
|     ands    r7, r12, r2
 | |
| .endif
 | |
|     ldrneb  r4, [r1,#\ofs]
 | |
|     cmp     r7, #0xe
 | |
|     blt     0f
 | |
| 
 | |
|     tst     r4, #0x40
 | |
|     bicne   r4, r4, #0x40
 | |
|     subne   r7, r7, #1
 | |
|     orrne   r4, r4, r7, lsl #6
 | |
|     strneb  r4, [r1,#\ofs]
 | |
| 0:
 | |
| .endm
 | |
| 
 | |
| .macro TileFlipSh_onlyop_lp
 | |
|     TileDoShGenPixel_onlyop_lp 16,  0 @ #0x000f0000
 | |
|     TileDoShGenPixel_onlyop_lp 20,  1 @ #0x00f00000
 | |
|     TileDoShGenPixel_onlyop_lp 24,  2 @ #0x0f000000
 | |
|     TileDoShGenPixel_onlyop_lp 28,  3 @ #0xf0000000
 | |
|     TileDoShGenPixel_onlyop_lp  0,  4 @ #0x0000000f
 | |
|     TileDoShGenPixel_onlyop_lp  4,  5 @ #0x000000f0
 | |
|     TileDoShGenPixel_onlyop_lp  8,  6 @ #0x00000f00
 | |
|     TileDoShGenPixel_onlyop_lp 12,  7 @ #0x0000f000
 | |
| .endm
 | |
| 
 | |
| .macro TileNormSh_onlyop_lp
 | |
|     TileDoShGenPixel_onlyop_lp 12,  0 @ #0x0000f000
 | |
|     TileDoShGenPixel_onlyop_lp  8,  1 @ #0x00000f00
 | |
|     TileDoShGenPixel_onlyop_lp  4,  2 @ #0x000000f0
 | |
|     TileDoShGenPixel_onlyop_lp  0,  3 @ #0x0000000f
 | |
|     TileDoShGenPixel_onlyop_lp 28,  4 @ #0xf0000000
 | |
|     TileDoShGenPixel_onlyop_lp 24,  5 @ #0x0f000000
 | |
|     TileDoShGenPixel_onlyop_lp 20,  6 @ #0x00f00000
 | |
|     TileDoShGenPixel_onlyop_lp 16,  7 @ #0x000f0000
 | |
| .endm
 | |
| 
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| @ struct TileStrip
 | |
| @ {
 | |
| @   int nametab; // 0x00
 | |
| @   int line;    // 0x04
 | |
| @   int hscroll; // 0x08
 | |
| @   int xmask;   // 0x0C
 | |
| @   int *hc;     // 0x10 (pointer to cache buffer)
 | |
| @   int cells;   // 0x14
 | |
| @ };
 | |
| 
 | |
| @ void DrawLayer(int lflags, int *hcache, int cellskip, int maxcells,
 | |
| @                struct PicoEState *est)
 | |
| 
 | |
| .global DrawLayer
 | |
| 
 | |
| DrawLayer:
 | |
|     ldr     r12, [sp]             @ est
 | |
|     stmfd   sp!, {r4-r11,lr}
 | |
| 
 | |
|     ldr     r11, [r12, #OFS_EST_Pico]
 | |
|     mov     r8, #1
 | |
| 
 | |
|     ldrb    r7, [r11, #OFS_Pico_video_reg+16] @ ??vv??hh
 | |
| 
 | |
|     mov     r6, r1                @ hcache
 | |
|     orr     r9, r3, r0, lsl #29   @ r9=force[31]|sh[30]|plane[29]
 | |
|     orr     r9, r9, r2, lsl #8    @    |cellskip[15:8]|maxcells[7:0]  (tmp)
 | |
| 
 | |
|     mov     r1, r7, lsl #4
 | |
|     orr     r1, r1, #0x00ff
 | |
| 
 | |
|     and     r10, r7,  #3
 | |
|     cmp     r10, #1
 | |
|     biclt   r1,  r1, #0xfc00
 | |
|     biceq   r1,  r1, #0xfe00
 | |
|     cmp     r10, #2
 | |
|     moveq   r1,      #0x0007
 | |
|     movgt   r1,      #0x00ff      @ r1=ymask=(height<<8)|0xff; ...; // Y Mask in pixels
 | |
| 
 | |
|     cmp     r10, #2
 | |
|     addlt   r10, r10, #5
 | |
|     moveq   r10, #5
 | |
|     movgt   r10, #7               @ r10=shift[width] (5,6,5,7)
 | |
| 
 | |
|     ldr     r2, [r12, #OFS_EST_DrawScanline]
 | |
|     ldr     lr, [r12, #OFS_EST_PicoMem_vram]
 | |
| 
 | |
|     @ Find name table:
 | |
|     ands    r0,  r0, #1
 | |
|     ldreqb  r12, [r11, #OFS_Pico_video_reg+2]
 | |
|     ldrneb  r12, [r11, #OFS_Pico_video_reg+4]
 | |
| 
 | |
|     @ calculate xmask:
 | |
|     mov     r5, r8, lsl r10
 | |
|     sub     r5, r5, #1            @ r5=xmask
 | |
| 
 | |
|     moveq   r12, r12, lsl #10
 | |
|     movne   r12, r12, lsl #13
 | |
|     and     r12, r12, #(7<<13)    @ r12=(ts->nametab<<1) (halfword compliant)
 | |
| 
 | |
|     ldrh    r8, [r11, #OFS_Pico_video_reg+12]
 | |
|     ldrb    r7, [r11, #OFS_Pico_video_reg+11]
 | |
| 
 | |
|     mov     r4, r8, lsr #8        @ pvid->reg[13]
 | |
|     mov     r4, r4, lsl #10       @ htab=pvid->reg[13]<<9; (halfwords)
 | |
| 
 | |
|     ands    r3, r7, #0x03
 | |
|     beq     0f
 | |
|     cmp     r3, #2
 | |
|     mov     r3, r2, lsl #2        @ htab+=DrawScanline<<1; // Offset by line
 | |
|     biceq   r3, #0x1f             @ htab&=~0xf; // Offset by tile
 | |
|     andlt   r3, #0x1f
 | |
|     add     r4, r4, r3
 | |
| 0:  add     r4, r4, r0, lsl #1    @ htab+=plane
 | |
|     bic     r4, r4, #0x00ff0000   @ just in case
 | |
|     ldrh    r3, [lr, r4]          @ r3=hscroll
 | |
| 
 | |
|     tst     r7, #4
 | |
|     bne     .DrawStrip_vsscroll
 | |
| 
 | |
|     @ Get vertical scroll value:
 | |
|     add_c24 r7, lr, (OFS_PMEM_vsram-OFS_PMEM_vram)
 | |
|     ldr     r7, [r7]
 | |
| 
 | |
|     tst     r8, #2
 | |
|     tstne   r8, #4
 | |
|     bne     .DrawStrip_interlace
 | |
| 
 | |
|     tst     r0, r0
 | |
|     moveq   r7, r7, lsl #16
 | |
|     mov     r7, r7, lsr #16
 | |
| 
 | |
|     @ Find the line in the name table
 | |
|     add     r2, r2, r7
 | |
|     and     r2, r2, r1
 | |
|     mov     r4, r2, lsr #3
 | |
|     add     r10, r10, #1           @ shift[width]++
 | |
|     add     r12, r12, r4, lsl r10  @ nametab+=(ts.line>>3)<<shift[width];
 | |
| 
 | |
|     @ ldmia   r0, {r1,r2,r3,r5,r6,r9} @ r2=line, r3=ts->hscroll, r5=ts->xmask, r6=ts->hc, r9=ts->cells
 | |
| 
 | |
|     and     r10,r2,  #7
 | |
|     mov     r10,r10, lsl #1 @ r10=ty=(ts->line&7)<<1;
 | |
|     orr     r10,r10, r9, lsl #24
 | |
| 
 | |
|     rsb     r8, r3, #0
 | |
|     mov     r8, r8, lsr #3  @ r8=tilex=(-ts->hscroll)>>3
 | |
| 
 | |
|     sub     r1, r3, #1
 | |
|     and     r1, r1, #7
 | |
|     add     r7, r1, #1      @ r7=dx=((ts->hscroll-1)&7)+1
 | |
| 
 | |
|     movs    r3, r9, lsl #1  @ (force[31]|sh[30]) << 1
 | |
|     mov     r3, #0
 | |
|     orrmi   r10,r10, #1<<23 @ r10=cells[31:24]|sh[23]|hi_not_empty[22]
 | |
| @    orrcc   r10,r10, #1<<20 @    |had_output[21]|!force[20]|ty[15:0]
 | |
|     movmi   r3, #0x80       @ default to shadowed pal on sh mode
 | |
| 
 | |
|     cmp     r7, #8
 | |
|     addne   r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll
 | |
| 
 | |
|     and     r9, r9, #0xff00
 | |
|     add     r8, r8, r9, lsr #8   @ tilex+=cellskip
 | |
|     add     r7, r7, r9, lsr #5   @ dx+=cellskip<<3;
 | |
|     sub     r10,r10,r9, lsl #16  @ cells-=cellskip
 | |
| 
 | |
|     @ cache some stuff to avoid mem access
 | |
|     ldr     r11,[sp, #9*4]       @ est
 | |
|     mov     r0, #0xf
 | |
|     ldr     r11,[r11, #OFS_EST_HighCol]
 | |
| 
 | |
|     mvn     r9, #0               @ r9=prevcode=-1
 | |
|     add     r1, r11, r7          @ r1=pdest
 | |
| 
 | |
|     @ r10=cells[31:24]|sh[23]|hi_not_empty[22]|had_output[21]|!force[20]|ty[15:0]
 | |
|     @ r1=pd+dx r2=pack r3=pal r5=xmask r6=hc r8=tilex r9=prevcode r11=HighCol r12=nametab lr=vram
 | |
|     @ r4 & r7 are scratch in this loop
 | |
| .dsloop_subr1:
 | |
|     sub     r1, r1, #8
 | |
| .dsloop: @ 40-41 times
 | |
|     subs    r10,r10, #0x01000000
 | |
|     bmi     .dsloop_exit
 | |
| 
 | |
| .dsloop_enter:
 | |
|     and     r7, r5, r8
 | |
|     add     r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords)
 | |
|     ldrh    r7, [r7, r12]      @ r7=code (int, but from unsigned, no sign extend)
 | |
| 
 | |
|     add     r1, r1, #8
 | |
|     add     r8, r8, #1
 | |
| 
 | |
|     cmp     r7, r9
 | |
|     beq     .DrawStrip_samecode @ we know stuff about this tile already
 | |
| 
 | |
|     mov     r9, r7          @ remember code
 | |
| 
 | |
|     movs    r2, r9, lsl #20 @ if (code&0x1000)
 | |
|     mov     r2, r2, lsl #1
 | |
|     add     r2, r2, r10, lsl #17
 | |
|     mov     r2, r2, lsr #17
 | |
|     eorcs   r2, r2, #0x0e   @ if (code&0x1000) addr^=0xe;
 | |
| 
 | |
|     ldr     r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
 | |
| 
 | |
| .DrawStrip_samecode:
 | |
|     tst     r9, #0x8000
 | |
| @    tstne   r10, #1<<20     @ !force[20]
 | |
|     bne     .DrawStrip_hiprio
 | |
| 
 | |
|     orr     r10, r10, #1<<21 @ seen non hi-prio tile
 | |
|     tst     r2, r2
 | |
|     beq     .dsloop              @ tileline blank
 | |
| 
 | |
|     bic     r7, r3, #0x7f
 | |
|     and     r3, r9, #0x6000
 | |
|     add     r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);
 | |
| 
 | |
|     cmp     r2, r2, ror #4
 | |
|     beq     .DrawStrip_SingleColor @ tileline singlecolor 
 | |
| 
 | |
|     tst     r9, #0x0800
 | |
|     bne     .DrawStrip_TileFlip
 | |
| 
 | |
|     @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern
 | |
| .DrawStrip_TileNorm:
 | |
|     TileNorm r0
 | |
|     b       .dsloop
 | |
| 
 | |
| .DrawStrip_TileFlip:
 | |
|     TileFlip r0
 | |
|     b       .dsloop
 | |
| 
 | |
| .DrawStrip_SingleColor:
 | |
|     and     r4, r2, #0xf
 | |
|     orr     r4, r3, r4
 | |
|     orr     r4, r4, r4, lsl #8
 | |
|     tst     r1, #1             @ not aligned?
 | |
|     strneb  r4, [r1], #1
 | |
|     streqh  r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strneb  r4, [r1], #1       @ have a remaining unaligned pixel?
 | |
|     b       .dsloop_subr1
 | |
| 
 | |
| .DrawStrip_hiprio:
 | |
|     tst     r10, #(1<<23)   @ sh[23]
 | |
|     tsteq   r2, r2          @ if (!sh[23] && code==blank) continue
 | |
|     beq     .dsloop
 | |
| 
 | |
| @    orr     r10, r10, #1<<22 @ hi_not_empty[22]
 | |
|     sub     r7, r1, r11
 | |
|     orr     r7, r9, r7,  lsl #16
 | |
|     orr     r7, r7, r10, lsl #25 @ (ty<<25)
 | |
|     tst     r9, #0x1000
 | |
|     eorne   r7, r7, #0xe<<25 @ if(code&0x1000) cval^=0xe<<25;
 | |
|     str     r7, [r6], #4    @ cache hi priority tile code
 | |
|     str     r2, [r6], #4    @ cache hi priority tile data
 | |
|     b       .dsloop
 | |
| 
 | |
| .dsloop_exit:
 | |
|     tst     r10, #1<<21 @ seen non hi-prio tile
 | |
|     ldr     r1, [sp, #9*4]  @ est
 | |
|     mov     r0, #0
 | |
|     ldreq   r2, [r1, #OFS_EST_rendstatus]
 | |
|     str     r0, [r6]    @ terminate the cache list
 | |
|     orreq   r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles
 | |
|     streq   r2, [r1, #OFS_EST_rendstatus]
 | |
| 
 | |
|     ldmfd   sp!, {r4-r11,pc}
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| .DrawStrip_vsscroll:
 | |
|     tst     r8, #1                @ if h40: lflags |= 0x10000
 | |
|     orrne   r0, r0, #0x10000
 | |
| 
 | |
|     rsb     r8, r3, #0
 | |
|     mov     r8, r8, lsr #3        @ r8=tilex=(-ts->hscroll)>>3
 | |
|     bic     r8, r8, #0x3fc00000
 | |
|     orr     r8, r8, r5, lsl #25   @ r8=(xmask[31:25]|had_output[24]|!force[23]|tilex[21:0])
 | |
| 
 | |
|     ldr     r11, [sp, #9*4]       @ est
 | |
|     orr     r5, r1, r10, lsl #24
 | |
|     ldr     r4, [r11, #OFS_EST_DrawScanline]
 | |
|     sub     r1, r3, #1
 | |
|     orr     r5, r5, r4, lsl #16   @ r5=(shift_width[31:24]|scanline[23:16]|ymask[15:0])
 | |
|     and     r1, r1, #7
 | |
|     add     r7, r1, #1            @ r7=dx=((ts->hscroll-1)&7)+1
 | |
| 
 | |
|     mov     r10,r9, lsl #16
 | |
|     orr     r10,r10, #0xff000000  @ will be adjusted on entering loop
 | |
|     tst     r0, #1
 | |
|     orrne   r10,r10, #0x8000
 | |
|     tst     r3, #0x0f             @ hscroll & 0x0f?
 | |
|     beq     0f
 | |
|     eor     r3, r3, r7
 | |
|     sub     r10,r10, #1<<24       @ cell--  // start from negative for hscroll
 | |
|     tst     r3, #0x08
 | |
|     subne   r10,r10, #1<<16       @ cells--
 | |
|     subne   r10,r10, #1<<24       @ cell--  // even more negative
 | |
| 
 | |
|     add_c24 r1, lr, (OFS_PMEM_vsram-OFS_PMEM_vram)
 | |
|     tst     r0, #0x10000          @ h40?
 | |
|     ldrne   r3, [r1, #0x00]       @ r3=vsram[0x00..0x01]
 | |
|     ldreq   r3, [r1, #0x40]       @ r3=vsram[0x20..0x21]
 | |
|     str     r3, [r1, #0x7c]       @ vsram[0x3e..0x3f]=r3
 | |
| 0:
 | |
|     tst     r9, #1<<30
 | |
|     mov     r3, #0
 | |
|     orrne   r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0])
 | |
|     movne   r3, #0x80       @ default to shadowed pal on sh mode
 | |
| @    tst     r9, #1<<31
 | |
| @    orreq   r8, r8, #1<<23
 | |
| 
 | |
|     and     r9, r9, #0xff00
 | |
|     add     r8, r8, r9, lsr #8   @ tilex+=cellskip
 | |
|     add     r7, r7, r9, lsr #5   @ dx+=cellskip<<3;
 | |
|     add     r10,r10,r9, lsl #16  @ cell+=cellskip
 | |
| 
 | |
|     @ cache some stuff to avoid mem access
 | |
|     ldr     r11,[sp, #9*4]       @ est
 | |
|     mov     r0, #0xf
 | |
|     ldr     r11,[r11, #OFS_EST_HighCol]
 | |
| 
 | |
|     mvn     r9, #0               @ r9=prevcode=-1
 | |
|     add     r1, r11, r7          @ r1=pdest
 | |
| 
 | |
|     @ r10=cells[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]
 | |
|     @ r8=xmask[31:25]|had_output[24]|!force[23]|tilex[21:0]
 | |
|     @ r5=shift_width[31:24]|scanline[23:16]|ymask[15:0]
 | |
|     @ r3=nametabadd[31:16]|must_be_0[15:8]|pal[7:0]
 | |
|     @ r1=pd+dx r2=pack r6=hc r9=prevcode r11=HighCol r12=nametab lr=vram
 | |
|     @ r4 & r7 are scratch in this loop
 | |
| 
 | |
|     @ need to calc new ty?
 | |
|     lsls    r7, r10, #7           @ (cell&1)?
 | |
|     bmi     .dsloop_vs_subr1
 | |
| 
 | |
|     @ calc offset and read tileline code to r7, also calc ty
 | |
|     add_c24 r7, lr, (OFS_PMEM_vsram-OFS_PMEM_vram)
 | |
|     and     r4, r10, #0x3e000000
 | |
|     add     r7, r7, r4, asr #23   @ vsram + ((cell&0x3e)<<1)
 | |
|     tst     r10,#0x8000           @ plane1?
 | |
|     addne   r7, r7, #2
 | |
|     ldrh    r7, [r7]              @ r7=vscroll
 | |
| 
 | |
|     bic     r10,r10,#0xff         @ clear old ty
 | |
|     and     r4, r5, #0xff0000     @ scanline
 | |
|     add     r4, r4, r7, lsl #16   @ ... += vscroll
 | |
|     and     r4, r4, r5, lsl #16   @ ... &= ymask
 | |
|     and     r7, r4, #0x70000
 | |
|     orr     r10,r10,r7, lsr #15   @ new ty
 | |
| 
 | |
|     mov     r4, r4, lsr #19
 | |
|     mov     r7, r5, lsr #24
 | |
|     mov     r4, r4, lsl r7        @ nametabadd
 | |
|     and     r3, r3, #0xff
 | |
|     orr     r3, r3, r4, lsl #16   @ r3=(nametabadd[31:16],pal[15:0])
 | |
| 
 | |
| .dsloop_vs_subr1:
 | |
|     sub     r1, r1, #8
 | |
| .dsloop_vs: @ 40-41 times
 | |
|     add     r10,r10, #0x01000000
 | |
|     and     r4, r10, #0x003f0000
 | |
|     cmp     r4, r10, asr #8
 | |
|     ble     .dsloop_vs_exit
 | |
| 
 | |
|     @ need to calc new ty?
 | |
|     lsls    r7, r10, #7           @ (cell&1)?
 | |
|     bmi     0f
 | |
| 
 | |
|     @ calc offset and read tileline code to r7, also calc ty
 | |
|     add_c24 r7, lr, (OFS_PMEM_vsram-OFS_PMEM_vram)
 | |
|     and     r4, r10, #0x3e000000
 | |
|     add     r7, r7, r4, asr #23   @ vsram + ((cell&0x3e)<<1)
 | |
|     tst     r10,#0x8000           @ plane1?
 | |
|     addne   r7, r7, #2
 | |
|     ldrh    r7, [r7]              @ r7=vscroll
 | |
| 
 | |
|     bic     r10,r10,#0xff         @ clear old ty
 | |
|     and     r4, r5, #0xff0000     @ scanline
 | |
|     add     r4, r4, r7, lsl #16   @ ... += vscroll
 | |
|     and     r4, r4, r5, lsl #16   @ ... &= ymask
 | |
|     and     r7, r4, #0x70000
 | |
|     orr     r10,r10,r7, lsr #15   @ new ty
 | |
| 
 | |
|     mov     r4, r4, lsr #19
 | |
|     mov     r7, r5, lsr #24
 | |
|     mov     r4, r4, lsl r7        @ nametabadd
 | |
|     and     r3, r3, #0xff
 | |
|     orr     r3, r3, r4, lsl #16   @ r3=(nametabadd[31:16],pal[15:0])
 | |
| 0:
 | |
|     and     r7, r8, r8, lsr #25
 | |
|     add     r7, lr, r7, lsl #1    @ PicoMem.vram+((tilex&ts->xmask) as halfwords)
 | |
|     add     r7, r7, r3, lsr #15
 | |
|     ldrh    r7, [r7, r12]         @ r7=code (int, but from unsigned, no sign extend)
 | |
| 
 | |
|     add     r1, r1, #8
 | |
|     add     r8, r8, #1
 | |
| 
 | |
|     orr     r7, r7, r10, lsl #24  @ code | (ty << 24)
 | |
|     cmp     r7, r9
 | |
|     beq     .DrawStrip_vs_samecode @ we know stuff about this tile already
 | |
| 
 | |
|     mov     r9, r7          @ remember code
 | |
| 
 | |
|     movs    r2, r9, lsl #20 @ if (code&0x1000)
 | |
|     mov     r2, r2, lsl #1
 | |
|     add     r2, r2, r10, lsl #17
 | |
|     mov     r2, r2, lsr #17
 | |
|     eorcs   r2, r2, #0x0e   @ if (code&0x1000) addr^=0xe;
 | |
| 
 | |
|     ldr     r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels
 | |
| 
 | |
| .DrawStrip_vs_samecode:
 | |
|     tst     r9, #0x8000
 | |
| @    tstne   r8, #1<<23     @ !force[23]
 | |
|     bne     .DrawStrip_vs_hiprio
 | |
| 
 | |
|     orr     r8, r8, #(1<<24)@ seen non hi-prio tile
 | |
|     tst     r2, r2
 | |
|     beq     .dsloop_vs              @ tileline blank
 | |
| 
 | |
|     bic     r7, r3, #0x7f
 | |
|     and     r3, r9, #0x6000
 | |
|     add     r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);
 | |
| 
 | |
|     cmp     r2, r2, ror #4
 | |
|     beq     .DrawStrip_vs_SingleColor @ tileline singlecolor 
 | |
| 
 | |
|     tst     r9, #0x0800
 | |
|     bne     .DrawStrip_vs_TileFlip
 | |
| 
 | |
|     @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern
 | |
| .DrawStrip_vs_TileNorm:
 | |
|     TileNorm r0
 | |
|     b       .dsloop_vs
 | |
| 
 | |
| .DrawStrip_vs_TileFlip:
 | |
|     TileFlip r0
 | |
|     b       .dsloop_vs
 | |
| 
 | |
| .DrawStrip_vs_SingleColor:
 | |
|     and     r4, r2, #0xf
 | |
|     orr     r4, r3, r4
 | |
|     orr     r4, r4, r4, lsl #8
 | |
|     tst     r1, #1             @ not aligned?
 | |
|     strneb  r4, [r1], #1
 | |
|     streqh  r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strneb  r4, [r1], #1       @ have a remaining unaligned pixel?
 | |
|     b       .dsloop_vs_subr1
 | |
| 
 | |
| .DrawStrip_vs_hiprio:
 | |
|     tst     r10, #(1<<23)   @ sh[23]
 | |
|     tsteq   r2, r2          @ if (!sh[23] && code==blank) continue
 | |
|     beq     .dsloop_vs
 | |
| 
 | |
| @    orr     r10, r10, #1<<22 @ hi_not_empty[22]
 | |
|     sub     r7, r1, r11
 | |
|     orr     r7, r9, r7,  lsl #16
 | |
|     orr     r7, r7, r10, lsl #25 @ (ty<<25)
 | |
|     tst     r9, #0x1000
 | |
|     eorne   r7, r7, #7<<26  @ if(code&0x1000) cval^=7<<26;
 | |
|     str     r7, [r6], #4    @ cache hi priority tile code
 | |
|     str     r2, [r6], #4    @ cache hi priority tile data
 | |
|     b       .dsloop_vs
 | |
| 
 | |
| .dsloop_vs_exit:
 | |
|     tst     r8, #(1<<24) @ seen non hi-prio tile
 | |
|     ldr     r1, [sp, #9*4]  @ est
 | |
|     mov     r0, #0
 | |
|     ldreq   r2, [r1, #OFS_EST_rendstatus]
 | |
|     str     r0, [r6]    @ terminate the cache list
 | |
|     orreq   r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles
 | |
|     streq   r2, [r1, #OFS_EST_rendstatus]
 | |
| 
 | |
|     ldmfd   sp!, {r4-r11,pc}
 | |
| 
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| @ interlace mode 2? Sonic 2?
 | |
| .DrawStrip_interlace:
 | |
|     tst     r0, r0
 | |
|     movne   r7, r7, lsr #16
 | |
|     mov     r7, r7, lsl #21
 | |
| 
 | |
|     @ Find the line in the name table
 | |
|     add     r2, r7, r2, lsl #22    @ r2=(vscroll+(DrawScanline<<1))<<21 (11 bits);
 | |
|     orr     r1, r1, #0x80000000
 | |
|     and     r2, r2, r1, ror #10    @ &((ymask<<1)|1)<<21;
 | |
|     mov     r2, r2, lsr #21
 | |
|     mov     r4, r2, lsr #4
 | |
|     mov     r12, r12, lsr #1       @ halfwords
 | |
|     add     r0, r12, r4, lsl r10   @ nametab+=(ts.line>>4)<<shift[width];
 | |
|     and     r9, r9, #0xff
 | |
| 
 | |
|     sub     sp, sp, #6*4
 | |
|     stmia   sp, {r0,r2,r3,r5,r6,r9}
 | |
| 
 | |
|     mov     r0, sp
 | |
|     mov     r1, r9, lsr #29
 | |
|     bl      DrawStripInterlace @ struct TileStrip *ts, int plane_sh
 | |
| 
 | |
|     add     sp, sp, #6*4
 | |
|     ldmfd   sp!, {r4-r11,pc}
 | |
| 
 | |
| .pool
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| @ void BackFill(int reg7, int sh, struct PicoEState *est)
 | |
| 
 | |
| .global BackFill
 | |
| 
 | |
| BackFill:
 | |
|     stmfd   sp!, {r4-r9,lr}
 | |
| 
 | |
|     mov     r0, r0, lsl #26
 | |
|     ldr     lr, [r2, #OFS_EST_HighCol]
 | |
|     mov     r0, r0, lsr #26
 | |
|     add     lr, lr, #8
 | |
| 
 | |
|     orr     r0, r0, r1, lsl #7
 | |
|     orr     r0, r0, r0, lsl #8
 | |
|     orr     r0, r0, r0, lsl #16
 | |
| 
 | |
|     mov     r1, r0
 | |
|     mov     r2, r0
 | |
|     mov     r3, r0
 | |
|     mov     r4, r0
 | |
|     mov     r5, r0
 | |
|     mov     r6, r0
 | |
|     mov     r7, r0
 | |
| 
 | |
|     @ go go go!
 | |
|     stmia   lr!, {r0-r7} @ 10*8*4
 | |
|     stmia   lr!, {r0-r7}
 | |
|     stmia   lr!, {r0-r7}
 | |
|     stmia   lr!, {r0-r7}
 | |
|     stmia   lr!, {r0-r7}
 | |
|     stmia   lr!, {r0-r7}
 | |
|     stmia   lr!, {r0-r7}
 | |
|     stmia   lr!, {r0-r7}
 | |
|     stmia   lr!, {r0-r7}
 | |
|     stmia   lr!, {r0-r7}
 | |
| 
 | |
|     ldmfd   sp!, {r4-r9,pc}
 | |
| 
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| @ void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est)
 | |
| 
 | |
| .global DrawTilesFromCache
 | |
| 
 | |
| DrawTilesFromCache:
 | |
|     stmfd   sp!, {r4-r9,r11,lr}
 | |
| 
 | |
|     @ cache some stuff to avoid mem access
 | |
|     ldr     r11,[r3, #OFS_EST_HighCol]
 | |
|     mov     r12,#0xf
 | |
|     ldr     lr, [r3, #OFS_EST_PicoMem_vram]
 | |
|     mov     r9, r3         @ est
 | |
| 
 | |
|     ands    r8, r1, #1
 | |
|     orr     r8, r8, r2, lsl #1
 | |
|     bne     .dtfc_check_rendflags
 | |
| 
 | |
|     @ scratch: r4, r7
 | |
| .dtfc_loop:
 | |
|     ldr     r6, [r0], #4    @ read code
 | |
|     movs    r1, r6, lsr #16 @ r1=dx;
 | |
|     ldmeqfd sp!, {r4-r9,r11,pc} @ dx is never zero, this must be a terminator, return
 | |
|     bic     r4, r1, #0xfe00
 | |
|     add     r1, r11, r4     @ r1=pdest
 | |
| 
 | |
|     ldr     r2, [r0], #4    @ read pixel data
 | |
| 
 | |
|     and     r3, r6, #0x6000
 | |
|     mov     r3, r3, lsr #9  @ r3=pal=((code&0x6000)>>9);
 | |
| 
 | |
|     rsbs    r4, r4, r8, lsr #1
 | |
|     bmi     .dtfc_cut_tile
 | |
| 
 | |
|     tst     r8, #1
 | |
|     bne     .dtfc_shadow
 | |
| 
 | |
|     tst     r2, r2
 | |
|     beq     .dtfc_loop
 | |
| 
 | |
|     cmp     r2, r2, ror #4
 | |
|     beq     .dtfc_SingleColor @ tileline singlecolor 
 | |
| 
 | |
|     tst     r6, #0x0800
 | |
|     bne     .dtfc_TileFlip
 | |
| 
 | |
|     @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern
 | |
| .dtfc_TileNorm:
 | |
|     TileNorm r12
 | |
|     b       .dtfc_loop
 | |
| 
 | |
| .dtfc_TileFlip:
 | |
|     TileFlip r12
 | |
|     b       .dtfc_loop
 | |
| 
 | |
| .dtfc_SingleColor:
 | |
|     and     r4, r2, #0xf
 | |
|     orr     r4, r3, r4
 | |
|     orr     r4, r4, r4, lsl #8
 | |
|     tst     r1, #1              @ not aligned?
 | |
|     strneb  r4, [r1], #1
 | |
|     streqh  r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strneb  r4, [r1], #1        @ have a remaining unaligned pixel?
 | |
|     b       .dtfc_loop
 | |
| 
 | |
| .dtfc_shadow:
 | |
|     tst     r2, r2
 | |
|     beq     .dtfc_shadow_blank
 | |
| 
 | |
|     cmp     r2, r2, ror #4
 | |
|     beq     .dtfc_SingleColor @ tileline singlecolor 
 | |
| 
 | |
|     tst     r6, #0x0800
 | |
|     bne     .dtfc_TileFlipShHP
 | |
| 
 | |
|     @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern
 | |
| .dtfc_TileNormShHP:
 | |
|     TileNormShHP
 | |
|     b       .dtfc_loop
 | |
| 
 | |
| .dtfc_TileFlipShHP:
 | |
|     TileFlipShHP
 | |
|     b       .dtfc_loop
 | |
| 
 | |
| .dtfc_shadow_blank:
 | |
|     tst     r1, #1
 | |
|     ldrneb  r4, [r1]
 | |
|     mov     r6, #0x7f
 | |
|     and     r4, r4, r6
 | |
|     strneb  r4, [r1], #1
 | |
|     ldrh    r4, [r1]
 | |
|     orr     r6, r6, r6, lsl #8
 | |
|     and     r4, r4, r6
 | |
|     strh    r4, [r1], #2
 | |
|     ldrh    r4, [r1]
 | |
|     and     r4, r4, r6
 | |
|     strh    r4, [r1], #2
 | |
|     ldrh    r4, [r1]
 | |
|     and     r4, r4, r6
 | |
|     strh    r4, [r1], #2
 | |
|     ldrh    r4, [r1]
 | |
|     and     r4, r4, r6
 | |
|     streqh  r4, [r1]
 | |
|     strneb  r4, [r1]
 | |
|     b       .dtfc_loop
 | |
| 
 | |
| .dtfc_cut_tile:
 | |
|     add     r4, r4, #7      @ 0-6
 | |
|     mov     r4, r4, lsl #2
 | |
|     mov     r12,#0xf<<28
 | |
|     mov     r12,r12,asr r4
 | |
|     mov     r2, r2, ror #16
 | |
|     tst     r6, #0x0800     @ flipped?
 | |
|     mvnne   r12,r12
 | |
|     and     r2, r2, r12
 | |
|     mov     r2, r2, ror #16
 | |
|     mov     r12,#0xf
 | |
|     tst     r8, #1
 | |
|     bne     .dtfc_shadow
 | |
|     tst     r2, r2
 | |
|     beq     .dtfc_loop
 | |
|     tst     r6, #0x0800
 | |
|     beq     .dtfc_TileNorm
 | |
|     b       .dtfc_TileFlip
 | |
| 
 | |
| @ check if we have detected layer covered with hi-prio tiles:
 | |
| .dtfc_check_rendflags:
 | |
|     ldr     r2, [r9, #OFS_EST_rendstatus]
 | |
|     tst     r2, #(PDRAW_PLANE_HI_PRIO|PDRAW_SHHI_DONE)
 | |
|     beq     .dtfc_loop
 | |
|     bic     r8, r8, #1      @ sh/hi mode off
 | |
|     tst     r2, #PDRAW_SHHI_DONE
 | |
|     bne     .dtfc_loop      @ already processed
 | |
|     orr     r2, r2, #PDRAW_SHHI_DONE
 | |
|     str     r2, [r9, #OFS_EST_rendstatus]
 | |
| 
 | |
|     add     r1, r11,#8
 | |
|     mov     r3, #320/4/4
 | |
|     mov     r6, #0x7f
 | |
|     orr     r6, r6, r6, lsl #8
 | |
|     orr     r6, r6, r6, lsl #16
 | |
| .dtfc_loop_shprep:
 | |
|     ldmia   r1, {r2,r4,r5,r7}
 | |
|     subs    r3, r3, #1
 | |
|     and     r2, r2, r6
 | |
|     and     r4, r4, r6
 | |
|     and     r5, r5, r6
 | |
|     and     r7, r7, r6
 | |
|     stmia   r1!,{r2,r4,r5,r7}
 | |
|     bne     .dtfc_loop_shprep
 | |
| 
 | |
|     b       .dtfc_loop
 | |
| 
 | |
| .pool
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| 
 | |
| @ void DrawSpritesSHi(unsigned char *sprited, struct PicoEState *est)
 | |
| 
 | |
| .global DrawSpritesSHi
 | |
| 
 | |
| DrawSpritesSHi:
 | |
|     ldrb    r3, [r0]
 | |
|     mov     r12,#0xff
 | |
|     ands    r3, r3, #0x7f
 | |
|     bxeq    lr
 | |
| 
 | |
|     stmfd   sp!, {r1,r3-r11,lr} @ +est
 | |
|     strb    r12,[r0,#3]     @ set end marker
 | |
|     ldrb    r12,[r0,#1]
 | |
|     add     r10,r0, #4      @ r10=HighLnSpr end
 | |
|     mvn     r12,r12
 | |
|     tst     r12,#0x6        @ masking in slot 1 and tile ovfl?
 | |
|     ldmeqfd sp!, {r1,r3-r11,pc}
 | |
|     add     r10,r10,r3      @ r10=HighLnSpr end
 | |
| 
 | |
|     ldrb    r12,[r10,#0]    @ width of last sprite
 | |
|     ldr     r11,[r1, #OFS_EST_HighCol]
 | |
|     str     r12,[sp, #4]
 | |
|     mov     r12,#0xf
 | |
|     ldr     lr, [r1, #OFS_EST_PicoMem_vram]
 | |
| 
 | |
| 
 | |
| DrawSpriteSHi:
 | |
|     @ draw next sprite
 | |
|     ldr     r7, [sp]        @ est
 | |
|     ldrb    r0, [r10,#-1]!
 | |
|     ldr     r1, [r7, #OFS_EST_HighPreSpr]
 | |
|     cmp     r0, #0xff
 | |
|     ldmeqfd sp!, {r1,r3-r11,pc} @ end of list
 | |
|     and     r0, r0, #0x7f
 | |
|     add     r0, r1, r0, lsl #3
 | |
| 
 | |
|     ldr     r9, [r0, #4]    @ sprite[1]
 | |
|     mov     r2, r9, asr #16 @ r2=sx
 | |
| 
 | |
|     mov     r9, r9, lsl #16
 | |
|     mov     r3, r9, lsr #31 @ priority
 | |
|     mov     r9, r9, lsr #16
 | |
| @    orr     r9, r9, r8, lsl #31 @ r9=code|sh[31]   @@ sh is always on here now
 | |
|     and     r4, r9, #0x6000
 | |
|     orr     r9, r9, r4, lsl #16
 | |
|     orr     r9, r9, #0x90000000 @ r9=scc1 ???? ... <code> (s=shadow/hilight, cc=pal)
 | |
|     cmp     r12,r9, lsr #28 @ sh/hi with pal3?
 | |
|     cmpne   r3, #1          @ if not, is it hi prio?
 | |
|     strne   r3, [sp, #4]    @ reset last sprite width
 | |
|     bne     DrawSpriteSHi   @ non-operator low sprite, already drawn
 | |
| 
 | |
|     ldr     r3, [r0]        @ sprite[0]
 | |
|     mov     r6, r3, lsr #28
 | |
|     sub     r6, r6, #1      @ r6=width-1 (inc later)
 | |
|     mov     r5, r3, lsr #24
 | |
|     and     r5, r5, #7      @ r5=height
 | |
| 
 | |
|     ldr     r7, [r7, #OFS_EST_DrawScanline]
 | |
|     mov     r0, r3, lsl #16 @ r4=sy<<16 (tmp)
 | |
| 
 | |
|     sub     r7, r7, r0, asr #16 @ r7=row=DrawScanline-sy
 | |
| 
 | |
|     tst     r9, #0x1000
 | |
|     movne   r0, r5, lsl #3
 | |
|     subne   r0, r0, #1
 | |
|     subne   r7, r0, r7      @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y
 | |
| 
 | |
|     add     r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down
 | |
|     tst     r9, #0x0800
 | |
|     mlane   r8, r5, r6, r8  @ if (code&0x0800) { tile+=delta*(width-1);
 | |
|     rsbne   r5, r5, #0      @ delta=-delta; } // r5=delta now
 | |
| 
 | |
|     mov     r8, r8, lsl #21
 | |
|     mov     r8, r8, lsr #17
 | |
|     and     r7, r7, #7
 | |
|     add     r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address
 | |
| 
 | |
|     ldr     r0, [sp, #4]
 | |
|     add     r6, r6, #1         @ inc now
 | |
|     cmp     r0, #0             @ check width of last sprite
 | |
|     movne   r6, r0
 | |
|     movne   r0, #0
 | |
|     strne   r0, [sp, #4]
 | |
| 
 | |
|     mov     r5, r5, lsl #4     @ delta<<=4; // Delta of address
 | |
|     mov     r3, r4, lsr #9     @ r3=pal=((code>>9)&0x30);
 | |
| 
 | |
|     adds    r0, r2, #0         @ mov sx to r0 and set ZV flags
 | |
|     b       .dsprShi_loop_enter
 | |
| 
 | |
| .dsprShi_loop:
 | |
|     subs    r6, r6, #1         @ width--
 | |
|     beq     DrawSpriteSHi
 | |
|     adds    r0, r0, #8         @ sx+=8
 | |
|     add     r8, r8, r5         @ tile+=delta
 | |
| 
 | |
| .dsprShi_loop_enter:
 | |
|     ble     .dsprShi_loop     @ sx <= 0
 | |
|     cmp     r0, #328
 | |
|     bge     DrawSpriteSHi
 | |
| 
 | |
|     mov     r8, r8, lsl #17
 | |
|     mov     r8, r8, lsr #17    @ tile&=0x7fff; // Clip tile address
 | |
| 
 | |
|     ldr     r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels
 | |
|     add     r1, r11, r0        @ r1=pdest
 | |
|     tst     r2, r2
 | |
|     beq     .dsprShi_loop
 | |
| 
 | |
|     cmp     r12, r9, lsr #28
 | |
|     beq     .dsprShi_shadow
 | |
| 
 | |
|     cmp     r2, r2, ror #4
 | |
|     beq     .dsprShi_SingleColor @ tileline singlecolor 
 | |
| 
 | |
|     tst     r9, #0x0800
 | |
|     bne     .dsprShi_TileFlip
 | |
| 
 | |
|     @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern
 | |
| @ scratch: r4, r7
 | |
| .dsprShi_TileNorm:
 | |
|     TileNorm r12
 | |
|     b       .dsprShi_loop
 | |
| 
 | |
| .dsprShi_TileFlip:
 | |
|     TileFlip r12
 | |
|     b       .dsprShi_loop
 | |
| 
 | |
| .dsprShi_SingleColor:
 | |
|     and     r4, r2, #0xf
 | |
|     orr     r4, r3, r4
 | |
|     orr     r4, r4, r4, lsl #8
 | |
|     tst     r0, #1              @ not aligned?
 | |
|     strneb  r4, [r1], #1
 | |
|     streqh  r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strneb  r4, [r1], #1
 | |
|     b       .dsprShi_loop
 | |
| 
 | |
| .dsprShi_shadow:
 | |
|     tst     r9, #0x8000
 | |
|     beq     .dsprShi_shadow_lowpri
 | |
| 
 | |
|     cmp     r2, r2, ror #4
 | |
|     beq     .dsprShi_singlec_sh
 | |
| 
 | |
|     tst     r9, #0x0800
 | |
|     bne     .dsprShi_TileFlip_sh
 | |
| 
 | |
|     @ (r1=pdest, r2=pixels8, r3=pal) r4, r7: scratch, r12: helper pattern
 | |
| .dsprShi_TileNorm_sh:
 | |
|     TileNormSh
 | |
|     b       .dsprShi_loop
 | |
| 
 | |
| .dsprShi_TileFlip_sh:
 | |
|     TileFlipSh
 | |
|     b       .dsprShi_loop
 | |
| 
 | |
| .dsprShi_singlec_sh:
 | |
|     cmp     r2, #0xe0000000
 | |
|     bcc     .dsprShi_SingleColor   @ normal singlecolor tileline (carry inverted in ARM)
 | |
|     tst     r2, #0x10000000
 | |
|     bne     .dsprShi_sh_sh
 | |
|     TileSingleHi
 | |
|     b       .dsprShi_loop
 | |
| 
 | |
| .dsprShi_sh_sh:
 | |
|     TileSingleSh
 | |
|     b       .dsprShi_loop
 | |
| 
 | |
| .dsprShi_shadow_lowpri:
 | |
|     tst     r9, #0x800
 | |
|     bne     .dsprShi_TileFlip_sh_lp
 | |
| 
 | |
| .dsprShi_TileNorm_sh_lp:
 | |
|     TileNormSh_onlyop_lp
 | |
|     b       .dsprShi_loop
 | |
| 
 | |
| .dsprShi_TileFlip_sh_lp:
 | |
|     TileFlipSh_onlyop_lp
 | |
|     b       .dsprShi_loop
 | |
| 
 | |
| .pool
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| @ void DrawAllSprites(unsigned char *sprited, int prio, int sh,
 | |
| @                     struct PicoEState *est)
 | |
| 
 | |
| .global DrawAllSprites
 | |
| 
 | |
| DrawAllSprites:
 | |
|     orr     r1, r2, r1, lsl #1
 | |
|     ldr     r2, [r0]
 | |
|     ands    r2, r2, #0x7f
 | |
|     bxeq    lr
 | |
| 
 | |
|     @ time to do some real work
 | |
|     stmfd   sp!, {r1,r3-r11,lr} @ +sh|prio<<1 +est
 | |
|     mov     r12,#0xff
 | |
|     strb    r12,[r0,#3]     @ set end marker
 | |
|     ldrb    r12,[r0,#1]
 | |
|     add     r10,r0 ,#4
 | |
|     mvn     r12,r12
 | |
|     tst     r12,#0x6        @ masking in slot 1 and tile ovfl?
 | |
|     ldmeqfd sp!, {r1,r3-r11,pc}
 | |
|     add     r10,r10,r2      @ r10=HighLnSpr end
 | |
| 
 | |
|     ldrb    r12,[r10,#0]    @ width of last sprite
 | |
|     ldr     r11,[r3, #OFS_EST_HighCol]
 | |
|     orr     r1 ,r1 ,r12,lsl #24
 | |
|     str     r1, [sp]
 | |
|     mov     r12,#0xf
 | |
|     ldr     lr, [r3, #OFS_EST_PicoMem_vram]
 | |
| 
 | |
| @ + 0  :    hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: horiz. size
 | |
| @ + 4  :    xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8
 | |
| 
 | |
| DrawSprite:
 | |
|     @ draw next sprite
 | |
|     ldrb    r0, [r10,#-1]!
 | |
|     ldr     r4, [sp]        @ sh|prio<<1|lastw<<24
 | |
|     ldr     r7, [sp, #4]    @ est
 | |
|     mov     r2, r0, lsl #24
 | |
|     cmp     r0, #0xff
 | |
|     ldmeqfd sp!, {r1,r3-r11,pc} @ end of list
 | |
|     eors    r2, r2, r4, lsl #30
 | |
|     bic     r2, r4, #0xff000000
 | |
|     str     r2, [sp]
 | |
|     bmi     DrawSprite      @ wrong priority
 | |
|     ldr     r1, [r7, #OFS_EST_HighPreSpr]
 | |
|     and     r0, r0, #0x7f
 | |
|     add     r0, r1, r0, lsl #3
 | |
| 
 | |
|     ldr     r3, [r0]        @ sprite[0]
 | |
|     ldr     r7, [r7, #OFS_EST_DrawScanline]
 | |
|     mov     r6, r3, lsr #28
 | |
|     sub     r6, r6, #1      @ r6=width-1 (inc later)
 | |
|     mov     r5, r3, lsr #24
 | |
|     and     r5, r5, #7      @ r5=height
 | |
| 
 | |
|     mov     r8, r3, lsl #16 @ r8=sy<<16 (tmp)
 | |
| 
 | |
|     ldr     r9, [r0, #4]
 | |
|     sub     r7, r7, r8, asr #16 @ r7=row=DrawScanline-sy
 | |
| 
 | |
|     mov     r2, r9, asr #16 @ r2=sx
 | |
|     mov     r9, r9, lsl #16
 | |
|     mov     r9, r9, lsr #16
 | |
|     orr     r9, r9, r4, lsl #31 @ r9=code|sh[31]
 | |
| 
 | |
|     tst     r9, #0x1000
 | |
|     movne   r8, r5, lsl #3
 | |
|     subne   r8, r8, #1
 | |
|     subne   r7, r8, r7      @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y
 | |
| 
 | |
|     add     r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down
 | |
|     tst     r9, #0x0800
 | |
|     mlane   r8, r5, r6, r8  @ if (code&0x0800) { tile+=delta*(width-1);
 | |
|     rsbne   r5, r5, #0      @ delta=-delta; } // r5=delta now
 | |
| 
 | |
|     mov     r8, r8, lsl #21
 | |
|     mov     r8, r8, lsr #17
 | |
|     and     r7, r7, #7
 | |
|     add     r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address
 | |
| 
 | |
|     add     r6, r6, #1         @ inc now
 | |
|     cmp     r4, #0x1000000     @ check width of last sprite
 | |
|     movhs   r6, r4, lsr #24
 | |
| 
 | |
|     @ cache some stuff to avoid mem access
 | |
|     mov     r5, r5, lsl #4     @ delta<<=4; // Delta of address
 | |
|     and     r4, r9, #0x6000
 | |
|     orr     r9, r9, r4, lsl #16
 | |
|     orrs    r9, r9, #0x10000000 @ r9=scc1 ???? ... <code> (s=shadow/hilight, cc=pal)
 | |
| 
 | |
|     mov     r3, r4, lsr #9     @ r3=pal=((code>>9)&0x30);
 | |
|     orrmi   r3, r3, #0x80      @ for sh/hi
 | |
| 
 | |
|     adds    r0, r2, #0         @ mov sx to r0 and set ZV flags
 | |
|     b       .dspr_loop_enter
 | |
| 
 | |
| .dspr_loop:
 | |
|     subs    r6, r6, #1         @ width--
 | |
|     beq     DrawSprite
 | |
|     adds    r0, r0, #8         @ sx+=8
 | |
|     add     r8, r8, r5         @ tile+=delta
 | |
| 
 | |
| .dspr_loop_enter:
 | |
|     ble     .dspr_loop         @ sx <= 0
 | |
|     cmp     r0, #328
 | |
|     bge     DrawSprite
 | |
| 
 | |
|     mov     r8, r8, lsl #17
 | |
|     mov     r8, r8, lsr #17    @ tile&=0x7fff; // Clip tile address
 | |
| 
 | |
|     ldr     r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels
 | |
|     add     r1, r11, r0        @ r1=pdest
 | |
|     tst     r2, r2
 | |
|     beq     .dspr_loop
 | |
| 
 | |
|     cmp     r12, r9, lsr #28
 | |
|     beq     .dspr_shadow
 | |
| 
 | |
|     cmp     r2, r2, ror #4
 | |
|     beq     .dspr_SingleColor @ tileline singlecolor 
 | |
| 
 | |
|     tst     r9, #0x0800
 | |
|     bne     .dspr_TileFlip
 | |
| 
 | |
|     @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern
 | |
| @ scratch: r4, r7
 | |
| .dspr_TileNorm:
 | |
|     TileNorm r12
 | |
|     b       .dspr_loop
 | |
| 
 | |
| .dspr_TileFlip:
 | |
|     TileFlip r12
 | |
|     b       .dspr_loop
 | |
| 
 | |
| .dspr_singlec_sh:
 | |
|     cmp     r2, #0xe0000000
 | |
|     bcs     .dspr_TileNorm_sh   @ op. tileline, markop. XXX: maybe add a spec. handler?
 | |
| 
 | |
| .dspr_SingleColor:
 | |
|     and     r4, r2, #0xf
 | |
|     orr     r4, r3, r4
 | |
|     orr     r4, r4, r4, lsl #8
 | |
|     tst     r0, #1              @ not aligned?
 | |
|     strneb  r4, [r1], #1
 | |
|     streqh  r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strh    r4, [r1], #2
 | |
|     strneb  r4, [r1], #1
 | |
|     b       .dspr_loop
 | |
| 
 | |
| .dspr_shadow:
 | |
|     cmp     r2, r2, ror #4
 | |
|     beq     .dspr_singlec_sh
 | |
| 
 | |
|     tst     r9, #0x0800
 | |
|     bne     .dspr_TileFlip_sh
 | |
| 
 | |
|     @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern
 | |
| .dspr_TileNorm_sh:
 | |
|     TileNormSh_markop
 | |
|     b       .dspr_loop
 | |
| 
 | |
| .dspr_TileFlip_sh:
 | |
|     TileFlipSh_markop
 | |
|     b       .dspr_loop
 | |
| 
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| @ void DrawWindow(int tstart, int tend, int prio, int sh
 | |
| @                 struct PicoEState *est)
 | |
| 
 | |
| .global DrawWindow
 | |
| 
 | |
| DrawWindow:
 | |
|     ldr     r12, [sp]             @ est
 | |
|     stmfd   sp!, {r4-r11,lr}
 | |
| 
 | |
|     ldr     r6,  [r12, #OFS_EST_Pico]
 | |
|     ldr     r10, [r12, #OFS_EST_DrawScanline]
 | |
|     mov     r11, r12              @ est
 | |
|     ldrb    r12, [r6, #OFS_Pico_video_reg+3] @ pvid->reg[3]
 | |
| 
 | |
|     ldr     r4,  [r6, #OFS_Pico_video_reg+12]
 | |
|     mov     r5,  r10, lsr #3
 | |
|     and     r10, r10, #7
 | |
|     mov     r10, r10, lsl #1      @ r10=ty
 | |
| 
 | |
|     mov     r12, r12, lsl #10
 | |
| 
 | |
|     tst     r4, #1                @ 40 cell mode?
 | |
|     andne   r12, r12, #0xf000     @ 0x3c<<10
 | |
|     andeq   r12, r12, #0xf800
 | |
|     addne   r12, r12, r5, lsl #7
 | |
|     addeq   r12, r12, r5, lsl #6  @ nametab
 | |
|     add     r12, r12, r0, lsl #2  @ +starttile
 | |
| 
 | |
|     ldr     lr, [r11, #OFS_EST_PicoMem_vram]
 | |
|     ldr     r6, [r11, #OFS_EST_rendstatus]
 | |
| 
 | |
|     @ fetch the first code now
 | |
|     ldrh    r7, [lr, r12]
 | |
| 
 | |
|     ands    r6, r6, #PDRAW_WND_DIFF_PRIO
 | |
|     cmpeq   r2, #1                @ prio && !(rendstatus & WND_DIFF_PRIO)?
 | |
|     ldmeqfd sp!, {r4-r11,pc}      @ yes, assume that whole window uses same priority
 | |
| 
 | |
|     orr     r6, r6, r2
 | |
|     orr     r6, r6, r3, lsl #8    @ shadow mode
 | |
| 
 | |
|     sub     r8, r1, r0
 | |
| 
 | |
|     @ cache some stuff to avoid mem access
 | |
|     ldr     r11, [r11, #OFS_EST_HighCol]
 | |
|     mov     r8, r8, lsl #1        @ cells
 | |
|     add     r11,r11,#8
 | |
|     mvn     r9, #0                @ r9=prevcode=-1
 | |
|     add     r1, r11, r0, lsl #4   @ r1=pdest
 | |
|     mov     r0, #0xf
 | |
|     b       .dwloop_enter
 | |
| 
 | |
|     @ r4,r5 are scratch in this loop
 | |
| .dwloop:
 | |
|     add     r1, r1, #8
 | |
| .dwloop_nor1:
 | |
|     add     r12, r12, #2    @ halfwords
 | |
|     ldrh    r7, [lr, r12]   @ r7=code (int, but from unsigned, no sign extend)
 | |
|     subs    r8, r8, #1
 | |
|     beq     .dwloop_end     @ done
 | |
| 
 | |
|     eor     r5, r6, r7, lsr #15
 | |
|     tst     r5, #1
 | |
|     orrne   r6, r6, #PDRAW_WND_DIFF_PRIO @ wrong pri
 | |
|     bne     .dwloop
 | |
| 
 | |
|     cmp     r7, r9
 | |
|     beq     .dw_samecode    @ we know stuff about this tile already
 | |
| 
 | |
| .dwloop_enter:
 | |
|     mov     r9, r7          @ remember code
 | |
| 
 | |
|     movs    r2, r9, lsl #20 @ if (code&0x1000)
 | |
|     mov     r2, r2, lsl #1
 | |
|     add     r2, r10, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4; addr+=ty
 | |
|     eorcs   r2, r2, #0x0e   @ if (code&0x1000) addr^=0xe;
 | |
| 
 | |
|     and     r3, r9, #0x6000
 | |
|     mov     r3, r3, lsr #9  @ r3=pal=((code&0x6000)>>9);
 | |
| 
 | |
|     ldr     r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
 | |
| 
 | |
| .dw_samecode:
 | |
|     tst     r6, #0x100
 | |
|     bne     .dw_shadow
 | |
| .dw_shadow_done:
 | |
|     tst     r2, r2
 | |
|     beq     .dwloop              @ tileline blank
 | |
| 
 | |
|     cmp     r2, r2, ror #4
 | |
|     beq     .dw_SingleColor @ tileline singlecolor 
 | |
| 
 | |
|     tst     r9, #0x0800
 | |
|     bne     .dw_TileFlip
 | |
| 
 | |
|     @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern
 | |
| .dw_TileNorm:
 | |
|     TileNorm r0
 | |
|     b       .dwloop
 | |
| 
 | |
| .dw_TileFlip:
 | |
|     TileFlip r0
 | |
|     b       .dwloop
 | |
| 
 | |
| .dw_SingleColor:
 | |
|     and     r4, r0, r2         @ #0x0000000f
 | |
|     orr     r4, r3, r4
 | |
|     orr     r4, r4, r4, lsl #8
 | |
|     orr     r4, r4, r4, lsl #16
 | |
|     mov     r5, r4
 | |
|     stmia   r1!, {r4,r5}
 | |
|     b       .dwloop_nor1       @ we incremeted r1 ourselves
 | |
| 
 | |
| .dw_shadow:
 | |
|     tst     r6, #1             @ hi pri?
 | |
|     orreq   r3, r3, #0x80
 | |
|     beq     .dw_shadow_done
 | |
|     ldr     r4, [r1]
 | |
|     mov     r5, #0x7f
 | |
|     orr     r5, r5, r5, lsl #8
 | |
|     orr     r5, r5, r5, lsl #16
 | |
|     and     r4, r4, r5
 | |
|     str     r4, [r1]
 | |
|     ldr     r4, [r1,#4]
 | |
|     and     r4, r4, r5
 | |
|     str     r4, [r1,#4]
 | |
|     b       .dw_shadow_done
 | |
| 
 | |
| .dwloop_end:
 | |
|     and     r2, r6, #PDRAW_WND_DIFF_PRIO
 | |
|     ldmfd   sp!, {r4-r11,lr}
 | |
|     ldr     r0, [sp]
 | |
|     ldr     r1, [r0, #OFS_EST_rendstatus]
 | |
|     orr     r1, r1, r2
 | |
|     str     r1, [r0, #OFS_EST_rendstatus]
 | |
| 
 | |
|     bx      lr
 | |
| 
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| 
 | |
| @ Convert 0000bbb0 ggg0rrr0
 | |
| @ to      rrrrrggg gggbbbbb
 | |
| 
 | |
| @ r2,r3 - scratch, lr = 0x001c001c, r8 = 0x08610861
 | |
| .macro convRGB565 reg
 | |
|     and     r2,   lr,   \reg,lsr #7  @ b
 | |
|     and     r3,   lr,   \reg,lsr #3  @ g
 | |
|     and     \reg, lr,   \reg,lsl #1  @ r
 | |
|     orr     r2,   r2,   r3,  lsl #6
 | |
|     orr     \reg, r2,   \reg,lsl #11
 | |
| 
 | |
|     and     r2,   r8,   \reg,lsr #4
 | |
|     orr     \reg, \reg, r2
 | |
| .endm
 | |
| 
 | |
| @ trashes: r2-r8,r12,lr; r8 = 0x08610861; r0,r1 are advanced
 | |
| .macro vidConvCpyRGB565_local
 | |
|     mov     r12, r2, lsr #3  @ repeats
 | |
|     mov     lr, #0x001c0000
 | |
|     orr     lr, lr,  #0x01c  @ lr == pattern 0x001c001c
 | |
| 
 | |
| 0:
 | |
|     ldmia   r1!, {r4-r7}
 | |
|     subs    r12, r12, #1
 | |
|     convRGB565 r4
 | |
|     str     r4, [r0], #4
 | |
|     convRGB565 r5
 | |
|     str     r5, [r0], #4
 | |
|     convRGB565 r6
 | |
|     str     r6, [r0], #4
 | |
|     convRGB565 r7
 | |
|     str     r7, [r0], #4
 | |
| 
 | |
|     bgt     0b
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .global vidConvCpyRGB565
 | |
| 
 | |
| vidConvCpyRGB565: @ void *to, void *from, int pixels
 | |
|     stmfd   sp!, {r4-r9,lr}
 | |
|     mov     r8,     #0x0061
 | |
|     orr     r8, r8, #0x0800
 | |
|     orr     r8, r8, r8, lsl #16
 | |
|     vidConvCpyRGB565_local
 | |
|     ldmfd   sp!, {r4-r9,pc}
 | |
| 
 | |
| 
 | |
| @ void PicoDoHighPal555(int sh, int line, struct PicoEState *est)
 | |
| 
 | |
| .global PicoDoHighPal555
 | |
| 
 | |
| PicoDoHighPal555:
 | |
|     stmfd   sp!, {r4-r10,lr}
 | |
|     mov     r10,r2               @ est
 | |
|     ldr     r8, [r10, #OFS_EST_Pico]
 | |
| 
 | |
|     mov     r9, r0
 | |
| 
 | |
|     add     r0, r10, #OFS_EST_HighPal
 | |
| 
 | |
|     mov     r1, #0
 | |
|     strb    r1, [r8, #OFS_Pico_m_dirtyPal]
 | |
| 
 | |
|     ldr     r1, [r10, #OFS_EST_PicoMem_cram]
 | |
|     mov     r2, #0x40
 | |
|     mov     r8,     #0x0061
 | |
|     orr     r8, r8, #0x0800
 | |
|     orr     r8, r8, r8, lsl #16
 | |
| 
 | |
|     vidConvCpyRGB565_local
 | |
| 
 | |
|     cmp     r9, #0
 | |
|     beq     PicoDoHighPal555_end
 | |
| 
 | |
|     add     r3, r10, #OFS_EST_HighPal
 | |
|     add     r4, r3, #0x40*2
 | |
| 
 | |
|     @ hilighted pixels (0x40-0x7f):
 | |
|     @  t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e;
 | |
|     @  t |= (t >> 4) & 0x08610861;
 | |
|     @ r8=0x08610861
 | |
|     mov     r12,    #0x008e
 | |
|     orr     r12,r12,#0x7300
 | |
|     orr     r12,r12,r12,lsl #16
 | |
|     mov     lr, #0x40/4
 | |
| .fl_loopcpRGB555_hi:
 | |
|     ldmia   r3!, {r1,r6}
 | |
|     and     r1, r12, r1, lsr #1
 | |
|     and     r6, r12, r6, lsr #1
 | |
|     add     r1, r12, r1
 | |
|     add     r6, r12, r6
 | |
|     and     r5, r8, r1, lsr #4
 | |
|     and     r7, r8, r6, lsr #4
 | |
|     orr     r1, r1, r5
 | |
|     orr     r6, r6, r7
 | |
|     stmia   r4!, {r1,r6}
 | |
|     subs    lr, lr, #1
 | |
|     bne     .fl_loopcpRGB555_hi
 | |
| 
 | |
|     sub     r3, r3, #0x40*2
 | |
|     @ shadowed (0x80-0xbf), shadow|hilight (aka normal, 0xc0-0xff) pixels:
 | |
|     add     r5, r3, #0xc0*2
 | |
|     mov     lr, #0x40/4
 | |
| .fl_loopcpRGB555_sh:
 | |
|     ldmia   r3!, {r1,r6}
 | |
|     subs    lr, lr, #1
 | |
|     stmia   r5!, {r1,r6} @ 0xc0, normal
 | |
|     and     r1, r12, r1, lsr #1
 | |
|     and     r6, r12, r6, lsr #1
 | |
|     stmia   r4!, {r1,r6}
 | |
|     bne     .fl_loopcpRGB555_sh
 | |
| 
 | |
|     @ fixup shadowed color 14 in palette 0,1,2 (always normal)
 | |
|     sub     r4, r3, #0x40*2
 | |
|     ldrh    r1, [r4, #0x0e*2] @ 0x0e, 0x1e, 0x2e
 | |
|     ldrh    r5, [r4, #0x1e*2]
 | |
|     ldrh    r6, [r4, #0x2e*2]
 | |
|     strh    r1, [r3, #0x4e*2] @ 0x8e, 0x9e, 0xae
 | |
|     strh    r5, [r3, #0x5e*2]
 | |
|     strh    r6, [r3, #0x6e*2]
 | |
| 
 | |
|     mov     r0, #1
 | |
| PicoDoHighPal555_end:
 | |
|     ldmfd   sp!, {r4-r10,pc}
 | |
| 
 | |
| 
 | |
| @ void FinalizeLine555(int sh, int line, struct PicoEState *est)
 | |
| 
 | |
| .global FinalizeLine555
 | |
| 
 | |
| FinalizeLine555:
 | |
|     stmfd   sp!, {r4-r10,lr}
 | |
|     mov     r10,r2               @ est
 | |
|     ldr     r8, [r10, #OFS_EST_Pico]
 | |
| 
 | |
|     bl      PicoDrawUpdateHighPal
 | |
| 
 | |
|     add     r3, r10, #OFS_EST_HighPal
 | |
| 
 | |
|     mov     lr, #0xff
 | |
| 
 | |
|     ldr     r1, [r10, #OFS_EST_HighCol]
 | |
|     ldr     r0, [r10, #OFS_EST_DrawLineDest]
 | |
|     add     r1, r1, #8
 | |
| 
 | |
|     ldrb    r12, [r8, #OFS_Pico_video_reg+12]
 | |
|     mov     lr, lr, lsl #1
 | |
| 
 | |
|     tst     r12, #1
 | |
|     movne   r2, #320/8           @ len
 | |
|     bne     .fl_no32colRGB555
 | |
|     ldr     r4, [r10, #OFS_EST_PicoOpt]
 | |
|     mov     r2, #256/8
 | |
|     ldr     r4, [r4]
 | |
|     tst     r4, #0x4000
 | |
|     bne     .fl_32scale_RGB555
 | |
|     tst     r4, #0x0100
 | |
|     addeq   r0, r0, #32*2
 | |
| 
 | |
| .fl_no32colRGB555:
 | |
| 
 | |
| #ifdef UNALIGNED_DRAWLINEDEST
 | |
|     @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer
 | |
|     tst     r0, #2
 | |
|     bne     .fl_RGB555u
 | |
| #endif
 | |
| 
 | |
| .fl_loopRGB555:
 | |
|     ldr     r12, [r1], #4
 | |
|     ldr     r7,  [r1], #4
 | |
| 
 | |
|     and     r4, lr, r12, lsl #1
 | |
|     ldrh    r4, [r3, r4]
 | |
|     and     r5, lr, r12, lsr #7
 | |
|     ldrh    r5, [r3, r5]
 | |
|     and     r6, lr, r12, lsr #15
 | |
|     ldrh    r6, [r3, r6]
 | |
|     orr     r4, r4, r5, lsl #16
 | |
| 
 | |
|     and     r5, lr, r12, lsr #23
 | |
|     ldrh    r5, [r3, r5]
 | |
|     and     r8, lr, r7, lsl #1
 | |
|     ldrh    r8, [r3, r8]
 | |
|     orr     r5, r6, r5, lsl #16
 | |
| 
 | |
|     and     r6, lr, r7, lsr #7
 | |
|     ldrh    r6, [r3, r6]
 | |
|     and     r12,lr, r7, lsr #15
 | |
|     ldrh    r12,[r3, r12]
 | |
|     and     r7, lr, r7, lsr #23
 | |
|     ldrh    r7, [r3, r7]
 | |
|     orr     r8, r8, r6, lsl #16
 | |
| 
 | |
|     subs    r2, r2, #1
 | |
|     orr     r12,r12, r7, lsl #16
 | |
| 
 | |
|     stmia   r0!, {r4,r5,r8,r12}
 | |
|     bne     .fl_loopRGB555
 | |
| 
 | |
|     ldmfd   sp!, {r4-r10,pc}
 | |
| 
 | |
| 
 | |
| .fl_32scale_RGB555:
 | |
|     mov     r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
 | |
|     orr     r9, r9, #0x00e7
 | |
| 
 | |
| #ifdef UNALIGNED_DRAWLINEDEST
 | |
|     tst     r0, #2
 | |
|     bne     .fl_32scale_RGB555u
 | |
| #endif
 | |
| 
 | |
| .fl_loop32scale_RGB555:
 | |
|     ldr     r12, [r1], #4
 | |
|     ldr     r7,  [r1], #4
 | |
| 
 | |
|     and     r4, lr, r12,lsl #1
 | |
|     ldrh    r4, [r3, r4]
 | |
|     and     r5, lr, r12,lsr #7
 | |
|     ldrh    r5, [r3, r5]
 | |
|     and     r4, r4, r9, lsl #2
 | |
|     orr     r4, r4, r4, lsl #14       @ r4[31:16] = 1/4 pix_s 0
 | |
|     and     r5, r5, r9, lsl #2
 | |
|     sub     r6, r5, r5, lsr #2        @ r6 = 3/4 pix_s 1
 | |
|     add     r4, r4, r6, lsl #16       @ pix_d 0, 1
 | |
|     and     r6, lr, r12,lsr #15
 | |
|     ldrh    r6, [r3, r6]
 | |
|     and     r12,lr, r12,lsr #23
 | |
|     ldrh    r12,[r3, r12]
 | |
|     and     r6, r6, r9, lsl #2
 | |
|     add     r5, r5, r6
 | |
|     mov     r5, r5, lsr #1
 | |
|     sub     r6, r6, r6, lsr #2        @ r6 = 3/4 pix_s 2
 | |
|     orr     r5, r5, r6, lsl #16
 | |
| 
 | |
|     and     r6, lr, r7, lsl #1
 | |
|     ldrh    r6, [r3, r6]
 | |
|     and     r12,r12,r9, lsl #2
 | |
|     add     r5, r5, r12,lsl #14       @ pix_d 2, 3
 | |
|     and     r6, r6, r9, lsl #2
 | |
|     orr     r6, r12,r6, lsl #16       @ pix_d 4, 5
 | |
| 
 | |
|     and     r12,lr, r7, lsr #7
 | |
|     ldrh    r12,[r3, r12]
 | |
|     and     r10,lr, r7, lsr #15
 | |
|     ldrh    r10,[r3, r10]
 | |
|     and     r12,r12,r9, lsl #2
 | |
|     sub     r8, r12,r12,lsr #2        @ r8 = 3/4 pix_s 1
 | |
|     add     r8, r8, r6, lsr #18
 | |
|     and     r7, lr, r7, lsr #23
 | |
|     ldrh    r7, [r3, r7]
 | |
|     and     r10,r10,r9, lsl #2
 | |
|     orr     r8, r8, r10,lsl #15
 | |
|     add     r8, r8, r12,lsl #15       @ pix_d 6, 7
 | |
|     sub     r10,r10,r10,lsr #2        @ r10= 3/4 pix_s 2
 | |
|     and     r7, r7, r9, lsl #2
 | |
|     add     r10,r10,r7, lsr #2        @ += 1/4 pix_s 3
 | |
|     orr     r10,r10,r7, lsl #16       @ pix_d 8, 9
 | |
| 
 | |
|     subs    r2, r2, #1
 | |
| 
 | |
|     stmia   r0!, {r4,r5,r6,r8,r10}
 | |
|     bne     .fl_loop32scale_RGB555
 | |
| 
 | |
|     ldmfd   sp!, {r4-r10,pc}
 | |
| 
 | |
| #ifdef UNALIGNED_DRAWLINEDEST
 | |
|     @ unaligned versions of loops
 | |
|     @ warning: starts drawing 2bytes before dst
 | |
| 
 | |
| .fl_RGB555u:
 | |
|     sub     r0, r0, #2              @ initial adjustment
 | |
|     mov     r8, #0
 | |
| 
 | |
| .fl_loopRGB555u:
 | |
|     ldr     r12, [r1], #4
 | |
|     ldr     r7,  [r1], #4
 | |
| 
 | |
|     and     r6, lr, r12,lsl #1
 | |
|     ldrh    r6, [r3, r6]
 | |
|     and     r5, lr, r12,lsr #7
 | |
|     ldrh    r5, [r3, r5]
 | |
|     orr     r4, r8, r6, lsl #16
 | |
| 
 | |
|     and     r6, lr, r12,lsr #15
 | |
|     ldrh    r6, [r3, r6]
 | |
|     and     r8, lr, r12,lsr #23
 | |
|     ldrh    r8, [r3, r8]
 | |
|     orr     r5, r5, r6, lsl #16
 | |
| 
 | |
|     and     r6, lr, r7, lsl #1
 | |
|     ldrh    r6, [r3, r6]
 | |
|     and     r12,lr, r7, lsr #7
 | |
|     ldrh    r12,[r3, r12]
 | |
|     orr     r6, r8, r6, lsl #16
 | |
| 
 | |
|     and     r8, lr, r7, lsr #15
 | |
|     ldrh    r8, [r3, r8]
 | |
|     and     r7, lr, r7, lsr #23
 | |
| 
 | |
|     subs    r2, r2, #1
 | |
|     orr     r12,r12,r8, lsl #16
 | |
|     ldrh    r8, [r3, r7]
 | |
| 
 | |
|     stmia   r0!, {r4,r5,r6,r12}
 | |
|     bne     .fl_loopRGB555u
 | |
| 
 | |
|     strh    r8, [r0], #2
 | |
| 
 | |
|     ldmfd   sp!, {r4-r10,pc}
 | |
| 
 | |
| 
 | |
| .fl_32scale_RGB555u:
 | |
|     sub     r0, r0, #2              @ initial adjustment
 | |
|     mov     r4, #0
 | |
| 
 | |
|     @ r9  f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
 | |
| .fl_loop32scale_RGB555u:
 | |
|     ldr     r12, [r1], #4
 | |
|     ldr     r7,  [r1], #4
 | |
| 
 | |
|     and     r6, lr, r12,lsl #1
 | |
|     ldrh    r6, [r3, r6]
 | |
|     and     r5, lr, r12,lsr #7
 | |
|     ldrh    r5, [r3, r5]
 | |
|     and     r6, r6, r9, lsl #2
 | |
|     orr     r4, r4, r6, lsl #16       @ r4 = pix_d -1, 0
 | |
| 
 | |
|     and     r5, r5, r9, lsl #2
 | |
|     sub     r8, r5, r5, lsr #2        @ r8 = 3/4 pix_s 1
 | |
|     add     r6, r8, r6, lsr #2        @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1)
 | |
|     orr     r5, r6, r5, lsl #15
 | |
| 
 | |
|     and     r6, lr, r12,lsr #15
 | |
|     ldrh    r6, [r3, r6]
 | |
|     and     r12,lr, r12,lsr #23
 | |
|     ldrh    r12,[r3, r12]
 | |
|     and     r6, r6, r9, lsl #2
 | |
|     add     r5, r5, r6, lsl #15       @ r5 = pix_d 1, 2
 | |
| 
 | |
|     and     r8, lr, r7, lsl #1
 | |
|     ldrh    r8, [r3, r8]
 | |
|     and     r10,lr, r7, lsr #7
 | |
|     ldrh    r10,[r3, r10]
 | |
|     and     r12,r12,r9, lsl #2
 | |
|     sub     r6, r6, r6, lsr #2        @ r6 = 3/4 pix_s 2
 | |
|     add     r6, r6, r12,lsr #2
 | |
|     orr     r6, r6, r12,lsl #16       @ r6 = pix_d 3, 4
 | |
| 
 | |
|     and     r8, r8, r9, lsl #2
 | |
|     and     r10,r10,r9, lsl #2
 | |
|     sub     r12,r10,r10,lsr #2        @ r12 = 3/4 pix_s 5
 | |
|     orr     r8, r8, r8, lsl #14
 | |
|     add     r8, r8, r12,lsl #16       @ r8 = pix_d 5, 6
 | |
|     and     r12,lr, r7, lsr #15
 | |
|     ldrh    r12,[r3, r12]
 | |
|     and     r7, lr, r7, lsr #23
 | |
|     ldrh    r7, [r3, r7]
 | |
|     and     r12,r12,r9, lsl #2
 | |
|     add     r10,r10,r12
 | |
|     mov     r10,r10,    lsr #1
 | |
|     sub     r12,r12,r12,lsr #2        @ r12 = 3/4 pix_s 6
 | |
|     orr     r10,r10,r12,lsl #16
 | |
|     and     r7, r7, r9, lsl #2
 | |
|     add     r10,r10,r7, lsl #14       @ r10 = pix_d 7, 8
 | |
| 
 | |
|     subs    r2, r2, #1
 | |
| 
 | |
|     stmia   r0!, {r4,r5,r6,r8,r10}
 | |
|     mov     r4, r7
 | |
|     bne     .fl_loop32scale_RGB555u
 | |
| 
 | |
|     strh    r4, [r0], #2
 | |
| 
 | |
|     ldmfd   sp!, {r4-r10,pc}
 | |
| 
 | |
| #endif /* UNALIGNED_DRAWLINEDEST */
 | |
| 
 | |
| 
 | |
| @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 | |
| 
 | |
| @ utility
 | |
| .global blockcpy @ void *dst, void *src, size_t n
 | |
| 
 | |
| blockcpy:
 | |
|     stmfd   sp!, {r4,r5}
 | |
|     mov     r2, r2, lsr #4
 | |
| blockcpy_loop:
 | |
|     ldmia   r1!, {r3-r5,r12}
 | |
|     subs    r2, r2, #1
 | |
|     stmia   r0!, {r3-r5,r12}
 | |
|     bne     blockcpy_loop
 | |
|     ldmfd   sp!, {r4,r5}
 | |
|     bx      lr
 | |
| 
 | |
| 
 | |
| .global blockcpy_or @ void *dst, void *src, size_t n, int pat
 | |
| 
 | |
| blockcpy_or:
 | |
|     stmfd   sp!, {r4-r6}
 | |
|     orr     r3, r3, r3, lsl #8
 | |
|     orr     r3, r3, r3, lsl #16
 | |
|     mov     r2, r2, lsr #4
 | |
| blockcpy_loop_or:
 | |
|     ldmia   r1!, {r4-r6,r12}
 | |
|     subs    r2, r2, #1
 | |
|     orr     r4, r4, r3
 | |
|     orr     r5, r5, r3
 | |
|     orr     r6, r6, r3
 | |
|     orr     r12,r12,r3
 | |
|     stmia   r0!, {r4-r6,r12}
 | |
|     bne     blockcpy_loop_or
 | |
|     ldmfd   sp!, {r4-r6}
 | |
|     bx      lr
 | |
| 
 | |
| @ vim:filetype=armasm
 | 
