mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 15:27:46 -04:00
ym2612 ARM optimisations
This commit is contained in:
parent
b061bc166c
commit
c918379137
1 changed files with 83 additions and 117 deletions
|
@ -15,8 +15,8 @@
|
|||
|
||||
#include "../arm_features.h"
|
||||
|
||||
@ very simple adaption YM2612 output rate to sample rate (~1M cycles @44100)
|
||||
//#define INTERPOL
|
||||
@ very simple YM2612 output rate to sample rate adaption (~500k cycles @44100)
|
||||
#define INTERPOL
|
||||
|
||||
.equiv SLOT1, 0
|
||||
.equiv SLOT2, 2
|
||||
|
@ -44,7 +44,7 @@
|
|||
|
||||
@ r5=slot, r1=eg_cnt, trashes: r0,r2,r3
|
||||
@ writes output to routp, but only if vol_out changes
|
||||
.macro update_eg_phase_slot slot
|
||||
.macro update_eg_phase_slot
|
||||
#if defined(INTERPOL)
|
||||
ldrh r0, [r5,#0x34] @ vol_out
|
||||
#endif
|
||||
|
@ -190,21 +190,6 @@
|
|||
ldrh r3, [r5,#0x18] @ tl
|
||||
add r0, r0, r3 @ volume += tl
|
||||
strh r0, [r5,#0x34] @ vol_out
|
||||
.if \slot == SLOT1
|
||||
mov r6, r6, lsr #16
|
||||
orr r6, r0, r6, lsl #16
|
||||
.elseif \slot == SLOT2
|
||||
mov r6, r6, lsl #16
|
||||
mov r0, r0, lsl #16
|
||||
orr r6, r0, r6, lsr #16
|
||||
.elseif \slot == SLOT3
|
||||
mov r7, r7, lsr #16
|
||||
orr r7, r0, r7, lsl #16
|
||||
.elseif \slot == SLOT4
|
||||
mov r7, r7, lsl #16
|
||||
mov r0, r0, lsl #16
|
||||
orr r7, r0, r7, lsr #16
|
||||
.endif
|
||||
|
||||
0: @ EG_OFF
|
||||
.endm
|
||||
|
@ -672,24 +657,16 @@ chan_render_loop:
|
|||
mov r11, r1
|
||||
and r0, r0, #7
|
||||
orr r4, r4, r0 @ (length<<8)|algo
|
||||
add r0, lr, #0x44
|
||||
ldmia r0, {r8,r9} @ eg_timer, eg_timer_add
|
||||
ldr r8, [lr, #0x44] @ eg_timer
|
||||
ldr r9, [lr, #0x48] @ eg_timer_add
|
||||
ldr r10, [lr, #0x54] @ op1_out
|
||||
@ ldmia lr, {r6,r7} @ load volumes
|
||||
ldr r5, [lr, #0x40] @ CH
|
||||
ldrh r6, [r5, #0x34] @ vol_out values for all slots
|
||||
ldrh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
|
||||
ldrh r7, [r5, #0x34+SLOT_STRUCT_SIZE]
|
||||
ldrh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
|
||||
orr r6, r6, r2, lsl #16
|
||||
orr r7, r7, r3, lsl #16
|
||||
|
||||
tst r12, #8 @ lfo?
|
||||
beq crl_loop
|
||||
|
||||
crl_loop_lfo:
|
||||
add r0, lr, #0x30
|
||||
ldmia r0, {r1,r2} @ lfo_cnt, lfo_inc
|
||||
ldr r1, [lr, #0x30] @ lfo_cnt
|
||||
ldr r2, [lr, #0x34] @ lfo_inc
|
||||
|
||||
subs r4, r4, #0x100
|
||||
bmi crl_loop_end
|
||||
|
@ -707,37 +684,48 @@ crl_loop:
|
|||
bmi crl_loop_end
|
||||
|
||||
@ -- SSG --
|
||||
add r0, lr, #0x3c
|
||||
ldmia r0, {r1,r5} @ eg_cnt, CH
|
||||
ldr r5, [lr, #0x40] @ CH
|
||||
|
||||
@ r5=slot, trashes: r0,r2,r3
|
||||
mov r6, #4
|
||||
ssg_upd_loop:
|
||||
update_ssg_eg
|
||||
add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT2 (2)
|
||||
update_ssg_eg
|
||||
sub r5, r5, #SLOT_STRUCT_SIZE @ SLOT3 (1)
|
||||
update_ssg_eg
|
||||
add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT4 (3)
|
||||
#if 0
|
||||
subs r6, r6, #1
|
||||
addne r5, r5, #SLOT_STRUCT_SIZE
|
||||
#else
|
||||
add r5, r5, #SLOT_STRUCT_SIZE*2
|
||||
update_ssg_eg
|
||||
subs r6, r6, #2
|
||||
subne r5, r5, #SLOT_STRUCT_SIZE
|
||||
#endif
|
||||
bne ssg_upd_loop
|
||||
sub r5, r5, #SLOT_STRUCT_SIZE*3
|
||||
|
||||
@ -- EG --
|
||||
add r8, r8, r9
|
||||
cmp r8, #EG_TIMER_OVERFLOW
|
||||
bcc eg_done
|
||||
ldr r1, [lr, #0x3c] @ eg_cnt
|
||||
eg_loop:
|
||||
sub r8, r8, #EG_TIMER_OVERFLOW
|
||||
add r1, r1, #1
|
||||
cmp r1, #4096
|
||||
movge r1, #1
|
||||
@ SLOT1 (0)
|
||||
@ r5=slot, r1=eg_cnt, trashes: r0,r2,r3
|
||||
update_eg_phase_slot SLOT1
|
||||
add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT2 (2)
|
||||
update_eg_phase_slot SLOT2
|
||||
sub r5, r5, #SLOT_STRUCT_SIZE @ SLOT3 (1)
|
||||
update_eg_phase_slot SLOT3
|
||||
add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT4 (3)
|
||||
update_eg_phase_slot SLOT4
|
||||
|
||||
mov r6, #4
|
||||
eg_upd_loop:
|
||||
update_eg_phase_slot
|
||||
#if 1
|
||||
subs r6, r6, #1
|
||||
addne r5, r5, #SLOT_STRUCT_SIZE
|
||||
#else
|
||||
add r5, r5, #SLOT_STRUCT_SIZE*2
|
||||
update_eg_phase_slot
|
||||
subs r6, r6, #2
|
||||
subne r5, r5, #SLOT_STRUCT_SIZE
|
||||
#endif
|
||||
bne eg_upd_loop
|
||||
|
||||
cmp r8, #EG_TIMER_OVERFLOW
|
||||
sub r5, r5, #SLOT_STRUCT_SIZE*3
|
||||
|
@ -754,64 +742,49 @@ eg_done:
|
|||
beq crl_loop
|
||||
|
||||
@ output interpolation
|
||||
#if 0 // too expensive on slow platforms
|
||||
#if defined(INTERPOL)
|
||||
#if 1 // possibly too expensive for slow platforms?
|
||||
@ basic interpolator, interpolate in middle region, else use closer value
|
||||
mov r3, r8, lsr #EG_SH @ eg_timer, [0..3<<EG_SH) after loop
|
||||
cmp r3, #(EG_TIMER_OVERFLOW>>EG_SH)/2
|
||||
bgt 0f @ mix is vol_out
|
||||
bne 0f @ mix is vol_out
|
||||
|
||||
ldrh r0, [r5,#0x36] @ SLOT1 vol_ipol
|
||||
lsleq r2, r6, #16
|
||||
addeq r0, r0, r2, lsr #16
|
||||
lsreq r0, r0, #1
|
||||
mov r6, r6, lsr #16
|
||||
orr r6, r0, r6, lsl #16
|
||||
|
||||
ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol
|
||||
addeq r0, r0, r6, lsr #16
|
||||
lsreq r0, r0, #1
|
||||
mov r6, r6, lsl #16
|
||||
orr r6, r6, r0
|
||||
ror r6, r6, #16
|
||||
|
||||
ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol
|
||||
lsleq r2, r7, #16
|
||||
addeq r0, r0, r2, lsr #16
|
||||
lsreq r0, r0, #1
|
||||
mov r7, r7, lsr #16
|
||||
orr r7, r0, r7, lsl #16
|
||||
|
||||
ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol
|
||||
addeq r0, r0, r7, lsr #16
|
||||
lsreq r0, r0, #1
|
||||
mov r7, r7, lsl #16
|
||||
orr r7, r7, r0
|
||||
ror r7, r7, #16
|
||||
#elif defined(INTERPOL)
|
||||
ldr r6, [r5, #0x34] @ vol_out, vol_ipol for all slots
|
||||
ldr r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
|
||||
ldr r7, [r5, #0x34+SLOT_STRUCT_SIZE]
|
||||
ldr r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
|
||||
add r6, r6, r6, lsl #16
|
||||
lsr r6, r6, #17
|
||||
add r2, r2, r2, lsl #16
|
||||
lsr r2, r2, #17
|
||||
add r7, r7, r7, lsl #16
|
||||
lsr r7, r7, #17
|
||||
add r3, r3, r3, lsl #16
|
||||
lsr r3, r3, #17
|
||||
b 1f
|
||||
#else
|
||||
@ super-basic... just take value closest to sample point
|
||||
mov r3, r8, lsr #EG_SH-1 @ eg_timer, [0..3<<EG_SH) after loop
|
||||
cmp r3, #(EG_TIMER_OVERFLOW>>EG_SH)
|
||||
bge 0f @ mix is vol_out
|
||||
|
||||
ldrh r0, [r5,#0x36] @ SLOT1 vol_ipol
|
||||
mov r6, r6, lsr #16
|
||||
orr r6, r0, r6, lsl #16
|
||||
|
||||
ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol
|
||||
mov r6, r6, lsl #16
|
||||
orr r6, r6, r0
|
||||
ror r6, r6, #16
|
||||
|
||||
ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol
|
||||
mov r7, r7, lsr #16
|
||||
orr r7, r0, r7, lsl #16
|
||||
|
||||
ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol
|
||||
mov r7, r7, lsl #16
|
||||
orr r7, r7, r0
|
||||
ror r7, r7, #16
|
||||
#endif
|
||||
0:
|
||||
|
||||
0: ldrgeh r6, [r5, #0x34] @ vol_out values for all slots
|
||||
ldrlth r6, [r5, #0x36] @ vol_ipol values for all slots
|
||||
ldrgeh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
|
||||
ldrlth r2, [r5, #0x36+SLOT_STRUCT_SIZE*2]
|
||||
ldrgeh r7, [r5, #0x34+SLOT_STRUCT_SIZE]
|
||||
ldrlth r7, [r5, #0x36+SLOT_STRUCT_SIZE]
|
||||
ldrgeh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
|
||||
ldrlth r3, [r5, #0x36+SLOT_STRUCT_SIZE*3]
|
||||
|
||||
#else
|
||||
ldrh r6, [r5, #0x34] @ vol_out values for all slots
|
||||
ldrh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
|
||||
ldrh r7, [r5, #0x34+SLOT_STRUCT_SIZE]
|
||||
ldrh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
|
||||
#endif
|
||||
1: orr r6, r6, r2, lsl #16
|
||||
orr r7, r7, r3, lsl #16
|
||||
|
||||
@ -- SLOT1 --
|
||||
PIC_LDR(r3, r2, ym_tl_tab)
|
||||
|
@ -893,34 +866,28 @@ crl_algo_done:
|
|||
strne r1, [r11], #4
|
||||
b crl_do_phase
|
||||
|
||||
ctl_sample_skip:
|
||||
and r1, r12, #1
|
||||
add r1, r1, #1
|
||||
add r11,r11, r1, lsl #2
|
||||
b crl_do_phase
|
||||
|
||||
ctl_sample_mono:
|
||||
ldr r1, [r11]
|
||||
add r1, r0, r1
|
||||
str r1, [r11], #4
|
||||
b crl_do_phase
|
||||
|
||||
ctl_sample_skip:
|
||||
and r1, r12, #1
|
||||
add r1, r1, #1
|
||||
add r11,r11, r1, lsl #2
|
||||
|
||||
crl_do_phase:
|
||||
@ -- PHASE UPDATE --
|
||||
add r5, lr, #0x10
|
||||
ldmia r5, {r0-r1}
|
||||
add r5, lr, #0x20
|
||||
ldmia r5, {r2-r3}
|
||||
add r5, lr, #0x10
|
||||
add r0, r0, r2
|
||||
add r1, r1, r3
|
||||
stmia r5!,{r0-r1}
|
||||
ldmia r5, {r0-r1}
|
||||
add r5, lr, #0x28
|
||||
ldmia r5, {r2-r3}
|
||||
add r5, lr, #0x18
|
||||
add r0, r0, r2
|
||||
add r1, r1, r3
|
||||
stmia r5, {r0-r1}
|
||||
ldmia r5, {r0-r3,r6-r7}
|
||||
add r0, r0, r6
|
||||
add r1, r1, r7
|
||||
ldr r6, [r5, #0x18]
|
||||
ldr r7, [r5, #0x1c]
|
||||
add r2, r2, r6
|
||||
add r3, r3, r7
|
||||
stmia r5, {r0-r3}
|
||||
|
||||
tst r12, #8
|
||||
bne crl_loop_lfo
|
||||
|
@ -928,7 +895,6 @@ crl_do_phase:
|
|||
|
||||
|
||||
crl_loop_end:
|
||||
@ stmia lr, {r6,r7} @ save volumes (for debug)
|
||||
str r8, [lr, #0x44] @ eg_timer
|
||||
str r12, [lr, #0x4c] @ pack (for lfo_ampm)
|
||||
str r4, [lr, #0x50] @ was_update
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue