sound, improve ym2612 accuracy (NB noticeably slower for low bitrates)

This commit is contained in:
kub 2021-12-23 00:42:11 +01:00
parent c3fcdf3f8d
commit d127b3f3d5
3 changed files with 361 additions and 438 deletions

View file

@ -16,7 +16,6 @@
#include <pico/arm_features.h>
@ very simple YM2612 output rate to sample rate adaption (~500k cycles @44100)
#define INTERPOL
#define SSG_EG
.equiv SLOT1, 0
@ -34,7 +33,6 @@
.equiv EG_OFF, 0
.equiv EG_SH, 16 @ 16.16 fixed point (envelope generator timing)
.equiv EG_TIMER_OVERFLOW, (3*(1<<EG_SH)) @ envelope generator timer overflows every 3 samples (on real chip)
.equiv LFO_SH, 24 /* 8.24 fixed point (LFO calculations) */
.equiv ENV_QUIET, (2*13*256/8)
@ -46,14 +44,8 @@
@ r5=slot, r1=eg_cnt, trashes: r0,r2,r3
@ writes output to routp, but only if vol_out changes
.macro update_eg_phase_slot
#if defined(INTERPOL)
ldrh r0, [r5,#0x34] @ vol_out
#endif
ldrb r2, [r5,#0x17] @ state
add r3, r5, #0x1c
#if defined(INTERPOL)
strh r0, [r5,#0x36] @ vol_ipol
#endif
tst r2, r2
beq 0f @ EG_OFF
@ -211,7 +203,6 @@
cmp r2, #EG_REL+1 @ state > EG_REL &&
cmpge r3, #0x200 @ volume >= 0x200?
blt 9f
orr r4, r4, #0x10 @ ssg_update
tst r0, #0x01
beq 1f
@ -259,14 +250,8 @@
@ r5=slot, trashes: r0,r2,r3
.macro recalc_volout
#if defined(INTERPOL)
ldrh r0, [r5,#0x34] @ vol_out
#endif
ldrb r2, [r5,#0x30] @ ssg
ldrb r3, [r5,#0x17] @ state
#if defined(INTERPOL)
strh r0, [r5,#0x36] @ vol_ipol
#endif
ldrh r0, [r5,#0x1a] @ volume
@ and r2, r2, #0x0c
@ -342,6 +327,9 @@
@ r0-r2=scratch, r3=sin_tab, r5=scratch, r6-r7=vol_out[4], r10=op1_out
.macro upd_algo0_m
cmp r8, #(1<<EG_SH)
bge 1f
@ SLOT3
make_eg_out SLOT3
cmp r1, #ENV_QUIET
@ -383,6 +371,9 @@
.macro upd_algo1_m
cmp r8, #(1<<EG_SH)
bge 1f
@ SLOT3
make_eg_out SLOT3
cmp r1, #ENV_QUIET
@ -423,6 +414,9 @@
.macro upd_algo2_m
cmp r8, #(1<<EG_SH)
bge 1f
@ SLOT3
make_eg_out SLOT3
cmp r1, #ENV_QUIET
@ -464,6 +458,9 @@
.macro upd_algo3_m
cmp r8, #(1<<EG_SH)
bge 1f
@ SLOT3
make_eg_out SLOT3
cmp r1, #ENV_QUIET
@ -505,6 +502,9 @@
.macro upd_algo4_m
cmp r8, #(1<<EG_SH)
bge 2f
@ SLOT3
make_eg_out SLOT3
cmp r1, #ENV_QUIET
@ -542,6 +542,9 @@
.macro upd_algo5_m
cmp r8, #(1<<EG_SH)
bge 2f
@ SLOT3
make_eg_out SLOT3
cmp r1, #ENV_QUIET
@ -582,6 +585,9 @@
.macro upd_algo6_m
cmp r8, #(1<<EG_SH)
bge 2f
@ SLOT3
make_eg_out SLOT3
cmp r1, #ENV_QUIET
@ -617,6 +623,9 @@
.macro upd_algo7_m
cmp r8, #(1<<EG_SH)
bge 2f
@ SLOT3
make_eg_out SLOT3
cmp r1, #ENV_QUIET
@ -677,7 +686,7 @@
@ lr=context, r12=pack (stereo, ssg_enabled, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16])
@ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|dac,unused[2],ssg_update,was_update,algo[3], r5=tl_tab/slot,
@ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|dac,upd_cnt[3],was_update,algo[3], r5=tl_tab/slot,
@ r6-r7=vol_out[4], r8=eg_timer, r9=eg_timer_add[31:16], r10=op1_out, r11=buffer
.global chan_render_loop @ chan_rend_context *ct, int *buffer, int length
@ -688,47 +697,48 @@ chan_render_loop:
ldr r12, [lr, #0x4c]
ldr r0, [lr, #0x50]
mov r11, r1
and r0, r0, #0x87
orr r4, r4, r0 @ (length<<8)|dac,unused[4],algo[3]
and r0, r0, #0xf7
orr r4, r4, r0 @ (length<<8)|dac,upd_cnt[2],unused,algo[3]
ldr r8, [lr, #0x44] @ eg_timer
ldr r9, [lr, #0x48] @ eg_timer_add
ldr r10, [lr, #0x54] @ op1_out
tst r12, #8 @ lfo?
beq crl_loop
crl_loop_lfo:
ldr r1, [lr, #0x30] @ lfo_cnt
ldr r2, [lr, #0x34] @ lfo_inc
crl_loop:
subs r4, r4, #0x100
bmi crl_loop_end
mov r0, #0
add r8, r8, r9
subs r8, r8, #(1<<EG_SH)
blt crl_smp_loop_end
@ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt
advance_lfo_m
crl_smp_loop:
tst r12, #8 @ lfo?
beq lfo_done
ldr r2, [lr, #0x34] @ lfo_inc
ldr r1, [lr, #0x30] @ lfo_cnt
add r2, r2, r1
str r2, [lr, #0x30]
@ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt
advance_lfo_m
add r4, r4, #0x100
crl_loop:
subs r4, r4, #0x100
bmi crl_loop_end
lfo_done:
ldr r5, [lr, #0x40] @ CH
#if defined(SSG_EG)
tst r12, #0x02 @ ssg_enabled?
beq ssg_done
@ -- SSG --
lsl r7, r8, #EG_SH
add r7, r9, r7, lsr #EG_SH
subs r7, r7, #1<<EG_SH
blt ssg_done
@ -- SSG --
ssg_loop:
mov r6, #4
bic r4, r4, #0x10 @ ssg_update
ssg_upd_loop:
@ use lr as a pointer to the slot phases stored in the context
update_ssg_eg
@ -748,26 +758,25 @@ ssg_upd_loop:
sub lr, lr, #4*3
sub r5, r5, #SLOT_STRUCT_SIZE*3
subs r7, r7, #1<<EG_SH
bge ssg_loop
ssg_done:
#endif
@ -- EG --
add r8, r8, r9
cmp r8, #EG_TIMER_OVERFLOW
blo volout_upd
ldr r1, [lr, #0x3c] @ eg_cnt
eg_loop:
sub r8, r8, #EG_TIMER_OVERFLOW
tst r4, #0x30
subnes r4, r4, #0x10
bne eg_done
orr r4, r4, #0x30
ldr r1, [lr, #0x3c] @ eg_cnt
add r1, r1, #1
cmp r1, #4096
movge r1, #1
str r1, [lr, #0x3c]
mov r6, #4
eg_upd_loop:
update_eg_phase_slot
#if 1
#if 0
subs r6, r6, #1
addne r5, r5, #SLOT_STRUCT_SIZE
#else
@ -777,96 +786,38 @@ eg_upd_loop:
subne r5, r5, #SLOT_STRUCT_SIZE
#endif
bne eg_upd_loop
cmp r8, #EG_TIMER_OVERFLOW
sub r5, r5, #SLOT_STRUCT_SIZE*3
bhs eg_loop
str r1, [lr, #0x3c]
b eg_done
volout_upd:
#if defined(SSG_EG)
tst r4, #0x10 @ ssg_update?
beq eg_done
@ recalc vol_out
mov r6, #4
volout_loop:
recalc_volout
#if 0
subs r6, r6, #1
addne r5, r5, #SLOT_STRUCT_SIZE
#else
add r5, r5, #SLOT_STRUCT_SIZE*2
recalc_volout
subs r6, r6, #2
subne r5, r5, #SLOT_STRUCT_SIZE
#endif
bne volout_loop
sub r5, r5, #SLOT_STRUCT_SIZE*3
#endif
eg_done:
@ -- disabled? --
and r0, r12, #0xC
cmp r0, #0xC
beq crl_loop_lfo
cmp r0, #0x4
beq crl_loop
tst r12, #0x4
mov r0, #0
bne crl_algo_done
@ output interpolation
#if defined(INTERPOL)
#if 1 // possibly too expensive for slow platforms?
@ basic interpolator, interpolate in middle region, else use closer value
mov r3, r8, lsr #EG_SH @ eg_timer, [0..3<<EG_SH) after loop
cmp r3, #(EG_TIMER_OVERFLOW>>EG_SH)/2
bne 0f @ mix is vol_out
cmp r8, #(2<<EG_SH) @ calculate only for operator memory, sample,
tstge r12, #0xf000 @ ...feedback
beq crl_algo_done
ldr r6, [r5, #0x34] @ vol_out, vol_ipol for all slots
ldr r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
ldr r7, [r5, #0x34+SLOT_STRUCT_SIZE]
ldr r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
add r6, r6, r6, lsl #16
lsr r6, r6, #17
add r2, r2, r2, lsl #16
lsr r2, r2, #17
add r7, r7, r7, lsl #16
lsr r7, r7, #17
add r3, r3, r3, lsl #16
lsr r3, r3, #17
b 1f
#else
@ super-basic... just take value closest to sample point
mov r3, r8, lsr #EG_SH-1 @ eg_timer, [0..3<<EG_SH) after loop
cmp r3, #(EG_TIMER_OVERFLOW>>EG_SH)
#endif
0: ldrgeh r6, [r5, #0x34] @ vol_out values for all slots
ldrlth r6, [r5, #0x36] @ vol_ipol values for all slots
ldrgeh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
ldrlth r2, [r5, #0x36+SLOT_STRUCT_SIZE*2]
ldrgeh r7, [r5, #0x34+SLOT_STRUCT_SIZE]
ldrlth r7, [r5, #0x36+SLOT_STRUCT_SIZE]
ldrgeh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
ldrlth r3, [r5, #0x36+SLOT_STRUCT_SIZE*3]
#else
ldrh r6, [r5, #0x34] @ vol_out values for all slots
ldrh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
ldrh r7, [r5, #0x34+SLOT_STRUCT_SIZE]
ldrh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
#endif
1: orr r6, r6, r2, lsl #16
orr r6, r6, r2, lsl #16
orr r7, r7, r3, lsl #16
@ -- SLOT1 --
PIC_LDR(r3, r2, ym_tl_tab)
@ lr=context, r12=pack (stereo, ssg_enabled, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16])
@ r0-r2=scratch, r3=tl_tab, r5=scratch, r6-r7=vol_out[4], r10=op1_out
@ -- SLOT1 --
upd_slot1_m
@ -- SLOT2+ --
cmp r8, #(2<<EG_SH) @ op mem or sample?
bge crl_algo_done
and r0, r4, #7
PIC_XB(,r0, lsl #2)
nop
@ -920,6 +871,23 @@ crl_algo7:
crl_algo_done:
@ -- PHASE UPDATE --
add lr, lr, #0x10
ldmia lr, {r1-r3,r5-r7}
add r1, r1, r6
add r2, r2, r7
ldr r6, [lr, #0x18]
ldr r7, [lr, #0x1c]
add r3, r3, r6
add r5, r5, r7
stmia lr, {r1-r3,r5}
sub lr, lr, #0x10
subs r8, r8, #(1<<EG_SH)
bge crl_smp_loop
crl_smp_loop_end:
add r8, r8, #(1<<EG_SH)
@ -- WRITE SAMPLE --
tst r0, r0
beq ctl_sample_skip
@ -944,36 +912,20 @@ crl_algo_done:
addeq r11, r11, #4
addne r1, r0, r1
strne r1, [r11], #4
b crl_do_phase
b crl_loop
ctl_sample_mono:
ldr r1, [r11]
add r1, r0, r1
str r1, [r11], #4
b crl_do_phase
b crl_loop
ctl_sample_skip:
and r1, r12, #1
add r1, r1, #1
add r11,r11, r1, lsl #2
crl_do_phase:
@ -- PHASE UPDATE --
add r5, lr, #0x10
ldmia r5, {r0-r3,r6-r7}
add r0, r0, r6
add r1, r1, r7
ldr r6, [r5, #0x18]
ldr r7, [r5, #0x1c]
add r2, r2, r6
add r3, r3, r7
stmia r5, {r0-r3}
tst r12, #8
bne crl_loop_lfo
b crl_loop
crl_loop_end:
str r8, [lr, #0x44] @ eg_timer
str r12, [lr, #0x4c] @ pack (for lfo_ampm)