audio: fixes and optimizations for SSG-EG

This commit is contained in:
kub 2020-04-22 20:48:03 +02:00
parent 09b96f9940
commit 1dbda5f894
3 changed files with 153 additions and 46 deletions

View file

@ -128,7 +128,7 @@ extern YM2612 *ym2612_940;
#endif
void memset32(int *dest, int c, int count);
void memset32(void *dest, int c, int count);
#ifndef __GNUC__
@ -511,7 +511,7 @@ static INT32 lfo_pm_table[128*8*32]; /* 128 combinations of 7 bits meaningful (o
but LFO works with one more bit of a precision so we really need 4096 elements */
static UINT32 fn_table[4096]; /* fnumber->increment counter */
static int g_lfo_ampm = 0;
static int g_lfo_ampm;
/* register number to channel number , slot offset */
#define OPN_CHAN(N) (N&3)
@ -569,7 +569,7 @@ INLINE void FM_KEYON(int c , int s )
} else {
SLOT->volume = MIN_ATT_INDEX;
}
recalc_volout(SLOT);
// recalc_volout(SLOT);
ym2612.slot_mask |= (1<<s) << (c*4);
}
}
@ -608,8 +608,8 @@ INLINE void set_det_mul(FM_CH *CH, FM_SLOT *SLOT, int v)
INLINE void set_tl(FM_SLOT *SLOT, int v)
{
SLOT->tl = (v&0x7f)<<(ENV_BITS-7); /* 7bit TL */
if (SLOT->state > EG_REL)
recalc_volout(SLOT);
// if (SLOT->state > EG_REL)
// recalc_volout(SLOT);
}
/* set attack rate & key scale */
@ -761,7 +761,7 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt)
return lfo_ampm;
}
INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt)
INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt, UINT32 ssg_en)
{
INT32 volume = SLOT->volume;
UINT32 pack = SLOT->eg_pack[SLOT->state - 1];
@ -774,7 +774,7 @@ INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt)
eg_inc_val = pack >> ((eg_cnt >> shift) & 7) * 3;
eg_inc_val = (1 << (eg_inc_val & 7)) >> 1;
if (SLOT->ssg&0x08) {
if ((SLOT->ssg&0x08) && ssg_en) {
switch (SLOT->state)
{
case EG_ATT: /* attack phase */
@ -854,7 +854,7 @@ INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt)
SLOT->volume = volume;
}
INLINE void update_ssg_eg_phase(FM_SLOT *SLOT)
INLINE UINT32 update_ssg_eg_phase(FM_SLOT *SLOT, UINT32 phase)
{
if (SLOT->ssg&0x01) {
if (SLOT->ssg&0x02) {
@ -869,7 +869,7 @@ INLINE void update_ssg_eg_phase(FM_SLOT *SLOT)
SLOT->ssg ^= 4;
SLOT->ssgn ^= 4;
} else
SLOT->phase = 0;
phase = 0;
if (SLOT->state != EG_ATT) {
SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC;
@ -880,7 +880,8 @@ INLINE void update_ssg_eg_phase(FM_SLOT *SLOT)
}
}
}
recalc_volout(SLOT);
// recalc_volout(SLOT);
return phase;
}
#endif
@ -927,15 +928,23 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
int smp = 0; /* produced sample */
unsigned int eg_out, eg_out2, eg_out4;
FM_SLOT *SLOT;
UINT32 cnt = ct->eg_timer_add+(ct->eg_timer & ((1<<EG_SH)-1));
if (ct->pack & 2) while (cnt >= 1<<EG_SH) {
cnt -= 1<<EG_SH;
SLOT = &ct->CH->SLOT[SLOT1];
if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT);
if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200)
ct->phase1 = update_ssg_eg_phase(SLOT, ct->phase1);
SLOT = &ct->CH->SLOT[SLOT2];
if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT);
if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200)
ct->phase2 = update_ssg_eg_phase(SLOT, ct->phase2);
SLOT = &ct->CH->SLOT[SLOT3];
if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT);
if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200)
ct->phase3 = update_ssg_eg_phase(SLOT, ct->phase3);
SLOT = &ct->CH->SLOT[SLOT4];
if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT);
if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200)
ct->phase4 = update_ssg_eg_phase(SLOT, ct->phase4);
}
if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */
ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16);
@ -943,7 +952,21 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
}
ct->eg_timer += ct->eg_timer_add;
while (ct->eg_timer >= EG_TIMER_OVERFLOW)
if (ct->eg_timer < EG_TIMER_OVERFLOW) {
SLOT = &ct->CH->SLOT[SLOT1];
SLOT->vol_ipol = SLOT->vol_out;
if (SLOT->state > EG_REL) recalc_volout(SLOT);
SLOT = &ct->CH->SLOT[SLOT2];
SLOT->vol_ipol = SLOT->vol_out;
if (SLOT->state > EG_REL) recalc_volout(SLOT);
SLOT = &ct->CH->SLOT[SLOT3];
SLOT->vol_ipol = SLOT->vol_out;
if (SLOT->state > EG_REL) recalc_volout(SLOT);
SLOT = &ct->CH->SLOT[SLOT4];
SLOT->vol_ipol = SLOT->vol_out;
if (SLOT->state > EG_REL) recalc_volout(SLOT);
}
else while (ct->eg_timer >= EG_TIMER_OVERFLOW)
{
ct->eg_timer -= EG_TIMER_OVERFLOW;
ct->eg_cnt++;
@ -951,17 +974,18 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
SLOT = &ct->CH->SLOT[SLOT1];
SLOT->vol_ipol = SLOT->vol_out;
if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt);
if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2);
SLOT = &ct->CH->SLOT[SLOT2];
SLOT->vol_ipol = SLOT->vol_out;
if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt);
if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2);
SLOT = &ct->CH->SLOT[SLOT3];
SLOT->vol_ipol = SLOT->vol_out;
if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt);
if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2);
SLOT = &ct->CH->SLOT[SLOT4];
SLOT->vol_ipol = SLOT->vol_out;
if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt);
if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2);
}
#if 0
UINT32 ifrac0 = ct->eg_timer / (EG_TIMER_OVERFLOW>>EG_SH);
UINT32 ifrac1 = (1<<EG_SH) - ifrac0;
@ -997,6 +1021,7 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
ct->CH->SLOT[SLOT3].vol_out) >> 1;
ct->vol_out4 = (ct->CH->SLOT[SLOT4].vol_ipol +
ct->CH->SLOT[SLOT4].vol_out) >> 1;
break;
}
#elif 0
if (ct->eg_timer >> (EG_SH-1) < EG_TIMER_OVERFLOW >> EG_SH) {
@ -1272,7 +1297,7 @@ static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: s
crct.mem = crct.CH->mem_value; /* one sample delay memory */
crct.lfo_cnt = ym2612.OPN.lfo_cnt;
flags &= 0x35;
flags &= 0x37;
if (crct.lfo_inc) {
flags |= 8;
@ -1453,6 +1478,7 @@ static void reset_channels(FM_CH *CH)
CH[c].mem_value = CH[c].op1_out = 0;
}
ym2612.slot_mask = 0;
ym2612.ssg_mask = 0;
}
/* initialize generic tables */
@ -1655,8 +1681,10 @@ static int OPNWriteReg(int r, int v)
case 0x90: /* SSG-EG */
SLOT->ssg = v&0x0f;
SLOT->ssg ^= SLOT->ssgn;
if (SLOT->state > EG_REL)
recalc_volout(SLOT);
if (v&0x08) ym2612.ssg_mask |= 1<<(OPN_SLOT(r) + c*4);
else ym2612.ssg_mask &= ~(1<<(OPN_SLOT(r) + c*4));
// if (SLOT->state > EG_REL)
// recalc_volout(SLOT);
break;
case 0xa0:
@ -1751,6 +1779,7 @@ int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty)
{
int pan;
int active_chs = 0;
int flags = stereo ? 1:0;
// if !is_buf_empty, it means it has valid samples to mix with, else it may contain trash
if (is_buf_empty) memset32(buffer, 0, length<<stereo);
@ -1786,17 +1815,24 @@ int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty)
refresh_fc_eg_chan( &ym2612.CH[5] );
pan = ym2612.OPN.pan;
if (stereo) stereo = 1;
/* mix to 32bit dest */
// flags: stereo, ?, disabled, ?, pan_r, pan_l
// flags: stereo, ssg_enabled, disabled, _, pan_r, pan_l
chan_render_prep();
if (ym2612.slot_mask & 0x00000f) active_chs |= chan_render(buffer, length, 0, stereo|((pan&0x003)<<4)) << 0;
if (ym2612.slot_mask & 0x0000f0) active_chs |= chan_render(buffer, length, 1, stereo|((pan&0x00c)<<2)) << 1;
if (ym2612.slot_mask & 0x000f00) active_chs |= chan_render(buffer, length, 2, stereo|((pan&0x030) )) << 2;
if (ym2612.slot_mask & 0x00f000) active_chs |= chan_render(buffer, length, 3, stereo|((pan&0x0c0)>>2)) << 3;
if (ym2612.slot_mask & 0x0f0000) active_chs |= chan_render(buffer, length, 4, stereo|((pan&0x300)>>4)) << 4;
if (ym2612.slot_mask & 0xf00000) active_chs |= chan_render(buffer, length, 5, stereo|((pan&0xc00)>>6)|(ym2612.dacen<<2)) << 5;
#define BIT_IF(v,b,c) { v &= ~(1<<(b)); if (c) v |= 1<<(b); }
BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00000f));
if (ym2612.slot_mask & 0x00000f) active_chs |= chan_render(buffer, length, 0, flags|((pan&0x003)<<4)) << 0;
BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0000f0));
if (ym2612.slot_mask & 0x0000f0) active_chs |= chan_render(buffer, length, 1, flags|((pan&0x00c)<<2)) << 1;
BIT_IF(flags, 1, (ym2612.ssg_mask & 0x000f00));
if (ym2612.slot_mask & 0x000f00) active_chs |= chan_render(buffer, length, 2, flags|((pan&0x030) )) << 2;
BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00f000));
if (ym2612.slot_mask & 0x00f000) active_chs |= chan_render(buffer, length, 3, flags|((pan&0x0c0)>>2)) << 3;
BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0f0000));
if (ym2612.slot_mask & 0x0f0000) active_chs |= chan_render(buffer, length, 4, flags|((pan&0x300)>>4)) << 4;
BIT_IF(flags, 1, (ym2612.ssg_mask & 0xf00000));
if (ym2612.slot_mask & 0xf00000) active_chs |= chan_render(buffer, length, 5, flags|((pan&0xc00)>>6)|(!!ym2612.dacen<<2)) << 5;
#undef BIT_IF
chan_render_finish();
return active_chs; // 1 if buffer updated

View file

@ -153,6 +153,7 @@ typedef struct
FM_OPN OPN; /* OPN state */
UINT32 slot_mask; /* active slot mask (performance hack) */
UINT32 ssg_mask; /* active ssg mask (performance hack) */
} YM2612;
#endif

View file

@ -17,6 +17,7 @@
@ very simple YM2612 output rate to sample rate adaption (~500k cycles @44100)
#define INTERPOL
#define SSG_EG
.equiv SLOT1, 0
.equiv SLOT2, 2
@ -73,8 +74,11 @@
and r3, r3, #7 @ eg_inc_val shift, may be 0
ldrb r2, [r5,#0x17] @ state
#if defined(SSG_EG)
tst r0, #0x08 @ ssg enabled?
tstne r12, #0x02
bne 9f
#endif
@ non-SSG-EG mode
cmp r2, #4 @ EG_ATT
@ -127,7 +131,9 @@
strgeb r3, [r5,#0x17] @ state
10: @ finish
ldrh r3, [r5,#0x18] @ tl
strh r0, [r5,#0x1a] @ volume
#if defined(SSG_EG)
b 11f
9: @ SSG-EG mode
@ -140,7 +146,7 @@
movlt r3, r0, lsl r3
ldrlth r0, [r5,#0x1a] @ volume, unsigned (0-1023)
movlt r3, r3, lsr #1 @ eg_inc_val
addlt r0, r0, r3, lsr #2
addlt r0, r0, r3, lsl #2
cmp r2, #2
blt 1f @ EG_REL
@ -182,18 +188,20 @@
strh r0, [r5,#0x1a] @ volume
cmp r2, #0x0c @ if ( ssg&0x04 && state > EG_REL )
cmpge r3, #EG_REL+1
ldrh r3, [r5,#0x18] @ tl
rsbge r0, r0, #0x200 @ volume = (0x200-volume) & MAX_ATT
lslge r0, r0, #10
lsrge r0, r0, #10
lslge r0, r0, #22
lsrge r0, r0, #22
11:
ldrh r3, [r5,#0x18] @ tl
#endif
add r0, r0, r3 @ volume += tl
strh r0, [r5,#0x34] @ vol_out
0: @ EG_OFF
.endm
#if defined(SSG_EG)
@ r5=slot, trashes: r0,r2,r3
.macro update_ssg_eg
ldrh r0, [r5,#0x30] @ ssg+ssgn
@ -204,6 +212,7 @@
cmp r2, #EG_REL+1 @ state > EG_REL &&
cmpge r3, #0x200 @ volume >= 0x200?
blt 9f
orr r4, r4, #0x10 @ ssg_update
tst r0, #0x01
beq 1f
@ -249,6 +258,33 @@
9:
.endm
@ r5=slot, trashes: r0,r2,r3
.macro recalc_volout
#if defined(INTERPOL)
ldrh r0, [r5,#0x34] @ vol_out
#endif
ldrb r2, [r5,#0x30] @ ssg
ldrb r3, [r5,#0x17] @ state
#if defined(INTERPOL)
strh r0, [r5,#0x36] @ vol_ipol
#endif
ldrh r0, [r5,#0x1a] @ volume
@ and r2, r2, #0x0c
cmp r2, #0x0c @ if ( ~ssg&0x0c && state > EG_REL )
cmpge r3, #EG_REL+1
ldrh r3, [r5,#0x18] @ tl
rsbge r0, r0, #0x200 @ volume = (0x200-volume) & MAX_ATT
lslge r0, r0, #22
lsrge r0, r0, #22
ldrh r0, [r5,#0x1a] @ volume
ldrh r3, [r5,#0x18] @ tl
add r0, r0, r3 @ volume += tl
strh r0, [r5,#0x34] @ vol_out
.endm
#endif
@ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt, r3=scratch
.macro advance_lfo_m
mov r2, r2, lsr #LFO_SH
@ -305,7 +341,7 @@
.endm
@ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16])
@ lr=context, r12=pack (stereo, ssg_enabled, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16])
@ r0-r2=scratch, r3=sin_tab, r5=scratch, r6-r7=vol_out[4], r10=op1_out
.macro upd_algo0_m
@ -643,8 +679,8 @@
.endm
@ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16])
@ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|unused[4],was_update,algo[3], r5=tl_tab/slot,
@ lr=context, r12=pack (stereo, ssg_enabled, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16])
@ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|unused[3],ssg_update,was_update,algo[3], r5=tl_tab/slot,
@ r6-r7=vol_out[4], r8=eg_timer, r9=eg_timer_add[31:16], r10=op1_out, r11=buffer
.global chan_render_loop @ chan_rend_context *ct, int *buffer, int length
@ -683,10 +719,17 @@ crl_loop:
subs r4, r4, #0x100
bmi crl_loop_end
@ -- SSG --
ldr r5, [lr, #0x40] @ CH
#if defined(SSG_EG)
tst r12, #0x02 @ ssg_enabled?
beq ssg_done
@ -- SSG --
lsl r7, r8, #EG_SH
add r7, r9, r7, lsr #EG_SH
subs r7, r7, #1<<EG_SH
blt ssg_done
@ r5=slot, trashes: r0,r2,r3
ssg_loop:
mov r6, #4
ssg_upd_loop:
update_ssg_eg
@ -702,10 +745,15 @@ ssg_upd_loop:
bne ssg_upd_loop
sub r5, r5, #SLOT_STRUCT_SIZE*3
subs r7, r7, #1<<EG_SH
bge ssg_loop
ssg_done:
#endif
@ -- EG --
add r8, r8, r9
cmp r8, #EG_TIMER_OVERFLOW
bcc eg_done
bcc volout_upd
ldr r1, [lr, #0x3c] @ eg_cnt
eg_loop:
sub r8, r8, #EG_TIMER_OVERFLOW
@ -731,9 +779,31 @@ eg_upd_loop:
sub r5, r5, #SLOT_STRUCT_SIZE*3
bhs eg_loop
str r1, [lr, #0x3c]
b eg_done
volout_upd:
#if defined(SSG_EG)
tst r4, #0x10 @ ssg_update?
beq eg_done
@ recalc vol_out
mov r6, #4
volout_loop:
recalc_volout
#if 0
subs r6, r6, #1
addne r5, r5, #SLOT_STRUCT_SIZE
#else
add r5, r5, #SLOT_STRUCT_SIZE*2
recalc_volout
subs r6, r6, #2
subne r5, r5, #SLOT_STRUCT_SIZE
#endif
bne volout_loop
sub r5, r5, #SLOT_STRUCT_SIZE*3
#endif
eg_done:
@ -- disabled? --
and r0, r12, #0xC
cmp r0, #0xC
@ -789,7 +859,7 @@ eg_done:
@ -- SLOT1 --
PIC_LDR(r3, r2, ym_tl_tab)
@ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16])
@ lr=context, r12=pack (stereo, ssg_enabled, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16])
@ r0-r2=scratch, r3=tl_tab, r5=scratch, r6-r7=vol_out[4], r10=op1_out
upd_slot1_m