fix ym2612 asm, rework EG

this should be split, but I'm lazy
EG saves ~900 bytes
This commit is contained in:
notaz 2017-09-30 00:19:55 +03:00
parent e0bcb7a90d
commit 6d28fb5023
3 changed files with 117 additions and 157 deletions

View file

@ -739,83 +739,57 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt)
return lfo_ampm; return lfo_ampm;
} }
#define EG_INC_VAL() \ INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt)
((1 << ((pack >> ((eg_cnt>>shift)&7)*3)&7)) >> 1)
INLINE UINT32 update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt)
{ {
INT32 volume = SLOT->volume; INT32 volume = SLOT->volume;
UINT32 pack = SLOT->eg_pack[SLOT->state - 1];
UINT32 shift = pack >> 24;
INT32 eg_inc_val;
switch(SLOT->state) if (eg_cnt & ((1 << shift) - 1))
return;
eg_inc_val = pack >> ((eg_cnt >> shift) & 7) * 3;
eg_inc_val = (1 << (eg_inc_val & 7)) >> 1;
switch (SLOT->state)
{ {
case EG_ATT: /* attack phase */ case EG_ATT: /* attack phase */
volume += ( ~volume * eg_inc_val ) >> 4;
if ( volume <= MIN_ATT_INDEX )
{ {
UINT32 pack = SLOT->eg_pack_ar; volume = MIN_ATT_INDEX;
UINT32 shift = pack>>24; SLOT->state = EG_DEC;
if ( !(eg_cnt & ((1<<shift)-1) ) )
{
volume += ( ~volume * EG_INC_VAL() ) >>4;
if (volume <= MIN_ATT_INDEX)
{
volume = MIN_ATT_INDEX;
SLOT->state = EG_DEC;
}
}
break;
} }
break;
case EG_DEC: /* decay phase */ case EG_DEC: /* decay phase */
volume += eg_inc_val;
if ( volume >= (INT32) SLOT->sl )
SLOT->state = EG_SUS;
break;
case EG_SUS: /* sustain phase */
volume += eg_inc_val;
if ( volume >= MAX_ATT_INDEX )
{ {
UINT32 pack = SLOT->eg_pack_d1r; volume = MAX_ATT_INDEX;
UINT32 shift = pack>>24; /* do not change SLOT->state (verified on real chip) */
if ( !(eg_cnt & ((1<<shift)-1) ) )
{
volume += EG_INC_VAL();
if ( volume >= (INT32) SLOT->sl )
SLOT->state = EG_SUS;
}
break;
} }
break;
case EG_SUS: /* sustain phase */ case EG_REL: /* release phase */
volume += eg_inc_val;
if ( volume >= MAX_ATT_INDEX )
{ {
UINT32 pack = SLOT->eg_pack_d2r; volume = MAX_ATT_INDEX;
UINT32 shift = pack>>24; SLOT->state = EG_OFF;
if ( !(eg_cnt & ((1<<shift)-1) ) )
{
volume += EG_INC_VAL();
if ( volume >= MAX_ATT_INDEX )
{
volume = MAX_ATT_INDEX;
/* do not change SLOT->state (verified on real chip) */
}
}
break;
}
case EG_REL: /* release phase */
{
UINT32 pack = SLOT->eg_pack_rr;
UINT32 shift = pack>>24;
if ( !(eg_cnt & ((1<<shift)-1) ) )
{
volume += EG_INC_VAL();
if ( volume >= MAX_ATT_INDEX )
{
volume = MAX_ATT_INDEX;
SLOT->state = EG_OFF;
}
}
break;
} }
break;
} }
SLOT->volume = volume; SLOT->volume = volume;
return SLOT->tl + ((UINT32)volume); /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */ *vol_out = SLOT->tl + volume; /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */
} }
#endif #endif
@ -873,10 +847,10 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
ct->eg_timer -= EG_TIMER_OVERFLOW; ct->eg_timer -= EG_TIMER_OVERFLOW;
ct->eg_cnt++; ct->eg_cnt++;
if (ct->CH->SLOT[SLOT1].state != EG_OFF) ct->vol_out1 = update_eg_phase(&ct->CH->SLOT[SLOT1], ct->eg_cnt); if (ct->CH->SLOT[SLOT1].state != EG_OFF) update_eg_phase(&ct->vol_out1, &ct->CH->SLOT[SLOT1], ct->eg_cnt);
if (ct->CH->SLOT[SLOT2].state != EG_OFF) ct->vol_out2 = update_eg_phase(&ct->CH->SLOT[SLOT2], ct->eg_cnt); if (ct->CH->SLOT[SLOT2].state != EG_OFF) update_eg_phase(&ct->vol_out2, &ct->CH->SLOT[SLOT2], ct->eg_cnt);
if (ct->CH->SLOT[SLOT3].state != EG_OFF) ct->vol_out3 = update_eg_phase(&ct->CH->SLOT[SLOT3], ct->eg_cnt); if (ct->CH->SLOT[SLOT3].state != EG_OFF) update_eg_phase(&ct->vol_out3, &ct->CH->SLOT[SLOT3], ct->eg_cnt);
if (ct->CH->SLOT[SLOT4].state != EG_OFF) ct->vol_out4 = update_eg_phase(&ct->CH->SLOT[SLOT4], ct->eg_cnt); if (ct->CH->SLOT[SLOT4].state != EG_OFF) update_eg_phase(&ct->vol_out4, &ct->CH->SLOT[SLOT4], ct->eg_cnt);
} }
if (ct->pack & 4) continue; /* output disabled */ if (ct->pack & 4) continue; /* output disabled */
@ -1071,7 +1045,7 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
} else { } else {
buffer[scounter] += smp; buffer[scounter] += smp;
} }
ct->algo = 8; // algo is only used in asm, here only bit3 is used ct->algo |= 8;
} }
/* update phase counters AFTER output calculations */ /* update phase counters AFTER output calculations */

View file

@ -43,10 +43,16 @@ typedef struct
INT16 volume; /* #0x1a envelope counter | need_save */ INT16 volume; /* #0x1a envelope counter | need_save */
UINT32 sl; /* #0x1c sustain level:sl_table[SL] */ UINT32 sl; /* #0x1c sustain level:sl_table[SL] */
UINT32 eg_pack_ar; /* #0x20 (attack state) */ /* asm relies on this order: */
UINT32 eg_pack_d1r; /* #0x24 (decay state) */ union {
UINT32 eg_pack_d2r; /* #0x28 (sustain state) */ struct {
UINT32 eg_pack_rr; /* #0x2c (release state) */ UINT32 eg_pack_rr; /* #0x20 1 (release state) */
UINT32 eg_pack_d2r; /* #0x24 2 (sustain state) */
UINT32 eg_pack_d1r; /* #0x28 3 (decay state) */
UINT32 eg_pack_ar; /* #0x2c 4 (attack state) */
};
UINT32 eg_pack[4];
};
} FM_SLOT; } FM_SLOT;

View file

@ -30,103 +30,73 @@
.equiv EG_TIMER_OVERFLOW, (3*(1<<EG_SH)) @ envelope generator timer overflows every 3 samples (on real chip) .equiv EG_TIMER_OVERFLOW, (3*(1<<EG_SH)) @ envelope generator timer overflows every 3 samples (on real chip)
.equiv LFO_SH, 25 /* 7.25 fixed point (LFO calculations) */ .equiv LFO_SH, 25 /* 7.25 fixed point (LFO calculations) */
.equiv ENV_QUIET, (2*13*256/8)/2 .equiv ENV_QUIET, (2*13*256/8)
.text
.align 2
@ r5=slot, r1=eg_cnt, trashes: r0,r2,r3 @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3
@ writes output to routp, but only if vol_out changes @ writes output to routp, but only if vol_out changes
.macro update_eg_phase_slot slot .macro update_eg_phase_slot slot
ldrb r2, [r5,#0x17] @ state ldrb r2, [r5,#0x17] @ state
mov r3, #1 @ 1ci add r3, r5, #0x1c
cmp r2, #1 tst r2, r2
blt 5f @ EG_OFF beq 0f @ EG_OFF
beq 3f @ EG_REL
cmp r2, #3
blt 2f @ EG_SUS
beq 1f @ EG_DEC
0: @ EG_ATT ldr r2, [r3, r2, lsl #2] @ pack
ldr r2, [r5,#0x20] @ eg_pack_ar (1ci) mov r3, #1
mov r0, r2, lsr #24 mov r0, r2, lsr #24 @ shift
mov r3, r3, lsl r0 mov r3, r3, lsl r0
sub r3, r3, #1 sub r3, r3, #1
tst r1, r3 tst r1, r3
bne 5f @ do smth for tl problem (set on init?) bne 0f @ no volume change
mov r3, r1, lsr r0 mov r3, r1, lsr r0
ldrh r0, [r5,#0x1a] @ volume, unsigned (0-1023)
and r3, r3, #7 and r3, r3, #7
add r3, r3, r3, lsl #1 add r3, r3, r3, lsl #1
mov r3, r2, lsr r3 mov r3, r2, lsr r3
and r3, r3, #7 @ shift for eg_inc calculation and r3, r3, #7 @ eg_inc_val shift, may be 0
mvn r2, r0 ldrb r2, [r5,#0x17] @ state
ldrh r0, [r5,#0x1a] @ volume, unsigned (0-1023)
cmp r2, #4 @ EG_ATT
beq 4f
cmp r2, #2
mov r2, #1
mov r2, r2, lsl r3 mov r2, r2, lsl r3
add r0, r0, r2, asr #5 mov r2, r2, lsr #1 @ eg_inc_val
add r0, r0, r2
blt 1f @ EG_REL
beq 2f @ EG_SUS
3: @ EG_DEC
ldr r2, [r5,#0x1c] @ sl (can be 16bit?)
mov r3, #EG_SUS
cmp r0, r2 @ if ( volume >= (INT32) SLOT->sl )
strgeb r3, [r5,#0x17] @ state
b 10f
4: @ EG_ATT
subs r3, r3, #1 @ eg_inc_val_shift - 1
mov r2, #0
mvnpl r2, r0
mov r2, r2, lsl r3
add r0, r0, r2, asr #4
cmp r0, #0 @ if (volume <= MIN_ATT_INDEX) cmp r0, #0 @ if (volume <= MIN_ATT_INDEX)
movle r3, #EG_DEC movle r3, #EG_DEC
strleb r3, [r5,#0x17] @ state strleb r3, [r5,#0x17] @ state
movle r0, #0 movle r0, #0
b 4f b 10f
1: @ EG_DEC
ldr r2, [r5,#0x24] @ eg_pack_d1r (1ci)
mov r0, r2, lsr #24
mov r3, r3, lsl r0
sub r3, r3, #1
tst r1, r3
bne 5f @ do smth for tl problem (set on init?)
mov r3, r1, lsr r0
ldrh r0, [r5,#0x1a] @ volume
and r3, r3, #7
add r3, r3, r3, lsl #1
mov r3, r2, lsr r3
and r3, r3, #7 @ shift for eg_inc calculation
mov r2, #1
mov r3, r2, lsl r3
ldr r2, [r5,#0x1c] @ sl (can be 16bit?)
add r0, r0, r3, asr #1
cmp r0, r2 @ if ( volume >= (INT32) SLOT->sl )
movge r3, #EG_SUS
strgeb r3, [r5,#0x17] @ state
b 4f
2: @ EG_SUS 2: @ EG_SUS
ldr r2, [r5,#0x28] @ eg_pack_d2r (1ci)
mov r0, r2, lsr #24
mov r3, r3, lsl r0
sub r3, r3, #1
tst r1, r3
bne 5f @ do smth for tl problem (set on init?)
mov r3, r1, lsr r0
ldrh r0, [r5,#0x1a] @ volume
and r3, r3, #7
add r3, r3, r3, lsl #1
mov r3, r2, lsr r3
and r3, r3, #7 @ shift for eg_inc calculation
mov r2, #1
mov r3, r2, lsl r3
add r0, r0, r3, asr #1
mov r2, #1024 mov r2, #1024
sub r2, r2, #1 @ r2 = MAX_ATT_INDEX sub r2, r2, #1 @ r2 = MAX_ATT_INDEX
cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX ) cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX )
movge r0, r2 movge r0, r2
b 4f b 10f
3: @ EG_REL 1: @ EG_REL
ldr r2, [r5,#0x2c] @ eg_pack_rr (1ci)
mov r0, r2, lsr #24
mov r3, r3, lsl r0
sub r3, r3, #1
tst r1, r3
bne 5f @ do smth for tl problem (set on init?)
mov r3, r1, lsr r0
ldrh r0, [r5,#0x1a] @ volume
and r3, r3, #7
add r3, r3, r3, lsl #1
mov r3, r2, lsr r3
and r3, r3, #7 @ shift for eg_inc calculation
mov r2, #1
mov r3, r2, lsl r3
add r0, r0, r3, asr #1
mov r2, #1024 mov r2, #1024
sub r2, r2, #1 @ r2 = MAX_ATT_INDEX sub r2, r2, #1 @ r2 = MAX_ATT_INDEX
cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX ) cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX )
@ -134,7 +104,7 @@
movge r3, #EG_OFF movge r3, #EG_OFF
strgeb r3, [r5,#0x17] @ state strgeb r3, [r5,#0x17] @ state
4: 10: @ finish
ldrh r3, [r5,#0x18] @ tl ldrh r3, [r5,#0x18] @ tl
strh r0, [r5,#0x1a] @ volume strh r0, [r5,#0x1a] @ volume
.if \slot == SLOT1 .if \slot == SLOT1
@ -157,7 +127,7 @@
orr r7, r0, r7, lsr #16 orr r7, r0, r7, lsr #16
.endif .endif
5: 0: @ EG_OFF
.endm .endm
@ -187,28 +157,30 @@
tstne r12, #(1<<(\slot+8)) tstne r12, #(1<<(\slot+8))
.if \slot == SLOT1 .if \slot == SLOT1
mov r1, r6, lsl #16 mov r1, r6, lsl #16
mov r1, r1, lsr #17 mov r1, r1, lsr #16
.elseif \slot == SLOT2 .elseif \slot == SLOT2
mov r1, r6, lsr #17 mov r1, r6, lsr #16
.elseif \slot == SLOT3 .elseif \slot == SLOT3
mov r1, r7, lsl #16 mov r1, r7, lsl #16
mov r1, r1, lsr #17 mov r1, r1, lsr #16
.elseif \slot == SLOT4 .elseif \slot == SLOT4
mov r1, r7, lsr #17 mov r1, r7, lsr #16
.endif .endif
andne r2, r12, #0xc0 andne r2, r12, #0xc0
movne r2, r2, lsr #6 movne r2, r2, lsr #6
addne r2, r2, #24 addne r2, r2, #24
addne r1, r1, r12, lsr r2 addne r1, r1, r12, lsr r2
bic r1, r1, #1
.endm .endm
@ \r=sin/result, r1=env, r3=ym_tl_tab
.macro lookup_tl r .macro lookup_tl r
tst \r, #0x100 tst \r, #0x100
eorne \r, \r, #0xff @ if (sin & 0x100) sin = 0xff - (sin&0xff); eorne \r, \r, #0xff @ if (sin & 0x100) sin = 0xff - (sin&0xff);
tst \r, #0x200 tst \r, #0x200
and \r, \r, #0xff and \r, \r, #0xff
orr \r, \r, r1, lsl #8 orr \r, \r, r1, lsl #7
mov \r, \r, lsl #1 mov \r, \r, lsl #1
ldrh \r, [r3, \r] @ 2ci if ne ldrh \r, [r3, \r] @ 2ci if ne
rsbne \r, \r, #0 rsbne \r, \r, #0
@ -345,9 +317,9 @@
make_eg_out SLOT3 make_eg_out SLOT3
cmp r1, #ENV_QUIET cmp r1, #ENV_QUIET
ldr r2, [lr, #0x38] @ mem (for future) ldr r2, [lr, #0x38] @ mem (for future)
movcs r0, r2 mov r0, #0
bcs 0f bcs 0f
ldr r0, [lr, #0x18] @ 1ci ldr r0, [lr, #0x18] @ phase3
mov r0, r0, lsr #16 mov r0, r0, lsr #16
lookup_tl r0 @ r0=c2 lookup_tl r0 @ r0=c2
@ -370,13 +342,13 @@
cmp r1, #ENV_QUIET cmp r1, #ENV_QUIET
movcs r2, #0 movcs r2, #0
bcs 2f bcs 2f
ldr r2, [lr, #0x14] ldr r2, [lr, #0x14] @ phase2
mov r5, r10, lsr #17 mov r5, r10, lsr #17
add r2, r5, r2, lsr #16 add r2, r5, r2, lsr #16
lookup_tl r2 @ r2=mem lookup_tl r2 @ r2=mem
2: 2:
str r2, [lr, #0x38] @ mem str r2, [lr, #0x38] @ mem
.endm .endm
@ -541,9 +513,9 @@
movne r0, r0, asr #16 movne r0, r0, asr #16
movne r0, r0, lsl r2 movne r0, r0, lsl r2
ldr r2, [lr, #0x10] ldr r2, [lr, #0x10] @ phase1
add r0, r0, r2
mov r0, r0, lsr #16 mov r0, r0, lsr #16
add r0, r0, r2, lsr #16
lookup_tl r0 lookup_tl r0
mov r10,r10,lsl #16 @ ct->op1_out <<= 16; mov r10,r10,lsl #16 @ ct->op1_out <<= 16;
mov r0, r0, lsl #16 mov r0, r0, lsl #16
@ -759,11 +731,18 @@ chan_render_loop:
crl_loop_lfo: crl_loop_lfo:
add r0, lr, #0x30 add r0, lr, #0x30
ldmia r0, {r1,r2} ldmia r0, {r1,r2}
subs r4, r4, #0x100
bmi crl_loop_end
add r2, r2, r1 add r2, r2, r1
str r2, [lr, #0x30] str r2, [lr, #0x30]
@ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt
advance_lfo_m advance_lfo_m
add r4, r4, #0x100
crl_loop: crl_loop:
subs r4, r4, #0x100 subs r4, r4, #0x100
bmi crl_loop_end bmi crl_loop_end
@ -859,7 +838,6 @@ crl_algo6:
crl_algo7: crl_algo7:
upd_algo7_m upd_algo7_m
.pool
crl_algo_done: crl_algo_done:
@ -917,6 +895,7 @@ crl_do_phase:
crl_loop_end: crl_loop_end:
@ stmia lr, {r6,r7} @ save volumes (for debug)
str r8, [lr, #0x44] @ eg_timer str r8, [lr, #0x44] @ eg_timer
str r12, [lr, #0x4c] @ pack (for lfo_ampm) str r12, [lr, #0x4c] @ pack (for lfo_ampm)
str r4, [lr, #0x50] @ was_update str r4, [lr, #0x50] @ was_update
@ -925,3 +904,4 @@ crl_loop_end:
.pool .pool
@ vim:filetype=armasm