code940 now plays mp3s

git-svn-id: file:///home/notaz/opt/svn/PicoDrive@22 be3aeb3a-fb24-0410-a615-afba39da0efa
This commit is contained in:
notaz 2007-01-20 23:11:02 +00:00
parent b837b69b3f
commit 42c7b14797
26 changed files with 1061 additions and 75 deletions

View file

@ -7,7 +7,7 @@
#include <fcntl.h>
#include <errno.h>
#include "940shared.h"
#include "code940/940shared.h"
#include "gp2x.h"
#include "emu.h"
#include "menu.h"
@ -235,11 +235,11 @@ static void wait_busy_940(void)
printf("wait iterations: %i\n", i);
#else
for (i = 0; shared_ctl->busy && i < 0x10000; i++)
spend_cycles(4*1024);
spend_cycles(8*1024); // tested to be best for mp3 dec
if (i < 0x10000) return;
/* 940 crashed */
printf("940 crashed (cnt: %i, wc: %i, ve: ", shared_ctl->loopc, shared_ctl->waitc);
printf("940 crashed (cnt: %i, ve: ", shared_ctl->loopc);
for (i = 0; i < 8; i++)
printf("%i ", shared_ctl->vstarts[i]);
printf(")\n");
@ -373,7 +373,7 @@ void YM2612Init_940(int baseclock, int rate)
/* now cause 940 to init it's ym2612 stuff */
shared_ctl->baseclock = baseclock;
shared_ctl->rate = rate;
shared_ctl->jobs[0] = JOB940_YM2612INIT;
shared_ctl->jobs[0] = JOB940_INITALL;
shared_ctl->jobs[1] = 0;
shared_ctl->busy = 1;

View file

@ -1,8 +1,6 @@
# you may or may not need to change this
#devkit_path = x:/stuff/dev/devkitgp2x/
devkit_path = /usr/local/devkitPro/devkitGP2X/
lgcc_path = $(devkit_path)lib/gcc/arm-linux/4.0.3/
CROSS = arm-linux-
#CROSS = $(devkit_path)bin/arm-linux-
@ -17,7 +15,7 @@ asm_ym2612 = 1
#use_musashi = 1
#up = 1
DEFINC = -I../.. -I. -D__GP2X__ -D_UNZIP_SUPPORT # -DBENCHMARK
DEFINC = -I../.. -I. -DARM -D__GP2X__ -D_UNZIP_SUPPORT # -DBENCHMARK
COPT_COMMON = -static -s -O3 -ftracer -fstrength-reduce -Wall -funroll-loops -fomit-frame-pointer -fstrict-aliasing -ffast-math
ifeq "$(profile)" "1"
COPT_COMMON += -fprofile-generate
@ -80,7 +78,7 @@ DEFINC += -D_USE_DRZ80
OBJS += ../../cpu/DrZ80/drz80.o
endif
all: PicoDrive.gpe code940.bin
all: PicoDrive.gpe
PicoDrive.gpe : $(OBJS)
@echo $@
@ -92,15 +90,11 @@ ifeq "$(up)" "1"
@cmd //C copy $@ \\\\10.0.1.2\\gp2x\\mnt\\sd\\games\\PicoDrive\\
endif
up: # up940
up:
@cp -v PicoDrive.gpe /mnt/gp2x/mnt/sd/games/PicoDrive/
# @cmd //C copy PicoDrive.gpe \\\\10.0.1.2\\gp2x\\mnt\\sd\\games\\PicoDrive\\
up940:
@cp -v code940.bin /mnt/gp2x/mnt/sd/games/PicoDrive/
# @cmd //C copy code940.bin \\\\10.0.1.2\\gp2x\\mnt\\sd\\games\\PicoDrive\\
testrefr.gpe : test.o gp2x.o asmutils.o
@echo $@
@ -133,45 +127,13 @@ testrefr.gpe : test.o gp2x.o asmutils.o
@make -C ../../cpu/Cyclone/proj -f Makefile.linux
# stuff for 940 core
# init, emu_control, emu
OBJS940 += 940init.o 940.o 940ym2612.o
# the asm code seems to be faster when run on 920, but not on 940 for some reason
# OBJS940 += ../../Pico/sound/ym2612_asm.o
# uClibc library code
OBJS940 += uClibc/memset.o uClibc/s_floor.o uClibc/e_pow.o uClibc/e_sqrt.o uClibc/s_fabs.o
OBJS940 += uClibc/s_scalbn.o uClibc/s_copysign.o uClibc/k_sin.o uClibc/k_cos.o uClibc/s_sin.o
OBJS940 += uClibc/e_rem_pio2.o uClibc/k_rem_pio2.o uClibc/e_log.o uClibc/wrappers.o
code940.bin : code940.gpe
@echo $@
@$(OBJCOPY) -O binary $< $@
code940.gpe : $(OBJS940)
@echo $@
@$(LD) -static -e code940 -Ttext 0x0 $^ -L$(lgcc_path) -lgcc -o $@
940ym2612.o : ../../Pico/sound/ym2612.c
@echo $@
@$(GCC) $(COPT_COMMON) -mtune=arm940t $(DEFINC) -DEXTERNAL_YM2612 -c $< -o $@
# cleanup
clean: clean_pd clean_940
tidy: tidy_pd tidy_940
clean_pd: tidy_pd
clean: tidy
@$(RM) PicoDrive.gpe
tidy_pd:
tidy:
@$(RM) $(OBJS)
# @make -C ../../cpu/Cyclone/proj -f Makefile.linux clean
clean_940: tidy_940
@$(RM) code940.bin
tidy_940:
@$(RM) code940.gpe $(OBJS940)
clean_prof:
find ../.. -name '*.gcno' -delete
@ -190,10 +152,3 @@ usbjoy.o : usbjoy.c
@echo $<
@$(GCC) $(COPT) $(DEFINC) -fno-profile-generate -c $< -o $@
uClibc/e_pow.o : uClibc/e_pow.c
@echo $<
@$(GCC) $(COPT) $(DEFINC) -fno-profile-generate -c $< -o $@
uClibc/e_sqrt.o : uClibc/e_sqrt.c
@echo $<
@$(GCC) $(COPT) $(DEFINC) -fno-profile-generate -c $< -o $@

View file

@ -1,7 +1,8 @@
#include "940shared.h"
static _940_data_t *shared_data = (_940_data_t *) 0x100000;
static _940_ctl_t *shared_ctl = (_940_ctl_t *) 0x200000;
static _940_data_t *shared_data = (_940_data_t *) 0x00100000;
static _940_ctl_t *shared_ctl = (_940_ctl_t *) 0x00200000;
static unsigned char *mp3_data = (unsigned char *) 0x01000000;
YM2612 *ym2612_940;
int *mix_buffer;
@ -22,17 +23,17 @@ void Main940(int startvector)
// debug
shared_ctl->vstarts[startvector]++;
asm volatile ("mcr p15, 0, r0, c7, c10, 4" ::: "r0");
// asm volatile ("mcr p15, 0, r0, c7, c10, 4" ::: "r0");
for (;; shared_ctl->loopc++)
for (;;)
{
int job_num;
int job_num = 0;
/*
while (!shared_ctl->busy)
{
//shared_ctl->waitc++;
spend_cycles(256);
spend_cycles(8*1024);
}
*/
if (!shared_ctl->busy)
@ -44,9 +45,12 @@ void Main940(int startvector)
{
switch (shared_ctl->jobs[job_num])
{
case JOB940_YM2612INIT:
case JOB940_INITALL:
/* ym2612 */
shared_ctl->writebuff0[0] = shared_ctl->writebuff1[0] = 0xffff;
YM2612Init_(shared_ctl->baseclock, shared_ctl->rate);
/* Helix mp3 decoder */
shared_data->mp3dec = MP3InitDecoder();
break;
case JOB940_YM2612RESETCHIP:
@ -80,14 +84,51 @@ void Main940(int startvector)
YM2612UpdateOne_(0, shared_ctl->length, shared_ctl->stereo);
break;
}
case JOB940_MP3DECODE: {
int mp3_offs = shared_ctl->mp3_offs;
unsigned char *readPtr = mp3_data + mp3_offs;
int bytesLeft = shared_ctl->mp3_len - mp3_offs;
int offset; // frame offset from readPtr
int err;
if (bytesLeft <= 0) break; // EOF, nothing to do
offset = MP3FindSyncWord(readPtr, bytesLeft);
if (offset < 0) {
shared_ctl->mp3_offs = shared_ctl->mp3_len;
break; // EOF
}
readPtr += offset;
bytesLeft -= offset;
err = MP3Decode(shared_data->mp3dec, &readPtr, &bytesLeft,
shared_data->mp3_buffer[shared_ctl->mp3_buffsel], 0);
if (err) {
if (err == ERR_MP3_INDATA_UNDERFLOW) {
shared_ctl->mp3_offs = shared_ctl->mp3_len; // EOF
break;
} else if (err <= -6 && err >= -12) {
// ERR_MP3_INVALID_FRAMEHEADER, ERR_MP3_INVALID_*
// just try to skip the offending frame..
readPtr++;
}
shared_ctl->mp3_errors++;
shared_ctl->mp3_lasterr = err;
}
shared_ctl->mp3_offs = readPtr - mp3_data;
break;
}
}
}
shared_ctl->busy = 0;
// cache_clean_flush();
cache_clean();
// asm volatile ("mov r0, #0" ::: "r0");
// asm volatile ("mcr p15, 0, r0, c7, c10, 4" ::: "r0"); /* drain write buffer, should be done on nonbuffered write */
// cache_clean_flush();
shared_ctl->loopc++;
shared_ctl->busy = 0;
}
}

View file

@ -73,25 +73,34 @@ code940: @ interrupt table:
mcr p15, 0, r0, c6, c3, 0
mcr p15, 0, r0, c6, c3, 1
@ set region 1 to be cacheable (so the first 2M will be cacheable)
mov r0, #2
@ set up region 4: 16M 0x01000000-0x02000000 (mp3 area)
mov r0, #(0x17<<1)|1
orr r0, r0, #0x01000000
mcr p15, 0, r0, c6, c4, 0
mcr p15, 0, r0, c6, c4, 1
@ set regions 1 and 4 to be cacheable (so the first 2M and mp3 area will be cacheable)
mov r0, #(1<<1)|(1<<4)
mcr p15, 0, r0, c2, c0, 0
mcr p15, 0, r0, c2, c0, 1
@ set region 1 to be bufferable too (only data)
mov r0, #(1<<1)
mcr p15, 0, r0, c3, c0, 0
@ set protection, allow accsess only to regions 1 and 2
mov r0, #(3<<6)|(3<<4)|(3<<2)|(0) @ data: [full, full, full, no access] for regions [3 2 1 0]
mov r0, #(3<<8)|(3<<6)|(3<<4)|(3<<2)|(0) @ data: [full, full, full, full, no access] for regions [4 3 2 1 0]
mcr p15, 0, r0, c5, c0, 0
mov r0, #(0<<6)|(0<<4)|(3<<2)|(0) @ instructions: [no access, no, full, no]
mov r0, #(0<<8)|(0<<6)|(0<<4)|(3<<2)|(0) @ instructions: [no access, no, no, full, no]
mcr p15, 0, r0, c5, c0, 1
mrc p15, 0, r0, c1, c0, 0 @ fetch current control reg
orr r0, r0, #1 @ 0x00000001: enable protection unit
orr r0, r0, #4 @ 0x00000004: enable D cache
orr r0, r0, #0x1000 @ 0x00001000: enable I cache
orr r0, r0, #0xC0000000 @ 0xC0000000: async+fastbus
bic r0, r0, #0xC0000000
orr r0, r0, #0x40000000 @ 0x40000000: synchronous, faster?
@ orr r0, r0, #0xC0000000 @ 0xC0000000: async
mcr p15, 0, r0, c1, c0, 0 @ set control reg
@ flush (invalidate) the cache (just in case)
@ -173,4 +182,4 @@ wait_irq:
.pool
@ vim:filetype=ignored:
@ vim:filetype=armasm:

View file

@ -1,10 +1,12 @@
#include "../../Pico/sound/ym2612.h"
#include "../../../Pico/sound/ym2612.h"
#include "../helix/pub/mp3dec.h"
enum _940_job_t {
JOB940_YM2612INIT = 1,
JOB940_INITALL = 1,
JOB940_YM2612RESETCHIP,
JOB940_YM2612UPDATEONE,
JOB940_PICOSTATELOAD,
JOB940_MP3DECODE,
JOB940_NUMJOBS
};
@ -12,9 +14,10 @@ enum _940_job_t {
typedef struct
{
YM2612 ym2612; /* current state of the emulated YM2612 */
int mix_buffer[44100/50*2]; /* this is where the YM2612 samples will be mixed to */
short mp3_buffer[2][1152*2]; /* buffer for mp3 decoder's output */
YM2612 ym2612; /* current state of the emulated YM2612 */
HMP3Decoder mp3dec; /* mp3 decoder's handle */
int mix_buffer[44100/50*2]; /* this is where the YM2612 samples will be mixed to */
short mp3_buffer[2][1152*2]; /* buffers for mp3 decoder's output */
} _940_data_t;
@ -29,7 +32,11 @@ typedef struct
int writebuffsel; /* which write buffer to use (from 940 side) */
UINT16 writebuff0[2048]; /* list of writes to ym2612, 1024 for savestates, 1024 extra */
UINT16 writebuff1[2048];
int mp3_len; /* data len of loaded mp3 */
int mp3_offs; /* current playback offset (just after last decoded frame) */
int mp3_buffsel; /* which output buffer to decode to */
int vstarts[8]; /* debug: number of starts from each of 8 vectors */
int loopc; /* debug: main loop counter */
int waitc; /* debug: wait loop counter */
int mp3_errors; /* debug: mp3 decoder's error counter */
int mp3_lasterr; /* debug: mp3 decoder's last error */
} _940_ctl_t;

View file

@ -0,0 +1,87 @@
# you may or may not need to change this
#devkit_path = x:/stuff/dev/devkitgp2x/
devkit_path = /usr/local/devkitPro/devkitGP2X/
lgcc_path = $(devkit_path)lib/gcc/arm-linux/4.0.3/
CROSS = arm-linux-
#CROSS = $(devkit_path)bin/arm-linux-
# settings
#up = 1
DEFINC = -I../.. -I. -D__GP2X__ -DARM # -DBENCHMARK
COPT_COMMON = -static -s -O3 -ftracer -fstrength-reduce -Wall -funroll-loops -fomit-frame-pointer -fstrict-aliasing -ffast-math
COPT = $(COPT_COMMON) -mtune=arm920t
GCC = $(CROSS)gcc
STRIP = $(CROSS)strip
AS = $(CROSS)as
LD = $(CROSS)ld
OBJCOPY = $(CROSS)objcopy
all: code940.bin
up940:
@cp -v code940.bin /mnt/gp2x/mnt/sd/games/PicoDrive/
# @cmd //C copy code940.bin \\\\10.0.1.2\\gp2x\\mnt\\sd\\games\\PicoDrive\\
.c.o:
@echo $<
$(GCC) $(COPT) $(DEFINC) -c $< -o $@
.s.o:
@echo $<
$(GCC) $(COPT) $(DEFINC) -c $< -o $@
# stuff for 940 core
# init, emu_control, emu
OBJS940 += 940init.o 940.o 940ym2612.o memcpy.o
# the asm code seems to be faster when run on 920, but not on 940 for some reason
# OBJS940 += ../../Pico/sound/ym2612_asm.o
# uClibc library code
OBJS940 += uClibc/memset.o uClibc/s_floor.o uClibc/e_pow.o uClibc/e_sqrt.o uClibc/s_fabs.o
OBJS940 += uClibc/s_scalbn.o uClibc/s_copysign.o uClibc/k_sin.o uClibc/k_cos.o uClibc/s_sin.o
OBJS940 += uClibc/e_rem_pio2.o uClibc/k_rem_pio2.o uClibc/e_log.o uClibc/wrappers.o
code940.bin : code940.gpe
@echo $@
@$(OBJCOPY) -O binary $< $@
code940.gpe : $(OBJS940) ../helix/helix_mp3.a
@echo $@
@$(LD) -static -e code940 -Ttext 0x0 $^ -L$(lgcc_path) -lgcc -o $@
940ym2612.o : ../../../Pico/sound/ym2612.c
@echo $@
@$(GCC) $(COPT_COMMON) -mtune=arm940t $(DEFINC) -DEXTERNAL_YM2612 -c $< -o $@
../helix/helix_mp3.a:
@make -C ../helix/
# cleanup
clean: tidy
@$(RM) code940.bin
tidy:
@$(RM) code940.gpe $(OBJS940)
OBJSMP3T = mp3test.o ../gp2x.o ../asmutils.o ../usbjoy.o
mp3test.gpe : $(OBJSMP3T) ../helix/helix_mp3.a
$(GCC) -static -o $@ $^
$(STRIP) $@
@cp -v $@ /mnt/gp2x/mnt/sd
cleanmp3test:
$(RM) $(OBJSMP3T) mp3test.gpe
# uClibc/e_pow.o : uClibc/e_pow.c
# @echo $<
# @$(GCC) $(COPT) $(DEFINC) -fno-profile-generate -c $< -o $@
# uClibc/e_sqrt.o : uClibc/e_sqrt.c
# @echo $<
# @$(GCC) $(COPT) $(DEFINC) -fno-profile-generate -c $< -o $@

View file

@ -0,0 +1,500 @@
/* $NetBSD: memcpy.S,v 1.3 1997/11/22 03:27:12 mark Exp $ */
/*-
* Copyright (c) 1997 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Neil A. Carson and Mark Brinicombe
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS\'\' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* This was modified by Jay Monkman <jmonkman@smoothsmoothie.com> to
* save and restore r12. This is necessary for RTEMS.
*/
/* #include <machine/asm.h>*/
#define ENTRY(_LABEL) \
.global _LABEL; _LABEL:
.globl memcpy
memcpy:
@ ENTRY(gp2x_memcpy)
stmfd sp!, {r0, r12, lr}
@ bl _gp2x_memcpy
bl _memcpy
ldmfd sp!, {r0, r12, pc}
.globl memmove
memmove:
@ ENTRY(gp2x_memmove)
stmfd sp!, {r0, r12, lr}
@ bl _gp2x_memcpy
bl _memcpy
ldmfd sp!, {r0, r12, pc}
/*
* This is one fun bit of code ...
* Some easy listening music is suggested while trying to understand this
* code e.g. Iron Maiden
*
* For anyone attempting to understand it :
*
* The core code is implemented here with simple stubs for memcpy()
* memmove() and bcopy().
*
* All local labels are prefixed with Lmemcpy_
* Following the prefix a label starting f is used in the forward copy code
* while a label using b is used in the backwards copy code
* The source and destination addresses determine whether a forward or
* backward copy is performed.
* Separate bits of code are used to deal with the following situations
* for both the forward and backwards copy.
* unaligned source address
* unaligned destination address
* Separate copy routines are used to produce an optimised result for each
* of these cases.
* The copy code will use LDM/STM instructions to copy up to 32 bytes at
* a time where possible.
*
* Note: r12 (aka ip) can be trashed during the function along with
* r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
* Additional registers are preserved prior to use i.e. r4, r5 & lr
*
* Apologies for the state of the comments;-)
*/
_memcpy:
@ ENTRY(_gp2x_memcpy)
/* Determine copy direction */
cmp r1, r0
bcc Lmemcpy_backwards
moveq r0, #0 /* Quick abort for len=0 */
moveq pc, lr
stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
subs r2, r2, #4
blt Lmemcpy_fl4 /* less than 4 bytes */
ands r12, r0, #3
bne Lmemcpy_fdestul /* oh unaligned destination addr */
ands r12, r1, #3
bne Lmemcpy_fsrcul /* oh unaligned source addr */
Lmemcpy_ft8:
/* We have aligned source and destination */
subs r2, r2, #8
blt Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
subs r2, r2, #0x14
blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
stmdb sp!, {r4, r7, r8, r9, r10} /* borrow r4 */
/* blat 64 bytes at a time */
/* XXX for really big copies perhaps we should use more registers */
Lmemcpy_floop32:
ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
subs r2, r2, #0x40
bge Lmemcpy_floop32
cmn r2, #0x10
ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
stmgeia r0!, {r3, r4, r12, lr}
subge r2, r2, #0x10
ldmia sp!, {r4, r7, r8, r9, r10} /* return r4 */
Lmemcpy_fl32:
adds r2, r2, #0x14
/* blat 12 bytes at a time */
Lmemcpy_floop12:
ldmgeia r1!, {r3, r12, lr}
stmgeia r0!, {r3, r12, lr}
subges r2, r2, #0x0c
bge Lmemcpy_floop12
Lmemcpy_fl12:
adds r2, r2, #8
blt Lmemcpy_fl4
subs r2, r2, #4
ldrlt r3, [r1], #4
strlt r3, [r0], #4
ldmgeia r1!, {r3, r12}
stmgeia r0!, {r3, r12}
subge r2, r2, #4
Lmemcpy_fl4:
/* less than 4 bytes to go */
adds r2, r2, #4
ldmeqia sp!, {r0, pc} /* done */
/* copy the crud byte at a time */
cmp r2, #2
ldrb r3, [r1], #1
strb r3, [r0], #1
ldrgeb r3, [r1], #1
strgeb r3, [r0], #1
ldrgtb r3, [r1], #1
strgtb r3, [r0], #1
ldmia sp!, {r0, pc}
/* erg - unaligned destination */
Lmemcpy_fdestul:
rsb r12, r12, #4
cmp r12, #2
/* align destination with byte copies */
ldrb r3, [r1], #1
strb r3, [r0], #1
ldrgeb r3, [r1], #1
strgeb r3, [r0], #1
ldrgtb r3, [r1], #1
strgtb r3, [r0], #1
subs r2, r2, r12
blt Lmemcpy_fl4 /* less the 4 bytes */
ands r12, r1, #3
beq Lmemcpy_ft8 /* we have an aligned source */
/* erg - unaligned source */
/* This is where it gets nasty ... */
Lmemcpy_fsrcul:
bic r1, r1, #3
ldr lr, [r1], #4
cmp r12, #2
bgt Lmemcpy_fsrcul3
beq Lmemcpy_fsrcul2
cmp r2, #0x0c
blt Lmemcpy_fsrcul1loop4
sub r2, r2, #0x0c
stmdb sp!, {r4, r5}
Lmemcpy_fsrcul1loop16:
mov r3, lr, lsr #8
ldmia r1!, {r4, r5, r12, lr}
orr r3, r3, r4, lsl #24
mov r4, r4, lsr #8
orr r4, r4, r5, lsl #24
mov r5, r5, lsr #8
orr r5, r5, r12, lsl #24
mov r12, r12, lsr #8
orr r12, r12, lr, lsl #24
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
bge Lmemcpy_fsrcul1loop16
ldmia sp!, {r4, r5}
adds r2, r2, #0x0c
blt Lmemcpy_fsrcul1l4
Lmemcpy_fsrcul1loop4:
mov r12, lr, lsr #8
ldr lr, [r1], #4
orr r12, r12, lr, lsl #24
str r12, [r0], #4
subs r2, r2, #4
bge Lmemcpy_fsrcul1loop4
Lmemcpy_fsrcul1l4:
sub r1, r1, #3
b Lmemcpy_fl4
Lmemcpy_fsrcul2:
cmp r2, #0x0c
blt Lmemcpy_fsrcul2loop4
sub r2, r2, #0x0c
stmdb sp!, {r4, r5}
Lmemcpy_fsrcul2loop16:
mov r3, lr, lsr #16
ldmia r1!, {r4, r5, r12, lr}
orr r3, r3, r4, lsl #16
mov r4, r4, lsr #16
orr r4, r4, r5, lsl #16
mov r5, r5, lsr #16
orr r5, r5, r12, lsl #16
mov r12, r12, lsr #16
orr r12, r12, lr, lsl #16
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
bge Lmemcpy_fsrcul2loop16
ldmia sp!, {r4, r5}
adds r2, r2, #0x0c
blt Lmemcpy_fsrcul2l4
Lmemcpy_fsrcul2loop4:
mov r12, lr, lsr #16
ldr lr, [r1], #4
orr r12, r12, lr, lsl #16
str r12, [r0], #4
subs r2, r2, #4
bge Lmemcpy_fsrcul2loop4
Lmemcpy_fsrcul2l4:
sub r1, r1, #2
b Lmemcpy_fl4
Lmemcpy_fsrcul3:
cmp r2, #0x0c
blt Lmemcpy_fsrcul3loop4
sub r2, r2, #0x0c
stmdb sp!, {r4, r5}
Lmemcpy_fsrcul3loop16:
mov r3, lr, lsr #24
ldmia r1!, {r4, r5, r12, lr}
orr r3, r3, r4, lsl #8
mov r4, r4, lsr #24
orr r4, r4, r5, lsl #8
mov r5, r5, lsr #24
orr r5, r5, r12, lsl #8
mov r12, r12, lsr #24
orr r12, r12, lr, lsl #8
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
bge Lmemcpy_fsrcul3loop16
ldmia sp!, {r4, r5}
adds r2, r2, #0x0c
blt Lmemcpy_fsrcul3l4
Lmemcpy_fsrcul3loop4:
mov r12, lr, lsr #24
ldr lr, [r1], #4
orr r12, r12, lr, lsl #8
str r12, [r0], #4
subs r2, r2, #4
bge Lmemcpy_fsrcul3loop4
Lmemcpy_fsrcul3l4:
sub r1, r1, #1
b Lmemcpy_fl4
Lmemcpy_backwards:
add r1, r1, r2
add r0, r0, r2
subs r2, r2, #4
blt Lmemcpy_bl4 /* less than 4 bytes */
ands r12, r0, #3
bne Lmemcpy_bdestul /* oh unaligned destination addr */
ands r12, r1, #3
bne Lmemcpy_bsrcul /* oh unaligned source addr */
Lmemcpy_bt8:
/* We have aligned source and destination */
subs r2, r2, #8
blt Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
stmdb sp!, {r4, r7, r8, r9, r10, lr}
subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
blt Lmemcpy_bl32
/* blat 64 bytes at a time */
/* XXX for really big copies perhaps we should use more registers */
Lmemcpy_bloop32:
ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
subs r2, r2, #0x40
bge Lmemcpy_bloop32
Lmemcpy_bl32:
cmn r2, #0x10
ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
stmgedb r0!, {r3, r4, r12, lr}
subge r2, r2, #0x10
adds r2, r2, #0x14
ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
stmgedb r0!, {r3, r12, lr}
subge r2, r2, #0x0c
ldmia sp!, {r4, r7, r8, r9, r10, lr}
Lmemcpy_bl12:
adds r2, r2, #8
blt Lmemcpy_bl4
subs r2, r2, #4
ldrlt r3, [r1, #-4]!
strlt r3, [r0, #-4]!
ldmgedb r1!, {r3, r12}
stmgedb r0!, {r3, r12}
subge r2, r2, #4
Lmemcpy_bl4:
/* less than 4 bytes to go */
adds r2, r2, #4
moveq pc, lr /* done */
/* copy the crud byte at a time */
cmp r2, #2
ldrb r3, [r1, #-1]!
strb r3, [r0, #-1]!
ldrgeb r3, [r1, #-1]!
strgeb r3, [r0, #-1]!
ldrgtb r3, [r1, #-1]!
strgtb r3, [r0, #-1]!
mov pc, lr
/* erg - unaligned destination */
Lmemcpy_bdestul:
cmp r12, #2
/* align destination with byte copies */
ldrb r3, [r1, #-1]!
strb r3, [r0, #-1]!
ldrgeb r3, [r1, #-1]!
strgeb r3, [r0, #-1]!
ldrgtb r3, [r1, #-1]!
strgtb r3, [r0, #-1]!
subs r2, r2, r12
blt Lmemcpy_bl4 /* less than 4 bytes to go */
ands r12, r1, #3
beq Lmemcpy_bt8 /* we have an aligned source */
/* erg - unaligned source */
/* This is where it gets nasty ... */
Lmemcpy_bsrcul:
bic r1, r1, #3
ldr r3, [r1, #0]
cmp r12, #2
blt Lmemcpy_bsrcul1
beq Lmemcpy_bsrcul2
cmp r2, #0x0c
blt Lmemcpy_bsrcul3loop4
sub r2, r2, #0x0c
stmdb sp!, {r4, r5, lr}
Lmemcpy_bsrcul3loop16:
mov lr, r3, lsl #8
ldmdb r1!, {r3-r5, r12}
orr lr, lr, r12, lsr #24
mov r12, r12, lsl #8
orr r12, r12, r5, lsr #24
mov r5, r5, lsl #8
orr r5, r5, r4, lsr #24
mov r4, r4, lsl #8
orr r4, r4, r3, lsr #24
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
bge Lmemcpy_bsrcul3loop16
ldmia sp!, {r4, r5, lr}
adds r2, r2, #0x0c
blt Lmemcpy_bsrcul3l4
Lmemcpy_bsrcul3loop4:
mov r12, r3, lsl #8
ldr r3, [r1, #-4]!
orr r12, r12, r3, lsr #24
str r12, [r0, #-4]!
subs r2, r2, #4
bge Lmemcpy_bsrcul3loop4
Lmemcpy_bsrcul3l4:
add r1, r1, #3
b Lmemcpy_bl4
Lmemcpy_bsrcul2:
cmp r2, #0x0c
blt Lmemcpy_bsrcul2loop4
sub r2, r2, #0x0c
stmdb sp!, {r4, r5, lr}
Lmemcpy_bsrcul2loop16:
mov lr, r3, lsl #16
ldmdb r1!, {r3-r5, r12}
orr lr, lr, r12, lsr #16
mov r12, r12, lsl #16
orr r12, r12, r5, lsr #16
mov r5, r5, lsl #16
orr r5, r5, r4, lsr #16
mov r4, r4, lsl #16
orr r4, r4, r3, lsr #16
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
bge Lmemcpy_bsrcul2loop16
ldmia sp!, {r4, r5, lr}
adds r2, r2, #0x0c
blt Lmemcpy_bsrcul2l4
Lmemcpy_bsrcul2loop4:
mov r12, r3, lsl #16
ldr r3, [r1, #-4]!
orr r12, r12, r3, lsr #16
str r12, [r0, #-4]!
subs r2, r2, #4
bge Lmemcpy_bsrcul2loop4
Lmemcpy_bsrcul2l4:
add r1, r1, #2
b Lmemcpy_bl4
Lmemcpy_bsrcul1:
cmp r2, #0x0c
blt Lmemcpy_bsrcul1loop4
sub r2, r2, #0x0c
stmdb sp!, {r4, r5, lr}
Lmemcpy_bsrcul1loop32:
mov lr, r3, lsl #24
ldmdb r1!, {r3-r5, r12}
orr lr, lr, r12, lsr #8
mov r12, r12, lsl #24
orr r12, r12, r5, lsr #8
mov r5, r5, lsl #24
orr r5, r5, r4, lsr #8
mov r4, r4, lsl #24
orr r4, r4, r3, lsr #8
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
bge Lmemcpy_bsrcul1loop32
ldmia sp!, {r4, r5, lr}
adds r2, r2, #0x0c
blt Lmemcpy_bsrcul1l4
Lmemcpy_bsrcul1loop4:
mov r12, r3, lsl #24
ldr r3, [r1, #-4]!
orr r12, r12, r3, lsr #8
str r12, [r0, #-4]!
subs r2, r2, #4
bge Lmemcpy_bsrcul1loop4
Lmemcpy_bsrcul1l4:
add r1, r1, #1
b Lmemcpy_bl4

View file

@ -0,0 +1,383 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/time.h>
#include <fcntl.h>
#include <errno.h>
#include "940shared.h"
#include "../gp2x.h"
//#include "emu.h"
//#include "menu.h"
#include "../asmutils.h"
#include "../helix/pub/mp3dec.h"
/* we will need some gp2x internals here */
extern volatile unsigned short *gp2x_memregs; /* from minimal library rlyeh */
extern volatile unsigned long *gp2x_memregl;
static unsigned char *shared_mem = 0;
static _940_data_t *shared_data = 0;
static _940_ctl_t *shared_ctl = 0;
static unsigned char *mp3_mem = 0;
#define MP3_SIZE_MAX (0x1000000 - 4*640*480)
int crashed_940 = 0;
/***********************************************************/
#define MAXOUT (+32767)
#define MINOUT (-32768)
/* limitter */
#define Limit(val, max,min) { \
if ( val > max ) val = max; \
else if ( val < min ) val = min; \
}
void wait_busy_940(void)
{
int i;
#if 0
printf("940 busy, entering wait loop.. (cnt: %i, wc: %i, ve: ", shared_ctl->loopc, shared_ctl->waitc);
for (i = 0; i < 8; i++)
printf("%i ", shared_ctl->vstarts[i]);
printf(")\n");
for (i = 0; shared_ctl->busy; i++)
{
spend_cycles(1024); /* needs tuning */
}
printf("wait iterations: %i\n", i);
#else
for (i = 0; shared_ctl->busy && i < 0x10000; i++)
spend_cycles(8*1024);
if (i < 0x10000) return;
/* 940 crashed */
printf("940 crashed (cnt: %i, ve: ", shared_ctl->loopc);
for (i = 0; i < 8; i++)
printf("%i ", shared_ctl->vstarts[i]);
printf(")\n");
crashed_940 = 1;
#endif
}
void add_job_940(int job0, int job1)
{
shared_ctl->jobs[0] = job0;
shared_ctl->jobs[1] = job1;
shared_ctl->busy = 1;
gp2x_memregs[0x3B3E>>1] = 0xffff; // cause an IRQ for 940
}
static int read_to_upper(void *dest, void *tmpbuf, int tmpsize, FILE *f)
{
int nRead, nLen = 0;
while(1)
{
nRead = fread(tmpbuf, 1, tmpsize, f);
if(nRead <= 0)
break;
memcpy((unsigned char *)dest + nLen, tmpbuf, nRead);
nLen += nRead;
}
return nLen;
}
static void simpleWait(int thissec, int lim_time)
{
struct timeval tval;
spend_cycles(1024);
gettimeofday(&tval, 0);
if(thissec != tval.tv_sec) tval.tv_usec+=1000000;
while(tval.tv_usec < lim_time)
{
spend_cycles(1024);
gettimeofday(&tval, 0);
if(thissec != tval.tv_sec) tval.tv_usec+=1000000;
}
}
char **g_argv;
/* none of the functions in this file should be called before this one */
void YM2612Init_940(int baseclock, int rate)
{
printf("YM2612Init_940()\n");
printf("Mem usage: shared_data: %i, shared_ctl: %i\n", sizeof(*shared_data), sizeof(*shared_ctl));
Reset940(1, 2);
Pause940(1);
gp2x_memregs[0x3B46>>1] = 0xffff; // clear pending DUALCPU interrupts for 940
gp2x_memregs[0x3B42>>1] = 0xffff; // enable DUALCPU interrupts for 940
gp2x_memregl[0x4508>>2] = ~(1<<26); // unmask DUALCPU ints in the undocumented 940's interrupt controller
if (shared_mem == NULL)
{
shared_mem = (unsigned char *) mmap(0, 0x210000, PROT_READ|PROT_WRITE, MAP_SHARED, memdev, 0x2000000);
if(shared_mem == MAP_FAILED)
{
printf("mmap(shared_data) failed with %i\n", errno);
exit(1);
}
shared_data = (_940_data_t *) (shared_mem+0x100000);
/* this area must not get buffered on either side */
shared_ctl = (_940_ctl_t *) (shared_mem+0x200000);
mp3_mem = (unsigned char *) mmap(0, MP3_SIZE_MAX, PROT_READ|PROT_WRITE, MAP_SHARED, memdev, 0x3000000);
if (mp3_mem == MAP_FAILED)
{
printf("mmap(mp3_mem) failed with %i\n", errno);
exit(1);
}
crashed_940 = 1;
}
if (crashed_940)
{
unsigned char ucData[1024];
int i;
char binpath[1024];
FILE *fp;
strncpy(binpath, g_argv[0], 1023);
binpath[1023] = 0;
for (i = strlen(binpath); i > 0; i--)
if (binpath[i] == '/') { binpath[i] = 0; break; }
strcat(binpath, "/code940.bin");
fp = fopen(binpath, "rb");
if(!fp)
{
printf("failed to open %s\n", binpath);
exit(1);
}
read_to_upper(shared_mem, ucData, sizeof(ucData), fp);
fclose(fp);
crashed_940 = 0;
}
memset(shared_data, 0, sizeof(*shared_data));
memset(shared_ctl, 0, sizeof(*shared_ctl));
/* now cause 940 to init it's ym2612 stuff */
shared_ctl->baseclock = baseclock;
shared_ctl->rate = rate;
shared_ctl->jobs[0] = JOB940_INITALL;
shared_ctl->jobs[1] = 0;
shared_ctl->busy = 1;
/* start the 940 */
Reset940(0, 2);
Pause940(0);
}
unsigned char *mp3_data = 0;
void local_decode(void)
{
int mp3_offs = shared_ctl->mp3_offs;
unsigned char *readPtr = mp3_data + mp3_offs;
int bytesLeft = shared_ctl->mp3_len - mp3_offs;
int offset; // frame offset from readPtr
int err = 0;
if (bytesLeft <= 0) return; // EOF, nothing to do
offset = MP3FindSyncWord(readPtr, bytesLeft);
if (offset < 0) {
shared_ctl->mp3_offs = shared_ctl->mp3_len;
return; // EOF
}
readPtr += offset;
bytesLeft -= offset;
err = MP3Decode(shared_data->mp3dec, &readPtr, &bytesLeft,
shared_data->mp3_buffer[shared_ctl->mp3_buffsel], 0);
if (err) {
if (err == ERR_MP3_INDATA_UNDERFLOW) {
shared_ctl->mp3_offs = shared_ctl->mp3_len; // EOF
return;
} else if (err <= -6 && err >= -12) {
// ERR_MP3_INVALID_FRAMEHEADER, ERR_MP3_INVALID_*
// just try to skip the offending frame..
readPtr++;
}
shared_ctl->mp3_errors++;
shared_ctl->mp3_lasterr = err;
}
shared_ctl->mp3_offs = readPtr - mp3_data;
}
void gp2x_sound_sync(void);
#define USE_LOCAL 0
#define BENCHMARK 0
int main(int argc, char *argv[])
{
FILE *f;
int size;
struct timeval tval; // timing
int thissec = 0, fps = 0;
int target_frametime, frame_samples, samples_ready, mp3_buffer_offs, play_bufsel;
unsigned char play_buffer[44100/50*2*2];
if (argc != 2) {
printf("usage: %s <mp3file>\n", argv[0]);
return 1;
}
g_argv = argv;
gp2x_init();
YM2612Init_940(123, 44100);
// load a mp3
f = fopen(argv[1], "rb");
if (!f) {
printf("can't open %s\n", argv[1]);
return 1;
}
fseek(f, 0, SEEK_END);
size = (int) ftell(f);
if (size > MP3_SIZE_MAX) {
printf("size %i > %i\n", size, MP3_SIZE_MAX);
size = MP3_SIZE_MAX;
}
fseek(f, 0, SEEK_SET);
if (fread(mp3_mem, 1, size, f) != size) {
printf("read failed, errno=%i\n", errno);
fclose(f);
exit(1);
}
fclose(f);
shared_ctl->mp3_len = size;
#if USE_LOCAL
shared_data->mp3dec = MP3InitDecoder();
mp3_data = malloc(size);
printf("init: dec: %p ptr: %p\n", shared_data->mp3dec, mp3_data);
if (!mp3_data) {
printf("low mem\n");
exit(1);
}
memcpy(mp3_data, mp3_mem, size);
#else
//printf("YM2612UpdateOne_940()\n");
if (shared_ctl->busy) wait_busy_940();
#endif
gp2x_start_sound(44100, 16, 1);
#define DESIRED_FPS 50
target_frametime = 1000000/DESIRED_FPS;
frame_samples = 44100/DESIRED_FPS;
samples_ready = mp3_buffer_offs = 0;
play_bufsel = 1;
for (;; fps++)
{
int lim_time;
gettimeofday(&tval, 0);
if (tval.tv_sec != thissec)
{
printf("fps: %i\n", fps);
thissec = tval.tv_sec;
fps = 0;
#if BENCHMARK
shared_ctl->mp3_offs = 0;
#endif
}
#if 0
// decode
#if USE_LOCAL
shared_ctl->mp3_buffsel ^= 1;
local_decode();
#else
wait_busy_940();
shared_ctl->mp3_buffsel ^= 1;
add_job_940(JOB940_MP3DECODE, 0);
#endif
if (shared_ctl->mp3_lasterr) {
printf("mp3_lasterr #%i: %i size: %i offs: %i\n", shared_ctl->mp3_errors, shared_ctl->mp3_lasterr,
shared_ctl->mp3_len, shared_ctl->mp3_offs);
printf("loopc: %i bytes: %08x\n",
shared_ctl->loopc, *(int *)(mp3_mem+shared_ctl->mp3_offs));
shared_ctl->mp3_lasterr = 0;
}
#if !BENCHMARK
// play
gp2x_sound_sync();
gp2x_sound_write(shared_data->mp3_buffer[shared_ctl->mp3_buffsel^1], 1152*2*2);
#endif
#else
lim_time = (fps+1) * target_frametime;
wait_busy_940();
// decode, play
if (samples_ready >= frame_samples) {
if (1152 - mp3_buffer_offs >= frame_samples) {
memcpy(play_buffer, shared_data->mp3_buffer[play_bufsel] + mp3_buffer_offs*2,
frame_samples*2*2);
mp3_buffer_offs += frame_samples;
} else {
// collect from both buffers..
int left = 1152 - mp3_buffer_offs;
memcpy(play_buffer, shared_data->mp3_buffer[play_bufsel] + mp3_buffer_offs*2,
left*2*2);
play_bufsel ^= 1;
mp3_buffer_offs = frame_samples - left;
memcpy(play_buffer + left*2*2, shared_data->mp3_buffer[play_bufsel],
mp3_buffer_offs*2*2);
}
gp2x_sound_write(play_buffer, frame_samples*2*2);
samples_ready -= frame_samples;
}
// make sure we will have enough samples next frame
if (samples_ready < frame_samples) {
// wait_busy_940();
shared_ctl->mp3_buffsel ^= 1;
add_job_940(JOB940_MP3DECODE, 0);
samples_ready += 1152;
}
gettimeofday(&tval, 0);
if(thissec != tval.tv_sec) tval.tv_usec+=1000000;
if(tval.tv_usec < lim_time)
{
// we are too fast
simpleWait(thissec, lim_time);
}
#endif
}
return 0;
}

View file

@ -204,6 +204,10 @@ void gp2x_sound_write(void *buff, int len)
write(sounddev, buff, len);
}
void gp2x_sound_sync(void)
{
ioctl(sounddev, SOUND_PCM_SYNC, 0);
}
void gp2x_sound_volume(int l, int r)
{