mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-05 15:27:46 -04:00
code940 now plays mp3s
git-svn-id: file:///home/notaz/opt/svn/PicoDrive@22 be3aeb3a-fb24-0410-a615-afba39da0efa
This commit is contained in:
parent
b837b69b3f
commit
42c7b14797
26 changed files with 1061 additions and 75 deletions
134
platform/gp2x/code940/940.c
Normal file
134
platform/gp2x/code940/940.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
#include "940shared.h"
|
||||
|
||||
static _940_data_t *shared_data = (_940_data_t *) 0x00100000;
|
||||
static _940_ctl_t *shared_ctl = (_940_ctl_t *) 0x00200000;
|
||||
static unsigned char *mp3_data = (unsigned char *) 0x01000000;
|
||||
YM2612 *ym2612_940;
|
||||
int *mix_buffer;
|
||||
|
||||
// from init.s
|
||||
void wait_irq(void);
|
||||
void spend_cycles(int c);
|
||||
void cache_clean(void);
|
||||
void cache_clean_flush(void);
|
||||
|
||||
// asm volatile ("mov r0, #0" ::: "r0");
|
||||
// asm volatile ("mcr p15, 0, r0, c7, c6, 0" ::: "r0"); /* flush dcache */
|
||||
// asm volatile ("mcr p15, 0, r0, c7, c10, 4" ::: "r0"); /* drain write buffer */
|
||||
|
||||
void Main940(int startvector)
|
||||
{
|
||||
ym2612_940 = &shared_data->ym2612;
|
||||
mix_buffer = shared_data->mix_buffer;
|
||||
|
||||
// debug
|
||||
shared_ctl->vstarts[startvector]++;
|
||||
// asm volatile ("mcr p15, 0, r0, c7, c10, 4" ::: "r0");
|
||||
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int job_num = 0;
|
||||
/*
|
||||
while (!shared_ctl->busy)
|
||||
{
|
||||
//shared_ctl->waitc++;
|
||||
spend_cycles(8*1024);
|
||||
}
|
||||
*/
|
||||
if (!shared_ctl->busy)
|
||||
{
|
||||
wait_irq();
|
||||
}
|
||||
|
||||
for (job_num = 0; job_num < MAX_940JOBS; job_num++)
|
||||
{
|
||||
switch (shared_ctl->jobs[job_num])
|
||||
{
|
||||
case JOB940_INITALL:
|
||||
/* ym2612 */
|
||||
shared_ctl->writebuff0[0] = shared_ctl->writebuff1[0] = 0xffff;
|
||||
YM2612Init_(shared_ctl->baseclock, shared_ctl->rate);
|
||||
/* Helix mp3 decoder */
|
||||
shared_data->mp3dec = MP3InitDecoder();
|
||||
break;
|
||||
|
||||
case JOB940_YM2612RESETCHIP:
|
||||
YM2612ResetChip_();
|
||||
break;
|
||||
|
||||
case JOB940_PICOSTATELOAD:
|
||||
YM2612PicoStateLoad_();
|
||||
break;
|
||||
|
||||
case JOB940_YM2612UPDATEONE: {
|
||||
int i, dw, *wbuff;
|
||||
if (shared_ctl->writebuffsel == 1) {
|
||||
wbuff = (int *) shared_ctl->writebuff1;
|
||||
} else {
|
||||
wbuff = (int *) shared_ctl->writebuff0;
|
||||
}
|
||||
|
||||
/* playback all writes */
|
||||
for (i = 2048/2; i > 0; i--) {
|
||||
UINT16 d;
|
||||
dw = *wbuff++;
|
||||
d = dw;
|
||||
if (d == 0xffff) break;
|
||||
YM2612Write_(d >> 8, d);
|
||||
d = (dw>>16);
|
||||
if (d == 0xffff) break;
|
||||
YM2612Write_(d >> 8, d);
|
||||
}
|
||||
|
||||
YM2612UpdateOne_(0, shared_ctl->length, shared_ctl->stereo);
|
||||
break;
|
||||
}
|
||||
|
||||
case JOB940_MP3DECODE: {
|
||||
int mp3_offs = shared_ctl->mp3_offs;
|
||||
unsigned char *readPtr = mp3_data + mp3_offs;
|
||||
int bytesLeft = shared_ctl->mp3_len - mp3_offs;
|
||||
int offset; // frame offset from readPtr
|
||||
int err;
|
||||
|
||||
if (bytesLeft <= 0) break; // EOF, nothing to do
|
||||
|
||||
offset = MP3FindSyncWord(readPtr, bytesLeft);
|
||||
if (offset < 0) {
|
||||
shared_ctl->mp3_offs = shared_ctl->mp3_len;
|
||||
break; // EOF
|
||||
}
|
||||
readPtr += offset;
|
||||
bytesLeft -= offset;
|
||||
|
||||
err = MP3Decode(shared_data->mp3dec, &readPtr, &bytesLeft,
|
||||
shared_data->mp3_buffer[shared_ctl->mp3_buffsel], 0);
|
||||
if (err) {
|
||||
if (err == ERR_MP3_INDATA_UNDERFLOW) {
|
||||
shared_ctl->mp3_offs = shared_ctl->mp3_len; // EOF
|
||||
break;
|
||||
} else if (err <= -6 && err >= -12) {
|
||||
// ERR_MP3_INVALID_FRAMEHEADER, ERR_MP3_INVALID_*
|
||||
// just try to skip the offending frame..
|
||||
readPtr++;
|
||||
}
|
||||
shared_ctl->mp3_errors++;
|
||||
shared_ctl->mp3_lasterr = err;
|
||||
}
|
||||
shared_ctl->mp3_offs = readPtr - mp3_data;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cache_clean();
|
||||
// asm volatile ("mov r0, #0" ::: "r0");
|
||||
// asm volatile ("mcr p15, 0, r0, c7, c10, 4" ::: "r0"); /* drain write buffer, should be done on nonbuffered write */
|
||||
// cache_clean_flush();
|
||||
|
||||
shared_ctl->loopc++;
|
||||
shared_ctl->busy = 0;
|
||||
}
|
||||
}
|
||||
|
185
platform/gp2x/code940/940init.s
Normal file
185
platform/gp2x/code940/940init.s
Normal file
|
@ -0,0 +1,185 @@
|
|||
.global code940
|
||||
|
||||
code940: @ interrupt table:
|
||||
b .b_reset @ reset
|
||||
b .b_undef @ undefined instructions
|
||||
b .b_swi @ software interrupt
|
||||
b .b_pabort @ prefetch abort
|
||||
b .b_dabort @ data abort
|
||||
b .b_reserved @ reserved
|
||||
b .b_irq @ IRQ
|
||||
b .b_fiq @ FIQ
|
||||
|
||||
@ test
|
||||
.b_reset:
|
||||
mov r12, #0
|
||||
b .Begin
|
||||
.b_undef:
|
||||
mov r12, #1
|
||||
b .Begin
|
||||
.b_swi:
|
||||
mov r12, #2
|
||||
b .Begin
|
||||
.b_pabort:
|
||||
mov r12, #3
|
||||
b .Begin
|
||||
.b_dabort:
|
||||
mov r12, #4
|
||||
b .Begin
|
||||
.b_reserved:
|
||||
mov r12, #5
|
||||
b .Begin
|
||||
.b_irq:
|
||||
mov r12, #6
|
||||
mov sp, #0x100000 @ reset stack
|
||||
sub sp, sp, #4
|
||||
mov r1, #0xbe000000 @ assume we live @ 0x2000000 bank
|
||||
orr r2, r1, #0x3B00
|
||||
orr r2, r2, #0x0046
|
||||
mvn r3, #0
|
||||
strh r3, [r2] @ clear any pending interrupts from the DUALCPU unit
|
||||
orr r2, r1, #0x4500
|
||||
str r3, [r2] @ clear all pending interrupts in irq controller's SRCPND register
|
||||
orr r2, r2, #0x0010
|
||||
str r3, [r2] @ clear all pending interrupts in irq controller's INTPND register
|
||||
b .Enter
|
||||
.b_fiq:
|
||||
mov r12, #7
|
||||
b .Begin
|
||||
|
||||
.Begin:
|
||||
mov sp, #0x100000 @ set the stack top (1M)
|
||||
sub sp, sp, #4 @ minus 4
|
||||
|
||||
@ set up memory region 0 -- the whole 4GB address space
|
||||
mov r0, #(0x1f<<1)|1 @ region data
|
||||
mcr p15, 0, r0, c6, c0, 0 @ opcode2 ~ data/instr
|
||||
mcr p15, 0, r0, c6, c0, 1
|
||||
|
||||
@ set up region 1 which is the first 2 megabytes.
|
||||
mov r0, #(0x14<<1)|1 @ region data
|
||||
mcr p15, 0, r0, c6, c1, 0
|
||||
mcr p15, 0, r0, c6, c1, 1
|
||||
|
||||
@ set up region 2: 64k 0x200000-0x210000
|
||||
mov r0, #(0x0f<<1)|1
|
||||
orr r0, r0, #0x200000
|
||||
mcr p15, 0, r0, c6, c2, 0
|
||||
mcr p15, 0, r0, c6, c2, 1
|
||||
|
||||
@ set up region 3: 64k 0xbe000000-0xbe010000 (hw control registers)
|
||||
mov r0, #(0x0f<<1)|1
|
||||
orr r0, r0, #0xbe000000
|
||||
mcr p15, 0, r0, c6, c3, 0
|
||||
mcr p15, 0, r0, c6, c3, 1
|
||||
|
||||
@ set up region 4: 16M 0x01000000-0x02000000 (mp3 area)
|
||||
mov r0, #(0x17<<1)|1
|
||||
orr r0, r0, #0x01000000
|
||||
mcr p15, 0, r0, c6, c4, 0
|
||||
mcr p15, 0, r0, c6, c4, 1
|
||||
|
||||
@ set regions 1 and 4 to be cacheable (so the first 2M and mp3 area will be cacheable)
|
||||
mov r0, #(1<<1)|(1<<4)
|
||||
mcr p15, 0, r0, c2, c0, 0
|
||||
mcr p15, 0, r0, c2, c0, 1
|
||||
|
||||
@ set region 1 to be bufferable too (only data)
|
||||
mov r0, #(1<<1)
|
||||
mcr p15, 0, r0, c3, c0, 0
|
||||
|
||||
@ set protection, allow accsess only to regions 1 and 2
|
||||
mov r0, #(3<<8)|(3<<6)|(3<<4)|(3<<2)|(0) @ data: [full, full, full, full, no access] for regions [4 3 2 1 0]
|
||||
mcr p15, 0, r0, c5, c0, 0
|
||||
mov r0, #(0<<8)|(0<<6)|(0<<4)|(3<<2)|(0) @ instructions: [no access, no, no, full, no]
|
||||
mcr p15, 0, r0, c5, c0, 1
|
||||
|
||||
mrc p15, 0, r0, c1, c0, 0 @ fetch current control reg
|
||||
orr r0, r0, #1 @ 0x00000001: enable protection unit
|
||||
orr r0, r0, #4 @ 0x00000004: enable D cache
|
||||
orr r0, r0, #0x1000 @ 0x00001000: enable I cache
|
||||
bic r0, r0, #0xC0000000
|
||||
orr r0, r0, #0x40000000 @ 0x40000000: synchronous, faster?
|
||||
@ orr r0, r0, #0xC0000000 @ 0xC0000000: async
|
||||
mcr p15, 0, r0, c1, c0, 0 @ set control reg
|
||||
|
||||
@ flush (invalidate) the cache (just in case)
|
||||
mov r0, #0
|
||||
mcr p15, 0, r0, c7, c6, 0
|
||||
|
||||
.Enter:
|
||||
mov r0, r12
|
||||
bl Main940
|
||||
|
||||
@ we should never get here
|
||||
.b_deadloop:
|
||||
b .b_deadloop
|
||||
|
||||
|
||||
|
||||
@ so asm utils are also defined here:
|
||||
.global spend_cycles @ c
|
||||
|
||||
spend_cycles:
|
||||
mov r0, r0, lsr #2 @ 4 cycles/iteration
|
||||
sub r0, r0, #2 @ entry/exit/init
|
||||
.sc_loop:
|
||||
subs r0, r0, #1
|
||||
bpl .sc_loop
|
||||
|
||||
bx lr
|
||||
|
||||
|
||||
@ clean-flush function from ARM940T technical reference manual
|
||||
.global cache_clean_flush
|
||||
|
||||
cache_clean_flush:
|
||||
mov r1, #0 @ init line counter
|
||||
ccf_outer_loop:
|
||||
mov r0, #0 @ segment counter
|
||||
ccf_inner_loop:
|
||||
orr r2, r1, r0 @ make segment and line address
|
||||
mcr p15, 0, r2, c7, c14, 2 @ clean and flush that line
|
||||
add r0, r0, #0x10 @ incremet secment counter
|
||||
cmp r0, #0x40 @ complete all 4 segments?
|
||||
bne ccf_inner_loop
|
||||
add r1, r1, #0x04000000 @ increment line counter
|
||||
cmp r1, #0 @ complete all lines?
|
||||
bne ccf_outer_loop
|
||||
bx lr
|
||||
|
||||
|
||||
@ clean-only version
|
||||
.global cache_clean
|
||||
|
||||
cache_clean:
|
||||
mov r1, #0 @ init line counter
|
||||
cf_outer_loop:
|
||||
mov r0, #0 @ segment counter
|
||||
cf_inner_loop:
|
||||
orr r2, r1, r0 @ make segment and line address
|
||||
mcr p15, 0, r2, c7, c10, 2 @ clean that line
|
||||
add r0, r0, #0x10 @ incremet secment counter
|
||||
cmp r0, #0x40 @ complete all 4 segments?
|
||||
bne cf_inner_loop
|
||||
add r1, r1, #0x04000000 @ increment line counter
|
||||
cmp r1, #0 @ complete all lines?
|
||||
bne cf_outer_loop
|
||||
bx lr
|
||||
|
||||
|
||||
.global wait_irq
|
||||
|
||||
wait_irq:
|
||||
mrs r0, cpsr
|
||||
bic r0, r0, #0x80
|
||||
msr cpsr_c, r0 @ enable interrupts
|
||||
|
||||
mov r0, #0
|
||||
mcr p15, 0, r0, c7, c0, 4 @ wait for IRQ
|
||||
@ mcr p15, 0, r0, c15, c8, 2
|
||||
b .b_reserved
|
||||
|
||||
.pool
|
||||
|
||||
@ vim:filetype=armasm:
|
42
platform/gp2x/code940/940shared.h
Normal file
42
platform/gp2x/code940/940shared.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include "../../../Pico/sound/ym2612.h"
|
||||
#include "../helix/pub/mp3dec.h"
|
||||
|
||||
enum _940_job_t {
|
||||
JOB940_INITALL = 1,
|
||||
JOB940_YM2612RESETCHIP,
|
||||
JOB940_YM2612UPDATEONE,
|
||||
JOB940_PICOSTATELOAD,
|
||||
JOB940_MP3DECODE,
|
||||
JOB940_NUMJOBS
|
||||
};
|
||||
|
||||
#define MAX_940JOBS 2
|
||||
|
||||
typedef struct
|
||||
{
|
||||
YM2612 ym2612; /* current state of the emulated YM2612 */
|
||||
HMP3Decoder mp3dec; /* mp3 decoder's handle */
|
||||
int mix_buffer[44100/50*2]; /* this is where the YM2612 samples will be mixed to */
|
||||
short mp3_buffer[2][1152*2]; /* buffers for mp3 decoder's output */
|
||||
} _940_data_t;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int jobs[MAX_940JOBS]; /* jobs for second core */
|
||||
int busy; /* busy status of the 940 core */
|
||||
int length; /* number of samples to mix (882 max) */
|
||||
int stereo; /* mix samples as stereo, doubles sample count automatically */
|
||||
int baseclock; /* ym2612 settings */
|
||||
int rate;
|
||||
int writebuffsel; /* which write buffer to use (from 940 side) */
|
||||
UINT16 writebuff0[2048]; /* list of writes to ym2612, 1024 for savestates, 1024 extra */
|
||||
UINT16 writebuff1[2048];
|
||||
int mp3_len; /* data len of loaded mp3 */
|
||||
int mp3_offs; /* current playback offset (just after last decoded frame) */
|
||||
int mp3_buffsel; /* which output buffer to decode to */
|
||||
int vstarts[8]; /* debug: number of starts from each of 8 vectors */
|
||||
int loopc; /* debug: main loop counter */
|
||||
int mp3_errors; /* debug: mp3 decoder's error counter */
|
||||
int mp3_lasterr; /* debug: mp3 decoder's last error */
|
||||
} _940_ctl_t;
|
87
platform/gp2x/code940/Makefile
Normal file
87
platform/gp2x/code940/Makefile
Normal file
|
@ -0,0 +1,87 @@
|
|||
|
||||
# you may or may not need to change this
|
||||
#devkit_path = x:/stuff/dev/devkitgp2x/
|
||||
devkit_path = /usr/local/devkitPro/devkitGP2X/
|
||||
lgcc_path = $(devkit_path)lib/gcc/arm-linux/4.0.3/
|
||||
CROSS = arm-linux-
|
||||
#CROSS = $(devkit_path)bin/arm-linux-
|
||||
|
||||
# settings
|
||||
#up = 1
|
||||
|
||||
DEFINC = -I../.. -I. -D__GP2X__ -DARM # -DBENCHMARK
|
||||
COPT_COMMON = -static -s -O3 -ftracer -fstrength-reduce -Wall -funroll-loops -fomit-frame-pointer -fstrict-aliasing -ffast-math
|
||||
COPT = $(COPT_COMMON) -mtune=arm920t
|
||||
GCC = $(CROSS)gcc
|
||||
STRIP = $(CROSS)strip
|
||||
AS = $(CROSS)as
|
||||
LD = $(CROSS)ld
|
||||
OBJCOPY = $(CROSS)objcopy
|
||||
|
||||
all: code940.bin
|
||||
|
||||
up940:
|
||||
@cp -v code940.bin /mnt/gp2x/mnt/sd/games/PicoDrive/
|
||||
|
||||
# @cmd //C copy code940.bin \\\\10.0.1.2\\gp2x\\mnt\\sd\\games\\PicoDrive\\
|
||||
|
||||
.c.o:
|
||||
@echo $<
|
||||
$(GCC) $(COPT) $(DEFINC) -c $< -o $@
|
||||
.s.o:
|
||||
@echo $<
|
||||
$(GCC) $(COPT) $(DEFINC) -c $< -o $@
|
||||
|
||||
|
||||
# stuff for 940 core
|
||||
|
||||
# init, emu_control, emu
|
||||
OBJS940 += 940init.o 940.o 940ym2612.o memcpy.o
|
||||
# the asm code seems to be faster when run on 920, but not on 940 for some reason
|
||||
# OBJS940 += ../../Pico/sound/ym2612_asm.o
|
||||
|
||||
# uClibc library code
|
||||
OBJS940 += uClibc/memset.o uClibc/s_floor.o uClibc/e_pow.o uClibc/e_sqrt.o uClibc/s_fabs.o
|
||||
OBJS940 += uClibc/s_scalbn.o uClibc/s_copysign.o uClibc/k_sin.o uClibc/k_cos.o uClibc/s_sin.o
|
||||
OBJS940 += uClibc/e_rem_pio2.o uClibc/k_rem_pio2.o uClibc/e_log.o uClibc/wrappers.o
|
||||
|
||||
code940.bin : code940.gpe
|
||||
@echo $@
|
||||
@$(OBJCOPY) -O binary $< $@
|
||||
|
||||
code940.gpe : $(OBJS940) ../helix/helix_mp3.a
|
||||
@echo $@
|
||||
@$(LD) -static -e code940 -Ttext 0x0 $^ -L$(lgcc_path) -lgcc -o $@
|
||||
|
||||
940ym2612.o : ../../../Pico/sound/ym2612.c
|
||||
@echo $@
|
||||
@$(GCC) $(COPT_COMMON) -mtune=arm940t $(DEFINC) -DEXTERNAL_YM2612 -c $< -o $@
|
||||
|
||||
../helix/helix_mp3.a:
|
||||
@make -C ../helix/
|
||||
|
||||
|
||||
# cleanup
|
||||
clean: tidy
|
||||
@$(RM) code940.bin
|
||||
tidy:
|
||||
@$(RM) code940.gpe $(OBJS940)
|
||||
|
||||
|
||||
OBJSMP3T = mp3test.o ../gp2x.o ../asmutils.o ../usbjoy.o
|
||||
|
||||
mp3test.gpe : $(OBJSMP3T) ../helix/helix_mp3.a
|
||||
$(GCC) -static -o $@ $^
|
||||
$(STRIP) $@
|
||||
@cp -v $@ /mnt/gp2x/mnt/sd
|
||||
|
||||
cleanmp3test:
|
||||
$(RM) $(OBJSMP3T) mp3test.gpe
|
||||
|
||||
# uClibc/e_pow.o : uClibc/e_pow.c
|
||||
# @echo $<
|
||||
# @$(GCC) $(COPT) $(DEFINC) -fno-profile-generate -c $< -o $@
|
||||
|
||||
# uClibc/e_sqrt.o : uClibc/e_sqrt.c
|
||||
# @echo $<
|
||||
# @$(GCC) $(COPT) $(DEFINC) -fno-profile-generate -c $< -o $@
|
500
platform/gp2x/code940/memcpy.s
Normal file
500
platform/gp2x/code940/memcpy.s
Normal file
|
@ -0,0 +1,500 @@
|
|||
/* $NetBSD: memcpy.S,v 1.3 1997/11/22 03:27:12 mark Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Neil A. Carson and Mark Brinicombe
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the NetBSD
|
||||
* Foundation, Inc. and its contributors.
|
||||
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS\'\' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* This was modified by Jay Monkman <jmonkman@smoothsmoothie.com> to
|
||||
* save and restore r12. This is necessary for RTEMS.
|
||||
*/
|
||||
/* #include <machine/asm.h>*/
|
||||
|
||||
#define ENTRY(_LABEL) \
|
||||
.global _LABEL; _LABEL:
|
||||
|
||||
.globl memcpy
|
||||
memcpy:
|
||||
|
||||
@ ENTRY(gp2x_memcpy)
|
||||
stmfd sp!, {r0, r12, lr}
|
||||
@ bl _gp2x_memcpy
|
||||
bl _memcpy
|
||||
ldmfd sp!, {r0, r12, pc}
|
||||
|
||||
|
||||
|
||||
.globl memmove
|
||||
memmove:
|
||||
|
||||
@ ENTRY(gp2x_memmove)
|
||||
stmfd sp!, {r0, r12, lr}
|
||||
@ bl _gp2x_memcpy
|
||||
bl _memcpy
|
||||
ldmfd sp!, {r0, r12, pc}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* This is one fun bit of code ...
|
||||
* Some easy listening music is suggested while trying to understand this
|
||||
* code e.g. Iron Maiden
|
||||
*
|
||||
* For anyone attempting to understand it :
|
||||
*
|
||||
* The core code is implemented here with simple stubs for memcpy()
|
||||
* memmove() and bcopy().
|
||||
*
|
||||
* All local labels are prefixed with Lmemcpy_
|
||||
* Following the prefix a label starting f is used in the forward copy code
|
||||
* while a label using b is used in the backwards copy code
|
||||
* The source and destination addresses determine whether a forward or
|
||||
* backward copy is performed.
|
||||
* Separate bits of code are used to deal with the following situations
|
||||
* for both the forward and backwards copy.
|
||||
* unaligned source address
|
||||
* unaligned destination address
|
||||
* Separate copy routines are used to produce an optimised result for each
|
||||
* of these cases.
|
||||
* The copy code will use LDM/STM instructions to copy up to 32 bytes at
|
||||
* a time where possible.
|
||||
*
|
||||
* Note: r12 (aka ip) can be trashed during the function along with
|
||||
* r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
|
||||
* Additional registers are preserved prior to use i.e. r4, r5 & lr
|
||||
*
|
||||
* Apologies for the state of the comments;-)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
_memcpy:
|
||||
|
||||
@ ENTRY(_gp2x_memcpy)
|
||||
/* Determine copy direction */
|
||||
cmp r1, r0
|
||||
bcc Lmemcpy_backwards
|
||||
|
||||
moveq r0, #0 /* Quick abort for len=0 */
|
||||
moveq pc, lr
|
||||
|
||||
stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
|
||||
subs r2, r2, #4
|
||||
blt Lmemcpy_fl4 /* less than 4 bytes */
|
||||
ands r12, r0, #3
|
||||
bne Lmemcpy_fdestul /* oh unaligned destination addr */
|
||||
ands r12, r1, #3
|
||||
bne Lmemcpy_fsrcul /* oh unaligned source addr */
|
||||
|
||||
Lmemcpy_ft8:
|
||||
/* We have aligned source and destination */
|
||||
subs r2, r2, #8
|
||||
blt Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
|
||||
subs r2, r2, #0x14
|
||||
blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
|
||||
stmdb sp!, {r4, r7, r8, r9, r10} /* borrow r4 */
|
||||
|
||||
/* blat 64 bytes at a time */
|
||||
/* XXX for really big copies perhaps we should use more registers */
|
||||
Lmemcpy_floop32:
|
||||
ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
|
||||
stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
|
||||
ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
|
||||
stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
|
||||
subs r2, r2, #0x40
|
||||
bge Lmemcpy_floop32
|
||||
|
||||
cmn r2, #0x10
|
||||
ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
|
||||
stmgeia r0!, {r3, r4, r12, lr}
|
||||
subge r2, r2, #0x10
|
||||
ldmia sp!, {r4, r7, r8, r9, r10} /* return r4 */
|
||||
|
||||
Lmemcpy_fl32:
|
||||
adds r2, r2, #0x14
|
||||
|
||||
/* blat 12 bytes at a time */
|
||||
Lmemcpy_floop12:
|
||||
ldmgeia r1!, {r3, r12, lr}
|
||||
stmgeia r0!, {r3, r12, lr}
|
||||
subges r2, r2, #0x0c
|
||||
bge Lmemcpy_floop12
|
||||
|
||||
Lmemcpy_fl12:
|
||||
adds r2, r2, #8
|
||||
blt Lmemcpy_fl4
|
||||
|
||||
subs r2, r2, #4
|
||||
ldrlt r3, [r1], #4
|
||||
strlt r3, [r0], #4
|
||||
ldmgeia r1!, {r3, r12}
|
||||
stmgeia r0!, {r3, r12}
|
||||
subge r2, r2, #4
|
||||
|
||||
Lmemcpy_fl4:
|
||||
/* less than 4 bytes to go */
|
||||
adds r2, r2, #4
|
||||
ldmeqia sp!, {r0, pc} /* done */
|
||||
|
||||
/* copy the crud byte at a time */
|
||||
cmp r2, #2
|
||||
ldrb r3, [r1], #1
|
||||
strb r3, [r0], #1
|
||||
ldrgeb r3, [r1], #1
|
||||
strgeb r3, [r0], #1
|
||||
ldrgtb r3, [r1], #1
|
||||
strgtb r3, [r0], #1
|
||||
ldmia sp!, {r0, pc}
|
||||
|
||||
/* erg - unaligned destination */
|
||||
Lmemcpy_fdestul:
|
||||
rsb r12, r12, #4
|
||||
cmp r12, #2
|
||||
|
||||
/* align destination with byte copies */
|
||||
ldrb r3, [r1], #1
|
||||
strb r3, [r0], #1
|
||||
ldrgeb r3, [r1], #1
|
||||
strgeb r3, [r0], #1
|
||||
ldrgtb r3, [r1], #1
|
||||
strgtb r3, [r0], #1
|
||||
subs r2, r2, r12
|
||||
blt Lmemcpy_fl4 /* less the 4 bytes */
|
||||
|
||||
ands r12, r1, #3
|
||||
beq Lmemcpy_ft8 /* we have an aligned source */
|
||||
|
||||
/* erg - unaligned source */
|
||||
/* This is where it gets nasty ... */
|
||||
Lmemcpy_fsrcul:
|
||||
bic r1, r1, #3
|
||||
ldr lr, [r1], #4
|
||||
cmp r12, #2
|
||||
bgt Lmemcpy_fsrcul3
|
||||
beq Lmemcpy_fsrcul2
|
||||
cmp r2, #0x0c
|
||||
blt Lmemcpy_fsrcul1loop4
|
||||
sub r2, r2, #0x0c
|
||||
stmdb sp!, {r4, r5}
|
||||
|
||||
Lmemcpy_fsrcul1loop16:
|
||||
mov r3, lr, lsr #8
|
||||
ldmia r1!, {r4, r5, r12, lr}
|
||||
orr r3, r3, r4, lsl #24
|
||||
mov r4, r4, lsr #8
|
||||
orr r4, r4, r5, lsl #24
|
||||
mov r5, r5, lsr #8
|
||||
orr r5, r5, r12, lsl #24
|
||||
mov r12, r12, lsr #8
|
||||
orr r12, r12, lr, lsl #24
|
||||
stmia r0!, {r3-r5, r12}
|
||||
subs r2, r2, #0x10
|
||||
bge Lmemcpy_fsrcul1loop16
|
||||
ldmia sp!, {r4, r5}
|
||||
adds r2, r2, #0x0c
|
||||
blt Lmemcpy_fsrcul1l4
|
||||
|
||||
Lmemcpy_fsrcul1loop4:
|
||||
mov r12, lr, lsr #8
|
||||
ldr lr, [r1], #4
|
||||
orr r12, r12, lr, lsl #24
|
||||
str r12, [r0], #4
|
||||
subs r2, r2, #4
|
||||
bge Lmemcpy_fsrcul1loop4
|
||||
|
||||
Lmemcpy_fsrcul1l4:
|
||||
sub r1, r1, #3
|
||||
b Lmemcpy_fl4
|
||||
|
||||
Lmemcpy_fsrcul2:
|
||||
cmp r2, #0x0c
|
||||
blt Lmemcpy_fsrcul2loop4
|
||||
sub r2, r2, #0x0c
|
||||
stmdb sp!, {r4, r5}
|
||||
|
||||
Lmemcpy_fsrcul2loop16:
|
||||
mov r3, lr, lsr #16
|
||||
ldmia r1!, {r4, r5, r12, lr}
|
||||
orr r3, r3, r4, lsl #16
|
||||
mov r4, r4, lsr #16
|
||||
orr r4, r4, r5, lsl #16
|
||||
mov r5, r5, lsr #16
|
||||
orr r5, r5, r12, lsl #16
|
||||
mov r12, r12, lsr #16
|
||||
orr r12, r12, lr, lsl #16
|
||||
stmia r0!, {r3-r5, r12}
|
||||
subs r2, r2, #0x10
|
||||
bge Lmemcpy_fsrcul2loop16
|
||||
ldmia sp!, {r4, r5}
|
||||
adds r2, r2, #0x0c
|
||||
blt Lmemcpy_fsrcul2l4
|
||||
|
||||
Lmemcpy_fsrcul2loop4:
|
||||
mov r12, lr, lsr #16
|
||||
ldr lr, [r1], #4
|
||||
orr r12, r12, lr, lsl #16
|
||||
str r12, [r0], #4
|
||||
subs r2, r2, #4
|
||||
bge Lmemcpy_fsrcul2loop4
|
||||
|
||||
Lmemcpy_fsrcul2l4:
|
||||
sub r1, r1, #2
|
||||
b Lmemcpy_fl4
|
||||
|
||||
Lmemcpy_fsrcul3:
|
||||
cmp r2, #0x0c
|
||||
blt Lmemcpy_fsrcul3loop4
|
||||
sub r2, r2, #0x0c
|
||||
stmdb sp!, {r4, r5}
|
||||
|
||||
Lmemcpy_fsrcul3loop16:
|
||||
mov r3, lr, lsr #24
|
||||
ldmia r1!, {r4, r5, r12, lr}
|
||||
orr r3, r3, r4, lsl #8
|
||||
mov r4, r4, lsr #24
|
||||
orr r4, r4, r5, lsl #8
|
||||
mov r5, r5, lsr #24
|
||||
orr r5, r5, r12, lsl #8
|
||||
mov r12, r12, lsr #24
|
||||
orr r12, r12, lr, lsl #8
|
||||
stmia r0!, {r3-r5, r12}
|
||||
subs r2, r2, #0x10
|
||||
bge Lmemcpy_fsrcul3loop16
|
||||
ldmia sp!, {r4, r5}
|
||||
adds r2, r2, #0x0c
|
||||
blt Lmemcpy_fsrcul3l4
|
||||
|
||||
Lmemcpy_fsrcul3loop4:
|
||||
mov r12, lr, lsr #24
|
||||
ldr lr, [r1], #4
|
||||
orr r12, r12, lr, lsl #8
|
||||
str r12, [r0], #4
|
||||
subs r2, r2, #4
|
||||
bge Lmemcpy_fsrcul3loop4
|
||||
|
||||
Lmemcpy_fsrcul3l4:
|
||||
sub r1, r1, #1
|
||||
b Lmemcpy_fl4
|
||||
|
||||
Lmemcpy_backwards:
|
||||
add r1, r1, r2
|
||||
add r0, r0, r2
|
||||
subs r2, r2, #4
|
||||
blt Lmemcpy_bl4 /* less than 4 bytes */
|
||||
ands r12, r0, #3
|
||||
bne Lmemcpy_bdestul /* oh unaligned destination addr */
|
||||
ands r12, r1, #3
|
||||
bne Lmemcpy_bsrcul /* oh unaligned source addr */
|
||||
|
||||
Lmemcpy_bt8:
|
||||
/* We have aligned source and destination */
|
||||
subs r2, r2, #8
|
||||
blt Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
|
||||
stmdb sp!, {r4, r7, r8, r9, r10, lr}
|
||||
subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
|
||||
blt Lmemcpy_bl32
|
||||
|
||||
/* blat 64 bytes at a time */
|
||||
/* XXX for really big copies perhaps we should use more registers */
|
||||
Lmemcpy_bloop32:
|
||||
ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
|
||||
stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
|
||||
ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
|
||||
stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
|
||||
subs r2, r2, #0x40
|
||||
bge Lmemcpy_bloop32
|
||||
|
||||
Lmemcpy_bl32:
|
||||
cmn r2, #0x10
|
||||
ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
|
||||
stmgedb r0!, {r3, r4, r12, lr}
|
||||
subge r2, r2, #0x10
|
||||
adds r2, r2, #0x14
|
||||
ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
|
||||
stmgedb r0!, {r3, r12, lr}
|
||||
subge r2, r2, #0x0c
|
||||
ldmia sp!, {r4, r7, r8, r9, r10, lr}
|
||||
|
||||
Lmemcpy_bl12:
|
||||
adds r2, r2, #8
|
||||
blt Lmemcpy_bl4
|
||||
subs r2, r2, #4
|
||||
ldrlt r3, [r1, #-4]!
|
||||
strlt r3, [r0, #-4]!
|
||||
ldmgedb r1!, {r3, r12}
|
||||
stmgedb r0!, {r3, r12}
|
||||
subge r2, r2, #4
|
||||
|
||||
Lmemcpy_bl4:
|
||||
/* less than 4 bytes to go */
|
||||
adds r2, r2, #4
|
||||
moveq pc, lr /* done */
|
||||
|
||||
/* copy the crud byte at a time */
|
||||
cmp r2, #2
|
||||
ldrb r3, [r1, #-1]!
|
||||
strb r3, [r0, #-1]!
|
||||
ldrgeb r3, [r1, #-1]!
|
||||
strgeb r3, [r0, #-1]!
|
||||
ldrgtb r3, [r1, #-1]!
|
||||
strgtb r3, [r0, #-1]!
|
||||
mov pc, lr
|
||||
|
||||
/* erg - unaligned destination */
|
||||
Lmemcpy_bdestul:
|
||||
cmp r12, #2
|
||||
|
||||
/* align destination with byte copies */
|
||||
ldrb r3, [r1, #-1]!
|
||||
strb r3, [r0, #-1]!
|
||||
ldrgeb r3, [r1, #-1]!
|
||||
strgeb r3, [r0, #-1]!
|
||||
ldrgtb r3, [r1, #-1]!
|
||||
strgtb r3, [r0, #-1]!
|
||||
subs r2, r2, r12
|
||||
blt Lmemcpy_bl4 /* less than 4 bytes to go */
|
||||
ands r12, r1, #3
|
||||
beq Lmemcpy_bt8 /* we have an aligned source */
|
||||
|
||||
/* erg - unaligned source */
|
||||
/* This is where it gets nasty ... */
|
||||
Lmemcpy_bsrcul:
|
||||
bic r1, r1, #3
|
||||
ldr r3, [r1, #0]
|
||||
cmp r12, #2
|
||||
blt Lmemcpy_bsrcul1
|
||||
beq Lmemcpy_bsrcul2
|
||||
cmp r2, #0x0c
|
||||
blt Lmemcpy_bsrcul3loop4
|
||||
sub r2, r2, #0x0c
|
||||
stmdb sp!, {r4, r5, lr}
|
||||
|
||||
Lmemcpy_bsrcul3loop16:
|
||||
mov lr, r3, lsl #8
|
||||
ldmdb r1!, {r3-r5, r12}
|
||||
orr lr, lr, r12, lsr #24
|
||||
mov r12, r12, lsl #8
|
||||
orr r12, r12, r5, lsr #24
|
||||
mov r5, r5, lsl #8
|
||||
orr r5, r5, r4, lsr #24
|
||||
mov r4, r4, lsl #8
|
||||
orr r4, r4, r3, lsr #24
|
||||
stmdb r0!, {r4, r5, r12, lr}
|
||||
subs r2, r2, #0x10
|
||||
bge Lmemcpy_bsrcul3loop16
|
||||
ldmia sp!, {r4, r5, lr}
|
||||
adds r2, r2, #0x0c
|
||||
blt Lmemcpy_bsrcul3l4
|
||||
|
||||
Lmemcpy_bsrcul3loop4:
|
||||
mov r12, r3, lsl #8
|
||||
ldr r3, [r1, #-4]!
|
||||
orr r12, r12, r3, lsr #24
|
||||
str r12, [r0, #-4]!
|
||||
subs r2, r2, #4
|
||||
bge Lmemcpy_bsrcul3loop4
|
||||
|
||||
Lmemcpy_bsrcul3l4:
|
||||
add r1, r1, #3
|
||||
b Lmemcpy_bl4
|
||||
|
||||
Lmemcpy_bsrcul2:
|
||||
cmp r2, #0x0c
|
||||
blt Lmemcpy_bsrcul2loop4
|
||||
sub r2, r2, #0x0c
|
||||
stmdb sp!, {r4, r5, lr}
|
||||
|
||||
Lmemcpy_bsrcul2loop16:
|
||||
mov lr, r3, lsl #16
|
||||
ldmdb r1!, {r3-r5, r12}
|
||||
orr lr, lr, r12, lsr #16
|
||||
mov r12, r12, lsl #16
|
||||
orr r12, r12, r5, lsr #16
|
||||
mov r5, r5, lsl #16
|
||||
orr r5, r5, r4, lsr #16
|
||||
mov r4, r4, lsl #16
|
||||
orr r4, r4, r3, lsr #16
|
||||
stmdb r0!, {r4, r5, r12, lr}
|
||||
subs r2, r2, #0x10
|
||||
bge Lmemcpy_bsrcul2loop16
|
||||
ldmia sp!, {r4, r5, lr}
|
||||
adds r2, r2, #0x0c
|
||||
blt Lmemcpy_bsrcul2l4
|
||||
|
||||
Lmemcpy_bsrcul2loop4:
|
||||
mov r12, r3, lsl #16
|
||||
ldr r3, [r1, #-4]!
|
||||
orr r12, r12, r3, lsr #16
|
||||
str r12, [r0, #-4]!
|
||||
subs r2, r2, #4
|
||||
bge Lmemcpy_bsrcul2loop4
|
||||
|
||||
Lmemcpy_bsrcul2l4:
|
||||
add r1, r1, #2
|
||||
b Lmemcpy_bl4
|
||||
|
||||
Lmemcpy_bsrcul1:
|
||||
cmp r2, #0x0c
|
||||
blt Lmemcpy_bsrcul1loop4
|
||||
sub r2, r2, #0x0c
|
||||
stmdb sp!, {r4, r5, lr}
|
||||
|
||||
Lmemcpy_bsrcul1loop32:
|
||||
mov lr, r3, lsl #24
|
||||
ldmdb r1!, {r3-r5, r12}
|
||||
orr lr, lr, r12, lsr #8
|
||||
mov r12, r12, lsl #24
|
||||
orr r12, r12, r5, lsr #8
|
||||
mov r5, r5, lsl #24
|
||||
orr r5, r5, r4, lsr #8
|
||||
mov r4, r4, lsl #24
|
||||
orr r4, r4, r3, lsr #8
|
||||
stmdb r0!, {r4, r5, r12, lr}
|
||||
subs r2, r2, #0x10
|
||||
bge Lmemcpy_bsrcul1loop32
|
||||
ldmia sp!, {r4, r5, lr}
|
||||
adds r2, r2, #0x0c
|
||||
blt Lmemcpy_bsrcul1l4
|
||||
|
||||
Lmemcpy_bsrcul1loop4:
|
||||
mov r12, r3, lsl #24
|
||||
ldr r3, [r1, #-4]!
|
||||
orr r12, r12, r3, lsr #8
|
||||
str r12, [r0, #-4]!
|
||||
subs r2, r2, #4
|
||||
bge Lmemcpy_bsrcul1loop4
|
||||
|
||||
Lmemcpy_bsrcul1l4:
|
||||
add r1, r1, #1
|
||||
b Lmemcpy_bl4
|
383
platform/gp2x/code940/mp3test.c
Normal file
383
platform/gp2x/code940/mp3test.c
Normal file
|
@ -0,0 +1,383 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/time.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "940shared.h"
|
||||
#include "../gp2x.h"
|
||||
//#include "emu.h"
|
||||
//#include "menu.h"
|
||||
#include "../asmutils.h"
|
||||
#include "../helix/pub/mp3dec.h"
|
||||
|
||||
/* we will need some gp2x internals here */
|
||||
extern volatile unsigned short *gp2x_memregs; /* from minimal library rlyeh */
|
||||
extern volatile unsigned long *gp2x_memregl;
|
||||
|
||||
static unsigned char *shared_mem = 0;
|
||||
static _940_data_t *shared_data = 0;
|
||||
static _940_ctl_t *shared_ctl = 0;
|
||||
static unsigned char *mp3_mem = 0;
|
||||
|
||||
#define MP3_SIZE_MAX (0x1000000 - 4*640*480)
|
||||
|
||||
int crashed_940 = 0;
|
||||
|
||||
|
||||
/***********************************************************/
|
||||
|
||||
#define MAXOUT (+32767)
|
||||
#define MINOUT (-32768)
|
||||
|
||||
/* limitter */
|
||||
#define Limit(val, max,min) { \
|
||||
if ( val > max ) val = max; \
|
||||
else if ( val < min ) val = min; \
|
||||
}
|
||||
|
||||
|
||||
void wait_busy_940(void)
|
||||
{
|
||||
int i;
|
||||
#if 0
|
||||
printf("940 busy, entering wait loop.. (cnt: %i, wc: %i, ve: ", shared_ctl->loopc, shared_ctl->waitc);
|
||||
for (i = 0; i < 8; i++)
|
||||
printf("%i ", shared_ctl->vstarts[i]);
|
||||
printf(")\n");
|
||||
|
||||
for (i = 0; shared_ctl->busy; i++)
|
||||
{
|
||||
spend_cycles(1024); /* needs tuning */
|
||||
}
|
||||
printf("wait iterations: %i\n", i);
|
||||
#else
|
||||
for (i = 0; shared_ctl->busy && i < 0x10000; i++)
|
||||
spend_cycles(8*1024);
|
||||
if (i < 0x10000) return;
|
||||
|
||||
/* 940 crashed */
|
||||
printf("940 crashed (cnt: %i, ve: ", shared_ctl->loopc);
|
||||
for (i = 0; i < 8; i++)
|
||||
printf("%i ", shared_ctl->vstarts[i]);
|
||||
printf(")\n");
|
||||
crashed_940 = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void add_job_940(int job0, int job1)
|
||||
{
|
||||
shared_ctl->jobs[0] = job0;
|
||||
shared_ctl->jobs[1] = job1;
|
||||
shared_ctl->busy = 1;
|
||||
gp2x_memregs[0x3B3E>>1] = 0xffff; // cause an IRQ for 940
|
||||
}
|
||||
|
||||
|
||||
static int read_to_upper(void *dest, void *tmpbuf, int tmpsize, FILE *f)
|
||||
{
|
||||
int nRead, nLen = 0;
|
||||
|
||||
while(1)
|
||||
{
|
||||
nRead = fread(tmpbuf, 1, tmpsize, f);
|
||||
if(nRead <= 0)
|
||||
break;
|
||||
memcpy((unsigned char *)dest + nLen, tmpbuf, nRead);
|
||||
nLen += nRead;
|
||||
}
|
||||
|
||||
return nLen;
|
||||
}
|
||||
|
||||
static void simpleWait(int thissec, int lim_time)
|
||||
{
|
||||
struct timeval tval;
|
||||
|
||||
spend_cycles(1024);
|
||||
gettimeofday(&tval, 0);
|
||||
if(thissec != tval.tv_sec) tval.tv_usec+=1000000;
|
||||
|
||||
while(tval.tv_usec < lim_time)
|
||||
{
|
||||
spend_cycles(1024);
|
||||
gettimeofday(&tval, 0);
|
||||
if(thissec != tval.tv_sec) tval.tv_usec+=1000000;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
char **g_argv;
|
||||
|
||||
/* none of the functions in this file should be called before this one */
|
||||
void YM2612Init_940(int baseclock, int rate)
|
||||
{
|
||||
printf("YM2612Init_940()\n");
|
||||
printf("Mem usage: shared_data: %i, shared_ctl: %i\n", sizeof(*shared_data), sizeof(*shared_ctl));
|
||||
|
||||
Reset940(1, 2);
|
||||
Pause940(1);
|
||||
|
||||
gp2x_memregs[0x3B46>>1] = 0xffff; // clear pending DUALCPU interrupts for 940
|
||||
gp2x_memregs[0x3B42>>1] = 0xffff; // enable DUALCPU interrupts for 940
|
||||
|
||||
gp2x_memregl[0x4508>>2] = ~(1<<26); // unmask DUALCPU ints in the undocumented 940's interrupt controller
|
||||
|
||||
if (shared_mem == NULL)
|
||||
{
|
||||
shared_mem = (unsigned char *) mmap(0, 0x210000, PROT_READ|PROT_WRITE, MAP_SHARED, memdev, 0x2000000);
|
||||
if(shared_mem == MAP_FAILED)
|
||||
{
|
||||
printf("mmap(shared_data) failed with %i\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
shared_data = (_940_data_t *) (shared_mem+0x100000);
|
||||
/* this area must not get buffered on either side */
|
||||
shared_ctl = (_940_ctl_t *) (shared_mem+0x200000);
|
||||
mp3_mem = (unsigned char *) mmap(0, MP3_SIZE_MAX, PROT_READ|PROT_WRITE, MAP_SHARED, memdev, 0x3000000);
|
||||
if (mp3_mem == MAP_FAILED)
|
||||
{
|
||||
printf("mmap(mp3_mem) failed with %i\n", errno);
|
||||
exit(1);
|
||||
}
|
||||
crashed_940 = 1;
|
||||
}
|
||||
|
||||
if (crashed_940)
|
||||
{
|
||||
unsigned char ucData[1024];
|
||||
int i;
|
||||
char binpath[1024];
|
||||
FILE *fp;
|
||||
|
||||
strncpy(binpath, g_argv[0], 1023);
|
||||
binpath[1023] = 0;
|
||||
for (i = strlen(binpath); i > 0; i--)
|
||||
if (binpath[i] == '/') { binpath[i] = 0; break; }
|
||||
strcat(binpath, "/code940.bin");
|
||||
|
||||
fp = fopen(binpath, "rb");
|
||||
if(!fp)
|
||||
{
|
||||
printf("failed to open %s\n", binpath);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
read_to_upper(shared_mem, ucData, sizeof(ucData), fp);
|
||||
fclose(fp);
|
||||
crashed_940 = 0;
|
||||
}
|
||||
|
||||
memset(shared_data, 0, sizeof(*shared_data));
|
||||
memset(shared_ctl, 0, sizeof(*shared_ctl));
|
||||
|
||||
/* now cause 940 to init it's ym2612 stuff */
|
||||
shared_ctl->baseclock = baseclock;
|
||||
shared_ctl->rate = rate;
|
||||
shared_ctl->jobs[0] = JOB940_INITALL;
|
||||
shared_ctl->jobs[1] = 0;
|
||||
shared_ctl->busy = 1;
|
||||
|
||||
/* start the 940 */
|
||||
Reset940(0, 2);
|
||||
Pause940(0);
|
||||
}
|
||||
|
||||
|
||||
unsigned char *mp3_data = 0;
|
||||
|
||||
void local_decode(void)
|
||||
{
|
||||
int mp3_offs = shared_ctl->mp3_offs;
|
||||
unsigned char *readPtr = mp3_data + mp3_offs;
|
||||
int bytesLeft = shared_ctl->mp3_len - mp3_offs;
|
||||
int offset; // frame offset from readPtr
|
||||
int err = 0;
|
||||
|
||||
if (bytesLeft <= 0) return; // EOF, nothing to do
|
||||
|
||||
offset = MP3FindSyncWord(readPtr, bytesLeft);
|
||||
if (offset < 0) {
|
||||
shared_ctl->mp3_offs = shared_ctl->mp3_len;
|
||||
return; // EOF
|
||||
}
|
||||
readPtr += offset;
|
||||
bytesLeft -= offset;
|
||||
|
||||
err = MP3Decode(shared_data->mp3dec, &readPtr, &bytesLeft,
|
||||
shared_data->mp3_buffer[shared_ctl->mp3_buffsel], 0);
|
||||
if (err) {
|
||||
if (err == ERR_MP3_INDATA_UNDERFLOW) {
|
||||
shared_ctl->mp3_offs = shared_ctl->mp3_len; // EOF
|
||||
return;
|
||||
} else if (err <= -6 && err >= -12) {
|
||||
// ERR_MP3_INVALID_FRAMEHEADER, ERR_MP3_INVALID_*
|
||||
// just try to skip the offending frame..
|
||||
readPtr++;
|
||||
}
|
||||
shared_ctl->mp3_errors++;
|
||||
shared_ctl->mp3_lasterr = err;
|
||||
}
|
||||
shared_ctl->mp3_offs = readPtr - mp3_data;
|
||||
}
|
||||
|
||||
|
||||
void gp2x_sound_sync(void);
|
||||
|
||||
#define USE_LOCAL 0
|
||||
#define BENCHMARK 0
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
FILE *f;
|
||||
int size;
|
||||
struct timeval tval; // timing
|
||||
int thissec = 0, fps = 0;
|
||||
int target_frametime, frame_samples, samples_ready, mp3_buffer_offs, play_bufsel;
|
||||
unsigned char play_buffer[44100/50*2*2];
|
||||
|
||||
if (argc != 2) {
|
||||
printf("usage: %s <mp3file>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
g_argv = argv;
|
||||
|
||||
gp2x_init();
|
||||
YM2612Init_940(123, 44100);
|
||||
|
||||
// load a mp3
|
||||
f = fopen(argv[1], "rb");
|
||||
if (!f) {
|
||||
printf("can't open %s\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
size = (int) ftell(f);
|
||||
if (size > MP3_SIZE_MAX) {
|
||||
printf("size %i > %i\n", size, MP3_SIZE_MAX);
|
||||
size = MP3_SIZE_MAX;
|
||||
}
|
||||
|
||||
fseek(f, 0, SEEK_SET);
|
||||
if (fread(mp3_mem, 1, size, f) != size) {
|
||||
printf("read failed, errno=%i\n", errno);
|
||||
fclose(f);
|
||||
exit(1);
|
||||
}
|
||||
fclose(f);
|
||||
shared_ctl->mp3_len = size;
|
||||
|
||||
#if USE_LOCAL
|
||||
shared_data->mp3dec = MP3InitDecoder();
|
||||
mp3_data = malloc(size);
|
||||
printf("init: dec: %p ptr: %p\n", shared_data->mp3dec, mp3_data);
|
||||
if (!mp3_data) {
|
||||
printf("low mem\n");
|
||||
exit(1);
|
||||
}
|
||||
memcpy(mp3_data, mp3_mem, size);
|
||||
#else
|
||||
//printf("YM2612UpdateOne_940()\n");
|
||||
if (shared_ctl->busy) wait_busy_940();
|
||||
#endif
|
||||
|
||||
gp2x_start_sound(44100, 16, 1);
|
||||
|
||||
#define DESIRED_FPS 50
|
||||
target_frametime = 1000000/DESIRED_FPS;
|
||||
frame_samples = 44100/DESIRED_FPS;
|
||||
samples_ready = mp3_buffer_offs = 0;
|
||||
play_bufsel = 1;
|
||||
|
||||
for (;; fps++)
|
||||
{
|
||||
int lim_time;
|
||||
|
||||
gettimeofday(&tval, 0);
|
||||
if (tval.tv_sec != thissec)
|
||||
{
|
||||
printf("fps: %i\n", fps);
|
||||
thissec = tval.tv_sec;
|
||||
fps = 0;
|
||||
#if BENCHMARK
|
||||
shared_ctl->mp3_offs = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 0
|
||||
// decode
|
||||
#if USE_LOCAL
|
||||
shared_ctl->mp3_buffsel ^= 1;
|
||||
local_decode();
|
||||
#else
|
||||
wait_busy_940();
|
||||
shared_ctl->mp3_buffsel ^= 1;
|
||||
add_job_940(JOB940_MP3DECODE, 0);
|
||||
#endif
|
||||
|
||||
if (shared_ctl->mp3_lasterr) {
|
||||
printf("mp3_lasterr #%i: %i size: %i offs: %i\n", shared_ctl->mp3_errors, shared_ctl->mp3_lasterr,
|
||||
shared_ctl->mp3_len, shared_ctl->mp3_offs);
|
||||
printf("loopc: %i bytes: %08x\n",
|
||||
shared_ctl->loopc, *(int *)(mp3_mem+shared_ctl->mp3_offs));
|
||||
shared_ctl->mp3_lasterr = 0;
|
||||
}
|
||||
|
||||
#if !BENCHMARK
|
||||
// play
|
||||
gp2x_sound_sync();
|
||||
gp2x_sound_write(shared_data->mp3_buffer[shared_ctl->mp3_buffsel^1], 1152*2*2);
|
||||
#endif
|
||||
#else
|
||||
lim_time = (fps+1) * target_frametime;
|
||||
|
||||
wait_busy_940();
|
||||
|
||||
// decode, play
|
||||
if (samples_ready >= frame_samples) {
|
||||
if (1152 - mp3_buffer_offs >= frame_samples) {
|
||||
memcpy(play_buffer, shared_data->mp3_buffer[play_bufsel] + mp3_buffer_offs*2,
|
||||
frame_samples*2*2);
|
||||
mp3_buffer_offs += frame_samples;
|
||||
} else {
|
||||
// collect from both buffers..
|
||||
int left = 1152 - mp3_buffer_offs;
|
||||
memcpy(play_buffer, shared_data->mp3_buffer[play_bufsel] + mp3_buffer_offs*2,
|
||||
left*2*2);
|
||||
play_bufsel ^= 1;
|
||||
mp3_buffer_offs = frame_samples - left;
|
||||
memcpy(play_buffer + left*2*2, shared_data->mp3_buffer[play_bufsel],
|
||||
mp3_buffer_offs*2*2);
|
||||
}
|
||||
gp2x_sound_write(play_buffer, frame_samples*2*2);
|
||||
samples_ready -= frame_samples;
|
||||
}
|
||||
|
||||
// make sure we will have enough samples next frame
|
||||
if (samples_ready < frame_samples) {
|
||||
// wait_busy_940();
|
||||
shared_ctl->mp3_buffsel ^= 1;
|
||||
add_job_940(JOB940_MP3DECODE, 0);
|
||||
samples_ready += 1152;
|
||||
}
|
||||
|
||||
gettimeofday(&tval, 0);
|
||||
if(thissec != tval.tv_sec) tval.tv_usec+=1000000;
|
||||
if(tval.tv_usec < lim_time)
|
||||
{
|
||||
// we are too fast
|
||||
simpleWait(thissec, lim_time);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
16
platform/gp2x/code940/uClibc/README
Normal file
16
platform/gp2x/code940/uClibc/README
Normal file
|
@ -0,0 +1,16 @@
|
|||
The routines included in this math library are derived from the
|
||||
math library for Apple's MacOS X/Darwin math library, which was
|
||||
itself swiped from FreeBSD. The original copyright information
|
||||
is as follows:
|
||||
|
||||
Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
|
||||
Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
Permission to use, copy, modify, and distribute this
|
||||
software is freely granted, provided that this notice
|
||||
is preserved.
|
||||
|
||||
It has been ported to work with uClibc and generally behave
|
||||
by Erik Andersen <andersen@codepoet.org>
|
||||
22 May, 2001
|
||||
|
147
platform/gp2x/code940/uClibc/e_log.c
Normal file
147
platform/gp2x/code940/uClibc/e_log.c
Normal file
|
@ -0,0 +1,147 @@
|
|||
/* @(#)e_log.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/* __ieee754_log(x)
|
||||
* Return the logrithm of x
|
||||
*
|
||||
* Method :
|
||||
* 1. Argument Reduction: find k and f such that
|
||||
* x = 2^k * (1+f),
|
||||
* where sqrt(2)/2 < 1+f < sqrt(2) .
|
||||
*
|
||||
* 2. Approximation of log(1+f).
|
||||
* Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
|
||||
* = 2s + 2/3 s**3 + 2/5 s**5 + .....,
|
||||
* = 2s + s*R
|
||||
* We use a special Reme algorithm on [0,0.1716] to generate
|
||||
* a polynomial of degree 14 to approximate R The maximum error
|
||||
* of this polynomial approximation is bounded by 2**-58.45. In
|
||||
* other words,
|
||||
* 2 4 6 8 10 12 14
|
||||
* R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
|
||||
* (the values of Lg1 to Lg7 are listed in the program)
|
||||
* and
|
||||
* | 2 14 | -58.45
|
||||
* | Lg1*s +...+Lg7*s - R(z) | <= 2
|
||||
* | |
|
||||
* Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
|
||||
* In order to guarantee error in log below 1ulp, we compute log
|
||||
* by
|
||||
* log(1+f) = f - s*(f - R) (if f is not too large)
|
||||
* log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
|
||||
*
|
||||
* 3. Finally, log(x) = k*ln2 + log(1+f).
|
||||
* = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
|
||||
* Here ln2 is split into two floating point number:
|
||||
* ln2_hi + ln2_lo,
|
||||
* where n*ln2_hi is always exact for |n| < 2000.
|
||||
*
|
||||
* Special cases:
|
||||
* log(x) is NaN with signal if x < 0 (including -INF) ;
|
||||
* log(+INF) is +INF; log(0) is -INF with signal;
|
||||
* log(NaN) is that NaN with no signal.
|
||||
*
|
||||
* Accuracy:
|
||||
* according to an error analysis, the error is always less than
|
||||
* 1 ulp (unit in the last place).
|
||||
*
|
||||
* Constants:
|
||||
* The hexadecimal values are the intended ones for the following
|
||||
* constants. The decimal values may be used, provided that the
|
||||
* compiler will convert from decimal to binary accurately enough
|
||||
* to produce the hexadecimal values shown.
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double
|
||||
#else
|
||||
static double
|
||||
#endif
|
||||
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
|
||||
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
|
||||
two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
|
||||
Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
|
||||
Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
|
||||
Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
|
||||
Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
|
||||
Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
|
||||
Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
|
||||
Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double zero = 0.0;
|
||||
#else
|
||||
static double zero = 0.0;
|
||||
#endif
|
||||
|
||||
#ifdef __STDC__
|
||||
double __ieee754_log(double x)
|
||||
#else
|
||||
double __ieee754_log(x)
|
||||
double x;
|
||||
#endif
|
||||
{
|
||||
double hfsq,f,s,z,R,w,t1,t2,dk;
|
||||
int32_t k,hx,i,j;
|
||||
u_int32_t lx;
|
||||
|
||||
EXTRACT_WORDS(hx,lx,x);
|
||||
|
||||
k=0;
|
||||
if (hx < 0x00100000) { /* x < 2**-1022 */
|
||||
if (((hx&0x7fffffff)|lx)==0)
|
||||
return -two54/zero; /* log(+-0)=-inf */
|
||||
if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
|
||||
k -= 54; x *= two54; /* subnormal number, scale up x */
|
||||
GET_HIGH_WORD(hx,x);
|
||||
}
|
||||
if (hx >= 0x7ff00000) return x+x;
|
||||
k += (hx>>20)-1023;
|
||||
hx &= 0x000fffff;
|
||||
i = (hx+0x95f64)&0x100000;
|
||||
SET_HIGH_WORD(x,hx|(i^0x3ff00000)); /* normalize x or x/2 */
|
||||
k += (i>>20);
|
||||
f = x-1.0;
|
||||
if((0x000fffff&(2+hx))<3) { /* |f| < 2**-20 */
|
||||
if(f==zero) {if(k==0) return zero; else {dk=(double)k;
|
||||
return dk*ln2_hi+dk*ln2_lo;}
|
||||
}
|
||||
R = f*f*(0.5-0.33333333333333333*f);
|
||||
if(k==0) return f-R; else {dk=(double)k;
|
||||
return dk*ln2_hi-((R-dk*ln2_lo)-f);}
|
||||
}
|
||||
s = f/(2.0+f);
|
||||
dk = (double)k;
|
||||
z = s*s;
|
||||
i = hx-0x6147a;
|
||||
w = z*z;
|
||||
j = 0x6b851-hx;
|
||||
t1= w*(Lg2+w*(Lg4+w*Lg6));
|
||||
t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
|
||||
i |= j;
|
||||
R = t2+t1;
|
||||
if(i>0) {
|
||||
hfsq=0.5*f*f;
|
||||
if(k==0) return f-(hfsq-s*(hfsq+R)); else
|
||||
return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
|
||||
} else {
|
||||
if(k==0) return f-s*(f-R); else
|
||||
return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
|
||||
}
|
||||
}
|
308
platform/gp2x/code940/uClibc/e_pow.c
Normal file
308
platform/gp2x/code940/uClibc/e_pow.c
Normal file
|
@ -0,0 +1,308 @@
|
|||
/* @(#)e_pow.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/* __ieee754_pow(x,y) return x**y
|
||||
*
|
||||
* n
|
||||
* Method: Let x = 2 * (1+f)
|
||||
* 1. Compute and return log2(x) in two pieces:
|
||||
* log2(x) = w1 + w2,
|
||||
* where w1 has 53-24 = 29 bit trailing zeros.
|
||||
* 2. Perform y*log2(x) = n+y' by simulating muti-precision
|
||||
* arithmetic, where |y'|<=0.5.
|
||||
* 3. Return x**y = 2**n*exp(y'*log2)
|
||||
*
|
||||
* Special cases:
|
||||
* 1. (anything) ** 0 is 1
|
||||
* 2. (anything) ** 1 is itself
|
||||
* 3. (anything) ** NAN is NAN
|
||||
* 4. NAN ** (anything except 0) is NAN
|
||||
* 5. +-(|x| > 1) ** +INF is +INF
|
||||
* 6. +-(|x| > 1) ** -INF is +0
|
||||
* 7. +-(|x| < 1) ** +INF is +0
|
||||
* 8. +-(|x| < 1) ** -INF is +INF
|
||||
* 9. +-1 ** +-INF is NAN
|
||||
* 10. +0 ** (+anything except 0, NAN) is +0
|
||||
* 11. -0 ** (+anything except 0, NAN, odd integer) is +0
|
||||
* 12. +0 ** (-anything except 0, NAN) is +INF
|
||||
* 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
|
||||
* 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
|
||||
* 15. +INF ** (+anything except 0,NAN) is +INF
|
||||
* 16. +INF ** (-anything except 0,NAN) is +0
|
||||
* 17. -INF ** (anything) = -0 ** (-anything)
|
||||
* 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
|
||||
* 19. (-anything except 0 and inf) ** (non-integer) is NAN
|
||||
*
|
||||
* Accuracy:
|
||||
* pow(x,y) returns x**y nearly rounded. In particular
|
||||
* pow(integer,integer)
|
||||
* always returns the correct integer provided it is
|
||||
* representable.
|
||||
*
|
||||
* Constants :
|
||||
* The hexadecimal values are the intended ones for the following
|
||||
* constants. The decimal values may be used, provided that the
|
||||
* compiler will convert from decimal to binary accurately enough
|
||||
* to produce the hexadecimal values shown.
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double
|
||||
#else
|
||||
static double
|
||||
#endif
|
||||
bp[] = {1.0, 1.5,},
|
||||
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
|
||||
dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
|
||||
zero = 0.0,
|
||||
one = 1.0,
|
||||
two = 2.0,
|
||||
two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */
|
||||
huge = 1.0e300,
|
||||
tiny = 1.0e-300,
|
||||
/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
|
||||
L1 = 5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */
|
||||
L2 = 4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */
|
||||
L3 = 3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */
|
||||
L4 = 2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */
|
||||
L5 = 2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */
|
||||
L6 = 2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */
|
||||
P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
|
||||
P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
|
||||
P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
|
||||
P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
|
||||
P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
|
||||
lg2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
|
||||
lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
|
||||
lg2_l = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */
|
||||
ovt = 8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */
|
||||
cp = 9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
|
||||
cp_h = 9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */
|
||||
cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
|
||||
ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
|
||||
ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
|
||||
ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
|
||||
|
||||
#ifdef __STDC__
|
||||
double __ieee754_pow(double x, double y)
|
||||
#else
|
||||
double __ieee754_pow(x,y)
|
||||
double x, y;
|
||||
#endif
|
||||
{
|
||||
double z,ax,z_h,z_l,p_h,p_l;
|
||||
double y1,t1,t2,r,s,t,u,v,w;
|
||||
int32_t i,j,k,yisint,n;
|
||||
int32_t hx,hy,ix,iy;
|
||||
u_int32_t lx,ly;
|
||||
|
||||
EXTRACT_WORDS(hx,lx,x);
|
||||
EXTRACT_WORDS(hy,ly,y);
|
||||
ix = hx&0x7fffffff; iy = hy&0x7fffffff;
|
||||
|
||||
/* y==zero: x**0 = 1 */
|
||||
if((iy|ly)==0) return one;
|
||||
|
||||
/* +-NaN return x+y */
|
||||
if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
|
||||
iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0)))
|
||||
return x+y;
|
||||
|
||||
/* determine if y is an odd int when x < 0
|
||||
* yisint = 0 ... y is not an integer
|
||||
* yisint = 1 ... y is an odd int
|
||||
* yisint = 2 ... y is an even int
|
||||
*/
|
||||
yisint = 0;
|
||||
if(hx<0) {
|
||||
if(iy>=0x43400000) yisint = 2; /* even integer y */
|
||||
else if(iy>=0x3ff00000) {
|
||||
k = (iy>>20)-0x3ff; /* exponent */
|
||||
if(k>20) {
|
||||
j = ly>>(52-k);
|
||||
if((j<<(52-k))==ly) yisint = 2-(j&1);
|
||||
} else if(ly==0) {
|
||||
j = iy>>(20-k);
|
||||
if((j<<(20-k))==iy) yisint = 2-(j&1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* special value of y */
|
||||
if(ly==0) {
|
||||
if (iy==0x7ff00000) { /* y is +-inf */
|
||||
if(((ix-0x3ff00000)|lx)==0)
|
||||
return y - y; /* inf**+-1 is NaN */
|
||||
else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */
|
||||
return (hy>=0)? y: zero;
|
||||
else /* (|x|<1)**-,+inf = inf,0 */
|
||||
return (hy<0)?-y: zero;
|
||||
}
|
||||
if(iy==0x3ff00000) { /* y is +-1 */
|
||||
if(hy<0) return one/x; else return x;
|
||||
}
|
||||
if(hy==0x40000000) return x*x; /* y is 2 */
|
||||
if(hy==0x3fe00000) { /* y is 0.5 */
|
||||
if(hx>=0) /* x >= +0 */
|
||||
return __ieee754_sqrt(x);
|
||||
}
|
||||
}
|
||||
|
||||
ax = fabs(x);
|
||||
/* special value of x */
|
||||
if(lx==0) {
|
||||
if(ix==0x7ff00000||ix==0||ix==0x3ff00000){
|
||||
z = ax; /*x is +-0,+-inf,+-1*/
|
||||
if(hy<0) z = one/z; /* z = (1/|x|) */
|
||||
if(hx<0) {
|
||||
if(((ix-0x3ff00000)|yisint)==0) {
|
||||
z = (z-z)/(z-z); /* (-1)**non-int is NaN */
|
||||
} else if(yisint==1)
|
||||
z = -z; /* (x<0)**odd = -(|x|**odd) */
|
||||
}
|
||||
return z;
|
||||
}
|
||||
}
|
||||
|
||||
/* (x<0)**(non-int) is NaN */
|
||||
if(((((u_int32_t)hx>>31)-1)|yisint)==0) return (x-x)/(x-x);
|
||||
|
||||
/* |y| is huge */
|
||||
if(iy>0x41e00000) { /* if |y| > 2**31 */
|
||||
if(iy>0x43f00000){ /* if |y| > 2**64, must o/uflow */
|
||||
if(ix<=0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
|
||||
if(ix>=0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
|
||||
}
|
||||
/* over/underflow if x is not close to one */
|
||||
if(ix<0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
|
||||
if(ix>0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
|
||||
/* now |1-x| is tiny <= 2**-20, suffice to compute
|
||||
log(x) by x-x^2/2+x^3/3-x^4/4 */
|
||||
t = x-1; /* t has 20 trailing zeros */
|
||||
w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25));
|
||||
u = ivln2_h*t; /* ivln2_h has 21 sig. bits */
|
||||
v = t*ivln2_l-w*ivln2;
|
||||
t1 = u+v;
|
||||
SET_LOW_WORD(t1,0);
|
||||
t2 = v-(t1-u);
|
||||
} else {
|
||||
double s2,s_h,s_l,t_h,t_l;
|
||||
n = 0;
|
||||
/* take care subnormal number */
|
||||
if(ix<0x00100000)
|
||||
{ax *= two53; n -= 53; GET_HIGH_WORD(ix,ax); }
|
||||
n += ((ix)>>20)-0x3ff;
|
||||
j = ix&0x000fffff;
|
||||
/* determine interval */
|
||||
ix = j|0x3ff00000; /* normalize ix */
|
||||
if(j<=0x3988E) k=0; /* |x|<sqrt(3/2) */
|
||||
else if(j<0xBB67A) k=1; /* |x|<sqrt(3) */
|
||||
else {k=0;n+=1;ix -= 0x00100000;}
|
||||
SET_HIGH_WORD(ax,ix);
|
||||
|
||||
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
|
||||
u = ax-bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
|
||||
v = one/(ax+bp[k]);
|
||||
s = u*v;
|
||||
s_h = s;
|
||||
SET_LOW_WORD(s_h,0);
|
||||
/* t_h=ax+bp[k] High */
|
||||
t_h = zero;
|
||||
SET_HIGH_WORD(t_h,((ix>>1)|0x20000000)+0x00080000+(k<<18));
|
||||
t_l = ax - (t_h-bp[k]);
|
||||
s_l = v*((u-s_h*t_h)-s_h*t_l);
|
||||
/* compute log(ax) */
|
||||
s2 = s*s;
|
||||
r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
|
||||
r += s_l*(s_h+s);
|
||||
s2 = s_h*s_h;
|
||||
t_h = 3.0+s2+r;
|
||||
SET_LOW_WORD(t_h,0);
|
||||
t_l = r-((t_h-3.0)-s2);
|
||||
/* u+v = s*(1+...) */
|
||||
u = s_h*t_h;
|
||||
v = s_l*t_h+t_l*s;
|
||||
/* 2/(3log2)*(s+...) */
|
||||
p_h = u+v;
|
||||
SET_LOW_WORD(p_h,0);
|
||||
p_l = v-(p_h-u);
|
||||
z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
|
||||
z_l = cp_l*p_h+p_l*cp+dp_l[k];
|
||||
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
|
||||
t = (double)n;
|
||||
t1 = (((z_h+z_l)+dp_h[k])+t);
|
||||
SET_LOW_WORD(t1,0);
|
||||
t2 = z_l-(((t1-t)-dp_h[k])-z_h);
|
||||
}
|
||||
|
||||
s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
|
||||
if(((((u_int32_t)hx>>31)-1)|(yisint-1))==0)
|
||||
s = -one;/* (-ve)**(odd int) */
|
||||
|
||||
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
|
||||
y1 = y;
|
||||
SET_LOW_WORD(y1,0);
|
||||
p_l = (y-y1)*t1+y*t2;
|
||||
p_h = y1*t1;
|
||||
z = p_l+p_h;
|
||||
EXTRACT_WORDS(j,i,z);
|
||||
if (j>=0x40900000) { /* z >= 1024 */
|
||||
if(((j-0x40900000)|i)!=0) /* if z > 1024 */
|
||||
return s*huge*huge; /* overflow */
|
||||
else {
|
||||
if(p_l+ovt>z-p_h) return s*huge*huge; /* overflow */
|
||||
}
|
||||
} else if((j&0x7fffffff)>=0x4090cc00 ) { /* z <= -1075 */
|
||||
if(((j-0xc090cc00)|i)!=0) /* z < -1075 */
|
||||
return s*tiny*tiny; /* underflow */
|
||||
else {
|
||||
if(p_l<=z-p_h) return s*tiny*tiny; /* underflow */
|
||||
}
|
||||
}
|
||||
/*
|
||||
* compute 2**(p_h+p_l)
|
||||
*/
|
||||
i = j&0x7fffffff;
|
||||
k = (i>>20)-0x3ff;
|
||||
n = 0;
|
||||
if(i>0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */
|
||||
n = j+(0x00100000>>(k+1));
|
||||
k = ((n&0x7fffffff)>>20)-0x3ff; /* new k for n */
|
||||
t = zero;
|
||||
SET_HIGH_WORD(t,n&~(0x000fffff>>k));
|
||||
n = ((n&0x000fffff)|0x00100000)>>(20-k);
|
||||
if(j<0) n = -n;
|
||||
p_h -= t;
|
||||
}
|
||||
t = p_l+p_h;
|
||||
SET_LOW_WORD(t,0);
|
||||
u = t*lg2_h;
|
||||
v = (p_l-(t-p_h))*lg2+t*lg2_l;
|
||||
z = u+v;
|
||||
w = v-(z-u);
|
||||
t = z*z;
|
||||
t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
|
||||
r = (z*t1)/(t1-two)-(w+z*w);
|
||||
z = one-(r-z);
|
||||
GET_HIGH_WORD(j,z);
|
||||
j += (n<<20);
|
||||
if((j>>20)<=0) z = scalbn(z,n); /* subnormal output */
|
||||
else SET_HIGH_WORD(z,j);
|
||||
return s*z;
|
||||
}
|
183
platform/gp2x/code940/uClibc/e_rem_pio2.c
Normal file
183
platform/gp2x/code940/uClibc/e_rem_pio2.c
Normal file
|
@ -0,0 +1,183 @@
|
|||
/* @(#)e_rem_pio2.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: e_rem_pio2.c,v 1.8 1995/05/10 20:46:02 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/* __ieee754_rem_pio2(x,y)
|
||||
*
|
||||
* return the remainder of x rem pi/2 in y[0]+y[1]
|
||||
* use __kernel_rem_pio2()
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
/*
|
||||
* Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
|
||||
*/
|
||||
#ifdef __STDC__
|
||||
static const int32_t two_over_pi[] = {
|
||||
#else
|
||||
static int32_t two_over_pi[] = {
|
||||
#endif
|
||||
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
|
||||
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
|
||||
0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
|
||||
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
|
||||
0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
|
||||
0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
|
||||
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
|
||||
0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
|
||||
0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
|
||||
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
|
||||
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
|
||||
};
|
||||
|
||||
#ifdef __STDC__
|
||||
static const int32_t npio2_hw[] = {
|
||||
#else
|
||||
static int32_t npio2_hw[] = {
|
||||
#endif
|
||||
0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
|
||||
0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
|
||||
0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
|
||||
0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
|
||||
0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
|
||||
0x404858EB, 0x404921FB,
|
||||
};
|
||||
|
||||
/*
|
||||
* invpio2: 53 bits of 2/pi
|
||||
* pio2_1: first 33 bit of pi/2
|
||||
* pio2_1t: pi/2 - pio2_1
|
||||
* pio2_2: second 33 bit of pi/2
|
||||
* pio2_2t: pi/2 - (pio2_1+pio2_2)
|
||||
* pio2_3: third 33 bit of pi/2
|
||||
* pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
|
||||
*/
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double
|
||||
#else
|
||||
static double
|
||||
#endif
|
||||
zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
|
||||
half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
|
||||
two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
|
||||
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
|
||||
pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
|
||||
pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
|
||||
pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
|
||||
pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
|
||||
pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
|
||||
pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
|
||||
|
||||
#ifdef __STDC__
|
||||
int32_t __ieee754_rem_pio2(double x, double *y)
|
||||
#else
|
||||
int32_t __ieee754_rem_pio2(x,y)
|
||||
double x,y[];
|
||||
#endif
|
||||
{
|
||||
double z=0.0,w,t,r,fn;
|
||||
double tx[3];
|
||||
int32_t e0,i,j,nx,n,ix,hx;
|
||||
u_int32_t low;
|
||||
|
||||
GET_HIGH_WORD(hx,x); /* high word of x */
|
||||
ix = hx&0x7fffffff;
|
||||
if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
|
||||
{y[0] = x; y[1] = 0; return 0;}
|
||||
if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
|
||||
if(hx>0) {
|
||||
z = x - pio2_1;
|
||||
if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
|
||||
y[0] = z - pio2_1t;
|
||||
y[1] = (z-y[0])-pio2_1t;
|
||||
} else { /* near pi/2, use 33+33+53 bit pi */
|
||||
z -= pio2_2;
|
||||
y[0] = z - pio2_2t;
|
||||
y[1] = (z-y[0])-pio2_2t;
|
||||
}
|
||||
return 1;
|
||||
} else { /* negative x */
|
||||
z = x + pio2_1;
|
||||
if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
|
||||
y[0] = z + pio2_1t;
|
||||
y[1] = (z-y[0])+pio2_1t;
|
||||
} else { /* near pi/2, use 33+33+53 bit pi */
|
||||
z += pio2_2;
|
||||
y[0] = z + pio2_2t;
|
||||
y[1] = (z-y[0])+pio2_2t;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
|
||||
t = fabs(x);
|
||||
n = (int32_t) (t*invpio2+half);
|
||||
fn = (double)n;
|
||||
r = t-fn*pio2_1;
|
||||
w = fn*pio2_1t; /* 1st round good to 85 bit */
|
||||
if(n<32&&ix!=npio2_hw[n-1]) {
|
||||
y[0] = r-w; /* quick check no cancellation */
|
||||
} else {
|
||||
u_int32_t high;
|
||||
j = ix>>20;
|
||||
y[0] = r-w;
|
||||
GET_HIGH_WORD(high,y[0]);
|
||||
i = j-((high>>20)&0x7ff);
|
||||
if(i>16) { /* 2nd iteration needed, good to 118 */
|
||||
t = r;
|
||||
w = fn*pio2_2;
|
||||
r = t-w;
|
||||
w = fn*pio2_2t-((t-r)-w);
|
||||
y[0] = r-w;
|
||||
GET_HIGH_WORD(high,y[0]);
|
||||
i = j-((high>>20)&0x7ff);
|
||||
if(i>49) { /* 3rd iteration need, 151 bits acc */
|
||||
t = r; /* will cover all possible cases */
|
||||
w = fn*pio2_3;
|
||||
r = t-w;
|
||||
w = fn*pio2_3t-((t-r)-w);
|
||||
y[0] = r-w;
|
||||
}
|
||||
}
|
||||
}
|
||||
y[1] = (r-y[0])-w;
|
||||
if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
|
||||
else return n;
|
||||
}
|
||||
/*
|
||||
* all other (large) arguments
|
||||
*/
|
||||
if(ix>=0x7ff00000) { /* x is inf or NaN */
|
||||
y[0]=y[1]=x-x; return 0;
|
||||
}
|
||||
/* set z = scalbn(|x|,ilogb(x)-23) */
|
||||
GET_LOW_WORD(low,x);
|
||||
SET_LOW_WORD(z,low);
|
||||
e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */
|
||||
SET_HIGH_WORD(z, ix - ((int32_t)(e0<<20)));
|
||||
for(i=0;i<2;i++) {
|
||||
tx[i] = (double)((int32_t)(z));
|
||||
z = (z-tx[i])*two24;
|
||||
}
|
||||
tx[2] = z;
|
||||
nx = 3;
|
||||
while(tx[nx-1]==zero) nx--; /* skip zero term */
|
||||
n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
|
||||
if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
|
||||
return n;
|
||||
}
|
453
platform/gp2x/code940/uClibc/e_sqrt.c
Normal file
453
platform/gp2x/code940/uClibc/e_sqrt.c
Normal file
|
@ -0,0 +1,453 @@
|
|||
/* @(#)e_sqrt.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/* __ieee754_sqrt(x)
|
||||
* Return correctly rounded sqrt.
|
||||
* ------------------------------------------
|
||||
* | Use the hardware sqrt if you have one |
|
||||
* ------------------------------------------
|
||||
* Method:
|
||||
* Bit by bit method using integer arithmetic. (Slow, but portable)
|
||||
* 1. Normalization
|
||||
* Scale x to y in [1,4) with even powers of 2:
|
||||
* find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
|
||||
* sqrt(x) = 2^k * sqrt(y)
|
||||
* 2. Bit by bit computation
|
||||
* Let q = sqrt(y) truncated to i bit after binary point (q = 1),
|
||||
* i 0
|
||||
* i+1 2
|
||||
* s = 2*q , and y = 2 * ( y - q ). (1)
|
||||
* i i i i
|
||||
*
|
||||
* To compute q from q , one checks whether
|
||||
* i+1 i
|
||||
*
|
||||
* -(i+1) 2
|
||||
* (q + 2 ) <= y. (2)
|
||||
* i
|
||||
* -(i+1)
|
||||
* If (2) is false, then q = q ; otherwise q = q + 2 .
|
||||
* i+1 i i+1 i
|
||||
*
|
||||
* With some algebric manipulation, it is not difficult to see
|
||||
* that (2) is equivalent to
|
||||
* -(i+1)
|
||||
* s + 2 <= y (3)
|
||||
* i i
|
||||
*
|
||||
* The advantage of (3) is that s and y can be computed by
|
||||
* i i
|
||||
* the following recurrence formula:
|
||||
* if (3) is false
|
||||
*
|
||||
* s = s , y = y ; (4)
|
||||
* i+1 i i+1 i
|
||||
*
|
||||
* otherwise,
|
||||
* -i -(i+1)
|
||||
* s = s + 2 , y = y - s - 2 (5)
|
||||
* i+1 i i+1 i i
|
||||
*
|
||||
* One may easily use induction to prove (4) and (5).
|
||||
* Note. Since the left hand side of (3) contain only i+2 bits,
|
||||
* it does not necessary to do a full (53-bit) comparison
|
||||
* in (3).
|
||||
* 3. Final rounding
|
||||
* After generating the 53 bits result, we compute one more bit.
|
||||
* Together with the remainder, we can decide whether the
|
||||
* result is exact, bigger than 1/2ulp, or less than 1/2ulp
|
||||
* (it will never equal to 1/2ulp).
|
||||
* The rounding mode can be detected by checking whether
|
||||
* huge + tiny is equal to huge, and whether huge - tiny is
|
||||
* equal to huge for some floating point number "huge" and "tiny".
|
||||
*
|
||||
* Special cases:
|
||||
* sqrt(+-0) = +-0 ... exact
|
||||
* sqrt(inf) = inf
|
||||
* sqrt(-ve) = NaN ... with invalid signal
|
||||
* sqrt(NaN) = NaN ... with invalid signal for signaling NaN
|
||||
*
|
||||
* Other methods : see the appended file at the end of the program below.
|
||||
*---------------
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double one = 1.0, tiny=1.0e-300;
|
||||
#else
|
||||
static double one = 1.0, tiny=1.0e-300;
|
||||
#endif
|
||||
|
||||
#ifdef __STDC__
|
||||
double __ieee754_sqrt(double x)
|
||||
#else
|
||||
double __ieee754_sqrt(x)
|
||||
double x;
|
||||
#endif
|
||||
{
|
||||
double z;
|
||||
int32_t sign = (int)0x80000000;
|
||||
int32_t ix0,s0,q,m,t,i;
|
||||
u_int32_t r,t1,s1,ix1,q1;
|
||||
|
||||
EXTRACT_WORDS(ix0,ix1,x);
|
||||
|
||||
/* take care of Inf and NaN */
|
||||
if((ix0&0x7ff00000)==0x7ff00000) {
|
||||
return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
|
||||
sqrt(-inf)=sNaN */
|
||||
}
|
||||
/* take care of zero */
|
||||
if(ix0<=0) {
|
||||
if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
|
||||
else if(ix0<0)
|
||||
return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
|
||||
}
|
||||
/* normalize x */
|
||||
m = (ix0>>20);
|
||||
if(m==0) { /* subnormal x */
|
||||
while(ix0==0) {
|
||||
m -= 21;
|
||||
ix0 |= (ix1>>11); ix1 <<= 21;
|
||||
}
|
||||
for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
|
||||
m -= i-1;
|
||||
ix0 |= (ix1>>(32-i));
|
||||
ix1 <<= i;
|
||||
}
|
||||
m -= 1023; /* unbias exponent */
|
||||
ix0 = (ix0&0x000fffff)|0x00100000;
|
||||
if(m&1){ /* odd m, double x to make it even */
|
||||
ix0 += ix0 + ((ix1&sign)>>31);
|
||||
ix1 += ix1;
|
||||
}
|
||||
m >>= 1; /* m = [m/2] */
|
||||
|
||||
/* generate sqrt(x) bit by bit */
|
||||
ix0 += ix0 + ((ix1&sign)>>31);
|
||||
ix1 += ix1;
|
||||
q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */
|
||||
r = 0x00200000; /* r = moving bit from right to left */
|
||||
|
||||
while(r!=0) {
|
||||
t = s0+r;
|
||||
if(t<=ix0) {
|
||||
s0 = t+r;
|
||||
ix0 -= t;
|
||||
q += r;
|
||||
}
|
||||
ix0 += ix0 + ((ix1&sign)>>31);
|
||||
ix1 += ix1;
|
||||
r>>=1;
|
||||
}
|
||||
|
||||
r = sign;
|
||||
while(r!=0) {
|
||||
t1 = s1+r;
|
||||
t = s0;
|
||||
if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
|
||||
s1 = t1+r;
|
||||
if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
|
||||
ix0 -= t;
|
||||
if (ix1 < t1) ix0 -= 1;
|
||||
ix1 -= t1;
|
||||
q1 += r;
|
||||
}
|
||||
ix0 += ix0 + ((ix1&sign)>>31);
|
||||
ix1 += ix1;
|
||||
r>>=1;
|
||||
}
|
||||
|
||||
/* use floating add to find out rounding direction */
|
||||
if((ix0|ix1)!=0) {
|
||||
z = one-tiny; /* trigger inexact flag */
|
||||
if (z>=one) {
|
||||
z = one+tiny;
|
||||
if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
|
||||
else if (z>one) {
|
||||
if (q1==(u_int32_t)0xfffffffe) q+=1;
|
||||
q1+=2;
|
||||
} else
|
||||
q1 += (q1&1);
|
||||
}
|
||||
}
|
||||
ix0 = (q>>1)+0x3fe00000;
|
||||
ix1 = q1>>1;
|
||||
if ((q&1)==1) ix1 |= sign;
|
||||
ix0 += (m <<20);
|
||||
INSERT_WORDS(z,ix0,ix1);
|
||||
return z;
|
||||
}
|
||||
|
||||
/*
|
||||
Other methods (use floating-point arithmetic)
|
||||
-------------
|
||||
(This is a copy of a drafted paper by Prof W. Kahan
|
||||
and K.C. Ng, written in May, 1986)
|
||||
|
||||
Two algorithms are given here to implement sqrt(x)
|
||||
(IEEE double precision arithmetic) in software.
|
||||
Both supply sqrt(x) correctly rounded. The first algorithm (in
|
||||
Section A) uses newton iterations and involves four divisions.
|
||||
The second one uses reciproot iterations to avoid division, but
|
||||
requires more multiplications. Both algorithms need the ability
|
||||
to chop results of arithmetic operations instead of round them,
|
||||
and the INEXACT flag to indicate when an arithmetic operation
|
||||
is executed exactly with no roundoff error, all part of the
|
||||
standard (IEEE 754-1985). The ability to perform shift, add,
|
||||
subtract and logical AND operations upon 32-bit words is needed
|
||||
too, though not part of the standard.
|
||||
|
||||
A. sqrt(x) by Newton Iteration
|
||||
|
||||
(1) Initial approximation
|
||||
|
||||
Let x0 and x1 be the leading and the trailing 32-bit words of
|
||||
a floating point number x (in IEEE double format) respectively
|
||||
|
||||
1 11 52 ...widths
|
||||
------------------------------------------------------
|
||||
x: |s| e | f |
|
||||
------------------------------------------------------
|
||||
msb lsb msb lsb ...order
|
||||
|
||||
|
||||
------------------------ ------------------------
|
||||
x0: |s| e | f1 | x1: | f2 |
|
||||
------------------------ ------------------------
|
||||
|
||||
By performing shifts and subtracts on x0 and x1 (both regarded
|
||||
as integers), we obtain an 8-bit approximation of sqrt(x) as
|
||||
follows.
|
||||
|
||||
k := (x0>>1) + 0x1ff80000;
|
||||
y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits
|
||||
Here k is a 32-bit integer and T1[] is an integer array containing
|
||||
correction terms. Now magically the floating value of y (y's
|
||||
leading 32-bit word is y0, the value of its trailing word is 0)
|
||||
approximates sqrt(x) to almost 8-bit.
|
||||
|
||||
Value of T1:
|
||||
static int T1[32]= {
|
||||
0, 1024, 3062, 5746, 9193, 13348, 18162, 23592,
|
||||
29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215,
|
||||
83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581,
|
||||
16499, 12183, 8588, 5674, 3403, 1742, 661, 130,};
|
||||
|
||||
(2) Iterative refinement
|
||||
|
||||
Apply Heron's rule three times to y, we have y approximates
|
||||
sqrt(x) to within 1 ulp (Unit in the Last Place):
|
||||
|
||||
y := (y+x/y)/2 ... almost 17 sig. bits
|
||||
y := (y+x/y)/2 ... almost 35 sig. bits
|
||||
y := y-(y-x/y)/2 ... within 1 ulp
|
||||
|
||||
|
||||
Remark 1.
|
||||
Another way to improve y to within 1 ulp is:
|
||||
|
||||
y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x)
|
||||
y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x)
|
||||
|
||||
2
|
||||
(x-y )*y
|
||||
y := y + 2* ---------- ...within 1 ulp
|
||||
2
|
||||
3y + x
|
||||
|
||||
|
||||
This formula has one division fewer than the one above; however,
|
||||
it requires more multiplications and additions. Also x must be
|
||||
scaled in advance to avoid spurious overflow in evaluating the
|
||||
expression 3y*y+x. Hence it is not recommended uless division
|
||||
is slow. If division is very slow, then one should use the
|
||||
reciproot algorithm given in section B.
|
||||
|
||||
(3) Final adjustment
|
||||
|
||||
By twiddling y's last bit it is possible to force y to be
|
||||
correctly rounded according to the prevailing rounding mode
|
||||
as follows. Let r and i be copies of the rounding mode and
|
||||
inexact flag before entering the square root program. Also we
|
||||
use the expression y+-ulp for the next representable floating
|
||||
numbers (up and down) of y. Note that y+-ulp = either fixed
|
||||
point y+-1, or multiply y by nextafter(1,+-inf) in chopped
|
||||
mode.
|
||||
|
||||
I := FALSE; ... reset INEXACT flag I
|
||||
R := RZ; ... set rounding mode to round-toward-zero
|
||||
z := x/y; ... chopped quotient, possibly inexact
|
||||
If(not I) then { ... if the quotient is exact
|
||||
if(z=y) {
|
||||
I := i; ... restore inexact flag
|
||||
R := r; ... restore rounded mode
|
||||
return sqrt(x):=y.
|
||||
} else {
|
||||
z := z - ulp; ... special rounding
|
||||
}
|
||||
}
|
||||
i := TRUE; ... sqrt(x) is inexact
|
||||
If (r=RN) then z=z+ulp ... rounded-to-nearest
|
||||
If (r=RP) then { ... round-toward-+inf
|
||||
y = y+ulp; z=z+ulp;
|
||||
}
|
||||
y := y+z; ... chopped sum
|
||||
y0:=y0-0x00100000; ... y := y/2 is correctly rounded.
|
||||
I := i; ... restore inexact flag
|
||||
R := r; ... restore rounded mode
|
||||
return sqrt(x):=y.
|
||||
|
||||
(4) Special cases
|
||||
|
||||
Square root of +inf, +-0, or NaN is itself;
|
||||
Square root of a negative number is NaN with invalid signal.
|
||||
|
||||
|
||||
B. sqrt(x) by Reciproot Iteration
|
||||
|
||||
(1) Initial approximation
|
||||
|
||||
Let x0 and x1 be the leading and the trailing 32-bit words of
|
||||
a floating point number x (in IEEE double format) respectively
|
||||
(see section A). By performing shifs and subtracts on x0 and y0,
|
||||
we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
|
||||
|
||||
k := 0x5fe80000 - (x0>>1);
|
||||
y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits
|
||||
|
||||
Here k is a 32-bit integer and T2[] is an integer array
|
||||
containing correction terms. Now magically the floating
|
||||
value of y (y's leading 32-bit word is y0, the value of
|
||||
its trailing word y1 is set to zero) approximates 1/sqrt(x)
|
||||
to almost 7.8-bit.
|
||||
|
||||
Value of T2:
|
||||
static int T2[64]= {
|
||||
0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
|
||||
0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
|
||||
0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
|
||||
0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
|
||||
0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
|
||||
0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
|
||||
0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
|
||||
0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
|
||||
|
||||
(2) Iterative refinement
|
||||
|
||||
Apply Reciproot iteration three times to y and multiply the
|
||||
result by x to get an approximation z that matches sqrt(x)
|
||||
to about 1 ulp. To be exact, we will have
|
||||
-1ulp < sqrt(x)-z<1.0625ulp.
|
||||
|
||||
... set rounding mode to Round-to-nearest
|
||||
y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x)
|
||||
y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
|
||||
... special arrangement for better accuracy
|
||||
z := x*y ... 29 bits to sqrt(x), with z*y<1
|
||||
z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x)
|
||||
|
||||
Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
|
||||
(a) the term z*y in the final iteration is always less than 1;
|
||||
(b) the error in the final result is biased upward so that
|
||||
-1 ulp < sqrt(x) - z < 1.0625 ulp
|
||||
instead of |sqrt(x)-z|<1.03125ulp.
|
||||
|
||||
(3) Final adjustment
|
||||
|
||||
By twiddling y's last bit it is possible to force y to be
|
||||
correctly rounded according to the prevailing rounding mode
|
||||
as follows. Let r and i be copies of the rounding mode and
|
||||
inexact flag before entering the square root program. Also we
|
||||
use the expression y+-ulp for the next representable floating
|
||||
numbers (up and down) of y. Note that y+-ulp = either fixed
|
||||
point y+-1, or multiply y by nextafter(1,+-inf) in chopped
|
||||
mode.
|
||||
|
||||
R := RZ; ... set rounding mode to round-toward-zero
|
||||
switch(r) {
|
||||
case RN: ... round-to-nearest
|
||||
if(x<= z*(z-ulp)...chopped) z = z - ulp; else
|
||||
if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
|
||||
break;
|
||||
case RZ:case RM: ... round-to-zero or round-to--inf
|
||||
R:=RP; ... reset rounding mod to round-to-+inf
|
||||
if(x<z*z ... rounded up) z = z - ulp; else
|
||||
if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
|
||||
break;
|
||||
case RP: ... round-to-+inf
|
||||
if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
|
||||
if(x>z*z ...chopped) z = z+ulp;
|
||||
break;
|
||||
}
|
||||
|
||||
Remark 3. The above comparisons can be done in fixed point. For
|
||||
example, to compare x and w=z*z chopped, it suffices to compare
|
||||
x1 and w1 (the trailing parts of x and w), regarding them as
|
||||
two's complement integers.
|
||||
|
||||
...Is z an exact square root?
|
||||
To determine whether z is an exact square root of x, let z1 be the
|
||||
trailing part of z, and also let x0 and x1 be the leading and
|
||||
trailing parts of x.
|
||||
|
||||
If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
|
||||
I := 1; ... Raise Inexact flag: z is not exact
|
||||
else {
|
||||
j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2
|
||||
k := z1 >> 26; ... get z's 25-th and 26-th
|
||||
fraction bits
|
||||
I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
|
||||
}
|
||||
R:= r ... restore rounded mode
|
||||
return sqrt(x):=z.
|
||||
|
||||
If multiplication is cheaper then the foregoing red tape, the
|
||||
Inexact flag can be evaluated by
|
||||
|
||||
I := i;
|
||||
I := (z*z!=x) or I.
|
||||
|
||||
Note that z*z can overwrite I; this value must be sensed if it is
|
||||
True.
|
||||
|
||||
Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
|
||||
zero.
|
||||
|
||||
--------------------
|
||||
z1: | f2 |
|
||||
--------------------
|
||||
bit 31 bit 0
|
||||
|
||||
Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
|
||||
or even of logb(x) have the following relations:
|
||||
|
||||
-------------------------------------------------
|
||||
bit 27,26 of z1 bit 1,0 of x1 logb(x)
|
||||
-------------------------------------------------
|
||||
00 00 odd and even
|
||||
01 01 even
|
||||
10 10 odd
|
||||
10 00 even
|
||||
11 01 even
|
||||
-------------------------------------------------
|
||||
|
||||
(4) Special cases (see (4) of Section A).
|
||||
|
||||
*/
|
||||
|
96
platform/gp2x/code940/uClibc/k_cos.c
Normal file
96
platform/gp2x/code940/uClibc/k_cos.c
Normal file
|
@ -0,0 +1,96 @@
|
|||
/* @(#)k_cos.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* __kernel_cos( x, y )
|
||||
* kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
|
||||
* Input x is assumed to be bounded by ~pi/4 in magnitude.
|
||||
* Input y is the tail of x.
|
||||
*
|
||||
* Algorithm
|
||||
* 1. Since cos(-x) = cos(x), we need only to consider positive x.
|
||||
* 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
|
||||
* 3. cos(x) is approximated by a polynomial of degree 14 on
|
||||
* [0,pi/4]
|
||||
* 4 14
|
||||
* cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
|
||||
* where the remez error is
|
||||
*
|
||||
* | 2 4 6 8 10 12 14 | -58
|
||||
* |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
|
||||
* | |
|
||||
*
|
||||
* 4 6 8 10 12 14
|
||||
* 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
|
||||
* cos(x) = 1 - x*x/2 + r
|
||||
* since cos(x+y) ~ cos(x) - sin(x)*y
|
||||
* ~ cos(x) - x*y,
|
||||
* a correction term is necessary in cos(x) and hence
|
||||
* cos(x+y) = 1 - (x*x/2 - (r - x*y))
|
||||
* For better accuracy when x > 0.3, let qx = |x|/4 with
|
||||
* the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
|
||||
* Then
|
||||
* cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
|
||||
* Note that 1-qx and (x*x/2-qx) is EXACT here, and the
|
||||
* magnitude of the latter is at least a quarter of x*x/2,
|
||||
* thus, reducing the rounding error in the subtraction.
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double
|
||||
#else
|
||||
static double
|
||||
#endif
|
||||
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
|
||||
C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
|
||||
C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
|
||||
C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
|
||||
C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
|
||||
C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
|
||||
C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
|
||||
|
||||
#ifdef __STDC__
|
||||
double __kernel_cos(double x, double y)
|
||||
#else
|
||||
double __kernel_cos(x, y)
|
||||
double x,y;
|
||||
#endif
|
||||
{
|
||||
double a,hz,z,r,qx;
|
||||
int32_t ix;
|
||||
GET_HIGH_WORD(ix,x);
|
||||
ix &= 0x7fffffff; /* ix = |x|'s high word*/
|
||||
if(ix<0x3e400000) { /* if x < 2**27 */
|
||||
if(((int)x)==0) return one; /* generate inexact */
|
||||
}
|
||||
z = x*x;
|
||||
r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
|
||||
if(ix < 0x3FD33333) /* if |x| < 0.3 */
|
||||
return one - (0.5*z - (z*r - x*y));
|
||||
else {
|
||||
if(ix > 0x3fe90000) { /* x > 0.78125 */
|
||||
qx = 0.28125;
|
||||
} else {
|
||||
INSERT_WORDS(qx,ix-0x00200000,0); /* x/4 */
|
||||
}
|
||||
hz = 0.5*z-qx;
|
||||
a = one-qx;
|
||||
return a - (hz - (z*r-x*y));
|
||||
}
|
||||
}
|
320
platform/gp2x/code940/uClibc/k_rem_pio2.c
Normal file
320
platform/gp2x/code940/uClibc/k_rem_pio2.c
Normal file
|
@ -0,0 +1,320 @@
|
|||
/* @(#)k_rem_pio2.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: k_rem_pio2.c,v 1.7 1995/05/10 20:46:25 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
|
||||
* double x[],y[]; int e0,nx,prec; int ipio2[];
|
||||
*
|
||||
* __kernel_rem_pio2 return the last three digits of N with
|
||||
* y = x - N*pi/2
|
||||
* so that |y| < pi/2.
|
||||
*
|
||||
* The method is to compute the integer (mod 8) and fraction parts of
|
||||
* (2/pi)*x without doing the full multiplication. In general we
|
||||
* skip the part of the product that are known to be a huge integer (
|
||||
* more accurately, = 0 mod 8 ). Thus the number of operations are
|
||||
* independent of the exponent of the input.
|
||||
*
|
||||
* (2/pi) is represented by an array of 24-bit integers in ipio2[].
|
||||
*
|
||||
* Input parameters:
|
||||
* x[] The input value (must be positive) is broken into nx
|
||||
* pieces of 24-bit integers in double precision format.
|
||||
* x[i] will be the i-th 24 bit of x. The scaled exponent
|
||||
* of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
|
||||
* match x's up to 24 bits.
|
||||
*
|
||||
* Example of breaking a double positive z into x[0]+x[1]+x[2]:
|
||||
* e0 = ilogb(z)-23
|
||||
* z = scalbn(z,-e0)
|
||||
* for i = 0,1,2
|
||||
* x[i] = floor(z)
|
||||
* z = (z-x[i])*2**24
|
||||
*
|
||||
*
|
||||
* y[] ouput result in an array of double precision numbers.
|
||||
* The dimension of y[] is:
|
||||
* 24-bit precision 1
|
||||
* 53-bit precision 2
|
||||
* 64-bit precision 2
|
||||
* 113-bit precision 3
|
||||
* The actual value is the sum of them. Thus for 113-bit
|
||||
* precison, one may have to do something like:
|
||||
*
|
||||
* long double t,w,r_head, r_tail;
|
||||
* t = (long double)y[2] + (long double)y[1];
|
||||
* w = (long double)y[0];
|
||||
* r_head = t+w;
|
||||
* r_tail = w - (r_head - t);
|
||||
*
|
||||
* e0 The exponent of x[0]
|
||||
*
|
||||
* nx dimension of x[]
|
||||
*
|
||||
* prec an integer indicating the precision:
|
||||
* 0 24 bits (single)
|
||||
* 1 53 bits (double)
|
||||
* 2 64 bits (extended)
|
||||
* 3 113 bits (quad)
|
||||
*
|
||||
* ipio2[]
|
||||
* integer array, contains the (24*i)-th to (24*i+23)-th
|
||||
* bit of 2/pi after binary point. The corresponding
|
||||
* floating value is
|
||||
*
|
||||
* ipio2[i] * 2^(-24(i+1)).
|
||||
*
|
||||
* External function:
|
||||
* double scalbn(), floor();
|
||||
*
|
||||
*
|
||||
* Here is the description of some local variables:
|
||||
*
|
||||
* jk jk+1 is the initial number of terms of ipio2[] needed
|
||||
* in the computation. The recommended value is 2,3,4,
|
||||
* 6 for single, double, extended,and quad.
|
||||
*
|
||||
* jz local integer variable indicating the number of
|
||||
* terms of ipio2[] used.
|
||||
*
|
||||
* jx nx - 1
|
||||
*
|
||||
* jv index for pointing to the suitable ipio2[] for the
|
||||
* computation. In general, we want
|
||||
* ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
|
||||
* is an integer. Thus
|
||||
* e0-3-24*jv >= 0 or (e0-3)/24 >= jv
|
||||
* Hence jv = max(0,(e0-3)/24).
|
||||
*
|
||||
* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
|
||||
*
|
||||
* q[] double array with integral value, representing the
|
||||
* 24-bits chunk of the product of x and 2/pi.
|
||||
*
|
||||
* q0 the corresponding exponent of q[0]. Note that the
|
||||
* exponent for q[i] would be q0-24*i.
|
||||
*
|
||||
* PIo2[] double precision array, obtained by cutting pi/2
|
||||
* into 24 bits chunks.
|
||||
*
|
||||
* f[] ipio2[] in floating point
|
||||
*
|
||||
* iq[] integer array by breaking up q[] in 24-bits chunk.
|
||||
*
|
||||
* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
|
||||
*
|
||||
* ih integer. If >0 it indicates q[] is >= 0.5, hence
|
||||
* it also indicates the *sign* of the result.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Constants:
|
||||
* The hexadecimal values are the intended ones for the following
|
||||
* constants. The decimal values may be used, provided that the
|
||||
* compiler will convert from decimal to binary accurately enough
|
||||
* to produce the hexadecimal values shown.
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
|
||||
#else
|
||||
static int init_jk[] = {2,3,4,6};
|
||||
#endif
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double PIo2[] = {
|
||||
#else
|
||||
static double PIo2[] = {
|
||||
#endif
|
||||
1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
|
||||
7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
|
||||
5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
|
||||
3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
|
||||
1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
|
||||
1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
|
||||
2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
|
||||
2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
|
||||
};
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double
|
||||
#else
|
||||
static double
|
||||
#endif
|
||||
zero = 0.0,
|
||||
one = 1.0,
|
||||
two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
|
||||
twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
|
||||
|
||||
#ifdef __STDC__
|
||||
int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int32_t *ipio2)
|
||||
#else
|
||||
int __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
|
||||
double x[], y[]; int e0,nx,prec; int32_t ipio2[];
|
||||
#endif
|
||||
{
|
||||
int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
|
||||
double z,fw,f[20],fq[20],q[20];
|
||||
|
||||
/* initialize jk*/
|
||||
jk = init_jk[prec];
|
||||
jp = jk;
|
||||
|
||||
/* determine jx,jv,q0, note that 3>q0 */
|
||||
jx = nx-1;
|
||||
jv = (e0-3)/24; if(jv<0) jv=0;
|
||||
q0 = e0-24*(jv+1);
|
||||
|
||||
/* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
|
||||
j = jv-jx; m = jx+jk;
|
||||
for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
|
||||
|
||||
/* compute q[0],q[1],...q[jk] */
|
||||
for (i=0;i<=jk;i++) {
|
||||
for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
|
||||
}
|
||||
|
||||
jz = jk;
|
||||
recompute:
|
||||
/* distill q[] into iq[] reversingly */
|
||||
for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
|
||||
fw = (double)((int32_t)(twon24* z));
|
||||
iq[i] = (int32_t)(z-two24*fw);
|
||||
z = q[j-1]+fw;
|
||||
}
|
||||
|
||||
/* compute n */
|
||||
z = scalbn(z,q0); /* actual value of z */
|
||||
z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
|
||||
n = (int32_t) z;
|
||||
z -= (double)n;
|
||||
ih = 0;
|
||||
if(q0>0) { /* need iq[jz-1] to determine n */
|
||||
i = (iq[jz-1]>>(24-q0)); n += i;
|
||||
iq[jz-1] -= i<<(24-q0);
|
||||
ih = iq[jz-1]>>(23-q0);
|
||||
}
|
||||
else if(q0==0) ih = iq[jz-1]>>23;
|
||||
else if(z>=0.5) ih=2;
|
||||
|
||||
if(ih>0) { /* q > 0.5 */
|
||||
n += 1; carry = 0;
|
||||
for(i=0;i<jz ;i++) { /* compute 1-q */
|
||||
j = iq[i];
|
||||
if(carry==0) {
|
||||
if(j!=0) {
|
||||
carry = 1; iq[i] = 0x1000000- j;
|
||||
}
|
||||
} else iq[i] = 0xffffff - j;
|
||||
}
|
||||
if(q0>0) { /* rare case: chance is 1 in 12 */
|
||||
switch(q0) {
|
||||
case 1:
|
||||
iq[jz-1] &= 0x7fffff; break;
|
||||
case 2:
|
||||
iq[jz-1] &= 0x3fffff; break;
|
||||
}
|
||||
}
|
||||
if(ih==2) {
|
||||
z = one - z;
|
||||
if(carry!=0) z -= scalbn(one,q0);
|
||||
}
|
||||
}
|
||||
|
||||
/* check if recomputation is needed */
|
||||
if(z==zero) {
|
||||
j = 0;
|
||||
for (i=jz-1;i>=jk;i--) j |= iq[i];
|
||||
if(j==0) { /* need recomputation */
|
||||
for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
|
||||
|
||||
for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
|
||||
f[jx+i] = (double) ipio2[jv+i];
|
||||
for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
|
||||
q[i] = fw;
|
||||
}
|
||||
jz += k;
|
||||
goto recompute;
|
||||
}
|
||||
}
|
||||
|
||||
/* chop off zero terms */
|
||||
if(z==0.0) {
|
||||
jz -= 1; q0 -= 24;
|
||||
while(iq[jz]==0) { jz--; q0-=24;}
|
||||
} else { /* break z into 24-bit if necessary */
|
||||
z = scalbn(z,-q0);
|
||||
if(z>=two24) {
|
||||
fw = (double)((int32_t)(twon24*z));
|
||||
iq[jz] = (int32_t)(z-two24*fw);
|
||||
jz += 1; q0 += 24;
|
||||
iq[jz] = (int32_t) fw;
|
||||
} else iq[jz] = (int32_t) z ;
|
||||
}
|
||||
|
||||
/* convert integer "bit" chunk to floating-point value */
|
||||
fw = scalbn(one,q0);
|
||||
for(i=jz;i>=0;i--) {
|
||||
q[i] = fw*(double)iq[i]; fw*=twon24;
|
||||
}
|
||||
|
||||
/* compute PIo2[0,...,jp]*q[jz,...,0] */
|
||||
for(i=jz;i>=0;i--) {
|
||||
for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
|
||||
fq[jz-i] = fw;
|
||||
}
|
||||
|
||||
/* compress fq[] into y[] */
|
||||
switch(prec) {
|
||||
case 0:
|
||||
fw = 0.0;
|
||||
for (i=jz;i>=0;i--) fw += fq[i];
|
||||
y[0] = (ih==0)? fw: -fw;
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
fw = 0.0;
|
||||
for (i=jz;i>=0;i--) fw += fq[i];
|
||||
y[0] = (ih==0)? fw: -fw;
|
||||
fw = fq[0]-fw;
|
||||
for (i=1;i<=jz;i++) fw += fq[i];
|
||||
y[1] = (ih==0)? fw: -fw;
|
||||
break;
|
||||
case 3: /* painful */
|
||||
for (i=jz;i>0;i--) {
|
||||
fw = fq[i-1]+fq[i];
|
||||
fq[i] += fq[i-1]-fw;
|
||||
fq[i-1] = fw;
|
||||
}
|
||||
for (i=jz;i>1;i--) {
|
||||
fw = fq[i-1]+fq[i];
|
||||
fq[i] += fq[i-1]-fw;
|
||||
fq[i-1] = fw;
|
||||
}
|
||||
for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
|
||||
if(ih==0) {
|
||||
y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
|
||||
} else {
|
||||
y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
|
||||
}
|
||||
}
|
||||
return n&7;
|
||||
}
|
79
platform/gp2x/code940/uClibc/k_sin.c
Normal file
79
platform/gp2x/code940/uClibc/k_sin.c
Normal file
|
@ -0,0 +1,79 @@
|
|||
/* @(#)k_sin.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/* __kernel_sin( x, y, iy)
|
||||
* kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
|
||||
* Input x is assumed to be bounded by ~pi/4 in magnitude.
|
||||
* Input y is the tail of x.
|
||||
* Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
|
||||
*
|
||||
* Algorithm
|
||||
* 1. Since sin(-x) = -sin(x), we need only to consider positive x.
|
||||
* 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
|
||||
* 3. sin(x) is approximated by a polynomial of degree 13 on
|
||||
* [0,pi/4]
|
||||
* 3 13
|
||||
* sin(x) ~ x + S1*x + ... + S6*x
|
||||
* where
|
||||
*
|
||||
* |sin(x) 2 4 6 8 10 12 | -58
|
||||
* |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
|
||||
* | x |
|
||||
*
|
||||
* 4. sin(x+y) = sin(x) + sin'(x')*y
|
||||
* ~ sin(x) + (1-x*x/2)*y
|
||||
* For better accuracy, let
|
||||
* 3 2 2 2 2
|
||||
* r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
|
||||
* then 3 2
|
||||
* sin(x) = x + (S1*x + (x *(r-y/2)+y))
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double
|
||||
#else
|
||||
static double
|
||||
#endif
|
||||
half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
|
||||
S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
|
||||
S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
|
||||
S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
|
||||
S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
|
||||
S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
|
||||
S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
|
||||
|
||||
#ifdef __STDC__
|
||||
double __kernel_sin(double x, double y, int iy)
|
||||
#else
|
||||
double __kernel_sin(x, y, iy)
|
||||
double x,y; int iy; /* iy=0 if y is zero */
|
||||
#endif
|
||||
{
|
||||
double z,r,v;
|
||||
int32_t ix;
|
||||
GET_HIGH_WORD(ix,x);
|
||||
ix &= 0x7fffffff; /* high word of x */
|
||||
if(ix<0x3e400000) /* |x| < 2**-27 */
|
||||
{if((int)x==0) return x;} /* generate inexact */
|
||||
z = x*x;
|
||||
v = z*x;
|
||||
r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
|
||||
if(iy==0) return x+v*(S1+z*r);
|
||||
else return x-((z*(half*y-v*r)-y)-v*S1);
|
||||
}
|
15
platform/gp2x/code940/uClibc/math.h
Normal file
15
platform/gp2x/code940/uClibc/math.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#include <sys/types.h>
|
||||
|
||||
int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int32_t *ipio2);
|
||||
double __kernel_sin(double x, double y, int iy);
|
||||
double __kernel_cos(double x, double y);
|
||||
|
||||
double __ieee754_pow(double x, double y);
|
||||
double __ieee754_sqrt(double x);
|
||||
double __ieee754_log(double x);
|
||||
int32_t __ieee754_rem_pio2(double x, double *y);
|
||||
|
||||
double fabs(double x);
|
||||
double scalbn(double x, int n);
|
||||
double copysign(double x, double y);
|
||||
double floor(double x);
|
129
platform/gp2x/code940/uClibc/math_private.h
Normal file
129
platform/gp2x/code940/uClibc/math_private.h
Normal file
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
/*
|
||||
* from: @(#)fdlibm.h 5.1 93/09/24
|
||||
* $Id: math_private.h,v 1.3 2004/02/09 07:10:38 andersen Exp $
|
||||
*/
|
||||
|
||||
#ifndef _MATH_PRIVATE_H_
|
||||
#define _MATH_PRIVATE_H_
|
||||
|
||||
#include <endian.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
/* The original fdlibm code used statements like:
|
||||
n0 = ((*(int*)&one)>>29)^1; * index of high word *
|
||||
ix0 = *(n0+(int*)&x); * high word of x *
|
||||
ix1 = *((1-n0)+(int*)&x); * low word of x *
|
||||
to dig two 32 bit words out of the 64 bit IEEE floating point
|
||||
value. That is non-ANSI, and, moreover, the gcc instruction
|
||||
scheduler gets it wrong. We instead use the following macros.
|
||||
Unlike the original code, we determine the endianness at compile
|
||||
time, not at run time; I don't see much benefit to selecting
|
||||
endianness at run time. */
|
||||
|
||||
/* A union which permits us to convert between a double and two 32 bit
|
||||
ints. */
|
||||
|
||||
/*
|
||||
* Math on arm is special:
|
||||
* For FPA, float words are always big-endian.
|
||||
* For VFP, floats words follow the memory system mode.
|
||||
*/
|
||||
|
||||
#if (__BYTE_ORDER == __BIG_ENDIAN) || \
|
||||
(!defined(__VFP_FP__) && (defined(__arm__) || defined(__thumb__)))
|
||||
|
||||
typedef union
|
||||
{
|
||||
double value;
|
||||
struct
|
||||
{
|
||||
u_int32_t msw;
|
||||
u_int32_t lsw;
|
||||
} parts;
|
||||
} ieee_double_shape_type;
|
||||
|
||||
#else
|
||||
|
||||
typedef union
|
||||
{
|
||||
double value;
|
||||
struct
|
||||
{
|
||||
u_int32_t lsw;
|
||||
u_int32_t msw;
|
||||
} parts;
|
||||
} ieee_double_shape_type;
|
||||
|
||||
#endif
|
||||
|
||||
/* Get two 32 bit ints from a double. */
|
||||
|
||||
#define EXTRACT_WORDS(ix0,ix1,d) \
|
||||
do { \
|
||||
ieee_double_shape_type ew_u; \
|
||||
ew_u.value = (d); \
|
||||
(ix0) = ew_u.parts.msw; \
|
||||
(ix1) = ew_u.parts.lsw; \
|
||||
} while (0)
|
||||
|
||||
/* Get the more significant 32 bit int from a double. */
|
||||
|
||||
#define GET_HIGH_WORD(i,d) \
|
||||
do { \
|
||||
ieee_double_shape_type gh_u; \
|
||||
gh_u.value = (d); \
|
||||
(i) = gh_u.parts.msw; \
|
||||
} while (0)
|
||||
|
||||
/* Get the less significant 32 bit int from a double. */
|
||||
|
||||
#define GET_LOW_WORD(i,d) \
|
||||
do { \
|
||||
ieee_double_shape_type gl_u; \
|
||||
gl_u.value = (d); \
|
||||
(i) = gl_u.parts.lsw; \
|
||||
} while (0)
|
||||
|
||||
/* Set a double from two 32 bit ints. */
|
||||
|
||||
#define INSERT_WORDS(d,ix0,ix1) \
|
||||
do { \
|
||||
ieee_double_shape_type iw_u; \
|
||||
iw_u.parts.msw = (ix0); \
|
||||
iw_u.parts.lsw = (ix1); \
|
||||
(d) = iw_u.value; \
|
||||
} while (0)
|
||||
|
||||
/* Set the more significant 32 bits of a double from an int. */
|
||||
|
||||
#define SET_HIGH_WORD(d,v) \
|
||||
do { \
|
||||
ieee_double_shape_type sh_u; \
|
||||
sh_u.value = (d); \
|
||||
sh_u.parts.msw = (v); \
|
||||
(d) = sh_u.value; \
|
||||
} while (0)
|
||||
|
||||
/* Set the less significant 32 bits of a double from an int. */
|
||||
|
||||
#define SET_LOW_WORD(d,v) \
|
||||
do { \
|
||||
ieee_double_shape_type sl_u; \
|
||||
sl_u.value = (d); \
|
||||
sl_u.parts.lsw = (v); \
|
||||
(d) = sl_u.value; \
|
||||
} while (0)
|
||||
|
||||
|
||||
#endif /* _MATH_PRIVATE_H_ */
|
72
platform/gp2x/code940/uClibc/memset.s
Normal file
72
platform/gp2x/code940/uClibc/memset.s
Normal file
|
@ -0,0 +1,72 @@
|
|||
/* Copyright (C) 1998 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Philip Blundell <philb@gnu.org>
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
@ #include <sys/syscall.h>
|
||||
|
||||
.text
|
||||
.global memset
|
||||
.type memset,%function
|
||||
.align 4
|
||||
|
||||
memset:
|
||||
mov a4, a1
|
||||
cmp a3, $8 @ at least 8 bytes to do?
|
||||
blt 2f
|
||||
orr a2, a2, a2, lsl $8
|
||||
orr a2, a2, a2, lsl $16
|
||||
1:
|
||||
tst a4, $3 @ aligned yet?
|
||||
strneb a2, [a4], $1
|
||||
subne a3, a3, $1
|
||||
bne 1b
|
||||
mov ip, a2
|
||||
1:
|
||||
cmp a3, $8 @ 8 bytes still to do?
|
||||
blt 2f
|
||||
stmia a4!, {a2, ip}
|
||||
sub a3, a3, $8
|
||||
cmp a3, $8 @ 8 bytes still to do?
|
||||
blt 2f
|
||||
stmia a4!, {a2, ip}
|
||||
sub a3, a3, $8
|
||||
cmp a3, $8 @ 8 bytes still to do?
|
||||
blt 2f
|
||||
stmia a4!, {a2, ip}
|
||||
sub a3, a3, $8
|
||||
cmp a3, $8 @ 8 bytes still to do?
|
||||
stmgeia a4!, {a2, ip}
|
||||
subge a3, a3, $8
|
||||
bge 1b
|
||||
2:
|
||||
movs a3, a3 @ anything left?
|
||||
moveq pc, lr @ nope
|
||||
rsb a3, a3, $7
|
||||
add pc, pc, a3, lsl $2
|
||||
mov r0, r0
|
||||
strb a2, [a4], $1
|
||||
strb a2, [a4], $1
|
||||
strb a2, [a4], $1
|
||||
strb a2, [a4], $1
|
||||
strb a2, [a4], $1
|
||||
strb a2, [a4], $1
|
||||
strb a2, [a4], $1
|
||||
mov pc, lr
|
||||
|
||||
.size memset,.-memset;
|
||||
|
39
platform/gp2x/code940/uClibc/s_copysign.c
Normal file
39
platform/gp2x/code940/uClibc/s_copysign.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* @(#)s_copysign.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: s_copysign.c,v 1.8 1995/05/10 20:46:57 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* copysign(double x, double y)
|
||||
* copysign(x,y) returns a value with the magnitude of x and
|
||||
* with the sign bit of y.
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
double copysign(double x, double y)
|
||||
#else
|
||||
double copysign(x,y)
|
||||
double x,y;
|
||||
#endif
|
||||
{
|
||||
u_int32_t hx,hy;
|
||||
GET_HIGH_WORD(hx,x);
|
||||
GET_HIGH_WORD(hy,y);
|
||||
SET_HIGH_WORD(x,(hx&0x7fffffff)|(hy&0x80000000));
|
||||
return x;
|
||||
}
|
||||
|
35
platform/gp2x/code940/uClibc/s_fabs.c
Normal file
35
platform/gp2x/code940/uClibc/s_fabs.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* @(#)s_fabs.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: s_fabs.c,v 1.7 1995/05/10 20:47:13 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* fabs(x) returns the absolute value of x.
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
double fabs(double x)
|
||||
#else
|
||||
double fabs(x)
|
||||
double x;
|
||||
#endif
|
||||
{
|
||||
u_int32_t high;
|
||||
GET_HIGH_WORD(high,x);
|
||||
SET_HIGH_WORD(x,high&0x7fffffff);
|
||||
return x;
|
||||
}
|
81
platform/gp2x/code940/uClibc/s_floor.c
Normal file
81
platform/gp2x/code940/uClibc/s_floor.c
Normal file
|
@ -0,0 +1,81 @@
|
|||
/* @(#)s_floor.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: s_floor.c,v 1.8 1995/05/10 20:47:20 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* floor(x)
|
||||
* Return x rounded toward -inf to integral value
|
||||
* Method:
|
||||
* Bit twiddling.
|
||||
* Exception:
|
||||
* Inexact flag raised if x not equal to floor(x).
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double huge = 1.0e300;
|
||||
#else
|
||||
static double huge = 1.0e300;
|
||||
#endif
|
||||
|
||||
#ifdef __STDC__
|
||||
double floor(double x)
|
||||
#else
|
||||
double floor(x)
|
||||
double x;
|
||||
#endif
|
||||
{
|
||||
int32_t i0,i1,j0;
|
||||
u_int32_t i,j;
|
||||
EXTRACT_WORDS(i0,i1,x);
|
||||
j0 = ((i0>>20)&0x7ff)-0x3ff;
|
||||
if(j0<20) {
|
||||
if(j0<0) { /* raise inexact if x != 0 */
|
||||
if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
|
||||
if(i0>=0) {i0=i1=0;}
|
||||
else if(((i0&0x7fffffff)|i1)!=0)
|
||||
{ i0=0xbff00000;i1=0;}
|
||||
}
|
||||
} else {
|
||||
i = (0x000fffff)>>j0;
|
||||
if(((i0&i)|i1)==0) return x; /* x is integral */
|
||||
if(huge+x>0.0) { /* raise inexact flag */
|
||||
if(i0<0) i0 += (0x00100000)>>j0;
|
||||
i0 &= (~i); i1=0;
|
||||
}
|
||||
}
|
||||
} else if (j0>51) {
|
||||
if(j0==0x400) return x+x; /* inf or NaN */
|
||||
else return x; /* x is integral */
|
||||
} else {
|
||||
i = ((u_int32_t)(0xffffffff))>>(j0-20);
|
||||
if((i1&i)==0) return x; /* x is integral */
|
||||
if(huge+x>0.0) { /* raise inexact flag */
|
||||
if(i0<0) {
|
||||
if(j0==20) i0+=1;
|
||||
else {
|
||||
j = i1+(1<<(52-j0));
|
||||
if(j<i1) i0 +=1 ; /* got a carry */
|
||||
i1=j;
|
||||
}
|
||||
}
|
||||
i1 &= (~i);
|
||||
}
|
||||
}
|
||||
INSERT_WORDS(x,i0,i1);
|
||||
return x;
|
||||
}
|
67
platform/gp2x/code940/uClibc/s_scalbn.c
Normal file
67
platform/gp2x/code940/uClibc/s_scalbn.c
Normal file
|
@ -0,0 +1,67 @@
|
|||
/* @(#)s_scalbn.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: s_scalbn.c,v 1.8 1995/05/10 20:48:08 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/*
|
||||
* scalbn (double x, int n)
|
||||
* scalbn(x,n) returns x* 2**n computed by exponent
|
||||
* manipulation rather than by actually performing an
|
||||
* exponentiation or a multiplication.
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
static const double
|
||||
#else
|
||||
static double
|
||||
#endif
|
||||
two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
|
||||
twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
|
||||
huge = 1.0e+300,
|
||||
tiny = 1.0e-300;
|
||||
|
||||
#ifdef __STDC__
|
||||
double scalbn (double x, int n)
|
||||
#else
|
||||
double scalbn (x,n)
|
||||
double x; int n;
|
||||
#endif
|
||||
{
|
||||
int32_t k,hx,lx;
|
||||
EXTRACT_WORDS(hx,lx,x);
|
||||
k = (hx&0x7ff00000)>>20; /* extract exponent */
|
||||
if (k==0) { /* 0 or subnormal x */
|
||||
if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
|
||||
x *= two54;
|
||||
GET_HIGH_WORD(hx,x);
|
||||
k = ((hx&0x7ff00000)>>20) - 54;
|
||||
if (n< -50000) return tiny*x; /*underflow*/
|
||||
}
|
||||
if (k==0x7ff) return x+x; /* NaN or Inf */
|
||||
k = k+n;
|
||||
if (k > 0x7fe) return huge*copysign(huge,x); /* overflow */
|
||||
if (k > 0) /* normal result */
|
||||
{SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;}
|
||||
if (k <= -54) {
|
||||
if (n > 50000) /* in case integer overflow in n+k */
|
||||
return huge*copysign(huge,x); /*overflow*/
|
||||
else return tiny*copysign(tiny,x); /*underflow*/
|
||||
}
|
||||
k += 54; /* subnormal result */
|
||||
SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20));
|
||||
return x*twom54;
|
||||
}
|
82
platform/gp2x/code940/uClibc/s_sin.c
Normal file
82
platform/gp2x/code940/uClibc/s_sin.c
Normal file
|
@ -0,0 +1,82 @@
|
|||
/* @(#)s_sin.c 5.1 93/09/24 */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: s_sin.c,v 1.7 1995/05/10 20:48:15 jtc Exp $";
|
||||
#endif
|
||||
|
||||
/* sin(x)
|
||||
* Return sine function of x.
|
||||
*
|
||||
* kernel function:
|
||||
* __kernel_sin ... sine function on [-pi/4,pi/4]
|
||||
* __kernel_cos ... cose function on [-pi/4,pi/4]
|
||||
* __ieee754_rem_pio2 ... argument reduction routine
|
||||
*
|
||||
* Method.
|
||||
* Let S,C and T denote the sin, cos and tan respectively on
|
||||
* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
|
||||
* in [-pi/4 , +pi/4], and let n = k mod 4.
|
||||
* We have
|
||||
*
|
||||
* n sin(x) cos(x) tan(x)
|
||||
* ----------------------------------------------------------
|
||||
* 0 S C T
|
||||
* 1 C -S -1/T
|
||||
* 2 -S -C T
|
||||
* 3 -C S -1/T
|
||||
* ----------------------------------------------------------
|
||||
*
|
||||
* Special cases:
|
||||
* Let trig be any of sin, cos, or tan.
|
||||
* trig(+-INF) is NaN, with signals;
|
||||
* trig(NaN) is that NaN;
|
||||
*
|
||||
* Accuracy:
|
||||
* TRIG(x) returns trig(x) nearly rounded
|
||||
*/
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
#ifdef __STDC__
|
||||
double sin(double x)
|
||||
#else
|
||||
double sin(x)
|
||||
double x;
|
||||
#endif
|
||||
{
|
||||
double y[2],z=0.0;
|
||||
int32_t n, ix;
|
||||
|
||||
/* High word of x. */
|
||||
GET_HIGH_WORD(ix,x);
|
||||
|
||||
/* |x| ~< pi/4 */
|
||||
ix &= 0x7fffffff;
|
||||
if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
|
||||
|
||||
/* sin(Inf or NaN) is NaN */
|
||||
else if (ix>=0x7ff00000) return x-x;
|
||||
|
||||
/* argument reduction needed */
|
||||
else {
|
||||
n = __ieee754_rem_pio2(x,y);
|
||||
switch(n&3) {
|
||||
case 0: return __kernel_sin(y[0],y[1],1);
|
||||
case 1: return __kernel_cos(y[0],y[1]);
|
||||
case 2: return -__kernel_sin(y[0],y[1],1);
|
||||
default:
|
||||
return -__kernel_cos(y[0],y[1]);
|
||||
}
|
||||
}
|
||||
}
|
12
platform/gp2x/code940/uClibc/wrappers.c
Normal file
12
platform/gp2x/code940/uClibc/wrappers.c
Normal file
|
@ -0,0 +1,12 @@
|
|||
#include "math.h"
|
||||
|
||||
double pow(double x, double y)
|
||||
{
|
||||
return __ieee754_pow(x, y);
|
||||
}
|
||||
|
||||
|
||||
double log(double x)
|
||||
{
|
||||
return __ieee754_log(x);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue