mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-09-08 01:08:03 -04:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
17
arch/sh/lib64/Makefile
Normal file
17
arch/sh/lib64/Makefile
Normal file
|
@ -0,0 +1,17 @@
|
|||
#
|
||||
# Makefile for the SH-5 specific library files..
|
||||
#
|
||||
# Copyright (C) 2000, 2001 Paolo Alberelli
|
||||
# Copyright (C) 2003 - 2008 Paul Mundt
|
||||
#
|
||||
# This file is subject to the terms and conditions of the GNU General Public
|
||||
# License. See the file "COPYING" in the main directory of this archive
|
||||
# for more details.
|
||||
#
|
||||
|
||||
# Panic should really be compiled as PIC
|
||||
lib-y := udelay.o panic.o memcpy.o memset.o \
|
||||
copy_user_memcpy.o copy_page.o strcpy.o strlen.o
|
||||
|
||||
# Extracted from libgcc
|
||||
lib-y += udivsi3.o udivdi3.o sdivsi3.o
|
89
arch/sh/lib64/copy_page.S
Normal file
89
arch/sh/lib64/copy_page.S
Normal file
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
|
||||
|
||||
This file is subject to the terms and conditions of the GNU General Public
|
||||
License. See the file "COPYING" in the main directory of this archive
|
||||
for more details.
|
||||
|
||||
Tight version of mempy for the case of just copying a page.
|
||||
Prefetch strategy empirically optimised against RTL simulations
|
||||
of SH5-101 cut2 eval chip with Cayman board DDR memory.
|
||||
|
||||
Parameters:
|
||||
r2 : destination effective address (start of page)
|
||||
r3 : source effective address (start of page)
|
||||
|
||||
Always copies 4096 bytes.
|
||||
|
||||
Points to review.
|
||||
* Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
|
||||
It seems like the prefetch needs to be at at least 4 lines ahead to get
|
||||
the data into the cache in time, and the allocos contend with outstanding
|
||||
prefetches for the same cache set, so it's better to have the numbers
|
||||
different.
|
||||
*/
|
||||
|
||||
.section .text..SHmedia32,"ax"
|
||||
.little
|
||||
|
||||
.balign 8
|
||||
.global copy_page
|
||||
copy_page:
|
||||
|
||||
/* Copy 4096 bytes worth of data from r3 to r2.
|
||||
Do prefetches 4 lines ahead.
|
||||
Do alloco 2 lines ahead */
|
||||
|
||||
pta 1f, tr1
|
||||
pta 2f, tr2
|
||||
pta 3f, tr3
|
||||
ptabs r18, tr0
|
||||
|
||||
#if 0
|
||||
/* TAKum03020 */
|
||||
ld.q r3, 0x00, r63
|
||||
ld.q r3, 0x20, r63
|
||||
ld.q r3, 0x40, r63
|
||||
ld.q r3, 0x60, r63
|
||||
#endif
|
||||
alloco r2, 0x00
|
||||
synco ! TAKum03020
|
||||
alloco r2, 0x20
|
||||
synco ! TAKum03020
|
||||
|
||||
movi 3968, r6
|
||||
add r2, r6, r6
|
||||
addi r6, 64, r7
|
||||
addi r7, 64, r8
|
||||
sub r3, r2, r60
|
||||
addi r60, 8, r61
|
||||
addi r61, 8, r62
|
||||
addi r62, 8, r23
|
||||
addi r60, 0x80, r22
|
||||
|
||||
/* Minimal code size. The extra branches inside the loop don't cost much
|
||||
because they overlap with the time spent waiting for prefetches to
|
||||
complete. */
|
||||
1:
|
||||
#if 0
|
||||
/* TAKum03020 */
|
||||
bge/u r2, r6, tr2 ! skip prefetch for last 4 lines
|
||||
ldx.q r2, r22, r63 ! prefetch 4 lines hence
|
||||
#endif
|
||||
2:
|
||||
bge/u r2, r7, tr3 ! skip alloco for last 2 lines
|
||||
alloco r2, 0x40 ! alloc destination line 2 lines ahead
|
||||
synco ! TAKum03020
|
||||
3:
|
||||
ldx.q r2, r60, r36
|
||||
ldx.q r2, r61, r37
|
||||
ldx.q r2, r62, r38
|
||||
ldx.q r2, r23, r39
|
||||
st.q r2, 0, r36
|
||||
st.q r2, 8, r37
|
||||
st.q r2, 16, r38
|
||||
st.q r2, 24, r39
|
||||
addi r2, 32, r2
|
||||
bgt/l r8, r2, tr1
|
||||
|
||||
blink tr0, r63 ! return
|
217
arch/sh/lib64/copy_user_memcpy.S
Normal file
217
arch/sh/lib64/copy_user_memcpy.S
Normal file
|
@ -0,0 +1,217 @@
|
|||
!
|
||||
! Fast SH memcpy
|
||||
!
|
||||
! by Toshiyasu Morita (tm@netcom.com)
|
||||
! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
|
||||
! SH5 code Copyright 2002 SuperH Ltd.
|
||||
!
|
||||
! Entry: ARG0: destination pointer
|
||||
! ARG1: source pointer
|
||||
! ARG2: byte count
|
||||
!
|
||||
! Exit: RESULT: destination pointer
|
||||
! any other registers in the range r0-r7: trashed
|
||||
!
|
||||
! Notes: Usually one wants to do small reads and write a longword, but
|
||||
! unfortunately it is difficult in some cases to concatanate bytes
|
||||
! into a longword on the SH, so this does a longword read and small
|
||||
! writes.
|
||||
!
|
||||
! This implementation makes two assumptions about how it is called:
|
||||
!
|
||||
! 1.: If the byte count is nonzero, the address of the last byte to be
|
||||
! copied is unsigned greater than the address of the first byte to
|
||||
! be copied. This could be easily swapped for a signed comparison,
|
||||
! but the algorithm used needs some comparison.
|
||||
!
|
||||
! 2.: When there are two or three bytes in the last word of an 11-or-more
|
||||
! bytes memory chunk to b copied, the rest of the word can be read
|
||||
! without side effects.
|
||||
! This could be easily changed by increasing the minimum size of
|
||||
! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
|
||||
! however, this would cost a few extra cyles on average.
|
||||
! For SHmedia, the assumption is that any quadword can be read in its
|
||||
! enirety if at least one byte is included in the copy.
|
||||
|
||||
/* Imported into Linux kernel by Richard Curnow. This is used to implement the
|
||||
__copy_user function in the general case, so it has to be a distinct
|
||||
function from intra-kernel memcpy to allow for exception fix-ups in the
|
||||
event that the user pointer is bad somewhere in the copy (e.g. due to
|
||||
running off the end of the vma).
|
||||
|
||||
Note, this algorithm will be slightly wasteful in the case where the source
|
||||
and destination pointers are equally aligned, because the stlo/sthi pairs
|
||||
could then be merged back into single stores. If there are a lot of cache
|
||||
misses, this is probably offset by the stall lengths on the preloads.
|
||||
|
||||
*/
|
||||
|
||||
/* NOTE : Prefetches removed and allocos guarded by synco to avoid TAKum03020
|
||||
* erratum. The first two prefetches are nop-ed out to avoid upsetting the
|
||||
* instruction counts used in the jump address calculation.
|
||||
* */
|
||||
|
||||
.section .text..SHmedia32,"ax"
|
||||
.little
|
||||
.balign 32
|
||||
.global copy_user_memcpy
|
||||
.global copy_user_memcpy_end
|
||||
copy_user_memcpy:
|
||||
|
||||
#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
|
||||
#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
|
||||
#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
|
||||
#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
|
||||
|
||||
nop ! ld.b r3,0,r63 ! TAKum03020
|
||||
pta/l Large,tr0
|
||||
movi 25,r0
|
||||
bgeu/u r4,r0,tr0
|
||||
nsb r4,r0
|
||||
shlli r0,5,r0
|
||||
movi (L1-L0+63*32 + 1) & 0xffff,r1
|
||||
sub r1, r0, r0
|
||||
L0: ptrel r0,tr0
|
||||
add r2,r4,r5
|
||||
ptabs r18,tr1
|
||||
add r3,r4,r6
|
||||
blink tr0,r63
|
||||
|
||||
/* Rearranged to make cut2 safe */
|
||||
.balign 8
|
||||
L4_7: /* 4..7 byte memcpy cntd. */
|
||||
stlo.l r2, 0, r0
|
||||
or r6, r7, r6
|
||||
sthi.l r5, -1, r6
|
||||
stlo.l r5, -4, r6
|
||||
blink tr1,r63
|
||||
|
||||
.balign 8
|
||||
L1: /* 0 byte memcpy */
|
||||
nop
|
||||
blink tr1,r63
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
||||
L2_3: /* 2 or 3 byte memcpy cntd. */
|
||||
st.b r5,-1,r6
|
||||
blink tr1,r63
|
||||
|
||||
/* 1 byte memcpy */
|
||||
ld.b r3,0,r0
|
||||
st.b r2,0,r0
|
||||
blink tr1,r63
|
||||
|
||||
L8_15: /* 8..15 byte memcpy cntd. */
|
||||
stlo.q r2, 0, r0
|
||||
or r6, r7, r6
|
||||
sthi.q r5, -1, r6
|
||||
stlo.q r5, -8, r6
|
||||
blink tr1,r63
|
||||
|
||||
/* 2 or 3 byte memcpy */
|
||||
ld.b r3,0,r0
|
||||
nop ! ld.b r2,0,r63 ! TAKum03020
|
||||
ld.b r3,1,r1
|
||||
st.b r2,0,r0
|
||||
pta/l L2_3,tr0
|
||||
ld.b r6,-1,r6
|
||||
st.b r2,1,r1
|
||||
blink tr0, r63
|
||||
|
||||
/* 4 .. 7 byte memcpy */
|
||||
LDUAL (r3, 0, r0, r1)
|
||||
pta L4_7, tr0
|
||||
ldlo.l r6, -4, r7
|
||||
or r0, r1, r0
|
||||
sthi.l r2, 3, r0
|
||||
ldhi.l r6, -1, r6
|
||||
blink tr0, r63
|
||||
|
||||
/* 8 .. 15 byte memcpy */
|
||||
LDUAQ (r3, 0, r0, r1)
|
||||
pta L8_15, tr0
|
||||
ldlo.q r6, -8, r7
|
||||
or r0, r1, r0
|
||||
sthi.q r2, 7, r0
|
||||
ldhi.q r6, -1, r6
|
||||
blink tr0, r63
|
||||
|
||||
/* 16 .. 24 byte memcpy */
|
||||
LDUAQ (r3, 0, r0, r1)
|
||||
LDUAQ (r3, 8, r8, r9)
|
||||
or r0, r1, r0
|
||||
sthi.q r2, 7, r0
|
||||
or r8, r9, r8
|
||||
sthi.q r2, 15, r8
|
||||
ldlo.q r6, -8, r7
|
||||
ldhi.q r6, -1, r6
|
||||
stlo.q r2, 8, r8
|
||||
stlo.q r2, 0, r0
|
||||
or r6, r7, r6
|
||||
sthi.q r5, -1, r6
|
||||
stlo.q r5, -8, r6
|
||||
blink tr1,r63
|
||||
|
||||
Large:
|
||||
! ld.b r2, 0, r63 ! TAKum03020
|
||||
pta/l Loop_ua, tr1
|
||||
ori r3, -8, r7
|
||||
sub r2, r7, r22
|
||||
sub r3, r2, r6
|
||||
add r2, r4, r5
|
||||
ldlo.q r3, 0, r0
|
||||
addi r5, -16, r5
|
||||
movi 64+8, r27 ! could subtract r7 from that.
|
||||
stlo.q r2, 0, r0
|
||||
sthi.q r2, 7, r0
|
||||
ldx.q r22, r6, r0
|
||||
bgtu/l r27, r4, tr1
|
||||
|
||||
addi r5, -48, r27
|
||||
pta/l Loop_line, tr0
|
||||
addi r6, 64, r36
|
||||
addi r6, -24, r19
|
||||
addi r6, -16, r20
|
||||
addi r6, -8, r21
|
||||
|
||||
Loop_line:
|
||||
! ldx.q r22, r36, r63 ! TAKum03020
|
||||
alloco r22, 32
|
||||
synco
|
||||
addi r22, 32, r22
|
||||
ldx.q r22, r19, r23
|
||||
sthi.q r22, -25, r0
|
||||
ldx.q r22, r20, r24
|
||||
ldx.q r22, r21, r25
|
||||
stlo.q r22, -32, r0
|
||||
ldx.q r22, r6, r0
|
||||
sthi.q r22, -17, r23
|
||||
sthi.q r22, -9, r24
|
||||
sthi.q r22, -1, r25
|
||||
stlo.q r22, -24, r23
|
||||
stlo.q r22, -16, r24
|
||||
stlo.q r22, -8, r25
|
||||
bgeu r27, r22, tr0
|
||||
|
||||
Loop_ua:
|
||||
addi r22, 8, r22
|
||||
sthi.q r22, -1, r0
|
||||
stlo.q r22, -8, r0
|
||||
ldx.q r22, r6, r0
|
||||
bgtu/l r5, r22, tr1
|
||||
|
||||
add r3, r4, r7
|
||||
ldlo.q r7, -8, r1
|
||||
sthi.q r22, 7, r0
|
||||
ldhi.q r7, -1, r7
|
||||
ptabs r18,tr1
|
||||
stlo.q r22, 0, r0
|
||||
or r1, r7, r1
|
||||
sthi.q r5, 15, r1
|
||||
stlo.q r5, 8, r1
|
||||
blink tr1, r63
|
||||
copy_user_memcpy_end:
|
||||
nop
|
201
arch/sh/lib64/memcpy.S
Normal file
201
arch/sh/lib64/memcpy.S
Normal file
|
@ -0,0 +1,201 @@
|
|||
/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
|
||||
/* Modified by SuperH, Inc. September 2003 */
|
||||
!
|
||||
! Fast SH memcpy
|
||||
!
|
||||
! by Toshiyasu Morita (tm@netcom.com)
|
||||
! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
|
||||
! SH5 code Copyright 2002 SuperH Ltd.
|
||||
!
|
||||
! Entry: ARG0: destination pointer
|
||||
! ARG1: source pointer
|
||||
! ARG2: byte count
|
||||
!
|
||||
! Exit: RESULT: destination pointer
|
||||
! any other registers in the range r0-r7: trashed
|
||||
!
|
||||
! Notes: Usually one wants to do small reads and write a longword, but
|
||||
! unfortunately it is difficult in some cases to concatanate bytes
|
||||
! into a longword on the SH, so this does a longword read and small
|
||||
! writes.
|
||||
!
|
||||
! This implementation makes two assumptions about how it is called:
|
||||
!
|
||||
! 1.: If the byte count is nonzero, the address of the last byte to be
|
||||
! copied is unsigned greater than the address of the first byte to
|
||||
! be copied. This could be easily swapped for a signed comparison,
|
||||
! but the algorithm used needs some comparison.
|
||||
!
|
||||
! 2.: When there are two or three bytes in the last word of an 11-or-more
|
||||
! bytes memory chunk to b copied, the rest of the word can be read
|
||||
! without side effects.
|
||||
! This could be easily changed by increasing the minimum size of
|
||||
! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
|
||||
! however, this would cost a few extra cyles on average.
|
||||
! For SHmedia, the assumption is that any quadword can be read in its
|
||||
! enirety if at least one byte is included in the copy.
|
||||
!
|
||||
|
||||
.section .text..SHmedia32,"ax"
|
||||
.globl memcpy
|
||||
.type memcpy, @function
|
||||
.align 5
|
||||
|
||||
memcpy:
|
||||
|
||||
#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
|
||||
#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
|
||||
#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
|
||||
#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
|
||||
|
||||
ld.b r3,0,r63
|
||||
pta/l Large,tr0
|
||||
movi 25,r0
|
||||
bgeu/u r4,r0,tr0
|
||||
nsb r4,r0
|
||||
shlli r0,5,r0
|
||||
movi (L1-L0+63*32 + 1) & 0xffff,r1
|
||||
sub r1, r0, r0
|
||||
L0: ptrel r0,tr0
|
||||
add r2,r4,r5
|
||||
ptabs r18,tr1
|
||||
add r3,r4,r6
|
||||
blink tr0,r63
|
||||
|
||||
/* Rearranged to make cut2 safe */
|
||||
.balign 8
|
||||
L4_7: /* 4..7 byte memcpy cntd. */
|
||||
stlo.l r2, 0, r0
|
||||
or r6, r7, r6
|
||||
sthi.l r5, -1, r6
|
||||
stlo.l r5, -4, r6
|
||||
blink tr1,r63
|
||||
|
||||
.balign 8
|
||||
L1: /* 0 byte memcpy */
|
||||
nop
|
||||
blink tr1,r63
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
||||
L2_3: /* 2 or 3 byte memcpy cntd. */
|
||||
st.b r5,-1,r6
|
||||
blink tr1,r63
|
||||
|
||||
/* 1 byte memcpy */
|
||||
ld.b r3,0,r0
|
||||
st.b r2,0,r0
|
||||
blink tr1,r63
|
||||
|
||||
L8_15: /* 8..15 byte memcpy cntd. */
|
||||
stlo.q r2, 0, r0
|
||||
or r6, r7, r6
|
||||
sthi.q r5, -1, r6
|
||||
stlo.q r5, -8, r6
|
||||
blink tr1,r63
|
||||
|
||||
/* 2 or 3 byte memcpy */
|
||||
ld.b r3,0,r0
|
||||
ld.b r2,0,r63
|
||||
ld.b r3,1,r1
|
||||
st.b r2,0,r0
|
||||
pta/l L2_3,tr0
|
||||
ld.b r6,-1,r6
|
||||
st.b r2,1,r1
|
||||
blink tr0, r63
|
||||
|
||||
/* 4 .. 7 byte memcpy */
|
||||
LDUAL (r3, 0, r0, r1)
|
||||
pta L4_7, tr0
|
||||
ldlo.l r6, -4, r7
|
||||
or r0, r1, r0
|
||||
sthi.l r2, 3, r0
|
||||
ldhi.l r6, -1, r6
|
||||
blink tr0, r63
|
||||
|
||||
/* 8 .. 15 byte memcpy */
|
||||
LDUAQ (r3, 0, r0, r1)
|
||||
pta L8_15, tr0
|
||||
ldlo.q r6, -8, r7
|
||||
or r0, r1, r0
|
||||
sthi.q r2, 7, r0
|
||||
ldhi.q r6, -1, r6
|
||||
blink tr0, r63
|
||||
|
||||
/* 16 .. 24 byte memcpy */
|
||||
LDUAQ (r3, 0, r0, r1)
|
||||
LDUAQ (r3, 8, r8, r9)
|
||||
or r0, r1, r0
|
||||
sthi.q r2, 7, r0
|
||||
or r8, r9, r8
|
||||
sthi.q r2, 15, r8
|
||||
ldlo.q r6, -8, r7
|
||||
ldhi.q r6, -1, r6
|
||||
stlo.q r2, 8, r8
|
||||
stlo.q r2, 0, r0
|
||||
or r6, r7, r6
|
||||
sthi.q r5, -1, r6
|
||||
stlo.q r5, -8, r6
|
||||
blink tr1,r63
|
||||
|
||||
Large:
|
||||
ld.b r2, 0, r63
|
||||
pta/l Loop_ua, tr1
|
||||
ori r3, -8, r7
|
||||
sub r2, r7, r22
|
||||
sub r3, r2, r6
|
||||
add r2, r4, r5
|
||||
ldlo.q r3, 0, r0
|
||||
addi r5, -16, r5
|
||||
movi 64+8, r27 // could subtract r7 from that.
|
||||
stlo.q r2, 0, r0
|
||||
sthi.q r2, 7, r0
|
||||
ldx.q r22, r6, r0
|
||||
bgtu/l r27, r4, tr1
|
||||
|
||||
addi r5, -48, r27
|
||||
pta/l Loop_line, tr0
|
||||
addi r6, 64, r36
|
||||
addi r6, -24, r19
|
||||
addi r6, -16, r20
|
||||
addi r6, -8, r21
|
||||
|
||||
Loop_line:
|
||||
ldx.q r22, r36, r63
|
||||
alloco r22, 32
|
||||
addi r22, 32, r22
|
||||
ldx.q r22, r19, r23
|
||||
sthi.q r22, -25, r0
|
||||
ldx.q r22, r20, r24
|
||||
ldx.q r22, r21, r25
|
||||
stlo.q r22, -32, r0
|
||||
ldx.q r22, r6, r0
|
||||
sthi.q r22, -17, r23
|
||||
sthi.q r22, -9, r24
|
||||
sthi.q r22, -1, r25
|
||||
stlo.q r22, -24, r23
|
||||
stlo.q r22, -16, r24
|
||||
stlo.q r22, -8, r25
|
||||
bgeu r27, r22, tr0
|
||||
|
||||
Loop_ua:
|
||||
addi r22, 8, r22
|
||||
sthi.q r22, -1, r0
|
||||
stlo.q r22, -8, r0
|
||||
ldx.q r22, r6, r0
|
||||
bgtu/l r5, r22, tr1
|
||||
|
||||
add r3, r4, r7
|
||||
ldlo.q r7, -8, r1
|
||||
sthi.q r22, 7, r0
|
||||
ldhi.q r7, -1, r7
|
||||
ptabs r18,tr1
|
||||
stlo.q r22, 0, r0
|
||||
or r1, r7, r1
|
||||
sthi.q r5, 15, r1
|
||||
stlo.q r5, 8, r1
|
||||
blink tr1, r63
|
||||
|
||||
.size memcpy,.-memcpy
|
91
arch/sh/lib64/memset.S
Normal file
91
arch/sh/lib64/memset.S
Normal file
|
@ -0,0 +1,91 @@
|
|||
/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
|
||||
/* Modified by SuperH, Inc. September 2003 */
|
||||
!
|
||||
! Fast SH memset
|
||||
!
|
||||
! by Toshiyasu Morita (tm@netcom.com)
|
||||
!
|
||||
! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
|
||||
! Copyright 2002 SuperH Ltd.
|
||||
!
|
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define SHHI shlld
|
||||
#define SHLO shlrd
|
||||
#else
|
||||
#define SHHI shlrd
|
||||
#define SHLO shlld
|
||||
#endif
|
||||
|
||||
.section .text..SHmedia32,"ax"
|
||||
.globl memset
|
||||
.type memset, @function
|
||||
|
||||
.align 5
|
||||
|
||||
memset:
|
||||
pta/l multiquad, tr0
|
||||
andi r2, 7, r22
|
||||
ptabs r18, tr2
|
||||
mshflo.b r3,r3,r3
|
||||
add r4, r22, r23
|
||||
mperm.w r3, r63, r3 // Fill pattern now in every byte of r3
|
||||
|
||||
movi 8, r9
|
||||
bgtu/u r23, r9, tr0 // multiquad
|
||||
|
||||
beqi/u r4, 0, tr2 // Return with size 0 - ensures no mem accesses
|
||||
ldlo.q r2, 0, r7
|
||||
shlli r4, 2, r4
|
||||
movi -1, r8
|
||||
SHHI r8, r4, r8
|
||||
SHHI r8, r4, r8
|
||||
mcmv r7, r8, r3
|
||||
stlo.q r2, 0, r3
|
||||
blink tr2, r63
|
||||
|
||||
multiquad:
|
||||
pta/l lastquad, tr0
|
||||
stlo.q r2, 0, r3
|
||||
shlri r23, 3, r24
|
||||
add r2, r4, r5
|
||||
beqi/u r24, 1, tr0 // lastquad
|
||||
pta/l loop, tr1
|
||||
sub r2, r22, r25
|
||||
andi r5, -8, r20 // calculate end address and
|
||||
addi r20, -7*8, r8 // loop end address; This might overflow, so we need
|
||||
// to use a different test before we start the loop
|
||||
bge/u r24, r9, tr1 // loop
|
||||
st.q r25, 8, r3
|
||||
st.q r20, -8, r3
|
||||
shlri r24, 1, r24
|
||||
beqi/u r24, 1, tr0 // lastquad
|
||||
st.q r25, 16, r3
|
||||
st.q r20, -16, r3
|
||||
beqi/u r24, 2, tr0 // lastquad
|
||||
st.q r25, 24, r3
|
||||
st.q r20, -24, r3
|
||||
lastquad:
|
||||
sthi.q r5, -1, r3
|
||||
blink tr2,r63
|
||||
|
||||
loop:
|
||||
!!! alloco r25, 32 // QQQ comment out for short-term fix to SHUK #3895.
|
||||
// QQQ commenting out is locically correct, but sub-optimal
|
||||
// QQQ Sean McGoogan - 4th April 2003.
|
||||
st.q r25, 8, r3
|
||||
st.q r25, 16, r3
|
||||
st.q r25, 24, r3
|
||||
st.q r25, 32, r3
|
||||
addi r25, 32, r25
|
||||
bgeu/l r8, r25, tr1 // loop
|
||||
|
||||
st.q r20, -40, r3
|
||||
st.q r20, -32, r3
|
||||
st.q r20, -24, r3
|
||||
st.q r20, -16, r3
|
||||
st.q r20, -8, r3
|
||||
sthi.q r5, -1, r3
|
||||
blink tr2,r63
|
||||
|
||||
.size memset,.-memset
|
15
arch/sh/lib64/panic.c
Normal file
15
arch/sh/lib64/panic.c
Normal file
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright (C) 2003 Richard Curnow, SuperH UK Limited
|
||||
*
|
||||
* This file is subject to the terms and conditions of the GNU General Public
|
||||
* License. See the file "COPYING" in the main directory of this archive
|
||||
* for more details.
|
||||
*/
|
||||
|
||||
void
|
||||
panic_handler(unsigned long panicPC, unsigned long panicSSR,
|
||||
unsigned long panicEXPEVT)
|
||||
{
|
||||
/* Never return from the panic handler */
|
||||
for (;;) ;
|
||||
}
|
135
arch/sh/lib64/sdivsi3.S
Normal file
135
arch/sh/lib64/sdivsi3.S
Normal file
|
@ -0,0 +1,135 @@
|
|||
.global __sdivsi3
|
||||
.global __sdivsi3_1
|
||||
.global __sdivsi3_2
|
||||
.section .text..SHmedia32,"ax"
|
||||
.align 2
|
||||
|
||||
/* inputs: r4,r5 */
|
||||
/* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
|
||||
/* result in r0 */
|
||||
__sdivsi3:
|
||||
__sdivsi3_1:
|
||||
ptb __div_table,tr0
|
||||
gettr tr0,r20
|
||||
|
||||
__sdivsi3_2:
|
||||
nsb r5, r1
|
||||
shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */
|
||||
shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */
|
||||
/* bubble */
|
||||
ldx.ub r20, r21, r19 /* u0.8 */
|
||||
shari r25, 32, r25 /* normalize to s2.30 */
|
||||
shlli r21, 1, r21
|
||||
muls.l r25, r19, r19 /* s2.38 */
|
||||
ldx.w r20, r21, r21 /* s2.14 */
|
||||
ptabs r18, tr0
|
||||
shari r19, 24, r19 /* truncate to s2.14 */
|
||||
sub r21, r19, r19 /* some 11 bit inverse in s1.14 */
|
||||
muls.l r19, r19, r21 /* u0.28 */
|
||||
sub r63, r1, r1
|
||||
addi r1, 92, r1
|
||||
muls.l r25, r21, r18 /* s2.58 */
|
||||
shlli r19, 45, r19 /* multiply by two and convert to s2.58 */
|
||||
/* bubble */
|
||||
sub r19, r18, r18
|
||||
shari r18, 28, r18 /* some 22 bit inverse in s1.30 */
|
||||
muls.l r18, r25, r0 /* s2.60 */
|
||||
muls.l r18, r4, r25 /* s32.30 */
|
||||
/* bubble */
|
||||
shari r0, 16, r19 /* s-16.44 */
|
||||
muls.l r19, r18, r19 /* s-16.74 */
|
||||
shari r25, 63, r0
|
||||
shari r4, 14, r18 /* s19.-14 */
|
||||
shari r19, 30, r19 /* s-16.44 */
|
||||
muls.l r19, r18, r19 /* s15.30 */
|
||||
xor r21, r0, r21 /* You could also use the constant 1 << 27. */
|
||||
add r21, r25, r21
|
||||
sub r21, r19, r21
|
||||
shard r21, r1, r21
|
||||
sub r21, r0, r0
|
||||
blink tr0, r63
|
||||
|
||||
/* This table has been generated by divtab.c .
|
||||
Defects for bias -330:
|
||||
Max defect: 6.081536e-07 at -1.000000e+00
|
||||
Min defect: 2.849516e-08 at 1.030651e+00
|
||||
Max 2nd step defect: 9.606539e-12 at -1.000000e+00
|
||||
Min 2nd step defect: 0.000000e+00 at 0.000000e+00
|
||||
Defect at 1: 1.238659e-07
|
||||
Defect at -2: 1.061708e-07 */
|
||||
|
||||
.balign 2
|
||||
.type __div_table,@object
|
||||
.size __div_table,128
|
||||
/* negative division constants */
|
||||
.word -16638
|
||||
.word -17135
|
||||
.word -17737
|
||||
.word -18433
|
||||
.word -19103
|
||||
.word -19751
|
||||
.word -20583
|
||||
.word -21383
|
||||
.word -22343
|
||||
.word -23353
|
||||
.word -24407
|
||||
.word -25582
|
||||
.word -26863
|
||||
.word -28382
|
||||
.word -29965
|
||||
.word -31800
|
||||
/* negative division factors */
|
||||
.byte 66
|
||||
.byte 70
|
||||
.byte 75
|
||||
.byte 81
|
||||
.byte 87
|
||||
.byte 93
|
||||
.byte 101
|
||||
.byte 109
|
||||
.byte 119
|
||||
.byte 130
|
||||
.byte 142
|
||||
.byte 156
|
||||
.byte 172
|
||||
.byte 192
|
||||
.byte 214
|
||||
.byte 241
|
||||
.skip 16
|
||||
.global __div_table
|
||||
__div_table:
|
||||
.skip 16
|
||||
/* positive division factors */
|
||||
.byte 241
|
||||
.byte 214
|
||||
.byte 192
|
||||
.byte 172
|
||||
.byte 156
|
||||
.byte 142
|
||||
.byte 130
|
||||
.byte 119
|
||||
.byte 109
|
||||
.byte 101
|
||||
.byte 93
|
||||
.byte 87
|
||||
.byte 81
|
||||
.byte 75
|
||||
.byte 70
|
||||
.byte 66
|
||||
/* positive division constants */
|
||||
.word 31801
|
||||
.word 29966
|
||||
.word 28383
|
||||
.word 26864
|
||||
.word 25583
|
||||
.word 24408
|
||||
.word 23354
|
||||
.word 22344
|
||||
.word 21384
|
||||
.word 20584
|
||||
.word 19752
|
||||
.word 19104
|
||||
.word 18434
|
||||
.word 17738
|
||||
.word 17136
|
||||
.word 16639
|
97
arch/sh/lib64/strcpy.S
Normal file
97
arch/sh/lib64/strcpy.S
Normal file
|
@ -0,0 +1,97 @@
|
|||
/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
|
||||
/* Modified by SuperH, Inc. September 2003 */
|
||||
! Entry: arg0: destination
|
||||
! arg1: source
|
||||
! Exit: result: destination
|
||||
!
|
||||
! SH5 code Copyright 2002 SuperH Ltd.
|
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define SHHI shlld
|
||||
#define SHLO shlrd
|
||||
#else
|
||||
#define SHHI shlrd
|
||||
#define SHLO shlld
|
||||
#endif
|
||||
|
||||
.section .text..SHmedia32,"ax"
|
||||
.globl strcpy
|
||||
.type strcpy, @function
|
||||
.align 5
|
||||
|
||||
strcpy:
|
||||
|
||||
pta/l shortstring,tr1
|
||||
ldlo.q r3,0,r4
|
||||
ptabs r18,tr4
|
||||
shlli r3,3,r7
|
||||
addi r2, 8, r0
|
||||
mcmpeq.b r4,r63,r6
|
||||
SHHI r6,r7,r6
|
||||
bnei/u r6,0,tr1 // shortstring
|
||||
pta/l no_lddst, tr2
|
||||
ori r3,-8,r23
|
||||
sub r2, r23, r0
|
||||
sub r3, r2, r21
|
||||
addi r21, 8, r20
|
||||
ldx.q r0, r21, r5
|
||||
pta/l loop, tr0
|
||||
ori r2,-8,r22
|
||||
mcmpeq.b r5, r63, r6
|
||||
bgt/u r22, r23, tr2 // no_lddst
|
||||
|
||||
// r22 < r23 : Need to do a load from the destination.
|
||||
// r22 == r23 : Doesn't actually need to load from destination,
|
||||
// but still can be handled here.
|
||||
ldlo.q r2, 0, r9
|
||||
movi -1, r8
|
||||
SHLO r8, r7, r8
|
||||
mcmv r4, r8, r9
|
||||
stlo.q r2, 0, r9
|
||||
beqi/l r6, 0, tr0 // loop
|
||||
|
||||
add r5, r63, r4
|
||||
addi r0, 8, r0
|
||||
blink tr1, r63 // shortstring
|
||||
no_lddst:
|
||||
// r22 > r23: note that for r22 == r23 the sthi.q would clobber
|
||||
// bytes before the destination region.
|
||||
stlo.q r2, 0, r4
|
||||
SHHI r4, r7, r4
|
||||
sthi.q r0, -1, r4
|
||||
beqi/l r6, 0, tr0 // loop
|
||||
|
||||
add r5, r63, r4
|
||||
addi r0, 8, r0
|
||||
shortstring:
|
||||
#if __BYTE_ORDER != __LITTLE_ENDIAN
|
||||
pta/l shortstring2,tr1
|
||||
byterev r4,r4
|
||||
#endif
|
||||
shortstring2:
|
||||
st.b r0,-8,r4
|
||||
andi r4,0xff,r5
|
||||
shlri r4,8,r4
|
||||
addi r0,1,r0
|
||||
bnei/l r5,0,tr1
|
||||
blink tr4,r63 // return
|
||||
|
||||
.balign 8
|
||||
loop:
|
||||
stlo.q r0, 0, r5
|
||||
ldx.q r0, r20, r4
|
||||
addi r0, 16, r0
|
||||
sthi.q r0, -9, r5
|
||||
mcmpeq.b r4, r63, r6
|
||||
bnei/u r6, 0, tr1 // shortstring
|
||||
ldx.q r0, r21, r5
|
||||
stlo.q r0, -8, r4
|
||||
sthi.q r0, -1, r4
|
||||
mcmpeq.b r5, r63, r6
|
||||
beqi/l r6, 0, tr0 // loop
|
||||
|
||||
add r5, r63, r4
|
||||
addi r0, 8, r0
|
||||
blink tr1, r63 // shortstring
|
||||
|
||||
.size strcpy,.-strcpy
|
33
arch/sh/lib64/strlen.S
Normal file
33
arch/sh/lib64/strlen.S
Normal file
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Simplistic strlen() implementation for SHmedia.
|
||||
*
|
||||
* Copyright (C) 2003 Paul Mundt <lethal@linux-sh.org>
|
||||
*/
|
||||
|
||||
.section .text..SHmedia32,"ax"
|
||||
.globl strlen
|
||||
.type strlen,@function
|
||||
|
||||
.balign 16
|
||||
strlen:
|
||||
ptabs r18, tr4
|
||||
|
||||
/*
|
||||
* Note: We could easily deal with the NULL case here with a simple
|
||||
* sanity check, though it seems that the behavior we want is to fault
|
||||
* in the event that r2 == NULL, so we don't bother.
|
||||
*/
|
||||
/* beqi r2, 0, tr4 */ ! Sanity check
|
||||
|
||||
movi -1, r0
|
||||
pta/l loop, tr0
|
||||
loop:
|
||||
ld.b r2, 0, r1
|
||||
addi r2, 1, r2
|
||||
addi r0, 1, r0
|
||||
bnei/l r1, 0, tr0
|
||||
|
||||
or r0, r63, r2
|
||||
blink tr4, r63
|
||||
|
||||
.size strlen,.-strlen
|
49
arch/sh/lib64/udelay.c
Normal file
49
arch/sh/lib64/udelay.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* arch/sh/lib64/udelay.c
|
||||
*
|
||||
* Delay routines, using a pre-computed "loops_per_jiffy" value.
|
||||
*
|
||||
* Copyright (C) 2000, 2001 Paolo Alberelli
|
||||
* Copyright (C) 2003, 2004 Paul Mundt
|
||||
*
|
||||
* This file is subject to the terms and conditions of the GNU General Public
|
||||
* License. See the file "COPYING" in the main directory of this archive
|
||||
* for more details.
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <asm/param.h>
|
||||
|
||||
/*
|
||||
* Use only for very small delays (< 1 msec).
|
||||
*
|
||||
* The active part of our cycle counter is only 32-bits wide, and
|
||||
* we're treating the difference between two marks as signed. On
|
||||
* a 1GHz box, that's about 2 seconds.
|
||||
*/
|
||||
|
||||
void __delay(unsigned long loops)
|
||||
{
|
||||
long long dummy;
|
||||
__asm__ __volatile__("gettr tr0, %1\n\t"
|
||||
"pta $+4, tr0\n\t"
|
||||
"addi %0, -1, %0\n\t"
|
||||
"bne %0, r63, tr0\n\t"
|
||||
"ptabs %1, tr0\n\t":"=r"(loops),
|
||||
"=r"(dummy)
|
||||
:"0"(loops));
|
||||
}
|
||||
|
||||
void __const_udelay(unsigned long xloops)
|
||||
{
|
||||
__delay(xloops * (HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy));
|
||||
}
|
||||
|
||||
void __udelay(unsigned long usecs)
|
||||
{
|
||||
__const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */
|
||||
}
|
||||
|
||||
void __ndelay(unsigned long nsecs)
|
||||
{
|
||||
__const_udelay(nsecs * 0x00000005);
|
||||
}
|
120
arch/sh/lib64/udivdi3.S
Normal file
120
arch/sh/lib64/udivdi3.S
Normal file
|
@ -0,0 +1,120 @@
|
|||
.section .text..SHmedia32,"ax"
|
||||
.align 2
|
||||
.global __udivdi3
|
||||
__udivdi3:
|
||||
shlri r3,1,r4
|
||||
nsb r4,r22
|
||||
shlld r3,r22,r6
|
||||
shlri r6,49,r5
|
||||
movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
|
||||
sub r21,r5,r1
|
||||
mmulfx.w r1,r1,r4
|
||||
mshflo.w r1,r63,r1
|
||||
sub r63,r22,r20 // r63 == 64 % 64
|
||||
mmulfx.w r5,r4,r4
|
||||
pta large_divisor,tr0
|
||||
addi r20,32,r9
|
||||
msub.w r1,r4,r1
|
||||
madd.w r1,r1,r1
|
||||
mmulfx.w r1,r1,r4
|
||||
shlri r6,32,r7
|
||||
bgt/u r9,r63,tr0 // large_divisor
|
||||
mmulfx.w r5,r4,r4
|
||||
shlri r2,32+14,r19
|
||||
addi r22,-31,r0
|
||||
msub.w r1,r4,r1
|
||||
|
||||
mulu.l r1,r7,r4
|
||||
addi r1,-3,r5
|
||||
mulu.l r5,r19,r5
|
||||
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
|
||||
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
|
||||
the case may be, %0000000000000000 000.11111111111, still */
|
||||
muls.l r1,r4,r4 /* leaving at least one sign bit. */
|
||||
mulu.l r5,r3,r8
|
||||
mshalds.l r1,r21,r1
|
||||
shari r4,26,r4
|
||||
shlld r8,r0,r8
|
||||
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
|
||||
sub r2,r8,r2
|
||||
/* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
|
||||
|
||||
shlri r2,22,r21
|
||||
mulu.l r21,r1,r21
|
||||
shlld r5,r0,r8
|
||||
addi r20,30-22,r0
|
||||
shlrd r21,r0,r21
|
||||
mulu.l r21,r3,r5
|
||||
add r8,r21,r8
|
||||
mcmpgt.l r21,r63,r21 // See Note 1
|
||||
addi r20,30,r0
|
||||
mshfhi.l r63,r21,r21
|
||||
sub r2,r5,r2
|
||||
andc r2,r21,r2
|
||||
|
||||
/* small divisor: need a third divide step */
|
||||
mulu.l r2,r1,r7
|
||||
ptabs r18,tr0
|
||||
addi r2,1,r2
|
||||
shlrd r7,r0,r7
|
||||
mulu.l r7,r3,r5
|
||||
add r8,r7,r8
|
||||
sub r2,r3,r2
|
||||
cmpgt r2,r5,r5
|
||||
add r8,r5,r2
|
||||
/* could test r3 here to check for divide by zero. */
|
||||
blink tr0,r63
|
||||
|
||||
large_divisor:
|
||||
mmulfx.w r5,r4,r4
|
||||
shlrd r2,r9,r25
|
||||
shlri r25,32,r8
|
||||
msub.w r1,r4,r1
|
||||
|
||||
mulu.l r1,r7,r4
|
||||
addi r1,-3,r5
|
||||
mulu.l r5,r8,r5
|
||||
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
|
||||
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
|
||||
the case may be, %0000000000000000 000.11111111111, still */
|
||||
muls.l r1,r4,r4 /* leaving at least one sign bit. */
|
||||
shlri r5,14-1,r8
|
||||
mulu.l r8,r7,r5
|
||||
mshalds.l r1,r21,r1
|
||||
shari r4,26,r4
|
||||
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
|
||||
sub r25,r5,r25
|
||||
/* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
|
||||
|
||||
shlri r25,22,r21
|
||||
mulu.l r21,r1,r21
|
||||
pta no_lo_adj,tr0
|
||||
addi r22,32,r0
|
||||
shlri r21,40,r21
|
||||
mulu.l r21,r7,r5
|
||||
add r8,r21,r8
|
||||
shlld r2,r0,r2
|
||||
sub r25,r5,r25
|
||||
bgtu/u r7,r25,tr0 // no_lo_adj
|
||||
addi r8,1,r8
|
||||
sub r25,r7,r25
|
||||
no_lo_adj:
|
||||
mextr4 r2,r25,r2
|
||||
|
||||
/* large_divisor: only needs a few adjustments. */
|
||||
mulu.l r8,r6,r5
|
||||
ptabs r18,tr0
|
||||
/* bubble */
|
||||
cmpgtu r5,r2,r5
|
||||
sub r8,r5,r2
|
||||
blink tr0,r63
|
||||
|
||||
/* Note 1: To shift the result of the second divide stage so that the result
|
||||
always fits into 32 bits, yet we still reduce the rest sufficiently
|
||||
would require a lot of instructions to do the shifts just right. Using
|
||||
the full 64 bit shift result to multiply with the divisor would require
|
||||
four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
|
||||
Fortunately, if the upper 32 bits of the shift result are nonzero, we
|
||||
know that the rest after taking this partial result into account will
|
||||
fit into 32 bits. So we just clear the upper 32 bits of the rest if the
|
||||
upper 32 bits of the partial result are nonzero. */
|
59
arch/sh/lib64/udivsi3.S
Normal file
59
arch/sh/lib64/udivsi3.S
Normal file
|
@ -0,0 +1,59 @@
|
|||
.global __udivsi3
|
||||
.section .text..SHmedia32,"ax"
|
||||
.align 2
|
||||
|
||||
/*
|
||||
inputs: r4,r5
|
||||
clobbered: r18,r19,r20,r21,r22,r25,tr0
|
||||
result in r0.
|
||||
*/
|
||||
__udivsi3:
|
||||
addz.l r5,r63,r22
|
||||
nsb r22,r0
|
||||
shlld r22,r0,r25
|
||||
shlri r25,48,r25
|
||||
movi 0xffffffffffffbb0c,r20 /* shift count eqiv 76 */
|
||||
sub r20,r25,r21
|
||||
mmulfx.w r21,r21,r19
|
||||
mshflo.w r21,r63,r21
|
||||
ptabs r18,tr0
|
||||
mmulfx.w r25,r19,r19
|
||||
sub r20,r0,r0
|
||||
/* bubble */
|
||||
msub.w r21,r19,r19
|
||||
|
||||
/*
|
||||
* It would be nice for scheduling to do this add to r21 before
|
||||
* the msub.w, but we need a different value for r19 to keep
|
||||
* errors under control.
|
||||
*/
|
||||
addi r19,-2,r21
|
||||
mulu.l r4,r21,r18
|
||||
mmulfx.w r19,r19,r19
|
||||
shlli r21,15,r21
|
||||
shlrd r18,r0,r18
|
||||
mulu.l r18,r22,r20
|
||||
mmacnfx.wl r25,r19,r21
|
||||
/* bubble */
|
||||
sub r4,r20,r25
|
||||
|
||||
mulu.l r25,r21,r19
|
||||
addi r0,14,r0
|
||||
/* bubble */
|
||||
shlrd r19,r0,r19
|
||||
mulu.l r19,r22,r20
|
||||
add r18,r19,r18
|
||||
/* bubble */
|
||||
sub.l r25,r20,r25
|
||||
|
||||
mulu.l r25,r21,r19
|
||||
addz.l r25,r63,r25
|
||||
sub r25,r22,r25
|
||||
shlrd r19,r0,r19
|
||||
mulu.l r19,r22,r20
|
||||
addi r25,1,r25
|
||||
add r18,r19,r18
|
||||
|
||||
cmpgt r25,r20,r25
|
||||
add.l r18,r25,r0
|
||||
blink tr0,r63
|
Loading…
Add table
Add a link
Reference in a new issue