mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-09-10 01:12:45 -04:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
7
arch/cris/arch-v32/lib/Makefile
Normal file
7
arch/cris/arch-v32/lib/Makefile
Normal file
|
@ -0,0 +1,7 @@
|
|||
#
|
||||
# Makefile for Etrax-specific library files..
|
||||
#
|
||||
|
||||
lib-y = checksum.o checksumcopy.o string.o usercopy.o memset.o \
|
||||
csumcpfruser.o spinlock.o delay.o strcmp.o
|
||||
|
88
arch/cris/arch-v32/lib/checksum.S
Normal file
88
arch/cris/arch-v32/lib/checksum.S
Normal file
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* A fast checksum routine using movem
|
||||
* Copyright (c) 1998-2007 Axis Communications AB
|
||||
*
|
||||
* csum_partial(const unsigned char * buff, int len, unsigned int sum)
|
||||
*/
|
||||
|
||||
.globl csum_partial
|
||||
.type csum_partial,@function
|
||||
csum_partial:
|
||||
|
||||
;; r10 - src
|
||||
;; r11 - length
|
||||
;; r12 - checksum
|
||||
|
||||
;; Optimized for large packets
|
||||
subq 10*4, $r11
|
||||
blt _word_loop
|
||||
move.d $r11, $acr
|
||||
|
||||
subq 9*4,$sp
|
||||
clearf c
|
||||
movem $r8,[$sp]
|
||||
|
||||
;; do a movem checksum
|
||||
|
||||
_mloop: movem [$r10+],$r9 ; read 10 longwords
|
||||
;; Loop count without touching the c flag.
|
||||
addoq -10*4, $acr, $acr
|
||||
;; perform dword checksumming on the 10 longwords
|
||||
|
||||
addc $r0,$r12
|
||||
addc $r1,$r12
|
||||
addc $r2,$r12
|
||||
addc $r3,$r12
|
||||
addc $r4,$r12
|
||||
addc $r5,$r12
|
||||
addc $r6,$r12
|
||||
addc $r7,$r12
|
||||
addc $r8,$r12
|
||||
addc $r9,$r12
|
||||
|
||||
;; test $acr without trashing carry.
|
||||
move.d $acr, $acr
|
||||
bpl _mloop
|
||||
;; r11 <= acr is not really needed in the mloop, just using the dslot
|
||||
;; to prepare for what is needed after mloop.
|
||||
move.d $acr, $r11
|
||||
|
||||
;; fold the last carry into r13
|
||||
addc 0, $r12
|
||||
movem [$sp+],$r8 ; restore regs
|
||||
|
||||
_word_loop:
|
||||
addq 10*4,$r11 ; compensate for last loop underflowing length
|
||||
|
||||
moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9
|
||||
lsrq 16,$r9
|
||||
|
||||
move.d $r12,$r13
|
||||
lsrq 16,$r13 ; r13 = checksum >> 16
|
||||
and.d $r9,$r12 ; checksum = checksum & 0xffff
|
||||
|
||||
_no_fold:
|
||||
subq 2,$r11
|
||||
blt _no_words
|
||||
add.d $r13,$r12 ; checksum += r13
|
||||
|
||||
;; checksum the rest of the words
|
||||
_wloop: subq 2,$r11
|
||||
bge _wloop
|
||||
addu.w [$r10+],$r12
|
||||
|
||||
_no_words:
|
||||
addq 2,$r11
|
||||
;; see if we have one odd byte more
|
||||
bne _do_byte
|
||||
nop
|
||||
ret
|
||||
move.d $r12,$r10
|
||||
|
||||
_do_byte:
|
||||
;; copy and checksum the last byte
|
||||
addu.b [$r10],$r12
|
||||
ret
|
||||
move.d $r12,$r10
|
||||
|
||||
.size csum_partial, .-csum_partial
|
94
arch/cris/arch-v32/lib/checksumcopy.S
Normal file
94
arch/cris/arch-v32/lib/checksumcopy.S
Normal file
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* A fast checksum+copy routine using movem
|
||||
* Copyright (c) 1998-2007 Axis Communications AB
|
||||
*
|
||||
* Authors: Bjorn Wesen
|
||||
*
|
||||
* csum_partial_copy_nocheck(const char *src, char *dst,
|
||||
* int len, unsigned int sum)
|
||||
*/
|
||||
|
||||
.globl csum_partial_copy_nocheck
|
||||
.type csum_partial_copy_nocheck,@function
|
||||
csum_partial_copy_nocheck:
|
||||
|
||||
;; r10 - src
|
||||
;; r11 - dst
|
||||
;; r12 - length
|
||||
;; r13 - checksum
|
||||
|
||||
;; Optimized for large packets
|
||||
subq 10*4, $r12
|
||||
blt _word_loop
|
||||
move.d $r12, $acr
|
||||
|
||||
subq 9*4,$sp
|
||||
clearf c
|
||||
movem $r8,[$sp]
|
||||
|
||||
;; do a movem copy and checksum
|
||||
1: ;; A failing userspace access (the read) will have this as PC.
|
||||
_mloop: movem [$r10+],$r9 ; read 10 longwords
|
||||
addoq -10*4, $acr, $acr ; loop counter in latency cycle
|
||||
movem $r9,[$r11+] ; write 10 longwords
|
||||
|
||||
;; perform dword checksumming on the 10 longwords
|
||||
addc $r0,$r13
|
||||
addc $r1,$r13
|
||||
addc $r2,$r13
|
||||
addc $r3,$r13
|
||||
addc $r4,$r13
|
||||
addc $r5,$r13
|
||||
addc $r6,$r13
|
||||
addc $r7,$r13
|
||||
addc $r8,$r13
|
||||
addc $r9,$r13
|
||||
|
||||
;; test $acr, without trashing carry.
|
||||
move.d $acr, $acr
|
||||
bpl _mloop
|
||||
;; r12 <= acr is needed after mloop and in the exception handlers.
|
||||
move.d $acr, $r12
|
||||
|
||||
;; fold the last carry into r13
|
||||
addc 0, $r13
|
||||
movem [$sp+],$r8 ; restore regs
|
||||
|
||||
_word_loop:
|
||||
addq 10*4,$r12 ; compensate for last loop underflowing length
|
||||
|
||||
;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
|
||||
;; r9 can be used as temporary.
|
||||
move.d $r13,$r9
|
||||
lsrq 16,$r9 ; r0 = checksum >> 16
|
||||
and.d 0xffff,$r13 ; checksum = checksum & 0xffff
|
||||
|
||||
subq 2, $r12
|
||||
blt _no_words
|
||||
add.d $r9,$r13 ; checksum += r0
|
||||
|
||||
;; copy and checksum the rest of the words
|
||||
2: ;; A failing userspace access for the read below will have this as PC.
|
||||
_wloop: move.w [$r10+],$r9
|
||||
addu.w $r9,$r13
|
||||
subq 2,$r12
|
||||
bge _wloop
|
||||
move.w $r9,[$r11+]
|
||||
|
||||
_no_words:
|
||||
addq 2,$r12
|
||||
bne _do_byte
|
||||
nop
|
||||
ret
|
||||
move.d $r13,$r10
|
||||
|
||||
_do_byte:
|
||||
;; copy and checksum the last byte
|
||||
3: ;; A failing userspace access for the read below will have this as PC.
|
||||
move.b [$r10],$r9
|
||||
addu.b $r9,$r13
|
||||
move.b $r9,[$r11]
|
||||
ret
|
||||
move.d $r13,$r10
|
||||
|
||||
.size csum_partial_copy_nocheck, . - csum_partial_copy_nocheck
|
69
arch/cris/arch-v32/lib/csumcpfruser.S
Normal file
69
arch/cris/arch-v32/lib/csumcpfruser.S
Normal file
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Add-on to transform csum_partial_copy_nocheck in checksumcopy.S into
|
||||
* csum_partial_copy_from_user by adding exception records.
|
||||
*
|
||||
* Copyright (C) 2001, 2003 Axis Communications AB.
|
||||
*
|
||||
* Author: Hans-Peter Nilsson.
|
||||
*/
|
||||
|
||||
#include <asm/errno.h>
|
||||
|
||||
/* Same function body, but a different name. If we just added exception
|
||||
records to _csum_partial_copy_nocheck and made it generic, we wouldn't
|
||||
know a user fault from a kernel fault and we would have overhead in
|
||||
each kernel caller for the error-pointer argument.
|
||||
|
||||
unsigned int csum_partial_copy_from_user
|
||||
(const char *src, char *dst, int len, unsigned int sum, int *errptr);
|
||||
|
||||
Note that the errptr argument is only set if we encounter an error.
|
||||
It is conveniently located on the stack, so the normal function body
|
||||
does not have to handle it. */
|
||||
|
||||
#define csum_partial_copy_nocheck csum_partial_copy_from_user
|
||||
|
||||
/* There are local labels numbered 1, 2 and 3 present to mark the
|
||||
different from-user accesses. */
|
||||
#include "checksumcopy.S"
|
||||
|
||||
.section .fixup,"ax"
|
||||
|
||||
;; Here from the movem loop; restore stack.
|
||||
4:
|
||||
movem [$sp+],$r8
|
||||
;; r12 is already decremented. Add back chunk_size-2.
|
||||
addq 40-2,$r12
|
||||
|
||||
;; Here from the word loop; r12 is off by 2; add it back.
|
||||
5:
|
||||
addq 2,$r12
|
||||
|
||||
;; Here from a failing single byte.
|
||||
6:
|
||||
|
||||
;; Signal in *errptr that we had a failing access.
|
||||
move.d [$sp],$acr
|
||||
moveq -EFAULT,$r9
|
||||
subq 4,$sp
|
||||
move.d $r9,[$acr]
|
||||
|
||||
;; Clear the rest of the destination area using memset. Preserve the
|
||||
;; checksum for the readable bytes.
|
||||
move.d $r13,[$sp]
|
||||
subq 4,$sp
|
||||
move.d $r11,$r10
|
||||
move $srp,[$sp]
|
||||
jsr memset
|
||||
clear.d $r11
|
||||
|
||||
move [$sp+],$srp
|
||||
ret
|
||||
move.d [$sp+],$r10
|
||||
|
||||
.previous
|
||||
.section __ex_table,"a"
|
||||
.dword 1b,4b
|
||||
.dword 2b,5b
|
||||
.dword 3b,6b
|
||||
.previous
|
28
arch/cris/arch-v32/lib/delay.c
Normal file
28
arch/cris/arch-v32/lib/delay.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Precise Delay Loops for ETRAX FS
|
||||
*
|
||||
* Copyright (C) 2006 Axis Communications AB.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <hwregs/reg_map.h>
|
||||
#include <hwregs/reg_rdwr.h>
|
||||
#include <hwregs/timer_defs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/*
|
||||
* On ETRAX FS, we can check the free-running read-only 100MHz timer
|
||||
* getting 32-bit 10ns precision, theoretically good for 42.94967295
|
||||
* seconds. Unsigned arithmetic and careful expression handles
|
||||
* wrapping.
|
||||
*/
|
||||
|
||||
void cris_delay10ns(u32 n10ns)
|
||||
{
|
||||
u32 t0 = REG_RD(timer, regi_timer0, r_time);
|
||||
while (REG_RD(timer, regi_timer0, r_time) - t0 < n10ns)
|
||||
;
|
||||
}
|
||||
EXPORT_SYMBOL(cris_delay10ns);
|
259
arch/cris/arch-v32/lib/memset.c
Normal file
259
arch/cris/arch-v32/lib/memset.c
Normal file
|
@ -0,0 +1,259 @@
|
|||
/* A memset for CRIS.
|
||||
Copyright (C) 1999-2005 Axis Communications.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of Axis Communications nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
|
||||
COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* FIXME: This file should really only be used for reference, as the
|
||||
result is somewhat depending on gcc generating what we expect rather
|
||||
than what we describe. An assembly file should be used instead. */
|
||||
|
||||
/* Note the multiple occurrence of the expression "12*4", including the
|
||||
asm. It is hard to get it into the asm in a good way. Thus better to
|
||||
expose the problem everywhere: no macro. */
|
||||
|
||||
/* Assuming one cycle per dword written or read (ok, not really true; the
|
||||
world is not ideal), and one cycle per instruction, then 43+3*(n/48-1)
|
||||
<= 24+24*(n/48-1) so n >= 45.7; n >= 0.9; we win on the first full
|
||||
48-byte block to set. */
|
||||
|
||||
#define MEMSET_BY_BLOCK_THRESHOLD (1 * 48)
|
||||
|
||||
/* No name ambiguities in this file. */
|
||||
__asm__ (".syntax no_register_prefix");
|
||||
|
||||
void *memset(void *pdst, int c, unsigned int plen)
|
||||
{
|
||||
/* Now we want the parameters in special registers. Make sure the
|
||||
compiler does something usable with this. */
|
||||
|
||||
register char *return_dst __asm__ ("r10") = pdst;
|
||||
register int n __asm__ ("r12") = plen;
|
||||
register int lc __asm__ ("r11") = c;
|
||||
|
||||
/* Most apps use memset sanely. Memsetting about 3..4 bytes or less get
|
||||
penalized here compared to the generic implementation. */
|
||||
|
||||
/* This is fragile performancewise at best. Check with newer GCC
|
||||
releases, if they compile cascaded "x |= x << 8" to sane code. */
|
||||
__asm__("movu.b %0,r13 \n\
|
||||
lslq 8,r13 \n\
|
||||
move.b %0,r13 \n\
|
||||
move.d r13,%0 \n\
|
||||
lslq 16,r13 \n\
|
||||
or.d r13,%0"
|
||||
: "=r" (lc) /* Inputs. */
|
||||
: "0" (lc) /* Outputs. */
|
||||
: "r13"); /* Trash. */
|
||||
|
||||
{
|
||||
register char *dst __asm__ ("r13") = pdst;
|
||||
|
||||
if (((unsigned long) pdst & 3) != 0
|
||||
/* Oops! n = 0 must be a valid call, regardless of alignment. */
|
||||
&& n >= 3)
|
||||
{
|
||||
if ((unsigned long) dst & 1)
|
||||
{
|
||||
*dst = (char) lc;
|
||||
n--;
|
||||
dst++;
|
||||
}
|
||||
|
||||
if ((unsigned long) dst & 2)
|
||||
{
|
||||
*(short *) dst = lc;
|
||||
n -= 2;
|
||||
dst += 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide which setting method to use. */
|
||||
if (n >= MEMSET_BY_BLOCK_THRESHOLD)
|
||||
{
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
registers; that will move the saving/restoring of those registers
|
||||
to the function prologue/epilogue, and make non-block sizes
|
||||
suboptimal. */
|
||||
__asm__ volatile
|
||||
("\
|
||||
;; GCC does promise correct register allocations, but let's \n\
|
||||
;; make sure it keeps its promises. \n\
|
||||
.ifnc %0-%1-%4,$r13-$r12-$r11 \n\
|
||||
.error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll clobber in the movem process \n\
|
||||
;; on the stack. Don't mention them to gcc, it will only be \n\
|
||||
;; upset. \n\
|
||||
subq 11*4,sp \n\
|
||||
movem r10,[sp] \n\
|
||||
\n\
|
||||
move.d r11,r0 \n\
|
||||
move.d r11,r1 \n\
|
||||
move.d r11,r2 \n\
|
||||
move.d r11,r3 \n\
|
||||
move.d r11,r4 \n\
|
||||
move.d r11,r5 \n\
|
||||
move.d r11,r6 \n\
|
||||
move.d r11,r7 \n\
|
||||
move.d r11,r8 \n\
|
||||
move.d r11,r9 \n\
|
||||
move.d r11,r10 \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop \n\
|
||||
subq 12*4,r12 \n\
|
||||
0: \n\
|
||||
"
|
||||
#ifdef __arch_common_v10_v32
|
||||
/* Cater to branch offset difference between v32 and v10. We
|
||||
assume the branch below has an 8-bit offset. */
|
||||
" setf\n"
|
||||
#endif
|
||||
" subq 12*4,r12 \n\
|
||||
bge 0b \n\
|
||||
movem r11,[r13+] \n\
|
||||
\n\
|
||||
;; Compensate for last loop underflowing n. \n\
|
||||
addq 12*4,r12 \n\
|
||||
\n\
|
||||
;; Restore registers from stack. \n\
|
||||
movem [sp+],r10"
|
||||
|
||||
/* Outputs. */
|
||||
: "=r" (dst), "=r" (n)
|
||||
|
||||
/* Inputs. */
|
||||
: "0" (dst), "1" (n), "r" (lc));
|
||||
}
|
||||
|
||||
/* An ad-hoc unroll, used for 4*12-1..16 bytes. */
|
||||
while (n >= 16)
|
||||
{
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
n -= 16;
|
||||
}
|
||||
|
||||
switch (n)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
|
||||
case 1:
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(short *) dst = (short) lc;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
*(short *) dst = (short) lc; dst += 2;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(long *) dst = lc;
|
||||
break;
|
||||
|
||||
case 5:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 6:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc;
|
||||
break;
|
||||
|
||||
case 7:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc; dst += 2;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc;
|
||||
break;
|
||||
|
||||
case 9:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 10:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc;
|
||||
break;
|
||||
|
||||
case 11:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc; dst += 2;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 12:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc;
|
||||
break;
|
||||
|
||||
case 13:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 14:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc;
|
||||
break;
|
||||
|
||||
case 15:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc; dst += 2;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return return_dst;
|
||||
}
|
40
arch/cris/arch-v32/lib/spinlock.S
Normal file
40
arch/cris/arch-v32/lib/spinlock.S
Normal file
|
@ -0,0 +1,40 @@
|
|||
;; Core of the spinlock implementation
|
||||
;;
|
||||
;; Copyright (C) 2004 Axis Communications AB.
|
||||
;;
|
||||
;; Author: Mikael Starvik
|
||||
|
||||
|
||||
.global cris_spin_lock
|
||||
.type cris_spin_lock,@function
|
||||
.global cris_spin_trylock
|
||||
.type cris_spin_trylock,@function
|
||||
|
||||
.text
|
||||
|
||||
cris_spin_lock:
|
||||
clearf p
|
||||
1: test.b [$r10]
|
||||
beq 1b
|
||||
clearf p
|
||||
ax
|
||||
clear.b [$r10]
|
||||
bcs 1b
|
||||
clearf p
|
||||
ret
|
||||
nop
|
||||
|
||||
.size cris_spin_lock, . - cris_spin_lock
|
||||
|
||||
cris_spin_trylock:
|
||||
clearf p
|
||||
1: move.b [$r10], $r11
|
||||
ax
|
||||
clear.b [$r10]
|
||||
bcs 1b
|
||||
clearf p
|
||||
ret
|
||||
movu.b $r11,$r10
|
||||
|
||||
.size cris_spin_trylock, . - cris_spin_trylock
|
||||
|
21
arch/cris/arch-v32/lib/strcmp.S
Normal file
21
arch/cris/arch-v32/lib/strcmp.S
Normal file
|
@ -0,0 +1,21 @@
|
|||
; strcmp.S -- CRISv32 version.
|
||||
; Copyright (C) 2008 AXIS Communications AB
|
||||
; Written by Edgar E. Iglesias
|
||||
;
|
||||
; This source code is licensed under the GNU General Public License,
|
||||
; Version 2. See the file COPYING for more details.
|
||||
|
||||
.global strcmp
|
||||
.type strcmp,@function
|
||||
strcmp:
|
||||
1:
|
||||
move.b [$r10+], $r12
|
||||
seq $r13
|
||||
sub.b [$r11+], $r12
|
||||
or.b $r12, $r13
|
||||
beq 1b
|
||||
nop
|
||||
|
||||
ret
|
||||
movs.b $r12, $r10
|
||||
.size strcmp, . - strcmp
|
236
arch/cris/arch-v32/lib/string.c
Normal file
236
arch/cris/arch-v32/lib/string.c
Normal file
|
@ -0,0 +1,236 @@
|
|||
/* A memcpy for CRIS.
|
||||
Copyright (C) 1994-2005 Axis Communications.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of Axis Communications nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
|
||||
COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* FIXME: This file should really only be used for reference, as the
|
||||
result is somewhat depending on gcc generating what we expect rather
|
||||
than what we describe. An assembly file should be used instead. */
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* Break even between movem and move16 is really at 38.7 * 2, but
|
||||
modulo 44, so up to the next multiple of 44, we use ordinary code. */
|
||||
#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2)
|
||||
|
||||
/* No name ambiguities in this file. */
|
||||
__asm__ (".syntax no_register_prefix");
|
||||
|
||||
void *
|
||||
memcpy(void *pdst, const void *psrc, size_t pn)
|
||||
{
|
||||
/* Now we want the parameters put in special registers.
|
||||
Make sure the compiler is able to make something useful of this.
|
||||
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
|
||||
|
||||
If gcc was allright, it really would need no temporaries, and no
|
||||
stack space to save stuff on. */
|
||||
|
||||
register void *return_dst __asm__ ("r10") = pdst;
|
||||
register unsigned char *dst __asm__ ("r13") = pdst;
|
||||
register unsigned const char *src __asm__ ("r11") = psrc;
|
||||
register int n __asm__ ("r12") = pn;
|
||||
|
||||
/* When src is aligned but not dst, this makes a few extra needless
|
||||
cycles. I believe it would take as many to check that the
|
||||
re-alignment was unnecessary. */
|
||||
if (((unsigned long) dst & 3) != 0
|
||||
/* Don't align if we wouldn't copy more than a few bytes; so we
|
||||
don't have to check further for overflows. */
|
||||
&& n >= 3)
|
||||
{
|
||||
if ((unsigned long) dst & 1)
|
||||
{
|
||||
n--;
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
}
|
||||
|
||||
if ((unsigned long) dst & 2)
|
||||
{
|
||||
n -= 2;
|
||||
*(short *) dst = *(short *) src;
|
||||
src += 2;
|
||||
dst += 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide which copying method to use. */
|
||||
if (n >= MEMCPY_BY_BLOCK_THRESHOLD)
|
||||
{
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
registers; that will move the saving/restoring of those registers
|
||||
to the function prologue/epilogue, and make non-movem sizes
|
||||
suboptimal. */
|
||||
__asm__ volatile
|
||||
("\
|
||||
;; GCC does promise correct register allocations, but let's \n\
|
||||
;; make sure it keeps its promises. \n\
|
||||
.ifnc %0-%1-%2,$r13-$r11-$r12 \n\
|
||||
.error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll use in the movem process \n\
|
||||
;; on the stack. \n\
|
||||
subq 11*4,sp \n\
|
||||
movem r10,[sp] \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r11 - src \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop. \n\
|
||||
subq 44,r12 \n\
|
||||
0: \n\
|
||||
"
|
||||
#ifdef __arch_common_v10_v32
|
||||
/* Cater to branch offset difference between v32 and v10. We
|
||||
assume the branch below has an 8-bit offset. */
|
||||
" setf\n"
|
||||
#endif
|
||||
" movem [r11+],r10 \n\
|
||||
subq 44,r12 \n\
|
||||
bge 0b \n\
|
||||
movem r10,[r13+] \n\
|
||||
\n\
|
||||
;; Compensate for last loop underflowing n. \n\
|
||||
addq 44,r12 \n\
|
||||
\n\
|
||||
;; Restore registers from stack. \n\
|
||||
movem [sp+],r10"
|
||||
|
||||
/* Outputs. */
|
||||
: "=r" (dst), "=r" (src), "=r" (n)
|
||||
|
||||
/* Inputs. */
|
||||
: "0" (dst), "1" (src), "2" (n));
|
||||
}
|
||||
|
||||
while (n >= 16)
|
||||
{
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
|
||||
n -= 16;
|
||||
}
|
||||
|
||||
switch (n)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
|
||||
case 1:
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(short *) dst = *(short *) src;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
*(short *) dst = *(short *) src; dst += 2; src += 2;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(long *) dst = *(long *) src;
|
||||
break;
|
||||
|
||||
case 5:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 6:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src;
|
||||
break;
|
||||
|
||||
case 7:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src; dst += 2; src += 2;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src;
|
||||
break;
|
||||
|
||||
case 9:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 10:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src;
|
||||
break;
|
||||
|
||||
case 11:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src; dst += 2; src += 2;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 12:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src;
|
||||
break;
|
||||
|
||||
case 13:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 14:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src;
|
||||
break;
|
||||
|
||||
case 15:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src; dst += 2; src += 2;
|
||||
*dst = *src;
|
||||
break;
|
||||
}
|
||||
|
||||
return return_dst;
|
||||
}
|
470
arch/cris/arch-v32/lib/usercopy.c
Normal file
470
arch/cris/arch-v32/lib/usercopy.c
Normal file
|
@ -0,0 +1,470 @@
|
|||
/*
|
||||
* User address space access functions.
|
||||
* The non-inlined parts of asm-cris/uaccess.h are here.
|
||||
*
|
||||
* Copyright (C) 2000, 2003 Axis Communications AB.
|
||||
*
|
||||
* Written by Hans-Peter Nilsson.
|
||||
* Pieces used from memcpy, originally by Kenny Ranerup long time ago.
|
||||
*/
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/* Asm:s have been tweaked (within the domain of correctness) to give
|
||||
satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32".
|
||||
|
||||
Check regularly...
|
||||
|
||||
Note that for CRISv32, the PC saved at a bus-fault is the address
|
||||
*at* the faulting instruction, with a special case for instructions
|
||||
in delay slots: then it's the address of the branch. Note also that
|
||||
in contrast to v10, a postincrement in the instruction is *not*
|
||||
performed at a bus-fault; the register is seen having the original
|
||||
value in fault handlers. */
|
||||
|
||||
|
||||
/* Copy to userspace. This is based on the memcpy used for
|
||||
kernel-to-kernel copying; see "string.c". */
|
||||
|
||||
unsigned long
|
||||
__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
|
||||
{
|
||||
/* We want the parameters put in special registers.
|
||||
Make sure the compiler is able to make something useful of this.
|
||||
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
|
||||
|
||||
FIXME: Comment for old gcc version. Check.
|
||||
If gcc was alright, it really would need no temporaries, and no
|
||||
stack space to save stuff on. */
|
||||
|
||||
register char *dst __asm__ ("r13") = pdst;
|
||||
register const char *src __asm__ ("r11") = psrc;
|
||||
register int n __asm__ ("r12") = pn;
|
||||
register int retn __asm__ ("r10") = 0;
|
||||
|
||||
|
||||
/* When src is aligned but not dst, this makes a few extra needless
|
||||
cycles. I believe it would take as many to check that the
|
||||
re-alignment was unnecessary. */
|
||||
if (((unsigned long) dst & 3) != 0
|
||||
/* Don't align if we wouldn't copy more than a few bytes; so we
|
||||
don't have to check further for overflows. */
|
||||
&& n >= 3)
|
||||
{
|
||||
if ((unsigned long) dst & 1)
|
||||
{
|
||||
__asm_copy_to_user_1 (dst, src, retn);
|
||||
n--;
|
||||
}
|
||||
|
||||
if ((unsigned long) dst & 2)
|
||||
{
|
||||
__asm_copy_to_user_2 (dst, src, retn);
|
||||
n -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Movem is dirt cheap. The overheap is low enough to always use the
|
||||
minimum possible block size as the threshold. */
|
||||
if (n >= 44)
|
||||
{
|
||||
/* For large copies we use 'movem'. */
|
||||
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
registers; that will move the saving/restoring of those registers
|
||||
to the function prologue/epilogue, and make non-movem sizes
|
||||
suboptimal. */
|
||||
__asm__ volatile ("\
|
||||
;; Check that the register asm declaration got right. \n\
|
||||
;; The GCC manual explicitly says TRT will happen. \n\
|
||||
.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
|
||||
.err \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll use in the movem process \n\
|
||||
;; on the stack. \n\
|
||||
subq 11*4,$sp \n\
|
||||
movem $r10,[$sp] \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r11 - src \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop \n\
|
||||
subq 44,$r12 \n\
|
||||
0: \n\
|
||||
movem [$r11+],$r10 \n\
|
||||
subq 44,$r12 \n\
|
||||
1: bge 0b \n\
|
||||
movem $r10,[$r13+] \n\
|
||||
3: \n\
|
||||
addq 44,$r12 ;; compensate for last loop underflowing n \n\
|
||||
\n\
|
||||
;; Restore registers from stack \n\
|
||||
movem [$sp+],$r10 \n\
|
||||
2: \n\
|
||||
.section .fixup,\"ax\" \n\
|
||||
4: \n\
|
||||
; When failing on any of the 1..44 bytes in a chunk, we adjust back the \n\
|
||||
; source pointer and just drop through to the by-16 and by-4 loops to \n\
|
||||
; get the correct number of failing bytes. This necessarily means a \n\
|
||||
; few extra exceptions, but invalid user pointers shouldn't happen in \n\
|
||||
; time-critical code anyway. \n\
|
||||
jump 3b \n\
|
||||
subq 44,$r11 \n\
|
||||
\n\
|
||||
.previous \n\
|
||||
.section __ex_table,\"a\" \n\
|
||||
.dword 1b,4b \n\
|
||||
.previous"
|
||||
|
||||
/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
|
||||
/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
|
||||
|
||||
}
|
||||
|
||||
while (n >= 16)
|
||||
{
|
||||
__asm_copy_to_user_16 (dst, src, retn);
|
||||
n -= 16;
|
||||
}
|
||||
|
||||
/* Having a separate by-four loops cuts down on cache footprint.
|
||||
FIXME: Test with and without; increasing switch to be 0..15. */
|
||||
while (n >= 4)
|
||||
{
|
||||
__asm_copy_to_user_4 (dst, src, retn);
|
||||
n -= 4;
|
||||
}
|
||||
|
||||
switch (n)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
__asm_copy_to_user_1 (dst, src, retn);
|
||||
break;
|
||||
case 2:
|
||||
__asm_copy_to_user_2 (dst, src, retn);
|
||||
break;
|
||||
case 3:
|
||||
__asm_copy_to_user_3 (dst, src, retn);
|
||||
break;
|
||||
}
|
||||
|
||||
return retn;
|
||||
}
|
||||
|
||||
/* Copy from user to kernel, zeroing the bytes that were inaccessible in
|
||||
userland. The return-value is the number of bytes that were
|
||||
inaccessible. */
|
||||
|
||||
unsigned long
|
||||
__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
|
||||
{
|
||||
/* We want the parameters put in special registers.
|
||||
Make sure the compiler is able to make something useful of this.
|
||||
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
|
||||
|
||||
FIXME: Comment for old gcc version. Check.
|
||||
If gcc was alright, it really would need no temporaries, and no
|
||||
stack space to save stuff on. */
|
||||
|
||||
register char *dst __asm__ ("r13") = pdst;
|
||||
register const char *src __asm__ ("r11") = psrc;
|
||||
register int n __asm__ ("r12") = pn;
|
||||
register int retn __asm__ ("r10") = 0;
|
||||
|
||||
/* The best reason to align src is that we then know that a read-fault
|
||||
was for aligned bytes; there's no 1..3 remaining good bytes to
|
||||
pickle. */
|
||||
if (((unsigned long) src & 3) != 0)
|
||||
{
|
||||
if (((unsigned long) src & 1) && n != 0)
|
||||
{
|
||||
__asm_copy_from_user_1 (dst, src, retn);
|
||||
n--;
|
||||
}
|
||||
|
||||
if (((unsigned long) src & 2) && n >= 2)
|
||||
{
|
||||
__asm_copy_from_user_2 (dst, src, retn);
|
||||
n -= 2;
|
||||
}
|
||||
|
||||
/* We only need one check after the unalignment-adjustments, because
|
||||
if both adjustments were done, either both or neither reference
|
||||
had an exception. */
|
||||
if (retn != 0)
|
||||
goto copy_exception_bytes;
|
||||
}
|
||||
|
||||
/* Movem is dirt cheap. The overheap is low enough to always use the
|
||||
minimum possible block size as the threshold. */
|
||||
if (n >= 44)
|
||||
{
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
registers; that will move the saving/restoring of those registers
|
||||
to the function prologue/epilogue, and make non-movem sizes
|
||||
suboptimal. */
|
||||
__asm__ volatile ("\
|
||||
.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
|
||||
.err \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll use in the movem process \n\
|
||||
;; on the stack. \n\
|
||||
subq 11*4,$sp \n\
|
||||
movem $r10,[$sp] \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r11 - src \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop \n\
|
||||
subq 44,$r12 \n\
|
||||
0: \n\
|
||||
movem [$r11+],$r10 \n\
|
||||
\n\
|
||||
subq 44,$r12 \n\
|
||||
bge 0b \n\
|
||||
movem $r10,[$r13+] \n\
|
||||
\n\
|
||||
4: \n\
|
||||
addq 44,$r12 ;; compensate for last loop underflowing n \n\
|
||||
\n\
|
||||
;; Restore registers from stack \n\
|
||||
movem [$sp+],$r10 \n\
|
||||
.section .fixup,\"ax\" \n\
|
||||
\n\
|
||||
;; Do not jump back into the loop if we fail. For some uses, we get a \n\
|
||||
;; page fault somewhere on the line. Without checking for page limits, \n\
|
||||
;; we don't know where, but we need to copy accurately and keep an \n\
|
||||
;; accurate count; not just clear the whole line. To do that, we fall \n\
|
||||
;; down in the code below, proceeding with smaller amounts. It should \n\
|
||||
;; be kept in mind that we have to cater to code like what at one time \n\
|
||||
;; was in fs/super.c: \n\
|
||||
;; i = size - copy_from_user((void *)page, data, size); \n\
|
||||
;; which would cause repeated faults while clearing the remainder of \n\
|
||||
;; the SIZE bytes at PAGE after the first fault. \n\
|
||||
;; A caveat here is that we must not fall through from a failing page \n\
|
||||
;; to a valid page. \n\
|
||||
\n\
|
||||
3: \n\
|
||||
jump 4b ;; Fall through, pretending the fault didn't happen. \n\
|
||||
nop \n\
|
||||
\n\
|
||||
.previous \n\
|
||||
.section __ex_table,\"a\" \n\
|
||||
.dword 0b,3b \n\
|
||||
.previous"
|
||||
|
||||
/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
|
||||
/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
|
||||
}
|
||||
|
||||
/* Either we directly start copying here, using dword copying in a loop,
|
||||
or we copy as much as possible with 'movem' and then the last block
|
||||
(<44 bytes) is copied here. This will work since 'movem' will have
|
||||
updated src, dst and n. (Except with failing src.)
|
||||
|
||||
Since we want to keep src accurate, we can't use
|
||||
__asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
|
||||
retn, but not src (by design; it's value is ignored elsewhere). */
|
||||
|
||||
while (n >= 4)
|
||||
{
|
||||
__asm_copy_from_user_4 (dst, src, retn);
|
||||
n -= 4;
|
||||
|
||||
if (retn)
|
||||
goto copy_exception_bytes;
|
||||
}
|
||||
|
||||
/* If we get here, there were no memory read faults. */
|
||||
switch (n)
|
||||
{
|
||||
/* These copies are at least "naturally aligned" (so we don't have
|
||||
to check each byte), due to the src alignment code before the
|
||||
movem loop. The *_3 case *will* get the correct count for retn. */
|
||||
case 0:
|
||||
/* This case deliberately left in (if you have doubts check the
|
||||
generated assembly code). */
|
||||
break;
|
||||
case 1:
|
||||
__asm_copy_from_user_1 (dst, src, retn);
|
||||
break;
|
||||
case 2:
|
||||
__asm_copy_from_user_2 (dst, src, retn);
|
||||
break;
|
||||
case 3:
|
||||
__asm_copy_from_user_3 (dst, src, retn);
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we get here, retn correctly reflects the number of failing
|
||||
bytes. */
|
||||
return retn;
|
||||
|
||||
copy_exception_bytes:
|
||||
/* We already have "retn" bytes cleared, and need to clear the
|
||||
remaining "n" bytes. A non-optimized simple byte-for-byte in-line
|
||||
memset is preferred here, since this isn't speed-critical code and
|
||||
we'd rather have this a leaf-function than calling memset. */
|
||||
{
|
||||
char *endp;
|
||||
for (endp = dst + n; dst < endp; dst++)
|
||||
*dst = 0;
|
||||
}
|
||||
|
||||
return retn + n;
|
||||
}
|
||||
|
||||
/* Zero userspace. */
|
||||
|
||||
unsigned long
|
||||
__do_clear_user (void __user *pto, unsigned long pn)
|
||||
{
|
||||
/* We want the parameters put in special registers.
|
||||
Make sure the compiler is able to make something useful of this.
|
||||
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
|
||||
|
||||
FIXME: Comment for old gcc version. Check.
|
||||
If gcc was alright, it really would need no temporaries, and no
|
||||
stack space to save stuff on. */
|
||||
|
||||
register char *dst __asm__ ("r13") = pto;
|
||||
register int n __asm__ ("r12") = pn;
|
||||
register int retn __asm__ ("r10") = 0;
|
||||
|
||||
|
||||
if (((unsigned long) dst & 3) != 0
|
||||
/* Don't align if we wouldn't copy more than a few bytes. */
|
||||
&& n >= 3)
|
||||
{
|
||||
if ((unsigned long) dst & 1)
|
||||
{
|
||||
__asm_clear_1 (dst, retn);
|
||||
n--;
|
||||
}
|
||||
|
||||
if ((unsigned long) dst & 2)
|
||||
{
|
||||
__asm_clear_2 (dst, retn);
|
||||
n -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide which copying method to use.
|
||||
FIXME: This number is from the "ordinary" kernel memset. */
|
||||
if (n >= 48)
|
||||
{
|
||||
/* For large clears we use 'movem' */
|
||||
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
call-saved registers; that will move the saving/restoring of
|
||||
those registers to the function prologue/epilogue, and make
|
||||
non-movem sizes suboptimal.
|
||||
|
||||
This method is not foolproof; it assumes that the "asm reg"
|
||||
declarations at the beginning of the function really are used
|
||||
here (beware: they may be moved to temporary registers).
|
||||
This way, we do not have to save/move the registers around into
|
||||
temporaries; we can safely use them straight away.
|
||||
|
||||
If you want to check that the allocation was right; then
|
||||
check the equalities in the first comment. It should say
|
||||
something like "r13=r13, r11=r11, r12=r12". */
|
||||
__asm__ volatile ("\
|
||||
.ifnc %0%1%2,$r13$r12$r10 \n\
|
||||
.err \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll clobber in the movem process \n\
|
||||
;; on the stack. Don't mention them to gcc, it will only be \n\
|
||||
;; upset. \n\
|
||||
subq 11*4,$sp \n\
|
||||
movem $r10,[$sp] \n\
|
||||
\n\
|
||||
clear.d $r0 \n\
|
||||
clear.d $r1 \n\
|
||||
clear.d $r2 \n\
|
||||
clear.d $r3 \n\
|
||||
clear.d $r4 \n\
|
||||
clear.d $r5 \n\
|
||||
clear.d $r6 \n\
|
||||
clear.d $r7 \n\
|
||||
clear.d $r8 \n\
|
||||
clear.d $r9 \n\
|
||||
clear.d $r10 \n\
|
||||
clear.d $r11 \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop \n\
|
||||
subq 12*4,$r12 \n\
|
||||
0: \n\
|
||||
subq 12*4,$r12 \n\
|
||||
1: \n\
|
||||
bge 0b \n\
|
||||
movem $r11,[$r13+] \n\
|
||||
\n\
|
||||
addq 12*4,$r12 ;; compensate for last loop underflowing n \n\
|
||||
\n\
|
||||
;; Restore registers from stack \n\
|
||||
movem [$sp+],$r10 \n\
|
||||
2: \n\
|
||||
.section .fixup,\"ax\" \n\
|
||||
3: \n\
|
||||
movem [$sp],$r10 \n\
|
||||
addq 12*4,$r10 \n\
|
||||
addq 12*4,$r13 \n\
|
||||
movem $r10,[$sp] \n\
|
||||
jump 0b \n\
|
||||
clear.d $r10 \n\
|
||||
\n\
|
||||
.previous \n\
|
||||
.section __ex_table,\"a\" \n\
|
||||
.dword 1b,3b \n\
|
||||
.previous"
|
||||
|
||||
/* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
|
||||
/* Inputs */ : "0" (dst), "1" (n), "2" (retn)
|
||||
/* Clobber */ : "r11");
|
||||
}
|
||||
|
||||
while (n >= 16)
|
||||
{
|
||||
__asm_clear_16 (dst, retn);
|
||||
n -= 16;
|
||||
}
|
||||
|
||||
/* Having a separate by-four loops cuts down on cache footprint.
|
||||
FIXME: Test with and without; increasing switch to be 0..15. */
|
||||
while (n >= 4)
|
||||
{
|
||||
__asm_clear_4 (dst, retn);
|
||||
n -= 4;
|
||||
}
|
||||
|
||||
switch (n)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
__asm_clear_1 (dst, retn);
|
||||
break;
|
||||
case 2:
|
||||
__asm_clear_2 (dst, retn);
|
||||
break;
|
||||
case 3:
|
||||
__asm_clear_3 (dst, retn);
|
||||
break;
|
||||
}
|
||||
|
||||
return retn;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue