mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-10-29 23:28:52 +01:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
6
arch/cris/arch-v10/lib/Makefile
Normal file
6
arch/cris/arch-v10/lib/Makefile
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
#
|
||||
# Makefile for Etrax-specific library files..
|
||||
#
|
||||
|
||||
lib-y = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o
|
||||
|
||||
118
arch/cris/arch-v10/lib/checksum.S
Normal file
118
arch/cris/arch-v10/lib/checksum.S
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
* A fast checksum routine using movem
|
||||
* Copyright (c) 1998-2001 Axis Communications AB
|
||||
*
|
||||
* csum_partial(const unsigned char * buff, int len, unsigned int sum)
|
||||
*/
|
||||
|
||||
.globl csum_partial
|
||||
csum_partial:
|
||||
|
||||
;; r10 - src
|
||||
;; r11 - length
|
||||
;; r12 - checksum
|
||||
|
||||
;; check for breakeven length between movem and normal word looping versions
|
||||
;; we also do _NOT_ want to compute a checksum over more than the
|
||||
;; actual length when length < 40
|
||||
|
||||
cmpu.w 80,$r11
|
||||
blo _word_loop
|
||||
nop
|
||||
|
||||
;; need to save the registers we use below in the movem loop
|
||||
;; this overhead is why we have a check above for breakeven length
|
||||
;; only r0 - r8 have to be saved, the other ones are clobber-able
|
||||
;; according to the ABI
|
||||
|
||||
subq 9*4,$sp
|
||||
movem $r8,[$sp]
|
||||
|
||||
;; do a movem checksum
|
||||
|
||||
subq 10*4,$r11 ; update length for the first loop
|
||||
|
||||
_mloop: movem [$r10+],$r9 ; read 10 longwords
|
||||
|
||||
;; perform dword checksumming on the 10 longwords
|
||||
|
||||
add.d $r0,$r12
|
||||
ax
|
||||
add.d $r1,$r12
|
||||
ax
|
||||
add.d $r2,$r12
|
||||
ax
|
||||
add.d $r3,$r12
|
||||
ax
|
||||
add.d $r4,$r12
|
||||
ax
|
||||
add.d $r5,$r12
|
||||
ax
|
||||
add.d $r6,$r12
|
||||
ax
|
||||
add.d $r7,$r12
|
||||
ax
|
||||
add.d $r8,$r12
|
||||
ax
|
||||
add.d $r9,$r12
|
||||
|
||||
;; fold the carry into the checksum, to avoid having to loop the carry
|
||||
;; back into the top
|
||||
|
||||
ax
|
||||
addq 0,$r12
|
||||
|
||||
subq 10*4,$r11
|
||||
bge _mloop
|
||||
nop
|
||||
|
||||
addq 10*4,$r11 ; compensate for last loop underflowing length
|
||||
|
||||
movem [$sp+],$r8 ; restore regs
|
||||
|
||||
_word_loop:
|
||||
;; only fold if there is anything to fold.
|
||||
|
||||
cmpq 0,$r12
|
||||
beq _no_fold
|
||||
|
||||
;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
|
||||
;; r9 and r13 can be used as temporaries.
|
||||
|
||||
moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9
|
||||
lsrq 16,$r9
|
||||
|
||||
move.d $r12,$r13
|
||||
lsrq 16,$r13 ; r13 = checksum >> 16
|
||||
and.d $r9,$r12 ; checksum = checksum & 0xffff
|
||||
add.d $r13,$r12 ; checksum += r13
|
||||
|
||||
_no_fold:
|
||||
cmpq 2,$r11
|
||||
blt _no_words
|
||||
nop
|
||||
|
||||
;; checksum the rest of the words
|
||||
|
||||
subq 2,$r11
|
||||
|
||||
_wloop: subq 2,$r11
|
||||
bge _wloop
|
||||
addu.w [$r10+],$r12
|
||||
|
||||
addq 2,$r11
|
||||
|
||||
_no_words:
|
||||
;; see if we have one odd byte more
|
||||
cmpq 1,$r11
|
||||
beq _do_byte
|
||||
nop
|
||||
ret
|
||||
move.d $r12, $r10
|
||||
|
||||
_do_byte:
|
||||
;; copy and checksum the last byte
|
||||
addu.b [$r10],$r12
|
||||
ret
|
||||
move.d $r12, $r10
|
||||
|
||||
126
arch/cris/arch-v10/lib/checksumcopy.S
Normal file
126
arch/cris/arch-v10/lib/checksumcopy.S
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
* A fast checksum+copy routine using movem
|
||||
* Copyright (c) 1998, 2001 Axis Communications AB
|
||||
*
|
||||
* Authors: Bjorn Wesen
|
||||
*
|
||||
* csum_partial_copy_nocheck(const char *src, char *dst,
|
||||
* int len, unsigned int sum)
|
||||
*/
|
||||
|
||||
.globl csum_partial_copy_nocheck
|
||||
csum_partial_copy_nocheck:
|
||||
|
||||
;; r10 - src
|
||||
;; r11 - dst
|
||||
;; r12 - length
|
||||
;; r13 - checksum
|
||||
|
||||
;; check for breakeven length between movem and normal word looping versions
|
||||
;; we also do _NOT_ want to compute a checksum over more than the
|
||||
;; actual length when length < 40
|
||||
|
||||
cmpu.w 80, $r12
|
||||
blo _word_loop
|
||||
nop
|
||||
|
||||
;; need to save the registers we use below in the movem loop
|
||||
;; this overhead is why we have a check above for breakeven length
|
||||
;; only r0 - r8 have to be saved, the other ones are clobber-able
|
||||
;; according to the ABI
|
||||
|
||||
subq 9*4, $sp
|
||||
movem $r8, [$sp]
|
||||
|
||||
;; do a movem copy and checksum
|
||||
|
||||
subq 10*4, $r12 ; update length for the first loop
|
||||
|
||||
_mloop: movem [$r10+],$r9 ; read 10 longwords
|
||||
1: ;; A failing userspace access will have this as PC.
|
||||
movem $r9,[$r11+] ; write 10 longwords
|
||||
|
||||
;; perform dword checksumming on the 10 longwords
|
||||
|
||||
add.d $r0,$r13
|
||||
ax
|
||||
add.d $r1,$r13
|
||||
ax
|
||||
add.d $r2,$r13
|
||||
ax
|
||||
add.d $r3,$r13
|
||||
ax
|
||||
add.d $r4,$r13
|
||||
ax
|
||||
add.d $r5,$r13
|
||||
ax
|
||||
add.d $r6,$r13
|
||||
ax
|
||||
add.d $r7,$r13
|
||||
ax
|
||||
add.d $r8,$r13
|
||||
ax
|
||||
add.d $r9,$r13
|
||||
|
||||
;; fold the carry into the checksum, to avoid having to loop the carry
|
||||
;; back into the top
|
||||
|
||||
ax
|
||||
addq 0,$r13
|
||||
|
||||
subq 10*4,$r12
|
||||
bge _mloop
|
||||
nop
|
||||
|
||||
addq 10*4,$r12 ; compensate for last loop underflowing length
|
||||
|
||||
movem [$sp+],$r8 ; restore regs
|
||||
|
||||
_word_loop:
|
||||
;; only fold if there is anything to fold.
|
||||
|
||||
cmpq 0,$r13
|
||||
beq _no_fold
|
||||
|
||||
;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
|
||||
;; r9 can be used as temporary.
|
||||
|
||||
move.d $r13,$r9
|
||||
lsrq 16,$r9 ; r0 = checksum >> 16
|
||||
and.d 0xffff,$r13 ; checksum = checksum & 0xffff
|
||||
add.d $r9,$r13 ; checksum += r0
|
||||
|
||||
_no_fold:
|
||||
cmpq 2,$r12
|
||||
blt _no_words
|
||||
nop
|
||||
|
||||
;; copy and checksum the rest of the words
|
||||
|
||||
subq 2,$r12
|
||||
|
||||
_wloop: move.w [$r10+],$r9
|
||||
2: ;; A failing userspace access will have this as PC.
|
||||
addu.w $r9,$r13
|
||||
subq 2,$r12
|
||||
bge _wloop
|
||||
move.w $r9,[$r11+]
|
||||
|
||||
addq 2,$r12
|
||||
|
||||
_no_words:
|
||||
;; see if we have one odd byte more
|
||||
cmpq 1,$r12
|
||||
beq _do_byte
|
||||
nop
|
||||
ret
|
||||
move.d $r13, $r10
|
||||
|
||||
_do_byte:
|
||||
;; copy and checksum the last byte
|
||||
move.b [$r10],$r9
|
||||
3: ;; A failing userspace access will have this as PC.
|
||||
addu.b $r9,$r13
|
||||
move.b $r9,[$r11]
|
||||
ret
|
||||
move.d $r13, $r10
|
||||
64
arch/cris/arch-v10/lib/csumcpfruser.S
Normal file
64
arch/cris/arch-v10/lib/csumcpfruser.S
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Add-on to transform csum_partial_copy_nocheck in checksumcopy.S into
|
||||
* csum_partial_copy_from_user by adding exception records.
|
||||
*
|
||||
* Copyright (C) 2001 Axis Communications AB.
|
||||
*
|
||||
* Author: Hans-Peter Nilsson.
|
||||
*/
|
||||
|
||||
#include <asm/errno.h>
|
||||
|
||||
/* Same function body, but a different name. If we just added exception
|
||||
records to _csum_partial_copy_nocheck and made it generic, we wouldn't
|
||||
know a user fault from a kernel fault and we would have overhead in
|
||||
each kernel caller for the error-pointer argument.
|
||||
|
||||
unsigned int csum_partial_copy_from_user
|
||||
(const char *src, char *dst, int len, unsigned int sum, int *errptr);
|
||||
|
||||
Note that the errptr argument is only set if we encounter an error.
|
||||
It is conveniently located on the stack, so the normal function body
|
||||
does not have to handle it. */
|
||||
|
||||
#define csum_partial_copy_nocheck csum_partial_copy_from_user
|
||||
|
||||
/* There are local labels numbered 1, 2 and 3 present to mark the
|
||||
different from-user accesses. */
|
||||
#include "checksumcopy.S"
|
||||
|
||||
.section .fixup,"ax"
|
||||
|
||||
;; Here from the movem loop; restore stack.
|
||||
4:
|
||||
movem [$sp+],$r8
|
||||
;; r12 is already decremented. Add back chunk_size-2.
|
||||
addq 40-2,$r12
|
||||
|
||||
;; Here from the word loop; r12 is off by 2; add it back.
|
||||
5:
|
||||
addq 2,$r12
|
||||
|
||||
;; Here from a failing single byte.
|
||||
6:
|
||||
|
||||
;; Signal in *errptr that we had a failing access.
|
||||
moveq -EFAULT,$r9
|
||||
move.d $r9,[[$sp]]
|
||||
|
||||
;; Clear the rest of the destination area using memset. Preserve the
|
||||
;; checksum for the readable bytes.
|
||||
push $srp
|
||||
push $r13
|
||||
move.d $r11,$r10
|
||||
clear.d $r11
|
||||
jsr memset
|
||||
pop $r10
|
||||
jump [$sp+]
|
||||
|
||||
.previous
|
||||
.section __ex_table,"a"
|
||||
.dword 1b,4b
|
||||
.dword 2b,5b
|
||||
.dword 3b,6b
|
||||
.previous
|
||||
42
arch/cris/arch-v10/lib/dmacopy.c
Normal file
42
arch/cris/arch-v10/lib/dmacopy.c
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* memcpy for large blocks, using memory-memory DMA channels 6 and 7 in Etrax
|
||||
*/
|
||||
|
||||
#include <asm/svinto.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
#define D(x)
|
||||
|
||||
void *dma_memcpy(void *pdst,
|
||||
const void *psrc,
|
||||
unsigned int pn)
|
||||
{
|
||||
static etrax_dma_descr indma, outdma;
|
||||
|
||||
D(printk(KERN_DEBUG "dma_memcpy %d bytes... ", pn));
|
||||
|
||||
#if 0
|
||||
*R_GEN_CONFIG = genconfig_shadow =
|
||||
(genconfig_shadow & ~0x3c0000) |
|
||||
IO_STATE(R_GEN_CONFIG, dma6, intdma7) |
|
||||
IO_STATE(R_GEN_CONFIG, dma7, intdma6);
|
||||
#endif
|
||||
indma.sw_len = outdma.sw_len = pn;
|
||||
indma.ctrl = d_eol | d_eop;
|
||||
outdma.ctrl = d_eol;
|
||||
indma.buf = psrc;
|
||||
outdma.buf = pdst;
|
||||
|
||||
*R_DMA_CH6_FIRST = &indma;
|
||||
*R_DMA_CH7_FIRST = &outdma;
|
||||
*R_DMA_CH6_CMD = IO_STATE(R_DMA_CH6_CMD, cmd, start);
|
||||
*R_DMA_CH7_CMD = IO_STATE(R_DMA_CH7_CMD, cmd, start);
|
||||
|
||||
while (*R_DMA_CH7_CMD == 1)
|
||||
/* wait for completion */;
|
||||
|
||||
D(printk(KERN_DEBUG "done\n"));
|
||||
}
|
||||
|
||||
|
||||
|
||||
146
arch/cris/arch-v10/lib/dram_init.S
Normal file
146
arch/cris/arch-v10/lib/dram_init.S
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* DRAM/SDRAM initialization - alter with care
|
||||
* This file is intended to be included from other assembler files
|
||||
*
|
||||
* Note: This file may not modify r9 because r9 is used to carry
|
||||
* information from the decompresser to the kernel
|
||||
*
|
||||
* Copyright (C) 2000-2012 Axis Communications AB
|
||||
*
|
||||
*/
|
||||
|
||||
/* Just to be certain the config file is included, we include it here
|
||||
* explicitly instead of depending on it being included in the file that
|
||||
* uses this code.
|
||||
*/
|
||||
|
||||
|
||||
;; WARNING! The registers r8 and r9 are used as parameters carrying
|
||||
;; information from the decompressor (if the kernel was compressed).
|
||||
;; They should not be used in the code below.
|
||||
|
||||
move.d CONFIG_ETRAX_DEF_R_WAITSTATES, $r0
|
||||
move.d $r0, [R_WAITSTATES]
|
||||
|
||||
move.d CONFIG_ETRAX_DEF_R_BUS_CONFIG, $r0
|
||||
move.d $r0, [R_BUS_CONFIG]
|
||||
|
||||
#ifndef CONFIG_ETRAX_SDRAM
|
||||
move.d CONFIG_ETRAX_DEF_R_DRAM_CONFIG, $r0
|
||||
move.d $r0, [R_DRAM_CONFIG]
|
||||
|
||||
move.d CONFIG_ETRAX_DEF_R_DRAM_TIMING, $r0
|
||||
move.d $r0, [R_DRAM_TIMING]
|
||||
#else
|
||||
;; Samsung SDRAMs seem to require to be initialized twice to work properly.
|
||||
moveq 2, $r6
|
||||
_sdram_init:
|
||||
|
||||
; Refer to ETRAX 100LX Designers Reference for a description of SDRAM initialization
|
||||
|
||||
; Bank configuration
|
||||
move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r0
|
||||
move.d $r0, [R_SDRAM_CONFIG]
|
||||
|
||||
; Calculate value of mrs_data
|
||||
; CAS latency = 2 && bus_width = 32 => 0x40
|
||||
; CAS latency = 3 && bus_width = 32 => 0x60
|
||||
; CAS latency = 2 && bus_width = 16 => 0x20
|
||||
; CAS latency = 3 && bus_width = 16 => 0x30
|
||||
|
||||
; Check if value is already supplied in kernel config
|
||||
move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r2
|
||||
and.d 0x00ff0000, $r2
|
||||
bne _set_timing
|
||||
lsrq 16, $r2
|
||||
|
||||
move.d 0x40, $r2 ; Assume 32 bits and CAS latency = 2
|
||||
move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
|
||||
move.d $r1, $r3
|
||||
and.d 0x03, $r1 ; Get CAS latency
|
||||
and.d 0x1000, $r3 ; 50 or 100 MHz?
|
||||
beq _speed_50
|
||||
nop
|
||||
_speed_100:
|
||||
cmp.d 0x00, $r1 ; CAS latency = 2?
|
||||
beq _bw_check
|
||||
nop
|
||||
or.d 0x20, $r2 ; CAS latency = 3
|
||||
ba _bw_check
|
||||
nop
|
||||
_speed_50:
|
||||
cmp.d 0x01, $r1 ; CAS latency = 2?
|
||||
beq _bw_check
|
||||
nop
|
||||
or.d 0x20, $r2 ; CAS latency = 3
|
||||
_bw_check:
|
||||
move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r1
|
||||
and.d 0x800000, $r1 ; DRAM width is bit 23
|
||||
bne _set_timing
|
||||
nop
|
||||
lsrq 1, $r2 ; 16 bits. Shift down value.
|
||||
|
||||
; Set timing parameters. Starts master clock
|
||||
_set_timing:
|
||||
move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
|
||||
and.d 0x8000f9ff, $r1 ; Make sure mrs data and command is 0
|
||||
or.d 0x80000000, $r1 ; Make sure sdram enable bit is set
|
||||
move.d $r1, $r5
|
||||
or.d 0x0000c000, $r1 ; ref = disable
|
||||
lslq 16, $r2 ; mrs data starts at bit 16
|
||||
or.d $r2, $r1
|
||||
move.d $r1, [R_SDRAM_TIMING]
|
||||
|
||||
; Wait 200us
|
||||
move.d 10000, $r2
|
||||
1: bne 1b
|
||||
subq 1, $r2
|
||||
|
||||
; Issue initialization command sequence
|
||||
move.d _sdram_commands_start, $r2
|
||||
and.d 0x000fffff, $r2 ; Make sure commands are read from flash
|
||||
move.d _sdram_commands_end, $r3
|
||||
and.d 0x000fffff, $r3
|
||||
1: clear.d $r4
|
||||
move.b [$r2+], $r4
|
||||
lslq 9, $r4 ; Command starts at bit 9
|
||||
or.d $r1, $r4
|
||||
move.d $r4, [R_SDRAM_TIMING]
|
||||
nop ; Wait five nop cycles between each command
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
cmp.d $r2, $r3
|
||||
bne 1b
|
||||
nop
|
||||
move.d $r5, [R_SDRAM_TIMING]
|
||||
subq 1, $r6
|
||||
bne _sdram_init
|
||||
nop
|
||||
ba _sdram_commands_end
|
||||
nop
|
||||
|
||||
_sdram_commands_start:
|
||||
.byte 3 ; Precharge
|
||||
.byte 0 ; nop
|
||||
.byte 2 ; refresh
|
||||
.byte 0 ; nop
|
||||
.byte 2 ; refresh
|
||||
.byte 0 ; nop
|
||||
.byte 2 ; refresh
|
||||
.byte 0 ; nop
|
||||
.byte 2 ; refresh
|
||||
.byte 0 ; nop
|
||||
.byte 2 ; refresh
|
||||
.byte 0 ; nop
|
||||
.byte 2 ; refresh
|
||||
.byte 0 ; nop
|
||||
.byte 2 ; refresh
|
||||
.byte 0 ; nop
|
||||
.byte 2 ; refresh
|
||||
.byte 0 ; nop
|
||||
.byte 1 ; mrs
|
||||
.byte 0 ; nop
|
||||
_sdram_commands_end:
|
||||
#endif
|
||||
60
arch/cris/arch-v10/lib/hw_settings.S
Normal file
60
arch/cris/arch-v10/lib/hw_settings.S
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* This table is used by some tools to extract hardware parameters.
|
||||
* The table should be included in the kernel and the decompressor.
|
||||
* Don't forget to update the tools if you change this table.
|
||||
*
|
||||
* Copyright (C) 2001 Axis Communications AB
|
||||
*
|
||||
* Authors: Mikael Starvik (starvik@axis.com)
|
||||
*/
|
||||
|
||||
#define PA_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PA_DIR << 8) | \
|
||||
(CONFIG_ETRAX_DEF_R_PORT_PA_DATA))
|
||||
#define PB_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PB_CONFIG << 16) | \
|
||||
(CONFIG_ETRAX_DEF_R_PORT_PB_DIR << 8) | \
|
||||
(CONFIG_ETRAX_DEF_R_PORT_PB_DATA))
|
||||
|
||||
.ascii "HW_PARAM_MAGIC" ; Magic number
|
||||
.dword 0xc0004000 ; Kernel start address
|
||||
|
||||
; Debug port
|
||||
#ifdef CONFIG_ETRAX_DEBUG_PORT0
|
||||
.dword 0
|
||||
#elif defined(CONFIG_ETRAX_DEBUG_PORT1)
|
||||
.dword 1
|
||||
#elif defined(CONFIG_ETRAX_DEBUG_PORT2)
|
||||
.dword 2
|
||||
#elif defined(CONFIG_ETRAX_DEBUG_PORT3)
|
||||
.dword 3
|
||||
#else
|
||||
.dword 4 ; No debug
|
||||
#endif
|
||||
|
||||
; SDRAM or EDO DRAM?
|
||||
#ifdef CONFIG_ETRAX_SDRAM
|
||||
.dword 1
|
||||
#else
|
||||
.dword 0
|
||||
#endif
|
||||
|
||||
; Register values
|
||||
.dword R_WAITSTATES
|
||||
.dword CONFIG_ETRAX_DEF_R_WAITSTATES
|
||||
.dword R_BUS_CONFIG
|
||||
.dword CONFIG_ETRAX_DEF_R_BUS_CONFIG
|
||||
#ifdef CONFIG_ETRAX_SDRAM
|
||||
.dword R_SDRAM_CONFIG
|
||||
.dword CONFIG_ETRAX_DEF_R_SDRAM_CONFIG
|
||||
.dword R_SDRAM_TIMING
|
||||
.dword CONFIG_ETRAX_DEF_R_SDRAM_TIMING
|
||||
#else
|
||||
.dword R_DRAM_CONFIG
|
||||
.dword CONFIG_ETRAX_DEF_R_DRAM_CONFIG
|
||||
.dword R_DRAM_TIMING
|
||||
.dword CONFIG_ETRAX_DEF_R_DRAM_TIMING
|
||||
#endif
|
||||
.dword R_PORT_PA_SET
|
||||
.dword PA_SET_VALUE
|
||||
.dword R_PORT_PB_SET
|
||||
.dword PB_SET_VALUE
|
||||
.dword 0 ; No more register values
|
||||
259
arch/cris/arch-v10/lib/memset.c
Normal file
259
arch/cris/arch-v10/lib/memset.c
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
/* A memset for CRIS.
|
||||
Copyright (C) 1999-2005 Axis Communications.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of Axis Communications nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
|
||||
COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* FIXME: This file should really only be used for reference, as the
|
||||
result is somewhat depending on gcc generating what we expect rather
|
||||
than what we describe. An assembly file should be used instead. */
|
||||
|
||||
/* Note the multiple occurrence of the expression "12*4", including the
|
||||
asm. It is hard to get it into the asm in a good way. Thus better to
|
||||
expose the problem everywhere: no macro. */
|
||||
|
||||
/* Assuming one cycle per dword written or read (ok, not really true; the
|
||||
world is not ideal), and one cycle per instruction, then 43+3*(n/48-1)
|
||||
<= 24+24*(n/48-1) so n >= 45.7; n >= 0.9; we win on the first full
|
||||
48-byte block to set. */
|
||||
|
||||
#define MEMSET_BY_BLOCK_THRESHOLD (1 * 48)
|
||||
|
||||
/* No name ambiguities in this file. */
|
||||
__asm__ (".syntax no_register_prefix");
|
||||
|
||||
void *memset(void *pdst, int c, unsigned int plen)
|
||||
{
|
||||
/* Now we want the parameters in special registers. Make sure the
|
||||
compiler does something usable with this. */
|
||||
|
||||
register char *return_dst __asm__ ("r10") = pdst;
|
||||
register int n __asm__ ("r12") = plen;
|
||||
register int lc __asm__ ("r11") = c;
|
||||
|
||||
/* Most apps use memset sanely. Memsetting about 3..4 bytes or less get
|
||||
penalized here compared to the generic implementation. */
|
||||
|
||||
/* This is fragile performancewise at best. Check with newer GCC
|
||||
releases, if they compile cascaded "x |= x << 8" to sane code. */
|
||||
__asm__("movu.b %0,r13 \n\
|
||||
lslq 8,r13 \n\
|
||||
move.b %0,r13 \n\
|
||||
move.d r13,%0 \n\
|
||||
lslq 16,r13 \n\
|
||||
or.d r13,%0"
|
||||
: "=r" (lc) /* Inputs. */
|
||||
: "0" (lc) /* Outputs. */
|
||||
: "r13"); /* Trash. */
|
||||
|
||||
{
|
||||
register char *dst __asm__ ("r13") = pdst;
|
||||
|
||||
if (((unsigned long) pdst & 3) != 0
|
||||
/* Oops! n = 0 must be a valid call, regardless of alignment. */
|
||||
&& n >= 3)
|
||||
{
|
||||
if ((unsigned long) dst & 1)
|
||||
{
|
||||
*dst = (char) lc;
|
||||
n--;
|
||||
dst++;
|
||||
}
|
||||
|
||||
if ((unsigned long) dst & 2)
|
||||
{
|
||||
*(short *) dst = lc;
|
||||
n -= 2;
|
||||
dst += 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide which setting method to use. */
|
||||
if (n >= MEMSET_BY_BLOCK_THRESHOLD)
|
||||
{
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
registers; that will move the saving/restoring of those registers
|
||||
to the function prologue/epilogue, and make non-block sizes
|
||||
suboptimal. */
|
||||
__asm__ volatile
|
||||
("\
|
||||
;; GCC does promise correct register allocations, but let's \n\
|
||||
;; make sure it keeps its promises. \n\
|
||||
.ifnc %0-%1-%4,$r13-$r12-$r11 \n\
|
||||
.error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll clobber in the movem process \n\
|
||||
;; on the stack. Don't mention them to gcc, it will only be \n\
|
||||
;; upset. \n\
|
||||
subq 11*4,sp \n\
|
||||
movem r10,[sp] \n\
|
||||
\n\
|
||||
move.d r11,r0 \n\
|
||||
move.d r11,r1 \n\
|
||||
move.d r11,r2 \n\
|
||||
move.d r11,r3 \n\
|
||||
move.d r11,r4 \n\
|
||||
move.d r11,r5 \n\
|
||||
move.d r11,r6 \n\
|
||||
move.d r11,r7 \n\
|
||||
move.d r11,r8 \n\
|
||||
move.d r11,r9 \n\
|
||||
move.d r11,r10 \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop \n\
|
||||
subq 12*4,r12 \n\
|
||||
0: \n\
|
||||
"
|
||||
#ifdef __arch_common_v10_v32
|
||||
/* Cater to branch offset difference between v32 and v10. We
|
||||
assume the branch below has an 8-bit offset. */
|
||||
" setf\n"
|
||||
#endif
|
||||
" subq 12*4,r12 \n\
|
||||
bge 0b \n\
|
||||
movem r11,[r13+] \n\
|
||||
\n\
|
||||
;; Compensate for last loop underflowing n. \n\
|
||||
addq 12*4,r12 \n\
|
||||
\n\
|
||||
;; Restore registers from stack. \n\
|
||||
movem [sp+],r10"
|
||||
|
||||
/* Outputs. */
|
||||
: "=r" (dst), "=r" (n)
|
||||
|
||||
/* Inputs. */
|
||||
: "0" (dst), "1" (n), "r" (lc));
|
||||
}
|
||||
|
||||
/* An ad-hoc unroll, used for 4*12-1..16 bytes. */
|
||||
while (n >= 16)
|
||||
{
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
n -= 16;
|
||||
}
|
||||
|
||||
switch (n)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
|
||||
case 1:
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(short *) dst = (short) lc;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
*(short *) dst = (short) lc; dst += 2;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(long *) dst = lc;
|
||||
break;
|
||||
|
||||
case 5:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 6:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc;
|
||||
break;
|
||||
|
||||
case 7:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc; dst += 2;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc;
|
||||
break;
|
||||
|
||||
case 9:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 10:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc;
|
||||
break;
|
||||
|
||||
case 11:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc; dst += 2;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 12:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc;
|
||||
break;
|
||||
|
||||
case 13:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
|
||||
case 14:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc;
|
||||
break;
|
||||
|
||||
case 15:
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(long *) dst = lc; dst += 4;
|
||||
*(short *) dst = (short) lc; dst += 2;
|
||||
*dst = (char) lc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return return_dst;
|
||||
}
|
||||
86
arch/cris/arch-v10/lib/old_checksum.c
Normal file
86
arch/cris/arch-v10/lib/old_checksum.c
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
||||
* operating system. INET is implemented using the BSD Socket
|
||||
* interface as the means of communication with the user level.
|
||||
*
|
||||
* IP/TCP/UDP checksumming routines
|
||||
*
|
||||
* Authors: Jorge Cwik, <jorge@laser.satlink.net>
|
||||
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
|
||||
* Tom May, <ftom@netcom.com>
|
||||
* Lots of code moved from tcp.c and ip.c; see those files
|
||||
* for more names.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <net/checksum.h>
|
||||
#include <net/module.h>
|
||||
|
||||
#undef PROFILE_CHECKSUM
|
||||
|
||||
#ifdef PROFILE_CHECKSUM
|
||||
/* these are just for profiling the checksum code with an oscillioscope.. uh */
|
||||
#if 0
|
||||
#define BITOFF *((unsigned char *)0xb0000030) = 0xff
|
||||
#define BITON *((unsigned char *)0xb0000030) = 0x0
|
||||
#endif
|
||||
#include <asm/io.h>
|
||||
#define CBITON LED_ACTIVE_SET(1)
|
||||
#define CBITOFF LED_ACTIVE_SET(0)
|
||||
#define BITOFF
|
||||
#define BITON
|
||||
#else
|
||||
#define BITOFF
|
||||
#define BITON
|
||||
#define CBITOFF
|
||||
#define CBITON
|
||||
#endif
|
||||
|
||||
/*
|
||||
* computes a partial checksum, e.g. for TCP/UDP fragments
|
||||
*/
|
||||
|
||||
#include <asm/delay.h>
|
||||
|
||||
__wsum csum_partial(const void *p, int len, __wsum __sum)
|
||||
{
|
||||
u32 sum = (__force u32)__sum;
|
||||
const u16 *buff = p;
|
||||
/*
|
||||
* Experiments with ethernet and slip connections show that buff
|
||||
* is aligned on either a 2-byte or 4-byte boundary.
|
||||
*/
|
||||
const void *endMarker = p + len;
|
||||
const void *marker = endMarker - (len % 16);
|
||||
#if 0
|
||||
if((int)buff & 0x3)
|
||||
printk("unaligned buff %p\n", buff);
|
||||
__delay(900); /* extra delay of 90 us to test performance hit */
|
||||
#endif
|
||||
BITON;
|
||||
while (buff < marker) {
|
||||
sum += *buff++;
|
||||
sum += *buff++;
|
||||
sum += *buff++;
|
||||
sum += *buff++;
|
||||
sum += *buff++;
|
||||
sum += *buff++;
|
||||
sum += *buff++;
|
||||
sum += *buff++;
|
||||
}
|
||||
marker = endMarker - (len % 2);
|
||||
while (buff < marker)
|
||||
sum += *buff++;
|
||||
|
||||
if (endMarker > buff)
|
||||
sum += *(const u8 *)buff; /* add extra byte separately */
|
||||
|
||||
BITOFF;
|
||||
return (__force __wsum)sum;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(csum_partial);
|
||||
236
arch/cris/arch-v10/lib/string.c
Normal file
236
arch/cris/arch-v10/lib/string.c
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
/* A memcpy for CRIS.
|
||||
Copyright (C) 1994-2005 Axis Communications.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Neither the name of Axis Communications nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
|
||||
COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
/* FIXME: This file should really only be used for reference, as the
|
||||
result is somewhat depending on gcc generating what we expect rather
|
||||
than what we describe. An assembly file should be used instead. */
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* Break even between movem and move16 is really at 38.7 * 2, but
|
||||
modulo 44, so up to the next multiple of 44, we use ordinary code. */
|
||||
#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2)
|
||||
|
||||
/* No name ambiguities in this file. */
|
||||
__asm__ (".syntax no_register_prefix");
|
||||
|
||||
void *
|
||||
memcpy(void *pdst, const void *psrc, size_t pn)
|
||||
{
|
||||
/* Now we want the parameters put in special registers.
|
||||
Make sure the compiler is able to make something useful of this.
|
||||
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
|
||||
|
||||
If gcc was allright, it really would need no temporaries, and no
|
||||
stack space to save stuff on. */
|
||||
|
||||
register void *return_dst __asm__ ("r10") = pdst;
|
||||
register unsigned char *dst __asm__ ("r13") = pdst;
|
||||
register unsigned const char *src __asm__ ("r11") = psrc;
|
||||
register int n __asm__ ("r12") = pn;
|
||||
|
||||
/* When src is aligned but not dst, this makes a few extra needless
|
||||
cycles. I believe it would take as many to check that the
|
||||
re-alignment was unnecessary. */
|
||||
if (((unsigned long) dst & 3) != 0
|
||||
/* Don't align if we wouldn't copy more than a few bytes; so we
|
||||
don't have to check further for overflows. */
|
||||
&& n >= 3)
|
||||
{
|
||||
if ((unsigned long) dst & 1)
|
||||
{
|
||||
n--;
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
}
|
||||
|
||||
if ((unsigned long) dst & 2)
|
||||
{
|
||||
n -= 2;
|
||||
*(short *) dst = *(short *) src;
|
||||
src += 2;
|
||||
dst += 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide which copying method to use. */
|
||||
if (n >= MEMCPY_BY_BLOCK_THRESHOLD)
|
||||
{
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
registers; that will move the saving/restoring of those registers
|
||||
to the function prologue/epilogue, and make non-movem sizes
|
||||
suboptimal. */
|
||||
__asm__ volatile
|
||||
("\
|
||||
;; GCC does promise correct register allocations, but let's \n\
|
||||
;; make sure it keeps its promises. \n\
|
||||
.ifnc %0-%1-%2,$r13-$r11-$r12 \n\
|
||||
.error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll use in the movem process \n\
|
||||
;; on the stack. \n\
|
||||
subq 11*4,sp \n\
|
||||
movem r10,[sp] \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r11 - src \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop. \n\
|
||||
subq 44,r12 \n\
|
||||
0: \n\
|
||||
"
|
||||
#ifdef __arch_common_v10_v32
|
||||
/* Cater to branch offset difference between v32 and v10. We
|
||||
assume the branch below has an 8-bit offset. */
|
||||
" setf\n"
|
||||
#endif
|
||||
" movem [r11+],r10 \n\
|
||||
subq 44,r12 \n\
|
||||
bge 0b \n\
|
||||
movem r10,[r13+] \n\
|
||||
\n\
|
||||
;; Compensate for last loop underflowing n. \n\
|
||||
addq 44,r12 \n\
|
||||
\n\
|
||||
;; Restore registers from stack. \n\
|
||||
movem [sp+],r10"
|
||||
|
||||
/* Outputs. */
|
||||
: "=r" (dst), "=r" (src), "=r" (n)
|
||||
|
||||
/* Inputs. */
|
||||
: "0" (dst), "1" (src), "2" (n));
|
||||
}
|
||||
|
||||
while (n >= 16)
|
||||
{
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
|
||||
n -= 16;
|
||||
}
|
||||
|
||||
switch (n)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
|
||||
case 1:
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(short *) dst = *(short *) src;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
*(short *) dst = *(short *) src; dst += 2; src += 2;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(long *) dst = *(long *) src;
|
||||
break;
|
||||
|
||||
case 5:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 6:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src;
|
||||
break;
|
||||
|
||||
case 7:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src; dst += 2; src += 2;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src;
|
||||
break;
|
||||
|
||||
case 9:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 10:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src;
|
||||
break;
|
||||
|
||||
case 11:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src; dst += 2; src += 2;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 12:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src;
|
||||
break;
|
||||
|
||||
case 13:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*dst = *src;
|
||||
break;
|
||||
|
||||
case 14:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src;
|
||||
break;
|
||||
|
||||
case 15:
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(long *) dst = *(long *) src; dst += 4; src += 4;
|
||||
*(short *) dst = *(short *) src; dst += 2; src += 2;
|
||||
*dst = *src;
|
||||
break;
|
||||
}
|
||||
|
||||
return return_dst;
|
||||
}
|
||||
523
arch/cris/arch-v10/lib/usercopy.c
Normal file
523
arch/cris/arch-v10/lib/usercopy.c
Normal file
|
|
@ -0,0 +1,523 @@
|
|||
/*
|
||||
* User address space access functions.
|
||||
* The non-inlined parts of asm-cris/uaccess.h are here.
|
||||
*
|
||||
* Copyright (C) 2000, Axis Communications AB.
|
||||
*
|
||||
* Written by Hans-Peter Nilsson.
|
||||
* Pieces used from memcpy, originally by Kenny Ranerup long time ago.
|
||||
*/
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/* Asm:s have been tweaked (within the domain of correctness) to give
|
||||
satisfactory results for "gcc version 2.96 20000427 (experimental)".
|
||||
|
||||
Check regularly...
|
||||
|
||||
Note that the PC saved at a bus-fault is the address *after* the
|
||||
faulting instruction, which means the branch-target for instructions in
|
||||
delay-slots for taken branches. Note also that the postincrement in
|
||||
the instruction is performed regardless of bus-fault; the register is
|
||||
seen updated in fault handlers.
|
||||
|
||||
Oh, and on the code formatting issue, to whomever feels like "fixing
|
||||
it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix"
|
||||
string.c too. I just don't think too many people will hack this file
|
||||
for the code format to be an issue. */
|
||||
|
||||
|
||||
/* Copy to userspace. This is based on the memcpy used for
|
||||
kernel-to-kernel copying; see "string.c". */
|
||||
|
||||
unsigned long
|
||||
__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
|
||||
{
|
||||
/* We want the parameters put in special registers.
|
||||
Make sure the compiler is able to make something useful of this.
|
||||
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
|
||||
|
||||
FIXME: Comment for old gcc version. Check.
|
||||
If gcc was alright, it really would need no temporaries, and no
|
||||
stack space to save stuff on. */
|
||||
|
||||
register char *dst __asm__ ("r13") = pdst;
|
||||
register const char *src __asm__ ("r11") = psrc;
|
||||
register int n __asm__ ("r12") = pn;
|
||||
register int retn __asm__ ("r10") = 0;
|
||||
|
||||
|
||||
/* When src is aligned but not dst, this makes a few extra needless
|
||||
cycles. I believe it would take as many to check that the
|
||||
re-alignment was unnecessary. */
|
||||
if (((unsigned long) dst & 3) != 0
|
||||
/* Don't align if we wouldn't copy more than a few bytes; so we
|
||||
don't have to check further for overflows. */
|
||||
&& n >= 3)
|
||||
{
|
||||
if ((unsigned long) dst & 1)
|
||||
{
|
||||
__asm_copy_to_user_1 (dst, src, retn);
|
||||
n--;
|
||||
}
|
||||
|
||||
if ((unsigned long) dst & 2)
|
||||
{
|
||||
__asm_copy_to_user_2 (dst, src, retn);
|
||||
n -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide which copying method to use. */
|
||||
if (n >= 44*2) /* Break even between movem and
|
||||
move16 is at 38.7*2, but modulo 44. */
|
||||
{
|
||||
/* For large copies we use 'movem'. */
|
||||
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
registers; that will move the saving/restoring of those registers
|
||||
to the function prologue/epilogue, and make non-movem sizes
|
||||
suboptimal.
|
||||
|
||||
This method is not foolproof; it assumes that the "asm reg"
|
||||
declarations at the beginning of the function really are used
|
||||
here (beware: they may be moved to temporary registers).
|
||||
This way, we do not have to save/move the registers around into
|
||||
temporaries; we can safely use them straight away.
|
||||
|
||||
If you want to check that the allocation was right; then
|
||||
check the equalities in the first comment. It should say
|
||||
"r13=r13, r11=r11, r12=r12". */
|
||||
__asm__ volatile ("\
|
||||
.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
|
||||
.err \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll use in the movem process \n\
|
||||
;; on the stack. \n\
|
||||
subq 11*4,$sp \n\
|
||||
movem $r10,[$sp] \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r11 - src \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop \n\
|
||||
subq 44,$r12 \n\
|
||||
\n\
|
||||
; Since the noted PC of a faulting instruction in a delay-slot of a taken \n\
|
||||
; branch, is that of the branch target, we actually point at the from-movem \n\
|
||||
; for this case. There is no ambiguity here; if there was a fault in that \n\
|
||||
; instruction (meaning a kernel oops), the faulted PC would be the address \n\
|
||||
; after *that* movem. \n\
|
||||
\n\
|
||||
0: \n\
|
||||
movem [$r11+],$r10 \n\
|
||||
subq 44,$r12 \n\
|
||||
bge 0b \n\
|
||||
movem $r10,[$r13+] \n\
|
||||
1: \n\
|
||||
addq 44,$r12 ;; compensate for last loop underflowing n \n\
|
||||
\n\
|
||||
;; Restore registers from stack \n\
|
||||
movem [$sp+],$r10 \n\
|
||||
2: \n\
|
||||
.section .fixup,\"ax\" \n\
|
||||
\n\
|
||||
; To provide a correct count in r10 of bytes that failed to be copied, \n\
|
||||
; we jump back into the loop if the loop-branch was taken. There is no \n\
|
||||
; performance penalty for sany use; the program will segfault soon enough.\n\
|
||||
\n\
|
||||
3: \n\
|
||||
move.d [$sp],$r10 \n\
|
||||
addq 44,$r10 \n\
|
||||
move.d $r10,[$sp] \n\
|
||||
jump 0b \n\
|
||||
4: \n\
|
||||
movem [$sp+],$r10 \n\
|
||||
addq 44,$r10 \n\
|
||||
addq 44,$r12 \n\
|
||||
jump 2b \n\
|
||||
\n\
|
||||
.previous \n\
|
||||
.section __ex_table,\"a\" \n\
|
||||
.dword 0b,3b \n\
|
||||
.dword 1b,4b \n\
|
||||
.previous"
|
||||
|
||||
/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
|
||||
/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
|
||||
|
||||
}
|
||||
|
||||
/* Either we directly start copying, using dword copying in a loop, or
|
||||
we copy as much as possible with 'movem' and then the last block (<44
|
||||
bytes) is copied here. This will work since 'movem' will have
|
||||
updated SRC, DST and N. */
|
||||
|
||||
while (n >= 16)
|
||||
{
|
||||
__asm_copy_to_user_16 (dst, src, retn);
|
||||
n -= 16;
|
||||
}
|
||||
|
||||
/* Having a separate by-four loops cuts down on cache footprint.
|
||||
FIXME: Test with and without; increasing switch to be 0..15. */
|
||||
while (n >= 4)
|
||||
{
|
||||
__asm_copy_to_user_4 (dst, src, retn);
|
||||
n -= 4;
|
||||
}
|
||||
|
||||
switch (n)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
__asm_copy_to_user_1 (dst, src, retn);
|
||||
break;
|
||||
case 2:
|
||||
__asm_copy_to_user_2 (dst, src, retn);
|
||||
break;
|
||||
case 3:
|
||||
__asm_copy_to_user_3 (dst, src, retn);
|
||||
break;
|
||||
}
|
||||
|
||||
return retn;
|
||||
}
|
||||
|
||||
/* Copy from user to kernel, zeroing the bytes that were inaccessible in
|
||||
userland. The return-value is the number of bytes that were
|
||||
inaccessible. */
|
||||
|
||||
unsigned long
|
||||
__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
|
||||
{
|
||||
/* We want the parameters put in special registers.
|
||||
Make sure the compiler is able to make something useful of this.
|
||||
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
|
||||
|
||||
FIXME: Comment for old gcc version. Check.
|
||||
If gcc was alright, it really would need no temporaries, and no
|
||||
stack space to save stuff on. */
|
||||
|
||||
register char *dst __asm__ ("r13") = pdst;
|
||||
register const char *src __asm__ ("r11") = psrc;
|
||||
register int n __asm__ ("r12") = pn;
|
||||
register int retn __asm__ ("r10") = 0;
|
||||
|
||||
/* The best reason to align src is that we then know that a read-fault
|
||||
was for aligned bytes; there's no 1..3 remaining good bytes to
|
||||
pickle. */
|
||||
if (((unsigned long) src & 3) != 0)
|
||||
{
|
||||
if (((unsigned long) src & 1) && n != 0)
|
||||
{
|
||||
__asm_copy_from_user_1 (dst, src, retn);
|
||||
n--;
|
||||
}
|
||||
|
||||
if (((unsigned long) src & 2) && n >= 2)
|
||||
{
|
||||
__asm_copy_from_user_2 (dst, src, retn);
|
||||
n -= 2;
|
||||
}
|
||||
|
||||
/* We only need one check after the unalignment-adjustments, because
|
||||
if both adjustments were done, either both or neither reference
|
||||
had an exception. */
|
||||
if (retn != 0)
|
||||
goto copy_exception_bytes;
|
||||
}
|
||||
|
||||
/* Decide which copying method to use. */
|
||||
if (n >= 44*2) /* Break even between movem and
|
||||
move16 is at 38.7*2, but modulo 44.
|
||||
FIXME: We use move4 now. */
|
||||
{
|
||||
/* For large copies we use 'movem' */
|
||||
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
registers; that will move the saving/restoring of those registers
|
||||
to the function prologue/epilogue, and make non-movem sizes
|
||||
suboptimal.
|
||||
|
||||
This method is not foolproof; it assumes that the "asm reg"
|
||||
declarations at the beginning of the function really are used
|
||||
here (beware: they may be moved to temporary registers).
|
||||
This way, we do not have to save/move the registers around into
|
||||
temporaries; we can safely use them straight away.
|
||||
|
||||
If you want to check that the allocation was right; then
|
||||
check the equalities in the first comment. It should say
|
||||
"r13=r13, r11=r11, r12=r12" */
|
||||
__asm__ volatile ("\n\
|
||||
.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
|
||||
.err \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll use in the movem process \n\
|
||||
;; on the stack. \n\
|
||||
subq 11*4,$sp \n\
|
||||
movem $r10,[$sp] \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r11 - src \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop \n\
|
||||
subq 44,$r12 \n\
|
||||
0: \n\
|
||||
movem [$r11+],$r10 \n\
|
||||
1: \n\
|
||||
subq 44,$r12 \n\
|
||||
bge 0b \n\
|
||||
movem $r10,[$r13+] \n\
|
||||
\n\
|
||||
addq 44,$r12 ;; compensate for last loop underflowing n \n\
|
||||
\n\
|
||||
;; Restore registers from stack \n\
|
||||
movem [$sp+],$r10 \n\
|
||||
4: \n\
|
||||
.section .fixup,\"ax\" \n\
|
||||
\n\
|
||||
;; Do not jump back into the loop if we fail. For some uses, we get a \n\
|
||||
;; page fault somewhere on the line. Without checking for page limits, \n\
|
||||
;; we don't know where, but we need to copy accurately and keep an \n\
|
||||
;; accurate count; not just clear the whole line. To do that, we fall \n\
|
||||
;; down in the code below, proceeding with smaller amounts. It should \n\
|
||||
;; be kept in mind that we have to cater to code like what at one time \n\
|
||||
;; was in fs/super.c: \n\
|
||||
;; i = size - copy_from_user((void *)page, data, size); \n\
|
||||
;; which would cause repeated faults while clearing the remainder of \n\
|
||||
;; the SIZE bytes at PAGE after the first fault. \n\
|
||||
;; A caveat here is that we must not fall through from a failing page \n\
|
||||
;; to a valid page. \n\
|
||||
\n\
|
||||
3: \n\
|
||||
movem [$sp+],$r10 \n\
|
||||
addq 44,$r12 ;; Get back count before faulting point. \n\
|
||||
subq 44,$r11 ;; Get back pointer to faulting movem-line. \n\
|
||||
jump 4b ;; Fall through, pretending the fault didn't happen.\n\
|
||||
\n\
|
||||
.previous \n\
|
||||
.section __ex_table,\"a\" \n\
|
||||
.dword 1b,3b \n\
|
||||
.previous"
|
||||
|
||||
/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
|
||||
/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
|
||||
|
||||
}
|
||||
|
||||
/* Either we directly start copying here, using dword copying in a loop,
|
||||
or we copy as much as possible with 'movem' and then the last block
|
||||
(<44 bytes) is copied here. This will work since 'movem' will have
|
||||
updated src, dst and n. (Except with failing src.)
|
||||
|
||||
Since we want to keep src accurate, we can't use
|
||||
__asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
|
||||
retn, but not src (by design; it's value is ignored elsewhere). */
|
||||
|
||||
while (n >= 4)
|
||||
{
|
||||
__asm_copy_from_user_4 (dst, src, retn);
|
||||
n -= 4;
|
||||
|
||||
if (retn)
|
||||
goto copy_exception_bytes;
|
||||
}
|
||||
|
||||
/* If we get here, there were no memory read faults. */
|
||||
switch (n)
|
||||
{
|
||||
/* These copies are at least "naturally aligned" (so we don't have
|
||||
to check each byte), due to the src alignment code before the
|
||||
movem loop. The *_3 case *will* get the correct count for retn. */
|
||||
case 0:
|
||||
/* This case deliberately left in (if you have doubts check the
|
||||
generated assembly code). */
|
||||
break;
|
||||
case 1:
|
||||
__asm_copy_from_user_1 (dst, src, retn);
|
||||
break;
|
||||
case 2:
|
||||
__asm_copy_from_user_2 (dst, src, retn);
|
||||
break;
|
||||
case 3:
|
||||
__asm_copy_from_user_3 (dst, src, retn);
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we get here, retn correctly reflects the number of failing
|
||||
bytes. */
|
||||
return retn;
|
||||
|
||||
copy_exception_bytes:
|
||||
/* We already have "retn" bytes cleared, and need to clear the
|
||||
remaining "n" bytes. A non-optimized simple byte-for-byte in-line
|
||||
memset is preferred here, since this isn't speed-critical code and
|
||||
we'd rather have this a leaf-function than calling memset. */
|
||||
{
|
||||
char *endp;
|
||||
for (endp = dst + n; dst < endp; dst++)
|
||||
*dst = 0;
|
||||
}
|
||||
|
||||
return retn + n;
|
||||
}
|
||||
|
||||
/* Zero userspace. */
|
||||
|
||||
unsigned long
|
||||
__do_clear_user (void __user *pto, unsigned long pn)
|
||||
{
|
||||
/* We want the parameters put in special registers.
|
||||
Make sure the compiler is able to make something useful of this.
|
||||
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
|
||||
|
||||
FIXME: Comment for old gcc version. Check.
|
||||
If gcc was alright, it really would need no temporaries, and no
|
||||
stack space to save stuff on. */
|
||||
|
||||
register char *dst __asm__ ("r13") = pto;
|
||||
register int n __asm__ ("r12") = pn;
|
||||
register int retn __asm__ ("r10") = 0;
|
||||
|
||||
|
||||
if (((unsigned long) dst & 3) != 0
|
||||
/* Don't align if we wouldn't copy more than a few bytes. */
|
||||
&& n >= 3)
|
||||
{
|
||||
if ((unsigned long) dst & 1)
|
||||
{
|
||||
__asm_clear_1 (dst, retn);
|
||||
n--;
|
||||
}
|
||||
|
||||
if ((unsigned long) dst & 2)
|
||||
{
|
||||
__asm_clear_2 (dst, retn);
|
||||
n -= 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Decide which copying method to use.
|
||||
FIXME: This number is from the "ordinary" kernel memset. */
|
||||
if (n >= (1*48))
|
||||
{
|
||||
/* For large clears we use 'movem' */
|
||||
|
||||
/* It is not optimal to tell the compiler about clobbering any
|
||||
call-saved registers; that will move the saving/restoring of
|
||||
those registers to the function prologue/epilogue, and make
|
||||
non-movem sizes suboptimal.
|
||||
|
||||
This method is not foolproof; it assumes that the "asm reg"
|
||||
declarations at the beginning of the function really are used
|
||||
here (beware: they may be moved to temporary registers).
|
||||
This way, we do not have to save/move the registers around into
|
||||
temporaries; we can safely use them straight away.
|
||||
|
||||
If you want to check that the allocation was right; then
|
||||
check the equalities in the first comment. It should say
|
||||
something like "r13=r13, r11=r11, r12=r12". */
|
||||
__asm__ volatile ("\n\
|
||||
.ifnc %0%1%2,$r13$r12$r10 \n\
|
||||
.err \n\
|
||||
.endif \n\
|
||||
\n\
|
||||
;; Save the registers we'll clobber in the movem process \n\
|
||||
;; on the stack. Don't mention them to gcc, it will only be \n\
|
||||
;; upset. \n\
|
||||
subq 11*4,$sp \n\
|
||||
movem $r10,[$sp] \n\
|
||||
\n\
|
||||
clear.d $r0 \n\
|
||||
clear.d $r1 \n\
|
||||
clear.d $r2 \n\
|
||||
clear.d $r3 \n\
|
||||
clear.d $r4 \n\
|
||||
clear.d $r5 \n\
|
||||
clear.d $r6 \n\
|
||||
clear.d $r7 \n\
|
||||
clear.d $r8 \n\
|
||||
clear.d $r9 \n\
|
||||
clear.d $r10 \n\
|
||||
clear.d $r11 \n\
|
||||
\n\
|
||||
;; Now we've got this: \n\
|
||||
;; r13 - dst \n\
|
||||
;; r12 - n \n\
|
||||
\n\
|
||||
;; Update n for the first loop \n\
|
||||
subq 12*4,$r12 \n\
|
||||
0: \n\
|
||||
subq 12*4,$r12 \n\
|
||||
bge 0b \n\
|
||||
movem $r11,[$r13+] \n\
|
||||
1: \n\
|
||||
addq 12*4,$r12 ;; compensate for last loop underflowing n\n\
|
||||
\n\
|
||||
;; Restore registers from stack \n\
|
||||
movem [$sp+],$r10 \n\
|
||||
2: \n\
|
||||
.section .fixup,\"ax\" \n\
|
||||
3: \n\
|
||||
move.d [$sp],$r10 \n\
|
||||
addq 12*4,$r10 \n\
|
||||
move.d $r10,[$sp] \n\
|
||||
clear.d $r10 \n\
|
||||
jump 0b \n\
|
||||
\n\
|
||||
4: \n\
|
||||
movem [$sp+],$r10 \n\
|
||||
addq 12*4,$r10 \n\
|
||||
addq 12*4,$r12 \n\
|
||||
jump 2b \n\
|
||||
\n\
|
||||
.previous \n\
|
||||
.section __ex_table,\"a\" \n\
|
||||
.dword 0b,3b \n\
|
||||
.dword 1b,4b \n\
|
||||
.previous"
|
||||
|
||||
/* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
|
||||
/* Inputs */ : "0" (dst), "1" (n), "2" (retn)
|
||||
/* Clobber */ : "r11");
|
||||
}
|
||||
|
||||
while (n >= 16)
|
||||
{
|
||||
__asm_clear_16 (dst, retn);
|
||||
n -= 16;
|
||||
}
|
||||
|
||||
/* Having a separate by-four loops cuts down on cache footprint.
|
||||
FIXME: Test with and without; increasing switch to be 0..15. */
|
||||
while (n >= 4)
|
||||
{
|
||||
__asm_clear_4 (dst, retn);
|
||||
n -= 4;
|
||||
}
|
||||
|
||||
switch (n)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
__asm_clear_1 (dst, retn);
|
||||
break;
|
||||
case 2:
|
||||
__asm_clear_2 (dst, retn);
|
||||
break;
|
||||
case 3:
|
||||
__asm_clear_3 (dst, retn);
|
||||
break;
|
||||
}
|
||||
|
||||
return retn;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue