mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-09-08 01:08:03 -04:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
23
arch/microblaze/lib/Makefile
Normal file
23
arch/microblaze/lib/Makefile
Normal file
|
@ -0,0 +1,23 @@
|
|||
#
|
||||
# Makefile
|
||||
#
|
||||
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_ashldi3.o = -pg
|
||||
CFLAGS_REMOVE_ashrdi3.o = -pg
|
||||
CFLAGS_REMOVE_lshrdi3.o = -pg
|
||||
endif
|
||||
|
||||
lib-y := memset.o
|
||||
|
||||
ifeq ($(CONFIG_OPT_LIB_ASM),y)
|
||||
lib-y += fastcopy.o
|
||||
else
|
||||
lib-y += memcpy.o memmove.o
|
||||
endif
|
||||
|
||||
lib-y += uaccess_old.o
|
||||
|
||||
# libgcc-style stuff needed in the kernel
|
||||
obj-y += ashldi3.o ashrdi3.o cmpdi2.o divsi3.o lshrdi3.o modsi3.o
|
||||
obj-y += muldi3.o mulsi3.o ucmpdi2.o udivsi3.o umodsi3.o
|
28
arch/microblaze/lib/ashldi3.c
Normal file
28
arch/microblaze/lib/ashldi3.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
#include <linux/export.h>
|
||||
|
||||
#include "libgcc.h"
|
||||
|
||||
long long __ashldi3(long long u, word_type b)
|
||||
{
|
||||
DWunion uu, w;
|
||||
word_type bm;
|
||||
|
||||
if (b == 0)
|
||||
return u;
|
||||
|
||||
uu.ll = u;
|
||||
bm = 32 - b;
|
||||
|
||||
if (bm <= 0) {
|
||||
w.s.low = 0;
|
||||
w.s.high = (unsigned int) uu.s.low << -bm;
|
||||
} else {
|
||||
const unsigned int carries = (unsigned int) uu.s.low >> bm;
|
||||
|
||||
w.s.low = (unsigned int) uu.s.low << b;
|
||||
w.s.high = ((unsigned int) uu.s.high << b) | carries;
|
||||
}
|
||||
|
||||
return w.ll;
|
||||
}
|
||||
EXPORT_SYMBOL(__ashldi3);
|
30
arch/microblaze/lib/ashrdi3.c
Normal file
30
arch/microblaze/lib/ashrdi3.c
Normal file
|
@ -0,0 +1,30 @@
|
|||
#include <linux/export.h>
|
||||
|
||||
#include "libgcc.h"
|
||||
|
||||
long long __ashrdi3(long long u, word_type b)
|
||||
{
|
||||
DWunion uu, w;
|
||||
word_type bm;
|
||||
|
||||
if (b == 0)
|
||||
return u;
|
||||
|
||||
uu.ll = u;
|
||||
bm = 32 - b;
|
||||
|
||||
if (bm <= 0) {
|
||||
/* w.s.high = 1..1 or 0..0 */
|
||||
w.s.high =
|
||||
uu.s.high >> 31;
|
||||
w.s.low = uu.s.high >> -bm;
|
||||
} else {
|
||||
const unsigned int carries = (unsigned int) uu.s.high << bm;
|
||||
|
||||
w.s.high = uu.s.high >> b;
|
||||
w.s.low = ((unsigned int) uu.s.low >> b) | carries;
|
||||
}
|
||||
|
||||
return w.ll;
|
||||
}
|
||||
EXPORT_SYMBOL(__ashrdi3);
|
26
arch/microblaze/lib/cmpdi2.c
Normal file
26
arch/microblaze/lib/cmpdi2.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
#include <linux/export.h>
|
||||
|
||||
#include "libgcc.h"
|
||||
|
||||
word_type __cmpdi2(long long a, long long b)
|
||||
{
|
||||
const DWunion au = {
|
||||
.ll = a
|
||||
};
|
||||
const DWunion bu = {
|
||||
.ll = b
|
||||
};
|
||||
|
||||
if (au.s.high < bu.s.high)
|
||||
return 0;
|
||||
else if (au.s.high > bu.s.high)
|
||||
return 2;
|
||||
|
||||
if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
|
||||
return 0;
|
||||
else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
|
||||
return 2;
|
||||
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(__cmpdi2);
|
73
arch/microblaze/lib/divsi3.S
Normal file
73
arch/microblaze/lib/divsi3.S
Normal file
|
@ -0,0 +1,73 @@
|
|||
#include <linux/linkage.h>
|
||||
|
||||
/*
|
||||
* Divide operation for 32 bit integers.
|
||||
* Input : Dividend in Reg r5
|
||||
* Divisor in Reg r6
|
||||
* Output: Result in Reg r3
|
||||
*/
|
||||
.text
|
||||
.globl __divsi3
|
||||
.type __divsi3, @function
|
||||
.ent __divsi3
|
||||
__divsi3:
|
||||
.frame r1, 0, r15
|
||||
|
||||
addik r1, r1, -16
|
||||
swi r28, r1, 0
|
||||
swi r29, r1, 4
|
||||
swi r30, r1, 8
|
||||
swi r31, r1, 12
|
||||
|
||||
beqi r6, div_by_zero /* div_by_zero - division error */
|
||||
beqi r5, result_is_zero /* result is zero */
|
||||
bgeid r5, r5_pos
|
||||
xor r28, r5, r6 /* get the sign of the result */
|
||||
rsubi r5, r5, 0 /* make r5 positive */
|
||||
r5_pos:
|
||||
bgei r6, r6_pos
|
||||
rsubi r6, r6, 0 /* make r6 positive */
|
||||
r6_pos:
|
||||
addik r30, r0, 0 /* clear mod */
|
||||
addik r3, r0, 0 /* clear div */
|
||||
addik r29, r0, 32 /* initialize the loop count */
|
||||
|
||||
/* first part try to find the first '1' in the r5 */
|
||||
div0:
|
||||
blti r5, div2 /* this traps r5 == 0x80000000 */
|
||||
div1:
|
||||
add r5, r5, r5 /* left shift logical r5 */
|
||||
bgtid r5, div1
|
||||
addik r29, r29, -1
|
||||
div2:
|
||||
/* left shift logical r5 get the '1' into the carry */
|
||||
add r5, r5, r5
|
||||
addc r30, r30, r30 /* move that bit into the mod register */
|
||||
rsub r31, r6, r30 /* try to subtract (r30 a r6) */
|
||||
blti r31, mod_too_small
|
||||
/* move the r31 to mod since the result was positive */
|
||||
or r30, r0, r31
|
||||
addik r3, r3, 1
|
||||
mod_too_small:
|
||||
addik r29, r29, -1
|
||||
beqi r29, loop_end
|
||||
add r3, r3, r3 /* shift in the '1' into div */
|
||||
bri div2 /* div2 */
|
||||
loop_end:
|
||||
bgei r28, return_here
|
||||
brid return_here
|
||||
rsubi r3, r3, 0 /* negate the result */
|
||||
div_by_zero:
|
||||
result_is_zero:
|
||||
or r3, r0, r0 /* set result to 0 */
|
||||
return_here:
|
||||
/* restore values of csrs and that of r3 and the divisor and the dividend */
|
||||
lwi r28, r1, 0
|
||||
lwi r29, r1, 4
|
||||
lwi r30, r1, 8
|
||||
lwi r31, r1, 12
|
||||
rtsd r15, 8
|
||||
addik r1, r1, 16
|
||||
|
||||
.size __divsi3, . - __divsi3
|
||||
.end __divsi3
|
670
arch/microblaze/lib/fastcopy.S
Normal file
670
arch/microblaze/lib/fastcopy.S
Normal file
|
@ -0,0 +1,670 @@
|
|||
/*
|
||||
* Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
|
||||
* Copyright (C) 2008-2009 PetaLogix
|
||||
* Copyright (C) 2008 Jim Law - Iris LP All rights reserved.
|
||||
*
|
||||
* This file is subject to the terms and conditions of the GNU General
|
||||
* Public License. See the file COPYING in the main directory of this
|
||||
* archive for more details.
|
||||
*
|
||||
* Written by Jim Law <jlaw@irispower.com>
|
||||
*
|
||||
* intended to replace:
|
||||
* memcpy in memcpy.c and
|
||||
* memmove in memmove.c
|
||||
* ... in arch/microblaze/lib
|
||||
*
|
||||
*
|
||||
* assly_fastcopy.S
|
||||
*
|
||||
* Attempt at quicker memcpy and memmove for MicroBlaze
|
||||
* Input : Operand1 in Reg r5 - destination address
|
||||
* Operand2 in Reg r6 - source address
|
||||
* Operand3 in Reg r7 - number of bytes to transfer
|
||||
* Output: Result in Reg r3 - starting destinaition address
|
||||
*
|
||||
*
|
||||
* Explanation:
|
||||
* Perform (possibly unaligned) copy of a block of memory
|
||||
* between mem locations with size of xfer spec'd in bytes
|
||||
*/
|
||||
|
||||
#ifdef __MICROBLAZEEL__
|
||||
#error Microblaze LE not support ASM optimized lib func. Disable OPT_LIB_ASM.
|
||||
#endif
|
||||
|
||||
#include <linux/linkage.h>
|
||||
.text
|
||||
.globl memcpy
|
||||
.type memcpy, @function
|
||||
.ent memcpy
|
||||
|
||||
memcpy:
|
||||
fast_memcpy_ascending:
|
||||
/* move d to return register as value of function */
|
||||
addi r3, r5, 0
|
||||
|
||||
addi r4, r0, 4 /* n = 4 */
|
||||
cmpu r4, r4, r7 /* n = c - n (unsigned) */
|
||||
blti r4, a_xfer_end /* if n < 0, less than one word to transfer */
|
||||
|
||||
/* transfer first 0~3 bytes to get aligned dest address */
|
||||
andi r4, r5, 3 /* n = d & 3 */
|
||||
/* if zero, destination already aligned */
|
||||
beqi r4, a_dalign_done
|
||||
/* n = 4 - n (yields 3, 2, 1 transfers for 1, 2, 3 addr offset) */
|
||||
rsubi r4, r4, 4
|
||||
rsub r7, r4, r7 /* c = c - n adjust c */
|
||||
|
||||
a_xfer_first_loop:
|
||||
/* if no bytes left to transfer, transfer the bulk */
|
||||
beqi r4, a_dalign_done
|
||||
lbui r11, r6, 0 /* h = *s */
|
||||
sbi r11, r5, 0 /* *d = h */
|
||||
addi r6, r6, 1 /* s++ */
|
||||
addi r5, r5, 1 /* d++ */
|
||||
brid a_xfer_first_loop /* loop */
|
||||
addi r4, r4, -1 /* n-- (IN DELAY SLOT) */
|
||||
|
||||
a_dalign_done:
|
||||
addi r4, r0, 32 /* n = 32 */
|
||||
cmpu r4, r4, r7 /* n = c - n (unsigned) */
|
||||
/* if n < 0, less than one block to transfer */
|
||||
blti r4, a_block_done
|
||||
|
||||
a_block_xfer:
|
||||
andi r4, r7, 0xffffffe0 /* n = c & ~31 */
|
||||
rsub r7, r4, r7 /* c = c - n */
|
||||
|
||||
andi r9, r6, 3 /* t1 = s & 3 */
|
||||
/* if temp != 0, unaligned transfers needed */
|
||||
bnei r9, a_block_unaligned
|
||||
|
||||
a_block_aligned:
|
||||
lwi r9, r6, 0 /* t1 = *(s + 0) */
|
||||
lwi r10, r6, 4 /* t2 = *(s + 4) */
|
||||
lwi r11, r6, 8 /* t3 = *(s + 8) */
|
||||
lwi r12, r6, 12 /* t4 = *(s + 12) */
|
||||
swi r9, r5, 0 /* *(d + 0) = t1 */
|
||||
swi r10, r5, 4 /* *(d + 4) = t2 */
|
||||
swi r11, r5, 8 /* *(d + 8) = t3 */
|
||||
swi r12, r5, 12 /* *(d + 12) = t4 */
|
||||
lwi r9, r6, 16 /* t1 = *(s + 16) */
|
||||
lwi r10, r6, 20 /* t2 = *(s + 20) */
|
||||
lwi r11, r6, 24 /* t3 = *(s + 24) */
|
||||
lwi r12, r6, 28 /* t4 = *(s + 28) */
|
||||
swi r9, r5, 16 /* *(d + 16) = t1 */
|
||||
swi r10, r5, 20 /* *(d + 20) = t2 */
|
||||
swi r11, r5, 24 /* *(d + 24) = t3 */
|
||||
swi r12, r5, 28 /* *(d + 28) = t4 */
|
||||
addi r6, r6, 32 /* s = s + 32 */
|
||||
addi r4, r4, -32 /* n = n - 32 */
|
||||
bneid r4, a_block_aligned /* while (n) loop */
|
||||
addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
|
||||
bri a_block_done
|
||||
|
||||
a_block_unaligned:
|
||||
andi r8, r6, 0xfffffffc /* as = s & ~3 */
|
||||
add r6, r6, r4 /* s = s + n */
|
||||
lwi r11, r8, 0 /* h = *(as + 0) */
|
||||
|
||||
addi r9, r9, -1
|
||||
beqi r9, a_block_u1 /* t1 was 1 => 1 byte offset */
|
||||
addi r9, r9, -1
|
||||
beqi r9, a_block_u2 /* t1 was 2 => 2 byte offset */
|
||||
|
||||
a_block_u3:
|
||||
bslli r11, r11, 24 /* h = h << 24 */
|
||||
a_bu3_loop:
|
||||
lwi r12, r8, 4 /* v = *(as + 4) */
|
||||
bsrli r9, r12, 8 /* t1 = v >> 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 0 /* *(d + 0) = t1 */
|
||||
bslli r11, r12, 24 /* h = v << 24 */
|
||||
lwi r12, r8, 8 /* v = *(as + 8) */
|
||||
bsrli r9, r12, 8 /* t1 = v >> 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 4 /* *(d + 4) = t1 */
|
||||
bslli r11, r12, 24 /* h = v << 24 */
|
||||
lwi r12, r8, 12 /* v = *(as + 12) */
|
||||
bsrli r9, r12, 8 /* t1 = v >> 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 8 /* *(d + 8) = t1 */
|
||||
bslli r11, r12, 24 /* h = v << 24 */
|
||||
lwi r12, r8, 16 /* v = *(as + 16) */
|
||||
bsrli r9, r12, 8 /* t1 = v >> 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 12 /* *(d + 12) = t1 */
|
||||
bslli r11, r12, 24 /* h = v << 24 */
|
||||
lwi r12, r8, 20 /* v = *(as + 20) */
|
||||
bsrli r9, r12, 8 /* t1 = v >> 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 16 /* *(d + 16) = t1 */
|
||||
bslli r11, r12, 24 /* h = v << 24 */
|
||||
lwi r12, r8, 24 /* v = *(as + 24) */
|
||||
bsrli r9, r12, 8 /* t1 = v >> 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 20 /* *(d + 20) = t1 */
|
||||
bslli r11, r12, 24 /* h = v << 24 */
|
||||
lwi r12, r8, 28 /* v = *(as + 28) */
|
||||
bsrli r9, r12, 8 /* t1 = v >> 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 24 /* *(d + 24) = t1 */
|
||||
bslli r11, r12, 24 /* h = v << 24 */
|
||||
lwi r12, r8, 32 /* v = *(as + 32) */
|
||||
bsrli r9, r12, 8 /* t1 = v >> 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 28 /* *(d + 28) = t1 */
|
||||
bslli r11, r12, 24 /* h = v << 24 */
|
||||
addi r8, r8, 32 /* as = as + 32 */
|
||||
addi r4, r4, -32 /* n = n - 32 */
|
||||
bneid r4, a_bu3_loop /* while (n) loop */
|
||||
addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
|
||||
bri a_block_done
|
||||
|
||||
a_block_u1:
|
||||
bslli r11, r11, 8 /* h = h << 8 */
|
||||
a_bu1_loop:
|
||||
lwi r12, r8, 4 /* v = *(as + 4) */
|
||||
bsrli r9, r12, 24 /* t1 = v >> 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 0 /* *(d + 0) = t1 */
|
||||
bslli r11, r12, 8 /* h = v << 8 */
|
||||
lwi r12, r8, 8 /* v = *(as + 8) */
|
||||
bsrli r9, r12, 24 /* t1 = v >> 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 4 /* *(d + 4) = t1 */
|
||||
bslli r11, r12, 8 /* h = v << 8 */
|
||||
lwi r12, r8, 12 /* v = *(as + 12) */
|
||||
bsrli r9, r12, 24 /* t1 = v >> 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 8 /* *(d + 8) = t1 */
|
||||
bslli r11, r12, 8 /* h = v << 8 */
|
||||
lwi r12, r8, 16 /* v = *(as + 16) */
|
||||
bsrli r9, r12, 24 /* t1 = v >> 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 12 /* *(d + 12) = t1 */
|
||||
bslli r11, r12, 8 /* h = v << 8 */
|
||||
lwi r12, r8, 20 /* v = *(as + 20) */
|
||||
bsrli r9, r12, 24 /* t1 = v >> 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 16 /* *(d + 16) = t1 */
|
||||
bslli r11, r12, 8 /* h = v << 8 */
|
||||
lwi r12, r8, 24 /* v = *(as + 24) */
|
||||
bsrli r9, r12, 24 /* t1 = v >> 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 20 /* *(d + 20) = t1 */
|
||||
bslli r11, r12, 8 /* h = v << 8 */
|
||||
lwi r12, r8, 28 /* v = *(as + 28) */
|
||||
bsrli r9, r12, 24 /* t1 = v >> 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 24 /* *(d + 24) = t1 */
|
||||
bslli r11, r12, 8 /* h = v << 8 */
|
||||
lwi r12, r8, 32 /* v = *(as + 32) */
|
||||
bsrli r9, r12, 24 /* t1 = v >> 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 28 /* *(d + 28) = t1 */
|
||||
bslli r11, r12, 8 /* h = v << 8 */
|
||||
addi r8, r8, 32 /* as = as + 32 */
|
||||
addi r4, r4, -32 /* n = n - 32 */
|
||||
bneid r4, a_bu1_loop /* while (n) loop */
|
||||
addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
|
||||
bri a_block_done
|
||||
|
||||
a_block_u2:
|
||||
bslli r11, r11, 16 /* h = h << 16 */
|
||||
a_bu2_loop:
|
||||
lwi r12, r8, 4 /* v = *(as + 4) */
|
||||
bsrli r9, r12, 16 /* t1 = v >> 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 0 /* *(d + 0) = t1 */
|
||||
bslli r11, r12, 16 /* h = v << 16 */
|
||||
lwi r12, r8, 8 /* v = *(as + 8) */
|
||||
bsrli r9, r12, 16 /* t1 = v >> 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 4 /* *(d + 4) = t1 */
|
||||
bslli r11, r12, 16 /* h = v << 16 */
|
||||
lwi r12, r8, 12 /* v = *(as + 12) */
|
||||
bsrli r9, r12, 16 /* t1 = v >> 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 8 /* *(d + 8) = t1 */
|
||||
bslli r11, r12, 16 /* h = v << 16 */
|
||||
lwi r12, r8, 16 /* v = *(as + 16) */
|
||||
bsrli r9, r12, 16 /* t1 = v >> 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 12 /* *(d + 12) = t1 */
|
||||
bslli r11, r12, 16 /* h = v << 16 */
|
||||
lwi r12, r8, 20 /* v = *(as + 20) */
|
||||
bsrli r9, r12, 16 /* t1 = v >> 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 16 /* *(d + 16) = t1 */
|
||||
bslli r11, r12, 16 /* h = v << 16 */
|
||||
lwi r12, r8, 24 /* v = *(as + 24) */
|
||||
bsrli r9, r12, 16 /* t1 = v >> 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 20 /* *(d + 20) = t1 */
|
||||
bslli r11, r12, 16 /* h = v << 16 */
|
||||
lwi r12, r8, 28 /* v = *(as + 28) */
|
||||
bsrli r9, r12, 16 /* t1 = v >> 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 24 /* *(d + 24) = t1 */
|
||||
bslli r11, r12, 16 /* h = v << 16 */
|
||||
lwi r12, r8, 32 /* v = *(as + 32) */
|
||||
bsrli r9, r12, 16 /* t1 = v >> 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 28 /* *(d + 28) = t1 */
|
||||
bslli r11, r12, 16 /* h = v << 16 */
|
||||
addi r8, r8, 32 /* as = as + 32 */
|
||||
addi r4, r4, -32 /* n = n - 32 */
|
||||
bneid r4, a_bu2_loop /* while (n) loop */
|
||||
addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
|
||||
|
||||
a_block_done:
|
||||
addi r4, r0, 4 /* n = 4 */
|
||||
cmpu r4, r4, r7 /* n = c - n (unsigned) */
|
||||
blti r4, a_xfer_end /* if n < 0, less than one word to transfer */
|
||||
|
||||
a_word_xfer:
|
||||
andi r4, r7, 0xfffffffc /* n = c & ~3 */
|
||||
addi r10, r0, 0 /* offset = 0 */
|
||||
|
||||
andi r9, r6, 3 /* t1 = s & 3 */
|
||||
/* if temp != 0, unaligned transfers needed */
|
||||
bnei r9, a_word_unaligned
|
||||
|
||||
a_word_aligned:
|
||||
lw r9, r6, r10 /* t1 = *(s+offset) */
|
||||
sw r9, r5, r10 /* *(d+offset) = t1 */
|
||||
addi r4, r4,-4 /* n-- */
|
||||
bneid r4, a_word_aligned /* loop */
|
||||
addi r10, r10, 4 /* offset++ (IN DELAY SLOT) */
|
||||
|
||||
bri a_word_done
|
||||
|
||||
a_word_unaligned:
|
||||
andi r8, r6, 0xfffffffc /* as = s & ~3 */
|
||||
lwi r11, r8, 0 /* h = *(as + 0) */
|
||||
addi r8, r8, 4 /* as = as + 4 */
|
||||
|
||||
addi r9, r9, -1
|
||||
beqi r9, a_word_u1 /* t1 was 1 => 1 byte offset */
|
||||
addi r9, r9, -1
|
||||
beqi r9, a_word_u2 /* t1 was 2 => 2 byte offset */
|
||||
|
||||
a_word_u3:
|
||||
bslli r11, r11, 24 /* h = h << 24 */
|
||||
a_wu3_loop:
|
||||
lw r12, r8, r10 /* v = *(as + offset) */
|
||||
bsrli r9, r12, 8 /* t1 = v >> 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
sw r9, r5, r10 /* *(d + offset) = t1 */
|
||||
bslli r11, r12, 24 /* h = v << 24 */
|
||||
addi r4, r4,-4 /* n = n - 4 */
|
||||
bneid r4, a_wu3_loop /* while (n) loop */
|
||||
addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
|
||||
|
||||
bri a_word_done
|
||||
|
||||
a_word_u1:
|
||||
bslli r11, r11, 8 /* h = h << 8 */
|
||||
a_wu1_loop:
|
||||
lw r12, r8, r10 /* v = *(as + offset) */
|
||||
bsrli r9, r12, 24 /* t1 = v >> 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
sw r9, r5, r10 /* *(d + offset) = t1 */
|
||||
bslli r11, r12, 8 /* h = v << 8 */
|
||||
addi r4, r4,-4 /* n = n - 4 */
|
||||
bneid r4, a_wu1_loop /* while (n) loop */
|
||||
addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
|
||||
|
||||
bri a_word_done
|
||||
|
||||
a_word_u2:
|
||||
bslli r11, r11, 16 /* h = h << 16 */
|
||||
a_wu2_loop:
|
||||
lw r12, r8, r10 /* v = *(as + offset) */
|
||||
bsrli r9, r12, 16 /* t1 = v >> 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
sw r9, r5, r10 /* *(d + offset) = t1 */
|
||||
bslli r11, r12, 16 /* h = v << 16 */
|
||||
addi r4, r4,-4 /* n = n - 4 */
|
||||
bneid r4, a_wu2_loop /* while (n) loop */
|
||||
addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
|
||||
|
||||
a_word_done:
|
||||
add r5, r5, r10 /* d = d + offset */
|
||||
add r6, r6, r10 /* s = s + offset */
|
||||
rsub r7, r10, r7 /* c = c - offset */
|
||||
|
||||
a_xfer_end:
|
||||
a_xfer_end_loop:
|
||||
beqi r7, a_done /* while (c) */
|
||||
lbui r9, r6, 0 /* t1 = *s */
|
||||
addi r6, r6, 1 /* s++ */
|
||||
sbi r9, r5, 0 /* *d = t1 */
|
||||
addi r7, r7, -1 /* c-- */
|
||||
brid a_xfer_end_loop /* loop */
|
||||
addi r5, r5, 1 /* d++ (IN DELAY SLOT) */
|
||||
|
||||
a_done:
|
||||
rtsd r15, 8
|
||||
nop
|
||||
|
||||
.size memcpy, . - memcpy
|
||||
.end memcpy
|
||||
/*----------------------------------------------------------------------------*/
|
||||
.globl memmove
|
||||
.type memmove, @function
|
||||
.ent memmove
|
||||
|
||||
memmove:
|
||||
cmpu r4, r5, r6 /* n = s - d */
|
||||
bgei r4,fast_memcpy_ascending
|
||||
|
||||
fast_memcpy_descending:
|
||||
/* move d to return register as value of function */
|
||||
addi r3, r5, 0
|
||||
|
||||
add r5, r5, r7 /* d = d + c */
|
||||
add r6, r6, r7 /* s = s + c */
|
||||
|
||||
addi r4, r0, 4 /* n = 4 */
|
||||
cmpu r4, r4, r7 /* n = c - n (unsigned) */
|
||||
blti r4,d_xfer_end /* if n < 0, less than one word to transfer */
|
||||
|
||||
/* transfer first 0~3 bytes to get aligned dest address */
|
||||
andi r4, r5, 3 /* n = d & 3 */
|
||||
/* if zero, destination already aligned */
|
||||
beqi r4,d_dalign_done
|
||||
rsub r7, r4, r7 /* c = c - n adjust c */
|
||||
|
||||
d_xfer_first_loop:
|
||||
/* if no bytes left to transfer, transfer the bulk */
|
||||
beqi r4,d_dalign_done
|
||||
addi r6, r6, -1 /* s-- */
|
||||
addi r5, r5, -1 /* d-- */
|
||||
lbui r11, r6, 0 /* h = *s */
|
||||
sbi r11, r5, 0 /* *d = h */
|
||||
brid d_xfer_first_loop /* loop */
|
||||
addi r4, r4, -1 /* n-- (IN DELAY SLOT) */
|
||||
|
||||
d_dalign_done:
|
||||
addi r4, r0, 32 /* n = 32 */
|
||||
cmpu r4, r4, r7 /* n = c - n (unsigned) */
|
||||
/* if n < 0, less than one block to transfer */
|
||||
blti r4, d_block_done
|
||||
|
||||
d_block_xfer:
|
||||
andi r4, r7, 0xffffffe0 /* n = c & ~31 */
|
||||
rsub r7, r4, r7 /* c = c - n */
|
||||
|
||||
andi r9, r6, 3 /* t1 = s & 3 */
|
||||
/* if temp != 0, unaligned transfers needed */
|
||||
bnei r9, d_block_unaligned
|
||||
|
||||
d_block_aligned:
|
||||
addi r6, r6, -32 /* s = s - 32 */
|
||||
addi r5, r5, -32 /* d = d - 32 */
|
||||
lwi r9, r6, 28 /* t1 = *(s + 28) */
|
||||
lwi r10, r6, 24 /* t2 = *(s + 24) */
|
||||
lwi r11, r6, 20 /* t3 = *(s + 20) */
|
||||
lwi r12, r6, 16 /* t4 = *(s + 16) */
|
||||
swi r9, r5, 28 /* *(d + 28) = t1 */
|
||||
swi r10, r5, 24 /* *(d + 24) = t2 */
|
||||
swi r11, r5, 20 /* *(d + 20) = t3 */
|
||||
swi r12, r5, 16 /* *(d + 16) = t4 */
|
||||
lwi r9, r6, 12 /* t1 = *(s + 12) */
|
||||
lwi r10, r6, 8 /* t2 = *(s + 8) */
|
||||
lwi r11, r6, 4 /* t3 = *(s + 4) */
|
||||
lwi r12, r6, 0 /* t4 = *(s + 0) */
|
||||
swi r9, r5, 12 /* *(d + 12) = t1 */
|
||||
swi r10, r5, 8 /* *(d + 8) = t2 */
|
||||
swi r11, r5, 4 /* *(d + 4) = t3 */
|
||||
addi r4, r4, -32 /* n = n - 32 */
|
||||
bneid r4, d_block_aligned /* while (n) loop */
|
||||
swi r12, r5, 0 /* *(d + 0) = t4 (IN DELAY SLOT) */
|
||||
bri d_block_done
|
||||
|
||||
d_block_unaligned:
|
||||
andi r8, r6, 0xfffffffc /* as = s & ~3 */
|
||||
rsub r6, r4, r6 /* s = s - n */
|
||||
lwi r11, r8, 0 /* h = *(as + 0) */
|
||||
|
||||
addi r9, r9, -1
|
||||
beqi r9,d_block_u1 /* t1 was 1 => 1 byte offset */
|
||||
addi r9, r9, -1
|
||||
beqi r9,d_block_u2 /* t1 was 2 => 2 byte offset */
|
||||
|
||||
d_block_u3:
|
||||
bsrli r11, r11, 8 /* h = h >> 8 */
|
||||
d_bu3_loop:
|
||||
addi r8, r8, -32 /* as = as - 32 */
|
||||
addi r5, r5, -32 /* d = d - 32 */
|
||||
lwi r12, r8, 28 /* v = *(as + 28) */
|
||||
bslli r9, r12, 24 /* t1 = v << 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 28 /* *(d + 28) = t1 */
|
||||
bsrli r11, r12, 8 /* h = v >> 8 */
|
||||
lwi r12, r8, 24 /* v = *(as + 24) */
|
||||
bslli r9, r12, 24 /* t1 = v << 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 24 /* *(d + 24) = t1 */
|
||||
bsrli r11, r12, 8 /* h = v >> 8 */
|
||||
lwi r12, r8, 20 /* v = *(as + 20) */
|
||||
bslli r9, r12, 24 /* t1 = v << 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 20 /* *(d + 20) = t1 */
|
||||
bsrli r11, r12, 8 /* h = v >> 8 */
|
||||
lwi r12, r8, 16 /* v = *(as + 16) */
|
||||
bslli r9, r12, 24 /* t1 = v << 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 16 /* *(d + 16) = t1 */
|
||||
bsrli r11, r12, 8 /* h = v >> 8 */
|
||||
lwi r12, r8, 12 /* v = *(as + 12) */
|
||||
bslli r9, r12, 24 /* t1 = v << 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 12 /* *(d + 112) = t1 */
|
||||
bsrli r11, r12, 8 /* h = v >> 8 */
|
||||
lwi r12, r8, 8 /* v = *(as + 8) */
|
||||
bslli r9, r12, 24 /* t1 = v << 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 8 /* *(d + 8) = t1 */
|
||||
bsrli r11, r12, 8 /* h = v >> 8 */
|
||||
lwi r12, r8, 4 /* v = *(as + 4) */
|
||||
bslli r9, r12, 24 /* t1 = v << 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 4 /* *(d + 4) = t1 */
|
||||
bsrli r11, r12, 8 /* h = v >> 8 */
|
||||
lwi r12, r8, 0 /* v = *(as + 0) */
|
||||
bslli r9, r12, 24 /* t1 = v << 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 0 /* *(d + 0) = t1 */
|
||||
addi r4, r4, -32 /* n = n - 32 */
|
||||
bneid r4, d_bu3_loop /* while (n) loop */
|
||||
bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */
|
||||
bri d_block_done
|
||||
|
||||
d_block_u1:
|
||||
bsrli r11, r11, 24 /* h = h >> 24 */
|
||||
d_bu1_loop:
|
||||
addi r8, r8, -32 /* as = as - 32 */
|
||||
addi r5, r5, -32 /* d = d - 32 */
|
||||
lwi r12, r8, 28 /* v = *(as + 28) */
|
||||
bslli r9, r12, 8 /* t1 = v << 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 28 /* *(d + 28) = t1 */
|
||||
bsrli r11, r12, 24 /* h = v >> 24 */
|
||||
lwi r12, r8, 24 /* v = *(as + 24) */
|
||||
bslli r9, r12, 8 /* t1 = v << 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 24 /* *(d + 24) = t1 */
|
||||
bsrli r11, r12, 24 /* h = v >> 24 */
|
||||
lwi r12, r8, 20 /* v = *(as + 20) */
|
||||
bslli r9, r12, 8 /* t1 = v << 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 20 /* *(d + 20) = t1 */
|
||||
bsrli r11, r12, 24 /* h = v >> 24 */
|
||||
lwi r12, r8, 16 /* v = *(as + 16) */
|
||||
bslli r9, r12, 8 /* t1 = v << 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 16 /* *(d + 16) = t1 */
|
||||
bsrli r11, r12, 24 /* h = v >> 24 */
|
||||
lwi r12, r8, 12 /* v = *(as + 12) */
|
||||
bslli r9, r12, 8 /* t1 = v << 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 12 /* *(d + 112) = t1 */
|
||||
bsrli r11, r12, 24 /* h = v >> 24 */
|
||||
lwi r12, r8, 8 /* v = *(as + 8) */
|
||||
bslli r9, r12, 8 /* t1 = v << 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 8 /* *(d + 8) = t1 */
|
||||
bsrli r11, r12, 24 /* h = v >> 24 */
|
||||
lwi r12, r8, 4 /* v = *(as + 4) */
|
||||
bslli r9, r12, 8 /* t1 = v << 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 4 /* *(d + 4) = t1 */
|
||||
bsrli r11, r12, 24 /* h = v >> 24 */
|
||||
lwi r12, r8, 0 /* v = *(as + 0) */
|
||||
bslli r9, r12, 8 /* t1 = v << 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 0 /* *(d + 0) = t1 */
|
||||
addi r4, r4, -32 /* n = n - 32 */
|
||||
bneid r4, d_bu1_loop /* while (n) loop */
|
||||
bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */
|
||||
bri d_block_done
|
||||
|
||||
d_block_u2:
|
||||
bsrli r11, r11, 16 /* h = h >> 16 */
|
||||
d_bu2_loop:
|
||||
addi r8, r8, -32 /* as = as - 32 */
|
||||
addi r5, r5, -32 /* d = d - 32 */
|
||||
lwi r12, r8, 28 /* v = *(as + 28) */
|
||||
bslli r9, r12, 16 /* t1 = v << 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 28 /* *(d + 28) = t1 */
|
||||
bsrli r11, r12, 16 /* h = v >> 16 */
|
||||
lwi r12, r8, 24 /* v = *(as + 24) */
|
||||
bslli r9, r12, 16 /* t1 = v << 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 24 /* *(d + 24) = t1 */
|
||||
bsrli r11, r12, 16 /* h = v >> 16 */
|
||||
lwi r12, r8, 20 /* v = *(as + 20) */
|
||||
bslli r9, r12, 16 /* t1 = v << 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 20 /* *(d + 20) = t1 */
|
||||
bsrli r11, r12, 16 /* h = v >> 16 */
|
||||
lwi r12, r8, 16 /* v = *(as + 16) */
|
||||
bslli r9, r12, 16 /* t1 = v << 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 16 /* *(d + 16) = t1 */
|
||||
bsrli r11, r12, 16 /* h = v >> 16 */
|
||||
lwi r12, r8, 12 /* v = *(as + 12) */
|
||||
bslli r9, r12, 16 /* t1 = v << 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 12 /* *(d + 112) = t1 */
|
||||
bsrli r11, r12, 16 /* h = v >> 16 */
|
||||
lwi r12, r8, 8 /* v = *(as + 8) */
|
||||
bslli r9, r12, 16 /* t1 = v << 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 8 /* *(d + 8) = t1 */
|
||||
bsrli r11, r12, 16 /* h = v >> 16 */
|
||||
lwi r12, r8, 4 /* v = *(as + 4) */
|
||||
bslli r9, r12, 16 /* t1 = v << 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 4 /* *(d + 4) = t1 */
|
||||
bsrli r11, r12, 16 /* h = v >> 16 */
|
||||
lwi r12, r8, 0 /* v = *(as + 0) */
|
||||
bslli r9, r12, 16 /* t1 = v << 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
swi r9, r5, 0 /* *(d + 0) = t1 */
|
||||
addi r4, r4, -32 /* n = n - 32 */
|
||||
bneid r4, d_bu2_loop /* while (n) loop */
|
||||
bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */
|
||||
|
||||
d_block_done:
|
||||
addi r4, r0, 4 /* n = 4 */
|
||||
cmpu r4, r4, r7 /* n = c - n (unsigned) */
|
||||
blti r4,d_xfer_end /* if n < 0, less than one word to transfer */
|
||||
|
||||
d_word_xfer:
|
||||
andi r4, r7, 0xfffffffc /* n = c & ~3 */
|
||||
rsub r5, r4, r5 /* d = d - n */
|
||||
rsub r6, r4, r6 /* s = s - n */
|
||||
rsub r7, r4, r7 /* c = c - n */
|
||||
|
||||
andi r9, r6, 3 /* t1 = s & 3 */
|
||||
/* if temp != 0, unaligned transfers needed */
|
||||
bnei r9, d_word_unaligned
|
||||
|
||||
d_word_aligned:
|
||||
addi r4, r4,-4 /* n-- */
|
||||
lw r9, r6, r4 /* t1 = *(s+n) */
|
||||
bneid r4, d_word_aligned /* loop */
|
||||
sw r9, r5, r4 /* *(d+n) = t1 (IN DELAY SLOT) */
|
||||
|
||||
bri d_word_done
|
||||
|
||||
d_word_unaligned:
|
||||
andi r8, r6, 0xfffffffc /* as = s & ~3 */
|
||||
lw r11, r8, r4 /* h = *(as + n) */
|
||||
|
||||
addi r9, r9, -1
|
||||
beqi r9,d_word_u1 /* t1 was 1 => 1 byte offset */
|
||||
addi r9, r9, -1
|
||||
beqi r9,d_word_u2 /* t1 was 2 => 2 byte offset */
|
||||
|
||||
d_word_u3:
|
||||
bsrli r11, r11, 8 /* h = h >> 8 */
|
||||
d_wu3_loop:
|
||||
addi r4, r4,-4 /* n = n - 4 */
|
||||
lw r12, r8, r4 /* v = *(as + n) */
|
||||
bslli r9, r12, 24 /* t1 = v << 24 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
sw r9, r5, r4 /* *(d + n) = t1 */
|
||||
bneid r4, d_wu3_loop /* while (n) loop */
|
||||
bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */
|
||||
|
||||
bri d_word_done
|
||||
|
||||
d_word_u1:
|
||||
bsrli r11, r11, 24 /* h = h >> 24 */
|
||||
d_wu1_loop:
|
||||
addi r4, r4,-4 /* n = n - 4 */
|
||||
lw r12, r8, r4 /* v = *(as + n) */
|
||||
bslli r9, r12, 8 /* t1 = v << 8 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
sw r9, r5, r4 /* *(d + n) = t1 */
|
||||
bneid r4, d_wu1_loop /* while (n) loop */
|
||||
bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */
|
||||
|
||||
bri d_word_done
|
||||
|
||||
d_word_u2:
|
||||
bsrli r11, r11, 16 /* h = h >> 16 */
|
||||
d_wu2_loop:
|
||||
addi r4, r4,-4 /* n = n - 4 */
|
||||
lw r12, r8, r4 /* v = *(as + n) */
|
||||
bslli r9, r12, 16 /* t1 = v << 16 */
|
||||
or r9, r11, r9 /* t1 = h | t1 */
|
||||
sw r9, r5, r4 /* *(d + n) = t1 */
|
||||
bneid r4, d_wu2_loop /* while (n) loop */
|
||||
bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */
|
||||
|
||||
d_word_done:
|
||||
|
||||
d_xfer_end:
|
||||
d_xfer_end_loop:
|
||||
beqi r7, a_done /* while (c) */
|
||||
addi r6, r6, -1 /* s-- */
|
||||
lbui r9, r6, 0 /* t1 = *s */
|
||||
addi r5, r5, -1 /* d-- */
|
||||
sbi r9, r5, 0 /* *d = t1 */
|
||||
brid d_xfer_end_loop /* loop */
|
||||
addi r7, r7, -1 /* c-- (IN DELAY SLOT) */
|
||||
|
||||
d_done:
|
||||
rtsd r15, 8
|
||||
nop
|
||||
|
||||
.size memmove, . - memmove
|
||||
.end memmove
|
32
arch/microblaze/lib/libgcc.h
Normal file
32
arch/microblaze/lib/libgcc.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
#ifndef __ASM_LIBGCC_H
|
||||
#define __ASM_LIBGCC_H
|
||||
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
typedef int word_type __attribute__ ((mode (__word__)));
|
||||
|
||||
#ifdef __BIG_ENDIAN
|
||||
struct DWstruct {
|
||||
int high, low;
|
||||
};
|
||||
#elif defined(__LITTLE_ENDIAN)
|
||||
struct DWstruct {
|
||||
int low, high;
|
||||
};
|
||||
#else
|
||||
#error I feel sick.
|
||||
#endif
|
||||
|
||||
typedef union {
|
||||
struct DWstruct s;
|
||||
long long ll;
|
||||
} DWunion;
|
||||
|
||||
extern long long __ashldi3(long long u, word_type b);
|
||||
extern long long __ashrdi3(long long u, word_type b);
|
||||
extern word_type __cmpdi2(long long a, long long b);
|
||||
extern long long __lshrdi3(long long u, word_type b);
|
||||
extern long long __muldi3(long long u, long long v);
|
||||
extern word_type __ucmpdi2(unsigned long long a, unsigned long long b);
|
||||
|
||||
#endif /* __ASM_LIBGCC_H */
|
28
arch/microblaze/lib/lshrdi3.c
Normal file
28
arch/microblaze/lib/lshrdi3.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
#include <linux/export.h>
|
||||
|
||||
#include "libgcc.h"
|
||||
|
||||
long long __lshrdi3(long long u, word_type b)
|
||||
{
|
||||
DWunion uu, w;
|
||||
word_type bm;
|
||||
|
||||
if (b == 0)
|
||||
return u;
|
||||
|
||||
uu.ll = u;
|
||||
bm = 32 - b;
|
||||
|
||||
if (bm <= 0) {
|
||||
w.s.high = 0;
|
||||
w.s.low = (unsigned int) uu.s.high >> -bm;
|
||||
} else {
|
||||
const unsigned int carries = (unsigned int) uu.s.high << bm;
|
||||
|
||||
w.s.high = (unsigned int) uu.s.high >> b;
|
||||
w.s.low = ((unsigned int) uu.s.low >> b) | carries;
|
||||
}
|
||||
|
||||
return w.ll;
|
||||
}
|
||||
EXPORT_SYMBOL(__lshrdi3);
|
189
arch/microblaze/lib/memcpy.c
Normal file
189
arch/microblaze/lib/memcpy.c
Normal file
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
* Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
|
||||
* Copyright (C) 2008-2009 PetaLogix
|
||||
* Copyright (C) 2007 John Williams
|
||||
*
|
||||
* Reasonably optimised generic C-code for memcpy on Microblaze
|
||||
* This is generic C code to do efficient, alignment-aware memcpy.
|
||||
*
|
||||
* It is based on demo code originally Copyright 2001 by Intel Corp, taken from
|
||||
* http://www.embedded.com/showArticle.jhtml?articleID=19205567
|
||||
*
|
||||
* Attempts were made, unsuccessfully, to contact the original
|
||||
* author of this code (Michael Morrow, Intel). Below is the original
|
||||
* copyright notice.
|
||||
*
|
||||
* This software has been developed by Intel Corporation.
|
||||
* Intel specifically disclaims all warranties, express or
|
||||
* implied, and all liability, including consequential and
|
||||
* other indirect damages, for the use of this program, including
|
||||
* liability for infringement of any proprietary rights,
|
||||
* and including the warranties of merchantability and fitness
|
||||
* for a particular purpose. Intel does not assume any
|
||||
* responsibility for and errors which may appear in this program
|
||||
* not any responsibility to update it.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#include <linux/string.h>
|
||||
|
||||
#ifdef __HAVE_ARCH_MEMCPY
|
||||
#ifndef CONFIG_OPT_LIB_FUNCTION
|
||||
void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
|
||||
{
|
||||
const char *src = v_src;
|
||||
char *dst = v_dst;
|
||||
|
||||
/* Simple, byte oriented memcpy. */
|
||||
while (c--)
|
||||
*dst++ = *src++;
|
||||
|
||||
return v_dst;
|
||||
}
|
||||
#else /* CONFIG_OPT_LIB_FUNCTION */
|
||||
void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
|
||||
{
|
||||
const char *src = v_src;
|
||||
char *dst = v_dst;
|
||||
|
||||
/* The following code tries to optimize the copy by using unsigned
|
||||
* alignment. This will work fine if both source and destination are
|
||||
* aligned on the same boundary. However, if they are aligned on
|
||||
* different boundaries shifts will be necessary. This might result in
|
||||
* bad performance on MicroBlaze systems without a barrel shifter.
|
||||
*/
|
||||
const uint32_t *i_src;
|
||||
uint32_t *i_dst;
|
||||
|
||||
if (likely(c >= 4)) {
|
||||
unsigned value, buf_hold;
|
||||
|
||||
/* Align the destination to a word boundary. */
|
||||
/* This is done in an endian independent manner. */
|
||||
switch ((unsigned long)dst & 3) {
|
||||
case 1:
|
||||
*dst++ = *src++;
|
||||
--c;
|
||||
case 2:
|
||||
*dst++ = *src++;
|
||||
--c;
|
||||
case 3:
|
||||
*dst++ = *src++;
|
||||
--c;
|
||||
}
|
||||
|
||||
i_dst = (void *)dst;
|
||||
|
||||
/* Choose a copy scheme based on the source */
|
||||
/* alignment relative to destination. */
|
||||
switch ((unsigned long)src & 3) {
|
||||
case 0x0: /* Both byte offsets are aligned */
|
||||
i_src = (const void *)src;
|
||||
|
||||
for (; c >= 4; c -= 4)
|
||||
*i_dst++ = *i_src++;
|
||||
|
||||
src = (const void *)i_src;
|
||||
break;
|
||||
case 0x1: /* Unaligned - Off by 1 */
|
||||
/* Word align the source */
|
||||
i_src = (const void *) ((unsigned)src & ~3);
|
||||
#ifndef __MICROBLAZEEL__
|
||||
/* Load the holding buffer */
|
||||
buf_hold = *i_src++ << 8;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *i_src++;
|
||||
*i_dst++ = buf_hold | value >> 24;
|
||||
buf_hold = value << 8;
|
||||
}
|
||||
#else
|
||||
/* Load the holding buffer */
|
||||
buf_hold = (*i_src++ & 0xFFFFFF00) >> 8;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *i_src++;
|
||||
*i_dst++ = buf_hold | ((value & 0xFF) << 24);
|
||||
buf_hold = (value & 0xFFFFFF00) >> 8;
|
||||
}
|
||||
#endif
|
||||
/* Realign the source */
|
||||
src = (const void *)i_src;
|
||||
src -= 3;
|
||||
break;
|
||||
case 0x2: /* Unaligned - Off by 2 */
|
||||
/* Word align the source */
|
||||
i_src = (const void *) ((unsigned)src & ~3);
|
||||
#ifndef __MICROBLAZEEL__
|
||||
/* Load the holding buffer */
|
||||
buf_hold = *i_src++ << 16;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *i_src++;
|
||||
*i_dst++ = buf_hold | value >> 16;
|
||||
buf_hold = value << 16;
|
||||
}
|
||||
#else
|
||||
/* Load the holding buffer */
|
||||
buf_hold = (*i_src++ & 0xFFFF0000) >> 16;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *i_src++;
|
||||
*i_dst++ = buf_hold | ((value & 0xFFFF) << 16);
|
||||
buf_hold = (value & 0xFFFF0000) >> 16;
|
||||
}
|
||||
#endif
|
||||
/* Realign the source */
|
||||
src = (const void *)i_src;
|
||||
src -= 2;
|
||||
break;
|
||||
case 0x3: /* Unaligned - Off by 3 */
|
||||
/* Word align the source */
|
||||
i_src = (const void *) ((unsigned)src & ~3);
|
||||
#ifndef __MICROBLAZEEL__
|
||||
/* Load the holding buffer */
|
||||
buf_hold = *i_src++ << 24;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *i_src++;
|
||||
*i_dst++ = buf_hold | value >> 8;
|
||||
buf_hold = value << 24;
|
||||
}
|
||||
#else
|
||||
/* Load the holding buffer */
|
||||
buf_hold = (*i_src++ & 0xFF000000) >> 24;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *i_src++;
|
||||
*i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
|
||||
buf_hold = (value & 0xFF000000) >> 24;
|
||||
}
|
||||
#endif
|
||||
/* Realign the source */
|
||||
src = (const void *)i_src;
|
||||
src -= 1;
|
||||
break;
|
||||
}
|
||||
dst = (void *)i_dst;
|
||||
}
|
||||
|
||||
/* Finish off any remaining bytes */
|
||||
/* simple fast copy, ... unless a cache boundary is crossed */
|
||||
switch (c) {
|
||||
case 3:
|
||||
*dst++ = *src++;
|
||||
case 2:
|
||||
*dst++ = *src++;
|
||||
case 1:
|
||||
*dst++ = *src++;
|
||||
}
|
||||
|
||||
return v_dst;
|
||||
}
|
||||
#endif /* CONFIG_OPT_LIB_FUNCTION */
|
||||
EXPORT_SYMBOL(memcpy);
|
||||
#endif /* __HAVE_ARCH_MEMCPY */
|
215
arch/microblaze/lib/memmove.c
Normal file
215
arch/microblaze/lib/memmove.c
Normal file
|
@ -0,0 +1,215 @@
|
|||
/*
|
||||
* Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
|
||||
* Copyright (C) 2008-2009 PetaLogix
|
||||
* Copyright (C) 2007 John Williams
|
||||
*
|
||||
* Reasonably optimised generic C-code for memcpy on Microblaze
|
||||
* This is generic C code to do efficient, alignment-aware memmove.
|
||||
*
|
||||
* It is based on demo code originally Copyright 2001 by Intel Corp, taken from
|
||||
* http://www.embedded.com/showArticle.jhtml?articleID=19205567
|
||||
*
|
||||
* Attempts were made, unsuccessfully, to contact the original
|
||||
* author of this code (Michael Morrow, Intel). Below is the original
|
||||
* copyright notice.
|
||||
*
|
||||
* This software has been developed by Intel Corporation.
|
||||
* Intel specifically disclaims all warranties, express or
|
||||
* implied, and all liability, including consequential and
|
||||
* other indirect damages, for the use of this program, including
|
||||
* liability for infringement of any proprietary rights,
|
||||
* and including the warranties of merchantability and fitness
|
||||
* for a particular purpose. Intel does not assume any
|
||||
* responsibility for and errors which may appear in this program
|
||||
* not any responsibility to update it.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#ifdef __HAVE_ARCH_MEMMOVE
|
||||
#ifndef CONFIG_OPT_LIB_FUNCTION
|
||||
void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
|
||||
{
|
||||
const char *src = v_src;
|
||||
char *dst = v_dst;
|
||||
|
||||
if (!c)
|
||||
return v_dst;
|
||||
|
||||
/* Use memcpy when source is higher than dest */
|
||||
if (v_dst <= v_src)
|
||||
return memcpy(v_dst, v_src, c);
|
||||
|
||||
/* copy backwards, from end to beginning */
|
||||
src += c;
|
||||
dst += c;
|
||||
|
||||
/* Simple, byte oriented memmove. */
|
||||
while (c--)
|
||||
*--dst = *--src;
|
||||
|
||||
return v_dst;
|
||||
}
|
||||
#else /* CONFIG_OPT_LIB_FUNCTION */
|
||||
void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
|
||||
{
|
||||
const char *src = v_src;
|
||||
char *dst = v_dst;
|
||||
const uint32_t *i_src;
|
||||
uint32_t *i_dst;
|
||||
|
||||
if (!c)
|
||||
return v_dst;
|
||||
|
||||
/* Use memcpy when source is higher than dest */
|
||||
if (v_dst <= v_src)
|
||||
return memcpy(v_dst, v_src, c);
|
||||
|
||||
/* The following code tries to optimize the copy by using unsigned
|
||||
* alignment. This will work fine if both source and destination are
|
||||
* aligned on the same boundary. However, if they are aligned on
|
||||
* different boundaries shifts will be necessary. This might result in
|
||||
* bad performance on MicroBlaze systems without a barrel shifter.
|
||||
*/
|
||||
/* FIXME this part needs more test */
|
||||
/* Do a descending copy - this is a bit trickier! */
|
||||
dst += c;
|
||||
src += c;
|
||||
|
||||
if (c >= 4) {
|
||||
unsigned value, buf_hold;
|
||||
|
||||
/* Align the destination to a word boundary. */
|
||||
/* This is done in an endian independent manner. */
|
||||
|
||||
switch ((unsigned long)dst & 3) {
|
||||
case 3:
|
||||
*--dst = *--src;
|
||||
--c;
|
||||
case 2:
|
||||
*--dst = *--src;
|
||||
--c;
|
||||
case 1:
|
||||
*--dst = *--src;
|
||||
--c;
|
||||
}
|
||||
|
||||
i_dst = (void *)dst;
|
||||
/* Choose a copy scheme based on the source */
|
||||
/* alignment relative to dstination. */
|
||||
switch ((unsigned long)src & 3) {
|
||||
case 0x0: /* Both byte offsets are aligned */
|
||||
|
||||
i_src = (const void *)src;
|
||||
|
||||
for (; c >= 4; c -= 4)
|
||||
*--i_dst = *--i_src;
|
||||
|
||||
src = (const void *)i_src;
|
||||
break;
|
||||
case 0x1: /* Unaligned - Off by 1 */
|
||||
/* Word align the source */
|
||||
i_src = (const void *) (((unsigned)src + 4) & ~3);
|
||||
#ifndef __MICROBLAZEEL__
|
||||
/* Load the holding buffer */
|
||||
buf_hold = *--i_src >> 24;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *--i_src;
|
||||
*--i_dst = buf_hold << 8 | value;
|
||||
buf_hold = value >> 24;
|
||||
}
|
||||
#else
|
||||
/* Load the holding buffer */
|
||||
buf_hold = (*--i_src & 0xFF) << 24;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *--i_src;
|
||||
*--i_dst = buf_hold |
|
||||
((value & 0xFFFFFF00) >> 8);
|
||||
buf_hold = (value & 0xFF) << 24;
|
||||
}
|
||||
#endif
|
||||
/* Realign the source */
|
||||
src = (const void *)i_src;
|
||||
src += 1;
|
||||
break;
|
||||
case 0x2: /* Unaligned - Off by 2 */
|
||||
/* Word align the source */
|
||||
i_src = (const void *) (((unsigned)src + 4) & ~3);
|
||||
#ifndef __MICROBLAZEEL__
|
||||
/* Load the holding buffer */
|
||||
buf_hold = *--i_src >> 16;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *--i_src;
|
||||
*--i_dst = buf_hold << 16 | value;
|
||||
buf_hold = value >> 16;
|
||||
}
|
||||
#else
|
||||
/* Load the holding buffer */
|
||||
buf_hold = (*--i_src & 0xFFFF) << 16;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *--i_src;
|
||||
*--i_dst = buf_hold |
|
||||
((value & 0xFFFF0000) >> 16);
|
||||
buf_hold = (value & 0xFFFF) << 16;
|
||||
}
|
||||
#endif
|
||||
/* Realign the source */
|
||||
src = (const void *)i_src;
|
||||
src += 2;
|
||||
break;
|
||||
case 0x3: /* Unaligned - Off by 3 */
|
||||
/* Word align the source */
|
||||
i_src = (const void *) (((unsigned)src + 4) & ~3);
|
||||
#ifndef __MICROBLAZEEL__
|
||||
/* Load the holding buffer */
|
||||
buf_hold = *--i_src >> 8;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *--i_src;
|
||||
*--i_dst = buf_hold << 24 | value;
|
||||
buf_hold = value >> 8;
|
||||
}
|
||||
#else
|
||||
/* Load the holding buffer */
|
||||
buf_hold = (*--i_src & 0xFFFFFF) << 8;
|
||||
|
||||
for (; c >= 4; c -= 4) {
|
||||
value = *--i_src;
|
||||
*--i_dst = buf_hold |
|
||||
((value & 0xFF000000) >> 24);
|
||||
buf_hold = (value & 0xFFFFFF) << 8;
|
||||
}
|
||||
#endif
|
||||
/* Realign the source */
|
||||
src = (const void *)i_src;
|
||||
src += 3;
|
||||
break;
|
||||
}
|
||||
dst = (void *)i_dst;
|
||||
}
|
||||
|
||||
/* simple fast copy, ... unless a cache boundary is crossed */
|
||||
/* Finish off any remaining bytes */
|
||||
switch (c) {
|
||||
case 4:
|
||||
*--dst = *--src;
|
||||
case 3:
|
||||
*--dst = *--src;
|
||||
case 2:
|
||||
*--dst = *--src;
|
||||
case 1:
|
||||
*--dst = *--src;
|
||||
}
|
||||
return v_dst;
|
||||
}
|
||||
#endif /* CONFIG_OPT_LIB_FUNCTION */
|
||||
EXPORT_SYMBOL(memmove);
|
||||
#endif /* __HAVE_ARCH_MEMMOVE */
|
97
arch/microblaze/lib/memset.c
Normal file
97
arch/microblaze/lib/memset.c
Normal file
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
|
||||
* Copyright (C) 2008-2009 PetaLogix
|
||||
* Copyright (C) 2007 John Williams
|
||||
*
|
||||
* Reasonably optimised generic C-code for memset on Microblaze
|
||||
* This is generic C code to do efficient, alignment-aware memcpy.
|
||||
*
|
||||
* It is based on demo code originally Copyright 2001 by Intel Corp, taken from
|
||||
* http://www.embedded.com/showArticle.jhtml?articleID=19205567
|
||||
*
|
||||
* Attempts were made, unsuccessfully, to contact the original
|
||||
* author of this code (Michael Morrow, Intel). Below is the original
|
||||
* copyright notice.
|
||||
*
|
||||
* This software has been developed by Intel Corporation.
|
||||
* Intel specifically disclaims all warranties, express or
|
||||
* implied, and all liability, including consequential and
|
||||
* other indirect damages, for the use of this program, including
|
||||
* liability for infringement of any proprietary rights,
|
||||
* and including the warranties of merchantability and fitness
|
||||
* for a particular purpose. Intel does not assume any
|
||||
* responsibility for and errors which may appear in this program
|
||||
* not any responsibility to update it.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#ifdef __HAVE_ARCH_MEMSET
|
||||
#ifndef CONFIG_OPT_LIB_FUNCTION
|
||||
void *memset(void *v_src, int c, __kernel_size_t n)
|
||||
{
|
||||
char *src = v_src;
|
||||
|
||||
/* Truncate c to 8 bits */
|
||||
c = (c & 0xFF);
|
||||
|
||||
/* Simple, byte oriented memset or the rest of count. */
|
||||
while (n--)
|
||||
*src++ = c;
|
||||
|
||||
return v_src;
|
||||
}
|
||||
#else /* CONFIG_OPT_LIB_FUNCTION */
|
||||
void *memset(void *v_src, int c, __kernel_size_t n)
|
||||
{
|
||||
char *src = v_src;
|
||||
uint32_t *i_src;
|
||||
uint32_t w32 = 0;
|
||||
|
||||
/* Truncate c to 8 bits */
|
||||
c = (c & 0xFF);
|
||||
|
||||
if (unlikely(c)) {
|
||||
/* Make a repeating word out of it */
|
||||
w32 = c;
|
||||
w32 |= w32 << 8;
|
||||
w32 |= w32 << 16;
|
||||
}
|
||||
|
||||
if (likely(n >= 4)) {
|
||||
/* Align the destination to a word boundary */
|
||||
/* This is done in an endian independent manner */
|
||||
switch ((unsigned) src & 3) {
|
||||
case 1:
|
||||
*src++ = c;
|
||||
--n;
|
||||
case 2:
|
||||
*src++ = c;
|
||||
--n;
|
||||
case 3:
|
||||
*src++ = c;
|
||||
--n;
|
||||
}
|
||||
|
||||
i_src = (void *)src;
|
||||
|
||||
/* Do as many full-word copies as we can */
|
||||
for (; n >= 4; n -= 4)
|
||||
*i_src++ = w32;
|
||||
|
||||
src = (void *)i_src;
|
||||
}
|
||||
|
||||
/* Simple, byte oriented memset or the rest of count. */
|
||||
while (n--)
|
||||
*src++ = c;
|
||||
|
||||
return v_src;
|
||||
}
|
||||
#endif /* CONFIG_OPT_LIB_FUNCTION */
|
||||
EXPORT_SYMBOL(memset);
|
||||
#endif /* __HAVE_ARCH_MEMSET */
|
73
arch/microblaze/lib/modsi3.S
Normal file
73
arch/microblaze/lib/modsi3.S
Normal file
|
@ -0,0 +1,73 @@
|
|||
#include <linux/linkage.h>
|
||||
|
||||
/*
|
||||
* modulo operation for 32 bit integers.
|
||||
* Input : op1 in Reg r5
|
||||
* op2 in Reg r6
|
||||
* Output: op1 mod op2 in Reg r3
|
||||
*/
|
||||
|
||||
.text
|
||||
.globl __modsi3
|
||||
.type __modsi3, @function
|
||||
.ent __modsi3
|
||||
|
||||
__modsi3:
|
||||
.frame r1, 0, r15
|
||||
|
||||
addik r1, r1, -16
|
||||
swi r28, r1, 0
|
||||
swi r29, r1, 4
|
||||
swi r30, r1, 8
|
||||
swi r31, r1, 12
|
||||
|
||||
beqi r6, div_by_zero /* div_by_zero division error */
|
||||
beqi r5, result_is_zero /* result is zero */
|
||||
bgeid r5, r5_pos
|
||||
/* get the sign of the result [ depends only on the first arg] */
|
||||
add r28, r5, r0
|
||||
rsubi r5, r5, 0 /* make r5 positive */
|
||||
r5_pos:
|
||||
bgei r6, r6_pos
|
||||
rsubi r6, r6, 0 /* make r6 positive */
|
||||
r6_pos:
|
||||
addik r3, r0, 0 /* clear mod */
|
||||
addik r30, r0, 0 /* clear div */
|
||||
addik r29, r0, 32 /* initialize the loop count */
|
||||
/* first part try to find the first '1' in the r5 */
|
||||
div1:
|
||||
add r5, r5, r5 /* left shift logical r5 */
|
||||
bgeid r5, div1
|
||||
addik r29, r29, -1
|
||||
div2:
|
||||
/* left shift logical r5 get the '1' into the carry */
|
||||
add r5, r5, r5
|
||||
addc r3, r3, r3 /* move that bit into the mod register */
|
||||
rsub r31, r6, r3 /* try to subtract (r30 a r6) */
|
||||
blti r31, mod_too_small
|
||||
/* move the r31 to mod since the result was positive */
|
||||
or r3, r0, r31
|
||||
addik r30, r30, 1
|
||||
mod_too_small:
|
||||
addik r29, r29, -1
|
||||
beqi r29, loop_end
|
||||
add r30, r30, r30 /* shift in the '1' into div */
|
||||
bri div2 /* div2 */
|
||||
loop_end:
|
||||
bgei r28, return_here
|
||||
brid return_here
|
||||
rsubi r3, r3, 0 /* negate the result */
|
||||
div_by_zero:
|
||||
result_is_zero:
|
||||
or r3, r0, r0 /* set result to 0 [both mod as well as div are 0] */
|
||||
return_here:
|
||||
/* restore values of csrs and that of r3 and the divisor and the dividend */
|
||||
lwi r28, r1, 0
|
||||
lwi r29, r1, 4
|
||||
lwi r30, r1, 8
|
||||
lwi r31, r1, 12
|
||||
rtsd r15, 8
|
||||
addik r1, r1, 16
|
||||
|
||||
.size __modsi3, . - __modsi3
|
||||
.end __modsi3
|
57
arch/microblaze/lib/muldi3.c
Normal file
57
arch/microblaze/lib/muldi3.c
Normal file
|
@ -0,0 +1,57 @@
|
|||
#include <linux/export.h>
|
||||
|
||||
#include "libgcc.h"
|
||||
|
||||
#define W_TYPE_SIZE 32
|
||||
|
||||
#define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2))
|
||||
#define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1))
|
||||
#define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2))
|
||||
|
||||
/* If we still don't have umul_ppmm, define it using plain C. */
|
||||
#if !defined(umul_ppmm)
|
||||
#define umul_ppmm(w1, w0, u, v) \
|
||||
do { \
|
||||
unsigned long __x0, __x1, __x2, __x3; \
|
||||
unsigned short __ul, __vl, __uh, __vh; \
|
||||
\
|
||||
__ul = __ll_lowpart(u); \
|
||||
__uh = __ll_highpart(u); \
|
||||
__vl = __ll_lowpart(v); \
|
||||
__vh = __ll_highpart(v); \
|
||||
\
|
||||
__x0 = (unsigned long) __ul * __vl; \
|
||||
__x1 = (unsigned long) __ul * __vh; \
|
||||
__x2 = (unsigned long) __uh * __vl; \
|
||||
__x3 = (unsigned long) __uh * __vh; \
|
||||
\
|
||||
__x1 += __ll_highpart(__x0); /* this can't give carry */\
|
||||
__x1 += __x2; /* but this indeed can */ \
|
||||
if (__x1 < __x2) /* did we get it? */ \
|
||||
__x3 += __ll_B; /* yes, add it in the proper pos */ \
|
||||
\
|
||||
(w1) = __x3 + __ll_highpart(__x1); \
|
||||
(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#if !defined(__umulsidi3)
|
||||
#define __umulsidi3(u, v) ({ \
|
||||
DWunion __w; \
|
||||
umul_ppmm(__w.s.high, __w.s.low, u, v); \
|
||||
__w.ll; \
|
||||
})
|
||||
#endif
|
||||
|
||||
long long __muldi3(long long u, long long v)
|
||||
{
|
||||
const DWunion uu = {.ll = u};
|
||||
const DWunion vv = {.ll = v};
|
||||
DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
|
||||
|
||||
w.s.high += ((unsigned long) uu.s.low * (unsigned long) vv.s.high
|
||||
+ (unsigned long) uu.s.high * (unsigned long) vv.s.low);
|
||||
|
||||
return w.ll;
|
||||
}
|
||||
EXPORT_SYMBOL(__muldi3);
|
46
arch/microblaze/lib/mulsi3.S
Normal file
46
arch/microblaze/lib/mulsi3.S
Normal file
|
@ -0,0 +1,46 @@
|
|||
#include <linux/linkage.h>
|
||||
|
||||
/*
|
||||
* Multiply operation for 32 bit integers.
|
||||
* Input : Operand1 in Reg r5
|
||||
* Operand2 in Reg r6
|
||||
* Output: Result [op1 * op2] in Reg r3
|
||||
*/
|
||||
.text
|
||||
.globl __mulsi3
|
||||
.type __mulsi3, @function
|
||||
.ent __mulsi3
|
||||
|
||||
__mulsi3:
|
||||
.frame r1, 0, r15
|
||||
add r3, r0, r0
|
||||
beqi r5, result_is_zero /* multiply by zero */
|
||||
beqi r6, result_is_zero /* multiply by zero */
|
||||
bgeid r5, r5_pos
|
||||
xor r4, r5, r6 /* get the sign of the result */
|
||||
rsubi r5, r5, 0 /* make r5 positive */
|
||||
r5_pos:
|
||||
bgei r6, r6_pos
|
||||
rsubi r6, r6, 0 /* make r6 positive */
|
||||
r6_pos:
|
||||
bri l1
|
||||
l2:
|
||||
add r5, r5, r5
|
||||
l1:
|
||||
srl r6, r6
|
||||
addc r7, r0, r0
|
||||
beqi r7, l2
|
||||
bneid r6, l2
|
||||
add r3, r3, r5
|
||||
blti r4, negateresult
|
||||
rtsd r15, 8
|
||||
nop
|
||||
negateresult:
|
||||
rtsd r15, 8
|
||||
rsub r3, r3, r0
|
||||
result_is_zero:
|
||||
rtsd r15, 8
|
||||
addi r3, r0, 0
|
||||
|
||||
.size __mulsi3, . - __mulsi3
|
||||
.end __mulsi3
|
266
arch/microblaze/lib/uaccess_old.S
Normal file
266
arch/microblaze/lib/uaccess_old.S
Normal file
|
@ -0,0 +1,266 @@
|
|||
/*
|
||||
* Copyright (C) 2009 Michal Simek <monstr@monstr.eu>
|
||||
* Copyright (C) 2009 PetaLogix
|
||||
* Copyright (C) 2007 LynuxWorks, Inc.
|
||||
*
|
||||
* This file is subject to the terms and conditions of the GNU General Public
|
||||
* License. See the file "COPYING" in the main directory of this archive
|
||||
* for more details.
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
/*
|
||||
* int __strncpy_user(char *to, char *from, int len);
|
||||
*
|
||||
* Returns:
|
||||
* -EFAULT for an exception
|
||||
* len if we hit the buffer limit
|
||||
* bytes copied
|
||||
*/
|
||||
|
||||
.text
|
||||
.globl __strncpy_user;
|
||||
.type __strncpy_user, @function
|
||||
.align 4;
|
||||
__strncpy_user:
|
||||
|
||||
/*
|
||||
* r5 - to
|
||||
* r6 - from
|
||||
* r7 - len
|
||||
* r3 - temp count
|
||||
* r4 - temp val
|
||||
*/
|
||||
beqid r7,3f
|
||||
addik r3,r7,0 /* temp_count = len */
|
||||
1:
|
||||
lbu r4,r6,r0
|
||||
beqid r4,2f
|
||||
sb r4,r5,r0
|
||||
|
||||
addik r5,r5,1
|
||||
addik r6,r6,1 /* delay slot */
|
||||
|
||||
addik r3,r3,-1
|
||||
bnei r3,1b /* break on len */
|
||||
2:
|
||||
rsubk r3,r3,r7 /* temp_count = len - temp_count */
|
||||
3:
|
||||
rtsd r15,8
|
||||
nop
|
||||
.size __strncpy_user, . - __strncpy_user
|
||||
|
||||
.section .fixup, "ax"
|
||||
.align 2
|
||||
4:
|
||||
brid 3b
|
||||
addik r3,r0, -EFAULT
|
||||
|
||||
.section __ex_table, "a"
|
||||
.word 1b,4b
|
||||
|
||||
/*
|
||||
* int __strnlen_user(char __user *str, int maxlen);
|
||||
*
|
||||
* Returns:
|
||||
* 0 on error
|
||||
* maxlen + 1 if no NUL byte found within maxlen bytes
|
||||
* size of the string (including NUL byte)
|
||||
*/
|
||||
|
||||
.text
|
||||
.globl __strnlen_user;
|
||||
.type __strnlen_user, @function
|
||||
.align 4;
|
||||
__strnlen_user:
|
||||
beqid r6,3f
|
||||
addik r3,r6,0
|
||||
1:
|
||||
lbu r4,r5,r0
|
||||
beqid r4,2f /* break on NUL */
|
||||
addik r3,r3,-1 /* delay slot */
|
||||
|
||||
bneid r3,1b
|
||||
addik r5,r5,1 /* delay slot */
|
||||
|
||||
addik r3,r3,-1 /* for break on len */
|
||||
2:
|
||||
rsubk r3,r3,r6
|
||||
3:
|
||||
rtsd r15,8
|
||||
nop
|
||||
.size __strnlen_user, . - __strnlen_user
|
||||
|
||||
.section .fixup,"ax"
|
||||
4:
|
||||
brid 3b
|
||||
addk r3,r0,r0
|
||||
|
||||
.section __ex_table,"a"
|
||||
.word 1b,4b
|
||||
|
||||
/* Loop unrolling for __copy_tofrom_user */
|
||||
#define COPY(offset) \
|
||||
1: lwi r4 , r6, 0x0000 + offset; \
|
||||
2: lwi r19, r6, 0x0004 + offset; \
|
||||
3: lwi r20, r6, 0x0008 + offset; \
|
||||
4: lwi r21, r6, 0x000C + offset; \
|
||||
5: lwi r22, r6, 0x0010 + offset; \
|
||||
6: lwi r23, r6, 0x0014 + offset; \
|
||||
7: lwi r24, r6, 0x0018 + offset; \
|
||||
8: lwi r25, r6, 0x001C + offset; \
|
||||
9: swi r4 , r5, 0x0000 + offset; \
|
||||
10: swi r19, r5, 0x0004 + offset; \
|
||||
11: swi r20, r5, 0x0008 + offset; \
|
||||
12: swi r21, r5, 0x000C + offset; \
|
||||
13: swi r22, r5, 0x0010 + offset; \
|
||||
14: swi r23, r5, 0x0014 + offset; \
|
||||
15: swi r24, r5, 0x0018 + offset; \
|
||||
16: swi r25, r5, 0x001C + offset; \
|
||||
.section __ex_table,"a"; \
|
||||
.word 1b, 33f; \
|
||||
.word 2b, 33f; \
|
||||
.word 3b, 33f; \
|
||||
.word 4b, 33f; \
|
||||
.word 5b, 33f; \
|
||||
.word 6b, 33f; \
|
||||
.word 7b, 33f; \
|
||||
.word 8b, 33f; \
|
||||
.word 9b, 33f; \
|
||||
.word 10b, 33f; \
|
||||
.word 11b, 33f; \
|
||||
.word 12b, 33f; \
|
||||
.word 13b, 33f; \
|
||||
.word 14b, 33f; \
|
||||
.word 15b, 33f; \
|
||||
.word 16b, 33f; \
|
||||
.text
|
||||
|
||||
#define COPY_80(offset) \
|
||||
COPY(0x00 + offset);\
|
||||
COPY(0x20 + offset);\
|
||||
COPY(0x40 + offset);\
|
||||
COPY(0x60 + offset);
|
||||
|
||||
/*
|
||||
* int __copy_tofrom_user(char *to, char *from, int len)
|
||||
* Return:
|
||||
* 0 on success
|
||||
* number of not copied bytes on error
|
||||
*/
|
||||
.text
|
||||
.globl __copy_tofrom_user;
|
||||
.type __copy_tofrom_user, @function
|
||||
.align 4;
|
||||
__copy_tofrom_user:
|
||||
/*
|
||||
* r5 - to
|
||||
* r6 - from
|
||||
* r7, r3 - count
|
||||
* r4 - tempval
|
||||
*/
|
||||
beqid r7, 0f /* zero size is not likely */
|
||||
or r3, r5, r6 /* find if is any to/from unaligned */
|
||||
or r3, r3, r7 /* find if count is unaligned */
|
||||
andi r3, r3, 0x3 /* mask last 3 bits */
|
||||
bneid r3, bu1 /* if r3 is not zero then byte copying */
|
||||
or r3, r0, r0
|
||||
|
||||
rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
|
||||
beqid r3, page;
|
||||
or r3, r0, r0
|
||||
|
||||
w1: lw r4, r6, r3 /* at least one 4 byte copy */
|
||||
w2: sw r4, r5, r3
|
||||
addik r7, r7, -4
|
||||
bneid r7, w1
|
||||
addik r3, r3, 4
|
||||
addik r3, r7, 0
|
||||
rtsd r15, 8
|
||||
nop
|
||||
|
||||
.section __ex_table,"a"
|
||||
.word w1, 0f;
|
||||
.word w2, 0f;
|
||||
.text
|
||||
|
||||
.align 4 /* Alignment is important to keep icache happy */
|
||||
page: /* Create room on stack and save registers for storign values */
|
||||
addik r1, r1, -40
|
||||
swi r5, r1, 0
|
||||
swi r6, r1, 4
|
||||
swi r7, r1, 8
|
||||
swi r19, r1, 12
|
||||
swi r20, r1, 16
|
||||
swi r21, r1, 20
|
||||
swi r22, r1, 24
|
||||
swi r23, r1, 28
|
||||
swi r24, r1, 32
|
||||
swi r25, r1, 36
|
||||
loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
|
||||
/* Loop unrolling to get performance boost */
|
||||
COPY_80(0x000);
|
||||
COPY_80(0x080);
|
||||
COPY_80(0x100);
|
||||
COPY_80(0x180);
|
||||
/* copy loop */
|
||||
addik r6, r6, 0x200
|
||||
addik r7, r7, -0x200
|
||||
bneid r7, loop
|
||||
addik r5, r5, 0x200
|
||||
|
||||
/* Restore register content */
|
||||
lwi r5, r1, 0
|
||||
lwi r6, r1, 4
|
||||
lwi r7, r1, 8
|
||||
lwi r19, r1, 12
|
||||
lwi r20, r1, 16
|
||||
lwi r21, r1, 20
|
||||
lwi r22, r1, 24
|
||||
lwi r23, r1, 28
|
||||
lwi r24, r1, 32
|
||||
lwi r25, r1, 36
|
||||
addik r1, r1, 40
|
||||
/* return back */
|
||||
addik r3, r0, 0
|
||||
rtsd r15, 8
|
||||
nop
|
||||
|
||||
/* Fault case - return temp count */
|
||||
33:
|
||||
addik r3, r7, 0
|
||||
/* Restore register content */
|
||||
lwi r5, r1, 0
|
||||
lwi r6, r1, 4
|
||||
lwi r7, r1, 8
|
||||
lwi r19, r1, 12
|
||||
lwi r20, r1, 16
|
||||
lwi r21, r1, 20
|
||||
lwi r22, r1, 24
|
||||
lwi r23, r1, 28
|
||||
lwi r24, r1, 32
|
||||
lwi r25, r1, 36
|
||||
addik r1, r1, 40
|
||||
/* return back */
|
||||
rtsd r15, 8
|
||||
nop
|
||||
|
||||
.align 4 /* Alignment is important to keep icache happy */
|
||||
bu1: lbu r4,r6,r3
|
||||
bu2: sb r4,r5,r3
|
||||
addik r7,r7,-1
|
||||
bneid r7,bu1
|
||||
addik r3,r3,1 /* delay slot */
|
||||
0:
|
||||
addik r3,r7,0
|
||||
rtsd r15,8
|
||||
nop
|
||||
.size __copy_tofrom_user, . - __copy_tofrom_user
|
||||
|
||||
.section __ex_table,"a"
|
||||
.word bu1, 0b;
|
||||
.word bu2, 0b;
|
||||
.text
|
20
arch/microblaze/lib/ucmpdi2.c
Normal file
20
arch/microblaze/lib/ucmpdi2.c
Normal file
|
@ -0,0 +1,20 @@
|
|||
#include <linux/export.h>
|
||||
|
||||
#include "libgcc.h"
|
||||
|
||||
word_type __ucmpdi2(unsigned long long a, unsigned long long b)
|
||||
{
|
||||
const DWunion au = {.ll = a};
|
||||
const DWunion bu = {.ll = b};
|
||||
|
||||
if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
|
||||
return 0;
|
||||
else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
|
||||
return 2;
|
||||
if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
|
||||
return 0;
|
||||
else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(__ucmpdi2);
|
84
arch/microblaze/lib/udivsi3.S
Normal file
84
arch/microblaze/lib/udivsi3.S
Normal file
|
@ -0,0 +1,84 @@
|
|||
#include <linux/linkage.h>
|
||||
|
||||
/*
|
||||
* Unsigned divide operation.
|
||||
* Input : Divisor in Reg r5
|
||||
* Dividend in Reg r6
|
||||
* Output: Result in Reg r3
|
||||
*/
|
||||
|
||||
.text
|
||||
.globl __udivsi3
|
||||
.type __udivsi3, @function
|
||||
.ent __udivsi3
|
||||
|
||||
__udivsi3:
|
||||
|
||||
.frame r1, 0, r15
|
||||
|
||||
addik r1, r1, -12
|
||||
swi r29, r1, 0
|
||||
swi r30, r1, 4
|
||||
swi r31, r1, 8
|
||||
|
||||
beqi r6, div_by_zero /* div_by_zero /* division error */
|
||||
beqid r5, result_is_zero /* result is zero */
|
||||
addik r30, r0, 0 /* clear mod */
|
||||
addik r29, r0, 32 /* initialize the loop count */
|
||||
|
||||
/* check if r6 and r5 are equal - if yes, return 1 */
|
||||
rsub r18, r5, r6
|
||||
beqid r18, return_here
|
||||
addik r3, r0, 1
|
||||
|
||||
/* check if (uns)r6 is greater than (uns)r5. in that case, just return 0 */
|
||||
xor r18, r5, r6
|
||||
bgeid r18, 16
|
||||
add r3, r0, r0 /* we would anyways clear r3 */
|
||||
blti r6, return_here /* r6[bit 31 = 1] hence is greater */
|
||||
bri checkr6
|
||||
rsub r18, r6, r5 /* microblazecmp */
|
||||
blti r18, return_here
|
||||
|
||||
/* if r6 [bit 31] is set, then return result as 1 */
|
||||
checkr6:
|
||||
bgti r6, div0
|
||||
brid return_here
|
||||
addik r3, r0, 1
|
||||
|
||||
/* first part try to find the first '1' in the r5 */
|
||||
div0:
|
||||
blti r5, div2
|
||||
div1:
|
||||
add r5, r5, r5 /* left shift logical r5 */
|
||||
bgtid r5, div1
|
||||
addik r29, r29, -1
|
||||
div2:
|
||||
/* left shift logical r5 get the '1' into the carry */
|
||||
add r5, r5, r5
|
||||
addc r30, r30, r30 /* move that bit into the mod register */
|
||||
rsub r31, r6, r30 /* try to subtract (r30 a r6) */
|
||||
blti r31, mod_too_small
|
||||
/* move the r31 to mod since the result was positive */
|
||||
or r30, r0, r31
|
||||
addik r3, r3, 1
|
||||
mod_too_small:
|
||||
addik r29, r29, -1
|
||||
beqi r29, loop_end
|
||||
add r3, r3, r3 /* shift in the '1' into div */
|
||||
bri div2 /* div2 */
|
||||
loop_end:
|
||||
bri return_here
|
||||
div_by_zero:
|
||||
result_is_zero:
|
||||
or r3, r0, r0 /* set result to 0 */
|
||||
return_here:
|
||||
/* restore values of csrs and that of r3 and the divisor and the dividend */
|
||||
lwi r29, r1, 0
|
||||
lwi r30, r1, 4
|
||||
lwi r31, r1, 8
|
||||
rtsd r15, 8
|
||||
addik r1, r1, 12
|
||||
|
||||
.size __udivsi3, . - __udivsi3
|
||||
.end __udivsi3
|
86
arch/microblaze/lib/umodsi3.S
Normal file
86
arch/microblaze/lib/umodsi3.S
Normal file
|
@ -0,0 +1,86 @@
|
|||
#include <linux/linkage.h>
|
||||
|
||||
/*
|
||||
* Unsigned modulo operation for 32 bit integers.
|
||||
* Input : op1 in Reg r5
|
||||
* op2 in Reg r6
|
||||
* Output: op1 mod op2 in Reg r3
|
||||
*/
|
||||
|
||||
.text
|
||||
.globl __umodsi3
|
||||
.type __umodsi3, @function
|
||||
.ent __umodsi3
|
||||
|
||||
__umodsi3:
|
||||
.frame r1, 0, r15
|
||||
|
||||
addik r1, r1, -12
|
||||
swi r29, r1, 0
|
||||
swi r30, r1, 4
|
||||
swi r31, r1, 8
|
||||
|
||||
beqi r6, div_by_zero /* div_by_zero - division error */
|
||||
beqid r5, result_is_zero /* result is zero */
|
||||
addik r3, r0, 0 /* clear div */
|
||||
addik r30, r0, 0 /* clear mod */
|
||||
addik r29, r0, 32 /* initialize the loop count */
|
||||
|
||||
/* check if r6 and r5 are equal /* if yes, return 0 */
|
||||
rsub r18, r5, r6
|
||||
beqi r18, return_here
|
||||
|
||||
/* check if (uns)r6 is greater than (uns)r5. in that case, just return r5 */
|
||||
xor r18, r5, r6
|
||||
bgeid r18, 16
|
||||
addik r3, r5, 0
|
||||
blti r6, return_here
|
||||
bri $lcheckr6
|
||||
rsub r18, r5, r6 /* microblazecmp */
|
||||
bgti r18, return_here
|
||||
|
||||
/* if r6 [bit 31] is set, then return result as r5-r6 */
|
||||
$lcheckr6:
|
||||
bgtid r6, div0
|
||||
addik r3, r0, 0
|
||||
addik r18, r0, 0x7fffffff
|
||||
and r5, r5, r18
|
||||
and r6, r6, r18
|
||||
brid return_here
|
||||
rsub r3, r6, r5
|
||||
/* first part: try to find the first '1' in the r5 */
|
||||
div0:
|
||||
blti r5, div2
|
||||
div1:
|
||||
add r5, r5, r5 /* left shift logical r5 */
|
||||
bgeid r5, div1
|
||||
addik r29, r29, -1
|
||||
div2:
|
||||
/* left shift logical r5 get the '1' into the carry */
|
||||
add r5, r5, r5
|
||||
addc r3, r3, r3 /* move that bit into the mod register */
|
||||
rsub r31, r6, r3 /* try to subtract (r3 a r6) */
|
||||
blti r31, mod_too_small
|
||||
/* move the r31 to mod since the result was positive */
|
||||
or r3, r0, r31
|
||||
addik r30, r30, 1
|
||||
mod_too_small:
|
||||
addik r29, r29, -1
|
||||
beqi r29, loop_end
|
||||
add r30, r30, r30 /* shift in the '1' into div */
|
||||
bri div2 /* div2 */
|
||||
loop_end:
|
||||
bri return_here
|
||||
div_by_zero:
|
||||
result_is_zero:
|
||||
or r3, r0, r0 /* set result to 0 */
|
||||
return_here:
|
||||
/* restore values of csrs and that of r3 and the divisor and the dividend */
|
||||
lwi r29, r1, 0
|
||||
lwi r30, r1, 4
|
||||
lwi r31, r1, 8
|
||||
rtsd r15, 8
|
||||
addik r1, r1, 12
|
||||
|
||||
.size __umodsi3, . - __umodsi3
|
||||
.end __umodsi3
|
Loading…
Add table
Add a link
Reference in a new issue