Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

View file

@ -0,0 +1,23 @@
#
# Makefile
#
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ashldi3.o = -pg
CFLAGS_REMOVE_ashrdi3.o = -pg
CFLAGS_REMOVE_lshrdi3.o = -pg
endif
lib-y := memset.o
ifeq ($(CONFIG_OPT_LIB_ASM),y)
lib-y += fastcopy.o
else
lib-y += memcpy.o memmove.o
endif
lib-y += uaccess_old.o
# libgcc-style stuff needed in the kernel
obj-y += ashldi3.o ashrdi3.o cmpdi2.o divsi3.o lshrdi3.o modsi3.o
obj-y += muldi3.o mulsi3.o ucmpdi2.o udivsi3.o umodsi3.o

View file

@ -0,0 +1,28 @@
#include <linux/export.h>
#include "libgcc.h"
long long __ashldi3(long long u, word_type b)
{
DWunion uu, w;
word_type bm;
if (b == 0)
return u;
uu.ll = u;
bm = 32 - b;
if (bm <= 0) {
w.s.low = 0;
w.s.high = (unsigned int) uu.s.low << -bm;
} else {
const unsigned int carries = (unsigned int) uu.s.low >> bm;
w.s.low = (unsigned int) uu.s.low << b;
w.s.high = ((unsigned int) uu.s.high << b) | carries;
}
return w.ll;
}
EXPORT_SYMBOL(__ashldi3);

View file

@ -0,0 +1,30 @@
#include <linux/export.h>
#include "libgcc.h"
long long __ashrdi3(long long u, word_type b)
{
DWunion uu, w;
word_type bm;
if (b == 0)
return u;
uu.ll = u;
bm = 32 - b;
if (bm <= 0) {
/* w.s.high = 1..1 or 0..0 */
w.s.high =
uu.s.high >> 31;
w.s.low = uu.s.high >> -bm;
} else {
const unsigned int carries = (unsigned int) uu.s.high << bm;
w.s.high = uu.s.high >> b;
w.s.low = ((unsigned int) uu.s.low >> b) | carries;
}
return w.ll;
}
EXPORT_SYMBOL(__ashrdi3);

View file

@ -0,0 +1,26 @@
#include <linux/export.h>
#include "libgcc.h"
word_type __cmpdi2(long long a, long long b)
{
const DWunion au = {
.ll = a
};
const DWunion bu = {
.ll = b
};
if (au.s.high < bu.s.high)
return 0;
else if (au.s.high > bu.s.high)
return 2;
if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
return 0;
else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
return 2;
return 1;
}
EXPORT_SYMBOL(__cmpdi2);

View file

@ -0,0 +1,73 @@
#include <linux/linkage.h>
/*
* Divide operation for 32 bit integers.
* Input : Dividend in Reg r5
* Divisor in Reg r6
* Output: Result in Reg r3
*/
.text
.globl __divsi3
.type __divsi3, @function
.ent __divsi3
__divsi3:
.frame r1, 0, r15
addik r1, r1, -16
swi r28, r1, 0
swi r29, r1, 4
swi r30, r1, 8
swi r31, r1, 12
beqi r6, div_by_zero /* div_by_zero - division error */
beqi r5, result_is_zero /* result is zero */
bgeid r5, r5_pos
xor r28, r5, r6 /* get the sign of the result */
rsubi r5, r5, 0 /* make r5 positive */
r5_pos:
bgei r6, r6_pos
rsubi r6, r6, 0 /* make r6 positive */
r6_pos:
addik r30, r0, 0 /* clear mod */
addik r3, r0, 0 /* clear div */
addik r29, r0, 32 /* initialize the loop count */
/* first part try to find the first '1' in the r5 */
div0:
blti r5, div2 /* this traps r5 == 0x80000000 */
div1:
add r5, r5, r5 /* left shift logical r5 */
bgtid r5, div1
addik r29, r29, -1
div2:
/* left shift logical r5 get the '1' into the carry */
add r5, r5, r5
addc r30, r30, r30 /* move that bit into the mod register */
rsub r31, r6, r30 /* try to subtract (r30 a r6) */
blti r31, mod_too_small
/* move the r31 to mod since the result was positive */
or r30, r0, r31
addik r3, r3, 1
mod_too_small:
addik r29, r29, -1
beqi r29, loop_end
add r3, r3, r3 /* shift in the '1' into div */
bri div2 /* div2 */
loop_end:
bgei r28, return_here
brid return_here
rsubi r3, r3, 0 /* negate the result */
div_by_zero:
result_is_zero:
or r3, r0, r0 /* set result to 0 */
return_here:
/* restore values of csrs and that of r3 and the divisor and the dividend */
lwi r28, r1, 0
lwi r29, r1, 4
lwi r30, r1, 8
lwi r31, r1, 12
rtsd r15, 8
addik r1, r1, 16
.size __divsi3, . - __divsi3
.end __divsi3

View file

@ -0,0 +1,670 @@
/*
* Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
* Copyright (C) 2008-2009 PetaLogix
* Copyright (C) 2008 Jim Law - Iris LP All rights reserved.
*
* This file is subject to the terms and conditions of the GNU General
* Public License. See the file COPYING in the main directory of this
* archive for more details.
*
* Written by Jim Law <jlaw@irispower.com>
*
* intended to replace:
* memcpy in memcpy.c and
* memmove in memmove.c
* ... in arch/microblaze/lib
*
*
* assly_fastcopy.S
*
* Attempt at quicker memcpy and memmove for MicroBlaze
* Input : Operand1 in Reg r5 - destination address
* Operand2 in Reg r6 - source address
* Operand3 in Reg r7 - number of bytes to transfer
* Output: Result in Reg r3 - starting destinaition address
*
*
* Explanation:
* Perform (possibly unaligned) copy of a block of memory
* between mem locations with size of xfer spec'd in bytes
*/
#ifdef __MICROBLAZEEL__
#error Microblaze LE not support ASM optimized lib func. Disable OPT_LIB_ASM.
#endif
#include <linux/linkage.h>
.text
.globl memcpy
.type memcpy, @function
.ent memcpy
memcpy:
fast_memcpy_ascending:
/* move d to return register as value of function */
addi r3, r5, 0
addi r4, r0, 4 /* n = 4 */
cmpu r4, r4, r7 /* n = c - n (unsigned) */
blti r4, a_xfer_end /* if n < 0, less than one word to transfer */
/* transfer first 0~3 bytes to get aligned dest address */
andi r4, r5, 3 /* n = d & 3 */
/* if zero, destination already aligned */
beqi r4, a_dalign_done
/* n = 4 - n (yields 3, 2, 1 transfers for 1, 2, 3 addr offset) */
rsubi r4, r4, 4
rsub r7, r4, r7 /* c = c - n adjust c */
a_xfer_first_loop:
/* if no bytes left to transfer, transfer the bulk */
beqi r4, a_dalign_done
lbui r11, r6, 0 /* h = *s */
sbi r11, r5, 0 /* *d = h */
addi r6, r6, 1 /* s++ */
addi r5, r5, 1 /* d++ */
brid a_xfer_first_loop /* loop */
addi r4, r4, -1 /* n-- (IN DELAY SLOT) */
a_dalign_done:
addi r4, r0, 32 /* n = 32 */
cmpu r4, r4, r7 /* n = c - n (unsigned) */
/* if n < 0, less than one block to transfer */
blti r4, a_block_done
a_block_xfer:
andi r4, r7, 0xffffffe0 /* n = c & ~31 */
rsub r7, r4, r7 /* c = c - n */
andi r9, r6, 3 /* t1 = s & 3 */
/* if temp != 0, unaligned transfers needed */
bnei r9, a_block_unaligned
a_block_aligned:
lwi r9, r6, 0 /* t1 = *(s + 0) */
lwi r10, r6, 4 /* t2 = *(s + 4) */
lwi r11, r6, 8 /* t3 = *(s + 8) */
lwi r12, r6, 12 /* t4 = *(s + 12) */
swi r9, r5, 0 /* *(d + 0) = t1 */
swi r10, r5, 4 /* *(d + 4) = t2 */
swi r11, r5, 8 /* *(d + 8) = t3 */
swi r12, r5, 12 /* *(d + 12) = t4 */
lwi r9, r6, 16 /* t1 = *(s + 16) */
lwi r10, r6, 20 /* t2 = *(s + 20) */
lwi r11, r6, 24 /* t3 = *(s + 24) */
lwi r12, r6, 28 /* t4 = *(s + 28) */
swi r9, r5, 16 /* *(d + 16) = t1 */
swi r10, r5, 20 /* *(d + 20) = t2 */
swi r11, r5, 24 /* *(d + 24) = t3 */
swi r12, r5, 28 /* *(d + 28) = t4 */
addi r6, r6, 32 /* s = s + 32 */
addi r4, r4, -32 /* n = n - 32 */
bneid r4, a_block_aligned /* while (n) loop */
addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
bri a_block_done
a_block_unaligned:
andi r8, r6, 0xfffffffc /* as = s & ~3 */
add r6, r6, r4 /* s = s + n */
lwi r11, r8, 0 /* h = *(as + 0) */
addi r9, r9, -1
beqi r9, a_block_u1 /* t1 was 1 => 1 byte offset */
addi r9, r9, -1
beqi r9, a_block_u2 /* t1 was 2 => 2 byte offset */
a_block_u3:
bslli r11, r11, 24 /* h = h << 24 */
a_bu3_loop:
lwi r12, r8, 4 /* v = *(as + 4) */
bsrli r9, r12, 8 /* t1 = v >> 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 0 /* *(d + 0) = t1 */
bslli r11, r12, 24 /* h = v << 24 */
lwi r12, r8, 8 /* v = *(as + 8) */
bsrli r9, r12, 8 /* t1 = v >> 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 4 /* *(d + 4) = t1 */
bslli r11, r12, 24 /* h = v << 24 */
lwi r12, r8, 12 /* v = *(as + 12) */
bsrli r9, r12, 8 /* t1 = v >> 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 8 /* *(d + 8) = t1 */
bslli r11, r12, 24 /* h = v << 24 */
lwi r12, r8, 16 /* v = *(as + 16) */
bsrli r9, r12, 8 /* t1 = v >> 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 12 /* *(d + 12) = t1 */
bslli r11, r12, 24 /* h = v << 24 */
lwi r12, r8, 20 /* v = *(as + 20) */
bsrli r9, r12, 8 /* t1 = v >> 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 16 /* *(d + 16) = t1 */
bslli r11, r12, 24 /* h = v << 24 */
lwi r12, r8, 24 /* v = *(as + 24) */
bsrli r9, r12, 8 /* t1 = v >> 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 20 /* *(d + 20) = t1 */
bslli r11, r12, 24 /* h = v << 24 */
lwi r12, r8, 28 /* v = *(as + 28) */
bsrli r9, r12, 8 /* t1 = v >> 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 24 /* *(d + 24) = t1 */
bslli r11, r12, 24 /* h = v << 24 */
lwi r12, r8, 32 /* v = *(as + 32) */
bsrli r9, r12, 8 /* t1 = v >> 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 28 /* *(d + 28) = t1 */
bslli r11, r12, 24 /* h = v << 24 */
addi r8, r8, 32 /* as = as + 32 */
addi r4, r4, -32 /* n = n - 32 */
bneid r4, a_bu3_loop /* while (n) loop */
addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
bri a_block_done
a_block_u1:
bslli r11, r11, 8 /* h = h << 8 */
a_bu1_loop:
lwi r12, r8, 4 /* v = *(as + 4) */
bsrli r9, r12, 24 /* t1 = v >> 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 0 /* *(d + 0) = t1 */
bslli r11, r12, 8 /* h = v << 8 */
lwi r12, r8, 8 /* v = *(as + 8) */
bsrli r9, r12, 24 /* t1 = v >> 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 4 /* *(d + 4) = t1 */
bslli r11, r12, 8 /* h = v << 8 */
lwi r12, r8, 12 /* v = *(as + 12) */
bsrli r9, r12, 24 /* t1 = v >> 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 8 /* *(d + 8) = t1 */
bslli r11, r12, 8 /* h = v << 8 */
lwi r12, r8, 16 /* v = *(as + 16) */
bsrli r9, r12, 24 /* t1 = v >> 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 12 /* *(d + 12) = t1 */
bslli r11, r12, 8 /* h = v << 8 */
lwi r12, r8, 20 /* v = *(as + 20) */
bsrli r9, r12, 24 /* t1 = v >> 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 16 /* *(d + 16) = t1 */
bslli r11, r12, 8 /* h = v << 8 */
lwi r12, r8, 24 /* v = *(as + 24) */
bsrli r9, r12, 24 /* t1 = v >> 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 20 /* *(d + 20) = t1 */
bslli r11, r12, 8 /* h = v << 8 */
lwi r12, r8, 28 /* v = *(as + 28) */
bsrli r9, r12, 24 /* t1 = v >> 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 24 /* *(d + 24) = t1 */
bslli r11, r12, 8 /* h = v << 8 */
lwi r12, r8, 32 /* v = *(as + 32) */
bsrli r9, r12, 24 /* t1 = v >> 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 28 /* *(d + 28) = t1 */
bslli r11, r12, 8 /* h = v << 8 */
addi r8, r8, 32 /* as = as + 32 */
addi r4, r4, -32 /* n = n - 32 */
bneid r4, a_bu1_loop /* while (n) loop */
addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
bri a_block_done
a_block_u2:
bslli r11, r11, 16 /* h = h << 16 */
a_bu2_loop:
lwi r12, r8, 4 /* v = *(as + 4) */
bsrli r9, r12, 16 /* t1 = v >> 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 0 /* *(d + 0) = t1 */
bslli r11, r12, 16 /* h = v << 16 */
lwi r12, r8, 8 /* v = *(as + 8) */
bsrli r9, r12, 16 /* t1 = v >> 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 4 /* *(d + 4) = t1 */
bslli r11, r12, 16 /* h = v << 16 */
lwi r12, r8, 12 /* v = *(as + 12) */
bsrli r9, r12, 16 /* t1 = v >> 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 8 /* *(d + 8) = t1 */
bslli r11, r12, 16 /* h = v << 16 */
lwi r12, r8, 16 /* v = *(as + 16) */
bsrli r9, r12, 16 /* t1 = v >> 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 12 /* *(d + 12) = t1 */
bslli r11, r12, 16 /* h = v << 16 */
lwi r12, r8, 20 /* v = *(as + 20) */
bsrli r9, r12, 16 /* t1 = v >> 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 16 /* *(d + 16) = t1 */
bslli r11, r12, 16 /* h = v << 16 */
lwi r12, r8, 24 /* v = *(as + 24) */
bsrli r9, r12, 16 /* t1 = v >> 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 20 /* *(d + 20) = t1 */
bslli r11, r12, 16 /* h = v << 16 */
lwi r12, r8, 28 /* v = *(as + 28) */
bsrli r9, r12, 16 /* t1 = v >> 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 24 /* *(d + 24) = t1 */
bslli r11, r12, 16 /* h = v << 16 */
lwi r12, r8, 32 /* v = *(as + 32) */
bsrli r9, r12, 16 /* t1 = v >> 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 28 /* *(d + 28) = t1 */
bslli r11, r12, 16 /* h = v << 16 */
addi r8, r8, 32 /* as = as + 32 */
addi r4, r4, -32 /* n = n - 32 */
bneid r4, a_bu2_loop /* while (n) loop */
addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
a_block_done:
addi r4, r0, 4 /* n = 4 */
cmpu r4, r4, r7 /* n = c - n (unsigned) */
blti r4, a_xfer_end /* if n < 0, less than one word to transfer */
a_word_xfer:
andi r4, r7, 0xfffffffc /* n = c & ~3 */
addi r10, r0, 0 /* offset = 0 */
andi r9, r6, 3 /* t1 = s & 3 */
/* if temp != 0, unaligned transfers needed */
bnei r9, a_word_unaligned
a_word_aligned:
lw r9, r6, r10 /* t1 = *(s+offset) */
sw r9, r5, r10 /* *(d+offset) = t1 */
addi r4, r4,-4 /* n-- */
bneid r4, a_word_aligned /* loop */
addi r10, r10, 4 /* offset++ (IN DELAY SLOT) */
bri a_word_done
a_word_unaligned:
andi r8, r6, 0xfffffffc /* as = s & ~3 */
lwi r11, r8, 0 /* h = *(as + 0) */
addi r8, r8, 4 /* as = as + 4 */
addi r9, r9, -1
beqi r9, a_word_u1 /* t1 was 1 => 1 byte offset */
addi r9, r9, -1
beqi r9, a_word_u2 /* t1 was 2 => 2 byte offset */
a_word_u3:
bslli r11, r11, 24 /* h = h << 24 */
a_wu3_loop:
lw r12, r8, r10 /* v = *(as + offset) */
bsrli r9, r12, 8 /* t1 = v >> 8 */
or r9, r11, r9 /* t1 = h | t1 */
sw r9, r5, r10 /* *(d + offset) = t1 */
bslli r11, r12, 24 /* h = v << 24 */
addi r4, r4,-4 /* n = n - 4 */
bneid r4, a_wu3_loop /* while (n) loop */
addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
bri a_word_done
a_word_u1:
bslli r11, r11, 8 /* h = h << 8 */
a_wu1_loop:
lw r12, r8, r10 /* v = *(as + offset) */
bsrli r9, r12, 24 /* t1 = v >> 24 */
or r9, r11, r9 /* t1 = h | t1 */
sw r9, r5, r10 /* *(d + offset) = t1 */
bslli r11, r12, 8 /* h = v << 8 */
addi r4, r4,-4 /* n = n - 4 */
bneid r4, a_wu1_loop /* while (n) loop */
addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
bri a_word_done
a_word_u2:
bslli r11, r11, 16 /* h = h << 16 */
a_wu2_loop:
lw r12, r8, r10 /* v = *(as + offset) */
bsrli r9, r12, 16 /* t1 = v >> 16 */
or r9, r11, r9 /* t1 = h | t1 */
sw r9, r5, r10 /* *(d + offset) = t1 */
bslli r11, r12, 16 /* h = v << 16 */
addi r4, r4,-4 /* n = n - 4 */
bneid r4, a_wu2_loop /* while (n) loop */
addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
a_word_done:
add r5, r5, r10 /* d = d + offset */
add r6, r6, r10 /* s = s + offset */
rsub r7, r10, r7 /* c = c - offset */
a_xfer_end:
a_xfer_end_loop:
beqi r7, a_done /* while (c) */
lbui r9, r6, 0 /* t1 = *s */
addi r6, r6, 1 /* s++ */
sbi r9, r5, 0 /* *d = t1 */
addi r7, r7, -1 /* c-- */
brid a_xfer_end_loop /* loop */
addi r5, r5, 1 /* d++ (IN DELAY SLOT) */
a_done:
rtsd r15, 8
nop
.size memcpy, . - memcpy
.end memcpy
/*----------------------------------------------------------------------------*/
.globl memmove
.type memmove, @function
.ent memmove
memmove:
cmpu r4, r5, r6 /* n = s - d */
bgei r4,fast_memcpy_ascending
fast_memcpy_descending:
/* move d to return register as value of function */
addi r3, r5, 0
add r5, r5, r7 /* d = d + c */
add r6, r6, r7 /* s = s + c */
addi r4, r0, 4 /* n = 4 */
cmpu r4, r4, r7 /* n = c - n (unsigned) */
blti r4,d_xfer_end /* if n < 0, less than one word to transfer */
/* transfer first 0~3 bytes to get aligned dest address */
andi r4, r5, 3 /* n = d & 3 */
/* if zero, destination already aligned */
beqi r4,d_dalign_done
rsub r7, r4, r7 /* c = c - n adjust c */
d_xfer_first_loop:
/* if no bytes left to transfer, transfer the bulk */
beqi r4,d_dalign_done
addi r6, r6, -1 /* s-- */
addi r5, r5, -1 /* d-- */
lbui r11, r6, 0 /* h = *s */
sbi r11, r5, 0 /* *d = h */
brid d_xfer_first_loop /* loop */
addi r4, r4, -1 /* n-- (IN DELAY SLOT) */
d_dalign_done:
addi r4, r0, 32 /* n = 32 */
cmpu r4, r4, r7 /* n = c - n (unsigned) */
/* if n < 0, less than one block to transfer */
blti r4, d_block_done
d_block_xfer:
andi r4, r7, 0xffffffe0 /* n = c & ~31 */
rsub r7, r4, r7 /* c = c - n */
andi r9, r6, 3 /* t1 = s & 3 */
/* if temp != 0, unaligned transfers needed */
bnei r9, d_block_unaligned
d_block_aligned:
addi r6, r6, -32 /* s = s - 32 */
addi r5, r5, -32 /* d = d - 32 */
lwi r9, r6, 28 /* t1 = *(s + 28) */
lwi r10, r6, 24 /* t2 = *(s + 24) */
lwi r11, r6, 20 /* t3 = *(s + 20) */
lwi r12, r6, 16 /* t4 = *(s + 16) */
swi r9, r5, 28 /* *(d + 28) = t1 */
swi r10, r5, 24 /* *(d + 24) = t2 */
swi r11, r5, 20 /* *(d + 20) = t3 */
swi r12, r5, 16 /* *(d + 16) = t4 */
lwi r9, r6, 12 /* t1 = *(s + 12) */
lwi r10, r6, 8 /* t2 = *(s + 8) */
lwi r11, r6, 4 /* t3 = *(s + 4) */
lwi r12, r6, 0 /* t4 = *(s + 0) */
swi r9, r5, 12 /* *(d + 12) = t1 */
swi r10, r5, 8 /* *(d + 8) = t2 */
swi r11, r5, 4 /* *(d + 4) = t3 */
addi r4, r4, -32 /* n = n - 32 */
bneid r4, d_block_aligned /* while (n) loop */
swi r12, r5, 0 /* *(d + 0) = t4 (IN DELAY SLOT) */
bri d_block_done
d_block_unaligned:
andi r8, r6, 0xfffffffc /* as = s & ~3 */
rsub r6, r4, r6 /* s = s - n */
lwi r11, r8, 0 /* h = *(as + 0) */
addi r9, r9, -1
beqi r9,d_block_u1 /* t1 was 1 => 1 byte offset */
addi r9, r9, -1
beqi r9,d_block_u2 /* t1 was 2 => 2 byte offset */
d_block_u3:
bsrli r11, r11, 8 /* h = h >> 8 */
d_bu3_loop:
addi r8, r8, -32 /* as = as - 32 */
addi r5, r5, -32 /* d = d - 32 */
lwi r12, r8, 28 /* v = *(as + 28) */
bslli r9, r12, 24 /* t1 = v << 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 28 /* *(d + 28) = t1 */
bsrli r11, r12, 8 /* h = v >> 8 */
lwi r12, r8, 24 /* v = *(as + 24) */
bslli r9, r12, 24 /* t1 = v << 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 24 /* *(d + 24) = t1 */
bsrli r11, r12, 8 /* h = v >> 8 */
lwi r12, r8, 20 /* v = *(as + 20) */
bslli r9, r12, 24 /* t1 = v << 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 20 /* *(d + 20) = t1 */
bsrli r11, r12, 8 /* h = v >> 8 */
lwi r12, r8, 16 /* v = *(as + 16) */
bslli r9, r12, 24 /* t1 = v << 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 16 /* *(d + 16) = t1 */
bsrli r11, r12, 8 /* h = v >> 8 */
lwi r12, r8, 12 /* v = *(as + 12) */
bslli r9, r12, 24 /* t1 = v << 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 12 /* *(d + 112) = t1 */
bsrli r11, r12, 8 /* h = v >> 8 */
lwi r12, r8, 8 /* v = *(as + 8) */
bslli r9, r12, 24 /* t1 = v << 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 8 /* *(d + 8) = t1 */
bsrli r11, r12, 8 /* h = v >> 8 */
lwi r12, r8, 4 /* v = *(as + 4) */
bslli r9, r12, 24 /* t1 = v << 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 4 /* *(d + 4) = t1 */
bsrli r11, r12, 8 /* h = v >> 8 */
lwi r12, r8, 0 /* v = *(as + 0) */
bslli r9, r12, 24 /* t1 = v << 24 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 0 /* *(d + 0) = t1 */
addi r4, r4, -32 /* n = n - 32 */
bneid r4, d_bu3_loop /* while (n) loop */
bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */
bri d_block_done
d_block_u1:
bsrli r11, r11, 24 /* h = h >> 24 */
d_bu1_loop:
addi r8, r8, -32 /* as = as - 32 */
addi r5, r5, -32 /* d = d - 32 */
lwi r12, r8, 28 /* v = *(as + 28) */
bslli r9, r12, 8 /* t1 = v << 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 28 /* *(d + 28) = t1 */
bsrli r11, r12, 24 /* h = v >> 24 */
lwi r12, r8, 24 /* v = *(as + 24) */
bslli r9, r12, 8 /* t1 = v << 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 24 /* *(d + 24) = t1 */
bsrli r11, r12, 24 /* h = v >> 24 */
lwi r12, r8, 20 /* v = *(as + 20) */
bslli r9, r12, 8 /* t1 = v << 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 20 /* *(d + 20) = t1 */
bsrli r11, r12, 24 /* h = v >> 24 */
lwi r12, r8, 16 /* v = *(as + 16) */
bslli r9, r12, 8 /* t1 = v << 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 16 /* *(d + 16) = t1 */
bsrli r11, r12, 24 /* h = v >> 24 */
lwi r12, r8, 12 /* v = *(as + 12) */
bslli r9, r12, 8 /* t1 = v << 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 12 /* *(d + 112) = t1 */
bsrli r11, r12, 24 /* h = v >> 24 */
lwi r12, r8, 8 /* v = *(as + 8) */
bslli r9, r12, 8 /* t1 = v << 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 8 /* *(d + 8) = t1 */
bsrli r11, r12, 24 /* h = v >> 24 */
lwi r12, r8, 4 /* v = *(as + 4) */
bslli r9, r12, 8 /* t1 = v << 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 4 /* *(d + 4) = t1 */
bsrli r11, r12, 24 /* h = v >> 24 */
lwi r12, r8, 0 /* v = *(as + 0) */
bslli r9, r12, 8 /* t1 = v << 8 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 0 /* *(d + 0) = t1 */
addi r4, r4, -32 /* n = n - 32 */
bneid r4, d_bu1_loop /* while (n) loop */
bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */
bri d_block_done
d_block_u2:
bsrli r11, r11, 16 /* h = h >> 16 */
d_bu2_loop:
addi r8, r8, -32 /* as = as - 32 */
addi r5, r5, -32 /* d = d - 32 */
lwi r12, r8, 28 /* v = *(as + 28) */
bslli r9, r12, 16 /* t1 = v << 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 28 /* *(d + 28) = t1 */
bsrli r11, r12, 16 /* h = v >> 16 */
lwi r12, r8, 24 /* v = *(as + 24) */
bslli r9, r12, 16 /* t1 = v << 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 24 /* *(d + 24) = t1 */
bsrli r11, r12, 16 /* h = v >> 16 */
lwi r12, r8, 20 /* v = *(as + 20) */
bslli r9, r12, 16 /* t1 = v << 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 20 /* *(d + 20) = t1 */
bsrli r11, r12, 16 /* h = v >> 16 */
lwi r12, r8, 16 /* v = *(as + 16) */
bslli r9, r12, 16 /* t1 = v << 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 16 /* *(d + 16) = t1 */
bsrli r11, r12, 16 /* h = v >> 16 */
lwi r12, r8, 12 /* v = *(as + 12) */
bslli r9, r12, 16 /* t1 = v << 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 12 /* *(d + 112) = t1 */
bsrli r11, r12, 16 /* h = v >> 16 */
lwi r12, r8, 8 /* v = *(as + 8) */
bslli r9, r12, 16 /* t1 = v << 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 8 /* *(d + 8) = t1 */
bsrli r11, r12, 16 /* h = v >> 16 */
lwi r12, r8, 4 /* v = *(as + 4) */
bslli r9, r12, 16 /* t1 = v << 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 4 /* *(d + 4) = t1 */
bsrli r11, r12, 16 /* h = v >> 16 */
lwi r12, r8, 0 /* v = *(as + 0) */
bslli r9, r12, 16 /* t1 = v << 16 */
or r9, r11, r9 /* t1 = h | t1 */
swi r9, r5, 0 /* *(d + 0) = t1 */
addi r4, r4, -32 /* n = n - 32 */
bneid r4, d_bu2_loop /* while (n) loop */
bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */
d_block_done:
addi r4, r0, 4 /* n = 4 */
cmpu r4, r4, r7 /* n = c - n (unsigned) */
blti r4,d_xfer_end /* if n < 0, less than one word to transfer */
d_word_xfer:
andi r4, r7, 0xfffffffc /* n = c & ~3 */
rsub r5, r4, r5 /* d = d - n */
rsub r6, r4, r6 /* s = s - n */
rsub r7, r4, r7 /* c = c - n */
andi r9, r6, 3 /* t1 = s & 3 */
/* if temp != 0, unaligned transfers needed */
bnei r9, d_word_unaligned
d_word_aligned:
addi r4, r4,-4 /* n-- */
lw r9, r6, r4 /* t1 = *(s+n) */
bneid r4, d_word_aligned /* loop */
sw r9, r5, r4 /* *(d+n) = t1 (IN DELAY SLOT) */
bri d_word_done
d_word_unaligned:
andi r8, r6, 0xfffffffc /* as = s & ~3 */
lw r11, r8, r4 /* h = *(as + n) */
addi r9, r9, -1
beqi r9,d_word_u1 /* t1 was 1 => 1 byte offset */
addi r9, r9, -1
beqi r9,d_word_u2 /* t1 was 2 => 2 byte offset */
d_word_u3:
bsrli r11, r11, 8 /* h = h >> 8 */
d_wu3_loop:
addi r4, r4,-4 /* n = n - 4 */
lw r12, r8, r4 /* v = *(as + n) */
bslli r9, r12, 24 /* t1 = v << 24 */
or r9, r11, r9 /* t1 = h | t1 */
sw r9, r5, r4 /* *(d + n) = t1 */
bneid r4, d_wu3_loop /* while (n) loop */
bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */
bri d_word_done
d_word_u1:
bsrli r11, r11, 24 /* h = h >> 24 */
d_wu1_loop:
addi r4, r4,-4 /* n = n - 4 */
lw r12, r8, r4 /* v = *(as + n) */
bslli r9, r12, 8 /* t1 = v << 8 */
or r9, r11, r9 /* t1 = h | t1 */
sw r9, r5, r4 /* *(d + n) = t1 */
bneid r4, d_wu1_loop /* while (n) loop */
bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */
bri d_word_done
d_word_u2:
bsrli r11, r11, 16 /* h = h >> 16 */
d_wu2_loop:
addi r4, r4,-4 /* n = n - 4 */
lw r12, r8, r4 /* v = *(as + n) */
bslli r9, r12, 16 /* t1 = v << 16 */
or r9, r11, r9 /* t1 = h | t1 */
sw r9, r5, r4 /* *(d + n) = t1 */
bneid r4, d_wu2_loop /* while (n) loop */
bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */
d_word_done:
d_xfer_end:
d_xfer_end_loop:
beqi r7, a_done /* while (c) */
addi r6, r6, -1 /* s-- */
lbui r9, r6, 0 /* t1 = *s */
addi r5, r5, -1 /* d-- */
sbi r9, r5, 0 /* *d = t1 */
brid d_xfer_end_loop /* loop */
addi r7, r7, -1 /* c-- (IN DELAY SLOT) */
d_done:
rtsd r15, 8
nop
.size memmove, . - memmove
.end memmove

View file

@ -0,0 +1,32 @@
#ifndef __ASM_LIBGCC_H
#define __ASM_LIBGCC_H
#include <asm/byteorder.h>
typedef int word_type __attribute__ ((mode (__word__)));
#ifdef __BIG_ENDIAN
struct DWstruct {
int high, low;
};
#elif defined(__LITTLE_ENDIAN)
struct DWstruct {
int low, high;
};
#else
#error I feel sick.
#endif
typedef union {
struct DWstruct s;
long long ll;
} DWunion;
extern long long __ashldi3(long long u, word_type b);
extern long long __ashrdi3(long long u, word_type b);
extern word_type __cmpdi2(long long a, long long b);
extern long long __lshrdi3(long long u, word_type b);
extern long long __muldi3(long long u, long long v);
extern word_type __ucmpdi2(unsigned long long a, unsigned long long b);
#endif /* __ASM_LIBGCC_H */

View file

@ -0,0 +1,28 @@
#include <linux/export.h>
#include "libgcc.h"
long long __lshrdi3(long long u, word_type b)
{
DWunion uu, w;
word_type bm;
if (b == 0)
return u;
uu.ll = u;
bm = 32 - b;
if (bm <= 0) {
w.s.high = 0;
w.s.low = (unsigned int) uu.s.high >> -bm;
} else {
const unsigned int carries = (unsigned int) uu.s.high << bm;
w.s.high = (unsigned int) uu.s.high >> b;
w.s.low = ((unsigned int) uu.s.low >> b) | carries;
}
return w.ll;
}
EXPORT_SYMBOL(__lshrdi3);

View file

@ -0,0 +1,189 @@
/*
* Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
* Copyright (C) 2008-2009 PetaLogix
* Copyright (C) 2007 John Williams
*
* Reasonably optimised generic C-code for memcpy on Microblaze
* This is generic C code to do efficient, alignment-aware memcpy.
*
* It is based on demo code originally Copyright 2001 by Intel Corp, taken from
* http://www.embedded.com/showArticle.jhtml?articleID=19205567
*
* Attempts were made, unsuccessfully, to contact the original
* author of this code (Michael Morrow, Intel). Below is the original
* copyright notice.
*
* This software has been developed by Intel Corporation.
* Intel specifically disclaims all warranties, express or
* implied, and all liability, including consequential and
* other indirect damages, for the use of this program, including
* liability for infringement of any proprietary rights,
* and including the warranties of merchantability and fitness
* for a particular purpose. Intel does not assume any
* responsibility for and errors which may appear in this program
* not any responsibility to update it.
*/
#include <linux/export.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/compiler.h>
#include <linux/string.h>
#ifdef __HAVE_ARCH_MEMCPY
#ifndef CONFIG_OPT_LIB_FUNCTION
void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
{
const char *src = v_src;
char *dst = v_dst;
/* Simple, byte oriented memcpy. */
while (c--)
*dst++ = *src++;
return v_dst;
}
#else /* CONFIG_OPT_LIB_FUNCTION */
void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
{
const char *src = v_src;
char *dst = v_dst;
/* The following code tries to optimize the copy by using unsigned
* alignment. This will work fine if both source and destination are
* aligned on the same boundary. However, if they are aligned on
* different boundaries shifts will be necessary. This might result in
* bad performance on MicroBlaze systems without a barrel shifter.
*/
const uint32_t *i_src;
uint32_t *i_dst;
if (likely(c >= 4)) {
unsigned value, buf_hold;
/* Align the destination to a word boundary. */
/* This is done in an endian independent manner. */
switch ((unsigned long)dst & 3) {
case 1:
*dst++ = *src++;
--c;
case 2:
*dst++ = *src++;
--c;
case 3:
*dst++ = *src++;
--c;
}
i_dst = (void *)dst;
/* Choose a copy scheme based on the source */
/* alignment relative to destination. */
switch ((unsigned long)src & 3) {
case 0x0: /* Both byte offsets are aligned */
i_src = (const void *)src;
for (; c >= 4; c -= 4)
*i_dst++ = *i_src++;
src = (const void *)i_src;
break;
case 0x1: /* Unaligned - Off by 1 */
/* Word align the source */
i_src = (const void *) ((unsigned)src & ~3);
#ifndef __MICROBLAZEEL__
/* Load the holding buffer */
buf_hold = *i_src++ << 8;
for (; c >= 4; c -= 4) {
value = *i_src++;
*i_dst++ = buf_hold | value >> 24;
buf_hold = value << 8;
}
#else
/* Load the holding buffer */
buf_hold = (*i_src++ & 0xFFFFFF00) >> 8;
for (; c >= 4; c -= 4) {
value = *i_src++;
*i_dst++ = buf_hold | ((value & 0xFF) << 24);
buf_hold = (value & 0xFFFFFF00) >> 8;
}
#endif
/* Realign the source */
src = (const void *)i_src;
src -= 3;
break;
case 0x2: /* Unaligned - Off by 2 */
/* Word align the source */
i_src = (const void *) ((unsigned)src & ~3);
#ifndef __MICROBLAZEEL__
/* Load the holding buffer */
buf_hold = *i_src++ << 16;
for (; c >= 4; c -= 4) {
value = *i_src++;
*i_dst++ = buf_hold | value >> 16;
buf_hold = value << 16;
}
#else
/* Load the holding buffer */
buf_hold = (*i_src++ & 0xFFFF0000) >> 16;
for (; c >= 4; c -= 4) {
value = *i_src++;
*i_dst++ = buf_hold | ((value & 0xFFFF) << 16);
buf_hold = (value & 0xFFFF0000) >> 16;
}
#endif
/* Realign the source */
src = (const void *)i_src;
src -= 2;
break;
case 0x3: /* Unaligned - Off by 3 */
/* Word align the source */
i_src = (const void *) ((unsigned)src & ~3);
#ifndef __MICROBLAZEEL__
/* Load the holding buffer */
buf_hold = *i_src++ << 24;
for (; c >= 4; c -= 4) {
value = *i_src++;
*i_dst++ = buf_hold | value >> 8;
buf_hold = value << 24;
}
#else
/* Load the holding buffer */
buf_hold = (*i_src++ & 0xFF000000) >> 24;
for (; c >= 4; c -= 4) {
value = *i_src++;
*i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
buf_hold = (value & 0xFF000000) >> 24;
}
#endif
/* Realign the source */
src = (const void *)i_src;
src -= 1;
break;
}
dst = (void *)i_dst;
}
/* Finish off any remaining bytes */
/* simple fast copy, ... unless a cache boundary is crossed */
switch (c) {
case 3:
*dst++ = *src++;
case 2:
*dst++ = *src++;
case 1:
*dst++ = *src++;
}
return v_dst;
}
#endif /* CONFIG_OPT_LIB_FUNCTION */
EXPORT_SYMBOL(memcpy);
#endif /* __HAVE_ARCH_MEMCPY */

View file

@ -0,0 +1,215 @@
/*
* Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
* Copyright (C) 2008-2009 PetaLogix
* Copyright (C) 2007 John Williams
*
* Reasonably optimised generic C-code for memcpy on Microblaze
* This is generic C code to do efficient, alignment-aware memmove.
*
* It is based on demo code originally Copyright 2001 by Intel Corp, taken from
* http://www.embedded.com/showArticle.jhtml?articleID=19205567
*
* Attempts were made, unsuccessfully, to contact the original
* author of this code (Michael Morrow, Intel). Below is the original
* copyright notice.
*
* This software has been developed by Intel Corporation.
* Intel specifically disclaims all warranties, express or
* implied, and all liability, including consequential and
* other indirect damages, for the use of this program, including
* liability for infringement of any proprietary rights,
* and including the warranties of merchantability and fitness
* for a particular purpose. Intel does not assume any
* responsibility for and errors which may appear in this program
* not any responsibility to update it.
*/
#include <linux/export.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/compiler.h>
#include <linux/string.h>
#ifdef __HAVE_ARCH_MEMMOVE
#ifndef CONFIG_OPT_LIB_FUNCTION
void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
{
const char *src = v_src;
char *dst = v_dst;
if (!c)
return v_dst;
/* Use memcpy when source is higher than dest */
if (v_dst <= v_src)
return memcpy(v_dst, v_src, c);
/* copy backwards, from end to beginning */
src += c;
dst += c;
/* Simple, byte oriented memmove. */
while (c--)
*--dst = *--src;
return v_dst;
}
#else /* CONFIG_OPT_LIB_FUNCTION */
void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
{
const char *src = v_src;
char *dst = v_dst;
const uint32_t *i_src;
uint32_t *i_dst;
if (!c)
return v_dst;
/* Use memcpy when source is higher than dest */
if (v_dst <= v_src)
return memcpy(v_dst, v_src, c);
/* The following code tries to optimize the copy by using unsigned
* alignment. This will work fine if both source and destination are
* aligned on the same boundary. However, if they are aligned on
* different boundaries shifts will be necessary. This might result in
* bad performance on MicroBlaze systems without a barrel shifter.
*/
/* FIXME this part needs more test */
/* Do a descending copy - this is a bit trickier! */
dst += c;
src += c;
if (c >= 4) {
unsigned value, buf_hold;
/* Align the destination to a word boundary. */
/* This is done in an endian independent manner. */
switch ((unsigned long)dst & 3) {
case 3:
*--dst = *--src;
--c;
case 2:
*--dst = *--src;
--c;
case 1:
*--dst = *--src;
--c;
}
i_dst = (void *)dst;
/* Choose a copy scheme based on the source */
/* alignment relative to dstination. */
switch ((unsigned long)src & 3) {
case 0x0: /* Both byte offsets are aligned */
i_src = (const void *)src;
for (; c >= 4; c -= 4)
*--i_dst = *--i_src;
src = (const void *)i_src;
break;
case 0x1: /* Unaligned - Off by 1 */
/* Word align the source */
i_src = (const void *) (((unsigned)src + 4) & ~3);
#ifndef __MICROBLAZEEL__
/* Load the holding buffer */
buf_hold = *--i_src >> 24;
for (; c >= 4; c -= 4) {
value = *--i_src;
*--i_dst = buf_hold << 8 | value;
buf_hold = value >> 24;
}
#else
/* Load the holding buffer */
buf_hold = (*--i_src & 0xFF) << 24;
for (; c >= 4; c -= 4) {
value = *--i_src;
*--i_dst = buf_hold |
((value & 0xFFFFFF00) >> 8);
buf_hold = (value & 0xFF) << 24;
}
#endif
/* Realign the source */
src = (const void *)i_src;
src += 1;
break;
case 0x2: /* Unaligned - Off by 2 */
/* Word align the source */
i_src = (const void *) (((unsigned)src + 4) & ~3);
#ifndef __MICROBLAZEEL__
/* Load the holding buffer */
buf_hold = *--i_src >> 16;
for (; c >= 4; c -= 4) {
value = *--i_src;
*--i_dst = buf_hold << 16 | value;
buf_hold = value >> 16;
}
#else
/* Load the holding buffer */
buf_hold = (*--i_src & 0xFFFF) << 16;
for (; c >= 4; c -= 4) {
value = *--i_src;
*--i_dst = buf_hold |
((value & 0xFFFF0000) >> 16);
buf_hold = (value & 0xFFFF) << 16;
}
#endif
/* Realign the source */
src = (const void *)i_src;
src += 2;
break;
case 0x3: /* Unaligned - Off by 3 */
/* Word align the source */
i_src = (const void *) (((unsigned)src + 4) & ~3);
#ifndef __MICROBLAZEEL__
/* Load the holding buffer */
buf_hold = *--i_src >> 8;
for (; c >= 4; c -= 4) {
value = *--i_src;
*--i_dst = buf_hold << 24 | value;
buf_hold = value >> 8;
}
#else
/* Load the holding buffer */
buf_hold = (*--i_src & 0xFFFFFF) << 8;
for (; c >= 4; c -= 4) {
value = *--i_src;
*--i_dst = buf_hold |
((value & 0xFF000000) >> 24);
buf_hold = (value & 0xFFFFFF) << 8;
}
#endif
/* Realign the source */
src = (const void *)i_src;
src += 3;
break;
}
dst = (void *)i_dst;
}
/* simple fast copy, ... unless a cache boundary is crossed */
/* Finish off any remaining bytes */
switch (c) {
case 4:
*--dst = *--src;
case 3:
*--dst = *--src;
case 2:
*--dst = *--src;
case 1:
*--dst = *--src;
}
return v_dst;
}
#endif /* CONFIG_OPT_LIB_FUNCTION */
EXPORT_SYMBOL(memmove);
#endif /* __HAVE_ARCH_MEMMOVE */

View file

@ -0,0 +1,97 @@
/*
* Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
* Copyright (C) 2008-2009 PetaLogix
* Copyright (C) 2007 John Williams
*
* Reasonably optimised generic C-code for memset on Microblaze
* This is generic C code to do efficient, alignment-aware memcpy.
*
* It is based on demo code originally Copyright 2001 by Intel Corp, taken from
* http://www.embedded.com/showArticle.jhtml?articleID=19205567
*
* Attempts were made, unsuccessfully, to contact the original
* author of this code (Michael Morrow, Intel). Below is the original
* copyright notice.
*
* This software has been developed by Intel Corporation.
* Intel specifically disclaims all warranties, express or
* implied, and all liability, including consequential and
* other indirect damages, for the use of this program, including
* liability for infringement of any proprietary rights,
* and including the warranties of merchantability and fitness
* for a particular purpose. Intel does not assume any
* responsibility for and errors which may appear in this program
* not any responsibility to update it.
*/
#include <linux/export.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/compiler.h>
#include <linux/string.h>
#ifdef __HAVE_ARCH_MEMSET
#ifndef CONFIG_OPT_LIB_FUNCTION
void *memset(void *v_src, int c, __kernel_size_t n)
{
char *src = v_src;
/* Truncate c to 8 bits */
c = (c & 0xFF);
/* Simple, byte oriented memset or the rest of count. */
while (n--)
*src++ = c;
return v_src;
}
#else /* CONFIG_OPT_LIB_FUNCTION */
void *memset(void *v_src, int c, __kernel_size_t n)
{
char *src = v_src;
uint32_t *i_src;
uint32_t w32 = 0;
/* Truncate c to 8 bits */
c = (c & 0xFF);
if (unlikely(c)) {
/* Make a repeating word out of it */
w32 = c;
w32 |= w32 << 8;
w32 |= w32 << 16;
}
if (likely(n >= 4)) {
/* Align the destination to a word boundary */
/* This is done in an endian independent manner */
switch ((unsigned) src & 3) {
case 1:
*src++ = c;
--n;
case 2:
*src++ = c;
--n;
case 3:
*src++ = c;
--n;
}
i_src = (void *)src;
/* Do as many full-word copies as we can */
for (; n >= 4; n -= 4)
*i_src++ = w32;
src = (void *)i_src;
}
/* Simple, byte oriented memset or the rest of count. */
while (n--)
*src++ = c;
return v_src;
}
#endif /* CONFIG_OPT_LIB_FUNCTION */
EXPORT_SYMBOL(memset);
#endif /* __HAVE_ARCH_MEMSET */

View file

@ -0,0 +1,73 @@
#include <linux/linkage.h>
/*
* modulo operation for 32 bit integers.
* Input : op1 in Reg r5
* op2 in Reg r6
* Output: op1 mod op2 in Reg r3
*/
.text
.globl __modsi3
.type __modsi3, @function
.ent __modsi3
__modsi3:
.frame r1, 0, r15
addik r1, r1, -16
swi r28, r1, 0
swi r29, r1, 4
swi r30, r1, 8
swi r31, r1, 12
beqi r6, div_by_zero /* div_by_zero division error */
beqi r5, result_is_zero /* result is zero */
bgeid r5, r5_pos
/* get the sign of the result [ depends only on the first arg] */
add r28, r5, r0
rsubi r5, r5, 0 /* make r5 positive */
r5_pos:
bgei r6, r6_pos
rsubi r6, r6, 0 /* make r6 positive */
r6_pos:
addik r3, r0, 0 /* clear mod */
addik r30, r0, 0 /* clear div */
addik r29, r0, 32 /* initialize the loop count */
/* first part try to find the first '1' in the r5 */
div1:
add r5, r5, r5 /* left shift logical r5 */
bgeid r5, div1
addik r29, r29, -1
div2:
/* left shift logical r5 get the '1' into the carry */
add r5, r5, r5
addc r3, r3, r3 /* move that bit into the mod register */
rsub r31, r6, r3 /* try to subtract (r30 a r6) */
blti r31, mod_too_small
/* move the r31 to mod since the result was positive */
or r3, r0, r31
addik r30, r30, 1
mod_too_small:
addik r29, r29, -1
beqi r29, loop_end
add r30, r30, r30 /* shift in the '1' into div */
bri div2 /* div2 */
loop_end:
bgei r28, return_here
brid return_here
rsubi r3, r3, 0 /* negate the result */
div_by_zero:
result_is_zero:
or r3, r0, r0 /* set result to 0 [both mod as well as div are 0] */
return_here:
/* restore values of csrs and that of r3 and the divisor and the dividend */
lwi r28, r1, 0
lwi r29, r1, 4
lwi r30, r1, 8
lwi r31, r1, 12
rtsd r15, 8
addik r1, r1, 16
.size __modsi3, . - __modsi3
.end __modsi3

View file

@ -0,0 +1,57 @@
#include <linux/export.h>
#include "libgcc.h"
#define W_TYPE_SIZE 32
#define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2))
#define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1))
#define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2))
/* If we still don't have umul_ppmm, define it using plain C. */
#if !defined(umul_ppmm)
#define umul_ppmm(w1, w0, u, v) \
do { \
unsigned long __x0, __x1, __x2, __x3; \
unsigned short __ul, __vl, __uh, __vh; \
\
__ul = __ll_lowpart(u); \
__uh = __ll_highpart(u); \
__vl = __ll_lowpart(v); \
__vh = __ll_highpart(v); \
\
__x0 = (unsigned long) __ul * __vl; \
__x1 = (unsigned long) __ul * __vh; \
__x2 = (unsigned long) __uh * __vl; \
__x3 = (unsigned long) __uh * __vh; \
\
__x1 += __ll_highpart(__x0); /* this can't give carry */\
__x1 += __x2; /* but this indeed can */ \
if (__x1 < __x2) /* did we get it? */ \
__x3 += __ll_B; /* yes, add it in the proper pos */ \
\
(w1) = __x3 + __ll_highpart(__x1); \
(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
} while (0)
#endif
#if !defined(__umulsidi3)
#define __umulsidi3(u, v) ({ \
DWunion __w; \
umul_ppmm(__w.s.high, __w.s.low, u, v); \
__w.ll; \
})
#endif
long long __muldi3(long long u, long long v)
{
const DWunion uu = {.ll = u};
const DWunion vv = {.ll = v};
DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
w.s.high += ((unsigned long) uu.s.low * (unsigned long) vv.s.high
+ (unsigned long) uu.s.high * (unsigned long) vv.s.low);
return w.ll;
}
EXPORT_SYMBOL(__muldi3);

View file

@ -0,0 +1,46 @@
#include <linux/linkage.h>
/*
* Multiply operation for 32 bit integers.
* Input : Operand1 in Reg r5
* Operand2 in Reg r6
* Output: Result [op1 * op2] in Reg r3
*/
.text
.globl __mulsi3
.type __mulsi3, @function
.ent __mulsi3
__mulsi3:
.frame r1, 0, r15
add r3, r0, r0
beqi r5, result_is_zero /* multiply by zero */
beqi r6, result_is_zero /* multiply by zero */
bgeid r5, r5_pos
xor r4, r5, r6 /* get the sign of the result */
rsubi r5, r5, 0 /* make r5 positive */
r5_pos:
bgei r6, r6_pos
rsubi r6, r6, 0 /* make r6 positive */
r6_pos:
bri l1
l2:
add r5, r5, r5
l1:
srl r6, r6
addc r7, r0, r0
beqi r7, l2
bneid r6, l2
add r3, r3, r5
blti r4, negateresult
rtsd r15, 8
nop
negateresult:
rtsd r15, 8
rsub r3, r3, r0
result_is_zero:
rtsd r15, 8
addi r3, r0, 0
.size __mulsi3, . - __mulsi3
.end __mulsi3

View file

@ -0,0 +1,266 @@
/*
* Copyright (C) 2009 Michal Simek <monstr@monstr.eu>
* Copyright (C) 2009 PetaLogix
* Copyright (C) 2007 LynuxWorks, Inc.
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#include <linux/errno.h>
#include <linux/linkage.h>
#include <asm/page.h>
/*
* int __strncpy_user(char *to, char *from, int len);
*
* Returns:
* -EFAULT for an exception
* len if we hit the buffer limit
* bytes copied
*/
.text
.globl __strncpy_user;
.type __strncpy_user, @function
.align 4;
__strncpy_user:
/*
* r5 - to
* r6 - from
* r7 - len
* r3 - temp count
* r4 - temp val
*/
beqid r7,3f
addik r3,r7,0 /* temp_count = len */
1:
lbu r4,r6,r0
beqid r4,2f
sb r4,r5,r0
addik r5,r5,1
addik r6,r6,1 /* delay slot */
addik r3,r3,-1
bnei r3,1b /* break on len */
2:
rsubk r3,r3,r7 /* temp_count = len - temp_count */
3:
rtsd r15,8
nop
.size __strncpy_user, . - __strncpy_user
.section .fixup, "ax"
.align 2
4:
brid 3b
addik r3,r0, -EFAULT
.section __ex_table, "a"
.word 1b,4b
/*
* int __strnlen_user(char __user *str, int maxlen);
*
* Returns:
* 0 on error
* maxlen + 1 if no NUL byte found within maxlen bytes
* size of the string (including NUL byte)
*/
.text
.globl __strnlen_user;
.type __strnlen_user, @function
.align 4;
__strnlen_user:
beqid r6,3f
addik r3,r6,0
1:
lbu r4,r5,r0
beqid r4,2f /* break on NUL */
addik r3,r3,-1 /* delay slot */
bneid r3,1b
addik r5,r5,1 /* delay slot */
addik r3,r3,-1 /* for break on len */
2:
rsubk r3,r3,r6
3:
rtsd r15,8
nop
.size __strnlen_user, . - __strnlen_user
.section .fixup,"ax"
4:
brid 3b
addk r3,r0,r0
.section __ex_table,"a"
.word 1b,4b
/* Loop unrolling for __copy_tofrom_user */
#define COPY(offset) \
1: lwi r4 , r6, 0x0000 + offset; \
2: lwi r19, r6, 0x0004 + offset; \
3: lwi r20, r6, 0x0008 + offset; \
4: lwi r21, r6, 0x000C + offset; \
5: lwi r22, r6, 0x0010 + offset; \
6: lwi r23, r6, 0x0014 + offset; \
7: lwi r24, r6, 0x0018 + offset; \
8: lwi r25, r6, 0x001C + offset; \
9: swi r4 , r5, 0x0000 + offset; \
10: swi r19, r5, 0x0004 + offset; \
11: swi r20, r5, 0x0008 + offset; \
12: swi r21, r5, 0x000C + offset; \
13: swi r22, r5, 0x0010 + offset; \
14: swi r23, r5, 0x0014 + offset; \
15: swi r24, r5, 0x0018 + offset; \
16: swi r25, r5, 0x001C + offset; \
.section __ex_table,"a"; \
.word 1b, 33f; \
.word 2b, 33f; \
.word 3b, 33f; \
.word 4b, 33f; \
.word 5b, 33f; \
.word 6b, 33f; \
.word 7b, 33f; \
.word 8b, 33f; \
.word 9b, 33f; \
.word 10b, 33f; \
.word 11b, 33f; \
.word 12b, 33f; \
.word 13b, 33f; \
.word 14b, 33f; \
.word 15b, 33f; \
.word 16b, 33f; \
.text
#define COPY_80(offset) \
COPY(0x00 + offset);\
COPY(0x20 + offset);\
COPY(0x40 + offset);\
COPY(0x60 + offset);
/*
* int __copy_tofrom_user(char *to, char *from, int len)
* Return:
* 0 on success
* number of not copied bytes on error
*/
.text
.globl __copy_tofrom_user;
.type __copy_tofrom_user, @function
.align 4;
__copy_tofrom_user:
/*
* r5 - to
* r6 - from
* r7, r3 - count
* r4 - tempval
*/
beqid r7, 0f /* zero size is not likely */
or r3, r5, r6 /* find if is any to/from unaligned */
or r3, r3, r7 /* find if count is unaligned */
andi r3, r3, 0x3 /* mask last 3 bits */
bneid r3, bu1 /* if r3 is not zero then byte copying */
or r3, r0, r0
rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
beqid r3, page;
or r3, r0, r0
w1: lw r4, r6, r3 /* at least one 4 byte copy */
w2: sw r4, r5, r3
addik r7, r7, -4
bneid r7, w1
addik r3, r3, 4
addik r3, r7, 0
rtsd r15, 8
nop
.section __ex_table,"a"
.word w1, 0f;
.word w2, 0f;
.text
.align 4 /* Alignment is important to keep icache happy */
page: /* Create room on stack and save registers for storign values */
addik r1, r1, -40
swi r5, r1, 0
swi r6, r1, 4
swi r7, r1, 8
swi r19, r1, 12
swi r20, r1, 16
swi r21, r1, 20
swi r22, r1, 24
swi r23, r1, 28
swi r24, r1, 32
swi r25, r1, 36
loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
/* Loop unrolling to get performance boost */
COPY_80(0x000);
COPY_80(0x080);
COPY_80(0x100);
COPY_80(0x180);
/* copy loop */
addik r6, r6, 0x200
addik r7, r7, -0x200
bneid r7, loop
addik r5, r5, 0x200
/* Restore register content */
lwi r5, r1, 0
lwi r6, r1, 4
lwi r7, r1, 8
lwi r19, r1, 12
lwi r20, r1, 16
lwi r21, r1, 20
lwi r22, r1, 24
lwi r23, r1, 28
lwi r24, r1, 32
lwi r25, r1, 36
addik r1, r1, 40
/* return back */
addik r3, r0, 0
rtsd r15, 8
nop
/* Fault case - return temp count */
33:
addik r3, r7, 0
/* Restore register content */
lwi r5, r1, 0
lwi r6, r1, 4
lwi r7, r1, 8
lwi r19, r1, 12
lwi r20, r1, 16
lwi r21, r1, 20
lwi r22, r1, 24
lwi r23, r1, 28
lwi r24, r1, 32
lwi r25, r1, 36
addik r1, r1, 40
/* return back */
rtsd r15, 8
nop
.align 4 /* Alignment is important to keep icache happy */
bu1: lbu r4,r6,r3
bu2: sb r4,r5,r3
addik r7,r7,-1
bneid r7,bu1
addik r3,r3,1 /* delay slot */
0:
addik r3,r7,0
rtsd r15,8
nop
.size __copy_tofrom_user, . - __copy_tofrom_user
.section __ex_table,"a"
.word bu1, 0b;
.word bu2, 0b;
.text

View file

@ -0,0 +1,20 @@
#include <linux/export.h>
#include "libgcc.h"
word_type __ucmpdi2(unsigned long long a, unsigned long long b)
{
const DWunion au = {.ll = a};
const DWunion bu = {.ll = b};
if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
return 0;
else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
return 2;
if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
return 0;
else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
return 2;
return 1;
}
EXPORT_SYMBOL(__ucmpdi2);

View file

@ -0,0 +1,84 @@
#include <linux/linkage.h>
/*
* Unsigned divide operation.
* Input : Divisor in Reg r5
* Dividend in Reg r6
* Output: Result in Reg r3
*/
.text
.globl __udivsi3
.type __udivsi3, @function
.ent __udivsi3
__udivsi3:
.frame r1, 0, r15
addik r1, r1, -12
swi r29, r1, 0
swi r30, r1, 4
swi r31, r1, 8
beqi r6, div_by_zero /* div_by_zero /* division error */
beqid r5, result_is_zero /* result is zero */
addik r30, r0, 0 /* clear mod */
addik r29, r0, 32 /* initialize the loop count */
/* check if r6 and r5 are equal - if yes, return 1 */
rsub r18, r5, r6
beqid r18, return_here
addik r3, r0, 1
/* check if (uns)r6 is greater than (uns)r5. in that case, just return 0 */
xor r18, r5, r6
bgeid r18, 16
add r3, r0, r0 /* we would anyways clear r3 */
blti r6, return_here /* r6[bit 31 = 1] hence is greater */
bri checkr6
rsub r18, r6, r5 /* microblazecmp */
blti r18, return_here
/* if r6 [bit 31] is set, then return result as 1 */
checkr6:
bgti r6, div0
brid return_here
addik r3, r0, 1
/* first part try to find the first '1' in the r5 */
div0:
blti r5, div2
div1:
add r5, r5, r5 /* left shift logical r5 */
bgtid r5, div1
addik r29, r29, -1
div2:
/* left shift logical r5 get the '1' into the carry */
add r5, r5, r5
addc r30, r30, r30 /* move that bit into the mod register */
rsub r31, r6, r30 /* try to subtract (r30 a r6) */
blti r31, mod_too_small
/* move the r31 to mod since the result was positive */
or r30, r0, r31
addik r3, r3, 1
mod_too_small:
addik r29, r29, -1
beqi r29, loop_end
add r3, r3, r3 /* shift in the '1' into div */
bri div2 /* div2 */
loop_end:
bri return_here
div_by_zero:
result_is_zero:
or r3, r0, r0 /* set result to 0 */
return_here:
/* restore values of csrs and that of r3 and the divisor and the dividend */
lwi r29, r1, 0
lwi r30, r1, 4
lwi r31, r1, 8
rtsd r15, 8
addik r1, r1, 12
.size __udivsi3, . - __udivsi3
.end __udivsi3

View file

@ -0,0 +1,86 @@
#include <linux/linkage.h>
/*
* Unsigned modulo operation for 32 bit integers.
* Input : op1 in Reg r5
* op2 in Reg r6
* Output: op1 mod op2 in Reg r3
*/
.text
.globl __umodsi3
.type __umodsi3, @function
.ent __umodsi3
__umodsi3:
.frame r1, 0, r15
addik r1, r1, -12
swi r29, r1, 0
swi r30, r1, 4
swi r31, r1, 8
beqi r6, div_by_zero /* div_by_zero - division error */
beqid r5, result_is_zero /* result is zero */
addik r3, r0, 0 /* clear div */
addik r30, r0, 0 /* clear mod */
addik r29, r0, 32 /* initialize the loop count */
/* check if r6 and r5 are equal /* if yes, return 0 */
rsub r18, r5, r6
beqi r18, return_here
/* check if (uns)r6 is greater than (uns)r5. in that case, just return r5 */
xor r18, r5, r6
bgeid r18, 16
addik r3, r5, 0
blti r6, return_here
bri $lcheckr6
rsub r18, r5, r6 /* microblazecmp */
bgti r18, return_here
/* if r6 [bit 31] is set, then return result as r5-r6 */
$lcheckr6:
bgtid r6, div0
addik r3, r0, 0
addik r18, r0, 0x7fffffff
and r5, r5, r18
and r6, r6, r18
brid return_here
rsub r3, r6, r5
/* first part: try to find the first '1' in the r5 */
div0:
blti r5, div2
div1:
add r5, r5, r5 /* left shift logical r5 */
bgeid r5, div1
addik r29, r29, -1
div2:
/* left shift logical r5 get the '1' into the carry */
add r5, r5, r5
addc r3, r3, r3 /* move that bit into the mod register */
rsub r31, r6, r3 /* try to subtract (r3 a r6) */
blti r31, mod_too_small
/* move the r31 to mod since the result was positive */
or r3, r0, r31
addik r30, r30, 1
mod_too_small:
addik r29, r29, -1
beqi r29, loop_end
add r30, r30, r30 /* shift in the '1' into div */
bri div2 /* div2 */
loop_end:
bri return_here
div_by_zero:
result_is_zero:
or r3, r0, r0 /* set result to 0 */
return_here:
/* restore values of csrs and that of r3 and the divisor and the dividend */
lwi r29, r1, 0
lwi r30, r1, 4
lwi r31, r1, 8
rtsd r15, 8
addik r1, r1, 12
.size __umodsi3, . - __umodsi3
.end __umodsi3