Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

View file

@ -0,0 +1,11 @@
#
# arch/blackfin/lib/Makefile
#
lib-y := \
ashldi3.o ashrdi3.o lshrdi3.o \
muldi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \
memcpy.o memset.o memcmp.o memchr.o memmove.o \
strcmp.o strcpy.o strncmp.o strncpy.o \
umulsi3_highpart.o smulsi3_highpart.o \
ins.o outs.o

View file

@ -0,0 +1,35 @@
/*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the GPL-2 or later.
*/
#include "gcclib.h"
#ifdef CONFIG_ARITHMETIC_OPS_L1
DItype __ashldi3(DItype u, word_type b)__attribute__((l1_text));
#endif
DItype __ashldi3(DItype u, word_type b)
{
DIunion w;
word_type bm;
DIunion uu;
if (b == 0)
return u;
uu.ll = u;
bm = (sizeof(SItype) * BITS_PER_UNIT) - b;
if (bm <= 0) {
w.s.low = 0;
w.s.high = (USItype) uu.s.low << -bm;
} else {
USItype carries = (USItype) uu.s.low >> bm;
w.s.low = (USItype) uu.s.low << b;
w.s.high = ((USItype) uu.s.high << b) | carries;
}
return w.ll;
}

View file

@ -0,0 +1,36 @@
/*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the GPL-2 or later.
*/
#include "gcclib.h"
#ifdef CONFIG_ARITHMETIC_OPS_L1
DItype __ashrdi3(DItype u, word_type b)__attribute__((l1_text));
#endif
DItype __ashrdi3(DItype u, word_type b)
{
DIunion w;
word_type bm;
DIunion uu;
if (b == 0)
return u;
uu.ll = u;
bm = (sizeof(SItype) * BITS_PER_UNIT) - b;
if (bm <= 0) {
/* w.s.high = 1..1 or 0..0 */
w.s.high = uu.s.high >> (sizeof(SItype) * BITS_PER_UNIT - 1);
w.s.low = uu.s.high >> -bm;
} else {
USItype carries = (USItype) uu.s.high << bm;
w.s.high = uu.s.high >> b;
w.s.low = ((USItype) uu.s.low >> b) | carries;
}
return w.ll;
}

199
arch/blackfin/lib/divsi3.S Normal file
View file

@ -0,0 +1,199 @@
/*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*
* 16 / 32 bit signed division.
* Special cases :
* 1) If(numerator == 0)
* return 0
* 2) If(denominator ==0)
* return positive max = 0x7fffffff
* 3) If(numerator == denominator)
* return 1
* 4) If(denominator ==1)
* return numerator
* 5) If(denominator == -1)
* return -numerator
*
* Operand : R0 - Numerator (i)
* R1 - Denominator (i)
* R0 - Quotient (o)
* Registers Used : R2-R7,P0-P2
*
*/
.global ___divsi3;
.type ___divsi3, STT_FUNC;
#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif
.align 2;
___divsi3 :
R3 = R0 ^ R1;
R0 = ABS R0;
CC = V;
r3 = rot r3 by -1;
r1 = abs r1; /* now both positive, r3.30 means "negate result",
** r3.31 means overflow, add one to result
*/
cc = r0 < r1;
if cc jump .Lret_zero;
r2 = r1 >> 15;
cc = r2;
if cc jump .Lidents;
r2 = r1 << 16;
cc = r2 <= r0;
if cc jump .Lidents;
DIVS(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
DIVQ(R0, R1);
R0 = R0.L (Z);
r1 = r3 >> 31; /* add overflow issue back in */
r0 = r0 + r1;
r1 = -r0;
cc = bittst(r3, 30);
if cc r0 = r1;
RTS;
/* Can't use the primitives. Test common identities.
** If the identity is true, return the value in R2.
*/
.Lidents:
CC = R1 == 0; /* check for divide by zero */
IF CC JUMP .Lident_return;
CC = R0 == 0; /* check for division of zero */
IF CC JUMP .Lzero_return;
CC = R0 == R1; /* check for identical operands */
IF CC JUMP .Lident_return;
CC = R1 == 1; /* check for divide by 1 */
IF CC JUMP .Lident_return;
R2.L = ONES R1;
R2 = R2.L (Z);
CC = R2 == 1;
IF CC JUMP .Lpower_of_two;
/* Identities haven't helped either.
** Perform the full division process.
*/
P1 = 31; /* Set loop counter */
[--SP] = (R7:5); /* Push registers R5-R7 */
R2 = -R1;
[--SP] = R2;
R2 = R0 << 1; /* R2 lsw of dividend */
R6 = R0 ^ R1; /* Get sign */
R5 = R6 >> 31; /* Shift sign to LSB */
R0 = 0 ; /* Clear msw partial remainder */
R2 = R2 | R5; /* Shift quotient bit */
R6 = R0 ^ R1; /* Get new quotient bit */
LSETUP(.Llst,.Llend) LC0 = P1; /* Setup loop */
.Llst: R7 = R2 >> 31; /* record copy of carry from R2 */
R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */
R0 = R0 << 1 || R5 = [SP];
R0 = R0 | R7; /* and add carry */
CC = R6 < 0; /* Check quotient(AQ) */
/* we might be subtracting divisor (AQ==0) */
IF CC R5 = R1; /* or we might be adding divisor (AQ==1)*/
R0 = R0 + R5; /* do add or subtract, as indicated by AQ */
R6 = R0 ^ R1; /* Generate next quotient bit */
R5 = R6 >> 31;
/* Assume AQ==1, shift in zero */
BITTGL(R5,0); /* tweak AQ to be what we want to shift in */
.Llend: R2 = R2 + R5; /* and then set shifted-in value to
** tweaked AQ.
*/
r1 = r3 >> 31;
r2 = r2 + r1;
cc = bittst(r3,30);
r0 = -r2;
if !cc r0 = r2;
SP += 4;
(R7:5)= [SP++]; /* Pop registers R6-R7 */
RTS;
.Lident_return:
CC = R1 == 0; /* check for divide by zero => 0x7fffffff */
R2 = -1 (X);
R2 >>= 1;
IF CC JUMP .Ltrue_ident_return;
CC = R0 == R1; /* check for identical operands => 1 */
R2 = 1 (Z);
IF CC JUMP .Ltrue_ident_return;
R2 = R0; /* assume divide by 1 => numerator */
/*FALLTHRU*/
.Ltrue_ident_return:
R0 = R2; /* Return an identity value */
R2 = -R2;
CC = bittst(R3,30);
IF CC R0 = R2;
.Lzero_return:
RTS; /* ...including zero */
.Lpower_of_two:
/* Y has a single bit set, which means it's a power of two.
** That means we can perform the division just by shifting
** X to the right the appropriate number of bits
*/
/* signbits returns the number of sign bits, minus one.
** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
** to shift right n-signbits spaces. It also means 0x80000000
** is a special case, because that *also* gives a signbits of 0
*/
R2 = R0 >> 31;
CC = R1 < 0;
IF CC JUMP .Ltrue_ident_return;
R1.l = SIGNBITS R1;
R1 = R1.L (Z);
R1 += -30;
R0 = LSHIFT R0 by R1.L;
r1 = r3 >> 31;
r0 = r0 + r1;
R2 = -R0; // negate result if necessary
CC = bittst(R3,30);
IF CC R0 = R2;
RTS;
.Lret_zero:
R0 = 0;
RTS;
.size ___divsi3, .-___divsi3

View file

@ -0,0 +1,24 @@
/*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the GPL-2 or later.
*/
#define BITS_PER_UNIT 8
#define SI_TYPE_SIZE (sizeof (SItype) * BITS_PER_UNIT)
typedef unsigned int UQItype __attribute__ ((mode(QI)));
typedef int SItype __attribute__ ((mode(SI)));
typedef unsigned int USItype __attribute__ ((mode(SI)));
typedef int DItype __attribute__ ((mode(DI)));
typedef int word_type __attribute__ ((mode(__word__)));
typedef unsigned int UDItype __attribute__ ((mode(DI)));
struct DIstruct {
SItype low, high;
};
typedef union {
struct DIstruct s;
DItype ll;
} DIunion;

118
arch/blackfin/lib/ins.S Normal file
View file

@ -0,0 +1,118 @@
/*
* arch/blackfin/lib/ins.S - ins{bwl} using hardware loops
*
* Copyright 2004-2008 Analog Devices Inc.
* Copyright (C) 2005 Bas Vermeulen, BuyWays BV <bas@buyways.nl>
* Licensed under the GPL-2 or later.
*/
#include <linux/linkage.h>
#include <asm/blackfin.h>
.align 2
#ifdef CONFIG_IPIPE
# define DO_CLI \
[--sp] = rets; \
[--sp] = (P5:0); \
sp += -12; \
call ___ipipe_disable_root_irqs_hw; \
sp += 12; \
(P5:0) = [sp++];
# define CLI_INNER_NOP
#else
# define DO_CLI cli R3;
# define CLI_INNER_NOP nop; nop; nop;
#endif
#ifdef CONFIG_IPIPE
# define DO_STI \
sp += -12; \
call ___ipipe_enable_root_irqs_hw; \
sp += 12; \
2: rets = [sp++];
#else
# define DO_STI 2: sti R3;
#endif
#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
# define CLI_OUTER DO_CLI;
# define STI_OUTER DO_STI;
# define CLI_INNER 1:
# if ANOMALY_05000416
# define STI_INNER nop; 2: nop;
# else
# define STI_INNER 2:
# endif
#else
# define CLI_OUTER
# define STI_OUTER
# define CLI_INNER 1: DO_CLI; CLI_INNER_NOP;
# define STI_INNER DO_STI;
#endif
/*
* Reads on the Blackfin are speculative. In Blackfin terms, this means they
* can be interrupted at any time (even after they have been issued on to the
* external bus), and re-issued after the interrupt occurs.
*
* If a FIFO is sitting on the end of the read, it will see two reads,
* when the core only sees one. The FIFO receives the read which is cancelled,
* and not delivered to the core.
*
* To solve this, interrupts are turned off before reads occur to I/O space.
* There are 3 versions of all these functions
* - turns interrupts off every read (higher overhead, but lower latency)
* - turns interrupts off every loop (low overhead, but longer latency)
* - DMA version, which do not suffer from this issue. DMA versions have
* different name (prefixed by dma_ ), and are located in
* ../kernel/bfin_dma.c
* Using the dma related functions are recommended for transferring large
* buffers in/out of FIFOs.
*/
#define COMMON_INS(func, ops) \
ENTRY(_ins##func) \
P0 = R0; /* P0 = port */ \
CLI_OUTER; /* 3 instructions before first read access */ \
P1 = R1; /* P1 = address */ \
P2 = R2; /* P2 = count */ \
SSYNC; \
\
LSETUP(1f, 2f) LC0 = P2; \
CLI_INNER; \
ops; \
STI_INNER; \
\
STI_OUTER; \
RTS; \
ENDPROC(_ins##func)
COMMON_INS(l, \
R0 = [P0]; \
[P1++] = R0; \
)
COMMON_INS(w, \
R0 = W[P0]; \
W[P1++] = R0; \
)
COMMON_INS(w_8, \
R0 = W[P0]; \
B[P1++] = R0; \
R0 = R0 >> 8; \
B[P1++] = R0; \
)
COMMON_INS(b, \
R0 = B[P0]; \
B[P1++] = R0; \
)
COMMON_INS(l_16, \
R0 = [P0]; \
W[P1++] = R0; \
R0 = R0 >> 16; \
W[P1++] = R0; \
)

View file

@ -0,0 +1,35 @@
/*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the GPL-2 or later.
*/
#include "gcclib.h"
#ifdef CONFIG_ARITHMETIC_OPS_L1
DItype __lshrdi3(DItype u, word_type b)__attribute__((l1_text));
#endif
DItype __lshrdi3(DItype u, word_type b)
{
DIunion w;
word_type bm;
DIunion uu;
if (b == 0)
return u;
uu.ll = u;
bm = (sizeof(SItype) * BITS_PER_UNIT) - b;
if (bm <= 0) {
w.s.high = 0;
w.s.low = (USItype) uu.s.high >> -bm;
} else {
USItype carries = (USItype) uu.s.high << bm;
w.s.high = (USItype) uu.s.high >> b;
w.s.low = ((USItype) uu.s.low >> b) | carries;
}
return w.ll;
}

View file

@ -0,0 +1,47 @@
/*
* Copyright 2005-2009 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
/* void *memchr(const void *s, int c, size_t n);
* R0 = address (s)
* R1 = sought byte (c)
* R2 = count (n)
*
* Returns pointer to located character.
*/
.text
.align 2
ENTRY(_memchr)
P0 = R0; /* P0 = address */
P2 = R2; /* P2 = count */
R1 = R1.B(Z);
CC = R2 == 0;
IF CC JUMP .Lfailed;
.Lbytes:
LSETUP (.Lbyte_loop_s, .Lbyte_loop_e) LC0=P2;
.Lbyte_loop_s:
R3 = B[P0++](Z);
CC = R3 == R1;
IF CC JUMP .Lfound;
.Lbyte_loop_e:
NOP;
.Lfailed:
R0=0;
RTS;
.Lfound:
R0 = P0;
R0 += -1;
RTS;
ENDPROC(_memchr)

View file

@ -0,0 +1,92 @@
/*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
/* int memcmp(const void *s1, const void *s2, size_t n);
* R0 = First Address (s1)
* R1 = Second Address (s2)
* R2 = count (n)
*
* Favours word aligned data.
*/
.text
.align 2
ENTRY(_memcmp)
I1 = P3;
P0 = R0; /* P0 = s1 address */
P3 = R1; /* P3 = s2 Address */
P2 = R2 ; /* P2 = count */
CC = R2 <= 7(IU);
IF CC JUMP .Ltoo_small;
I0 = R1; /* s2 */
R1 = R1 | R0; /* OR addresses together */
R1 <<= 30; /* check bottom two bits */
CC = AZ; /* AZ set if zero. */
IF !CC JUMP .Lbytes ; /* Jump if addrs not aligned. */
P1 = P2 >> 2; /* count = n/4 */
R3 = 3;
R2 = R2 & R3; /* remainder */
P2 = R2; /* set remainder */
LSETUP (.Lquad_loop_s, .Lquad_loop_e) LC0=P1;
.Lquad_loop_s:
#if ANOMALY_05000202
R0 = [P0++];
R1 = [I0++];
#else
MNOP || R0 = [P0++] || R1 = [I0++];
#endif
CC = R0 == R1;
IF !CC JUMP .Lquad_different;
.Lquad_loop_e:
NOP;
P3 = I0; /* s2 */
.Ltoo_small:
CC = P2 == 0; /* Check zero count*/
IF CC JUMP .Lfinished; /* very unlikely*/
.Lbytes:
LSETUP (.Lbyte_loop_s, .Lbyte_loop_e) LC0=P2;
.Lbyte_loop_s:
R1 = B[P3++](Z); /* *s2 */
R0 = B[P0++](Z); /* *s1 */
CC = R0 == R1;
IF !CC JUMP .Ldifferent;
.Lbyte_loop_e:
NOP;
.Ldifferent:
R0 = R0 - R1;
P3 = I1;
RTS;
.Lquad_different:
/* We've read two quads which don't match.
* Can't just compare them, because we're
* a little-endian machine, so the MSBs of
* the regs occur at later addresses in the
* string.
* Arrange to re-read those two quads again,
* byte-by-byte.
*/
P0 += -4; /* back up to the start of the */
P3 = I0; /* quads, and increase the*/
P2 += 4; /* remainder count*/
P3 += -4;
JUMP .Lbytes;
.Lfinished:
R0 = 0;
P3 = I1;
RTS;
ENDPROC(_memcmp)

124
arch/blackfin/lib/memcpy.S Normal file
View file

@ -0,0 +1,124 @@
/*
* internal version of memcpy(), issued by the compiler to copy blocks of
* data around. This is really memmove() - it has to be able to deal with
* possible overlaps, because that ambiguity is when the compiler gives up
* and calls a function. We have our own, internal version so that we get
* something we trust, even if the user has redefined the normal symbol.
*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
/* void *memcpy(void *dest, const void *src, size_t n);
* R0 = To Address (dest) (leave unchanged to form result)
* R1 = From Address (src)
* R2 = count
*
* Note: Favours word alignment
*/
#ifdef CONFIG_MEMCPY_L1
.section .l1.text
#else
.text
#endif
.align 2
ENTRY(_memcpy)
CC = R2 <= 0; /* length not positive? */
IF CC JUMP .L_P1L2147483647; /* Nothing to do */
P0 = R0 ; /* dst*/
P1 = R1 ; /* src*/
P2 = R2 ; /* length */
/* check for overlapping data */
CC = R1 < R0; /* src < dst */
IF !CC JUMP .Lno_overlap;
R3 = R1 + R2;
CC = R0 < R3; /* and dst < src+len */
IF CC JUMP .Lhas_overlap;
.Lno_overlap:
/* Check for aligned data.*/
R3 = R1 | R0;
R1 = 0x3;
R3 = R3 & R1;
CC = R3; /* low bits set on either address? */
IF CC JUMP .Lnot_aligned;
/* Both addresses are word-aligned, so we can copy
at least part of the data using word copies.*/
P2 = P2 >> 2;
CC = P2 <= 2;
IF !CC JUMP .Lmore_than_seven;
/* less than eight bytes... */
P2 = R2;
LSETUP(.Lthree_start, .Lthree_end) LC0=P2;
.Lthree_start:
R3 = B[P1++] (X);
.Lthree_end:
B[P0++] = R3;
RTS;
.Lmore_than_seven:
/* There's at least eight bytes to copy. */
P2 += -1; /* because we unroll one iteration */
LSETUP(.Lword_loops, .Lword_loope) LC0=P2;
I1 = P1;
R3 = [I1++];
#if ANOMALY_05000202
.Lword_loops:
[P0++] = R3;
.Lword_loope:
R3 = [I1++];
#else
.Lword_loops:
.Lword_loope:
MNOP || [P0++] = R3 || R3 = [I1++];
#endif
[P0++] = R3;
/* Any remaining bytes to copy? */
R3 = 0x3;
R3 = R2 & R3;
CC = R3 == 0;
P1 = I1; /* in case there's something left, */
IF !CC JUMP .Lbytes_left;
RTS;
.Lbytes_left: P2 = R3;
.Lnot_aligned:
/* From here, we're copying byte-by-byte. */
LSETUP (.Lbyte_start, .Lbyte_end) LC0=P2;
.Lbyte_start:
R1 = B[P1++] (X);
.Lbyte_end:
B[P0++] = R1;
.L_P1L2147483647:
RTS;
.Lhas_overlap:
/* Need to reverse the copying, because the
* dst would clobber the src.
* Don't bother to work out alignment for
* the reverse case.
*/
P0 = P0 + P2;
P0 += -1;
P1 = P1 + P2;
P1 += -1;
LSETUP(.Lover_start, .Lover_end) LC0=P2;
.Lover_start:
R1 = B[P1--] (X);
.Lover_end:
B[P0--] = R1;
RTS;
ENDPROC(_memcpy)

View file

@ -0,0 +1,93 @@
/*
* Copyright 2005-2009 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
.align 2
/*
* C Library function MEMMOVE
* R0 = To Address (leave unchanged to form result)
* R1 = From Address
* R2 = count
* Data may overlap
*/
ENTRY(_memmove)
I1 = P3;
P0 = R0; /* P0 = To address */
P3 = R1; /* P3 = From Address */
P2 = R2; /* P2 = count */
CC = P2 == 0; /* Check zero count*/
IF CC JUMP .Lfinished; /* very unlikely */
CC = R1 < R0 (IU); /* From < To */
IF !CC JUMP .Lno_overlap;
R3 = R1 + R2;
CC = R0 <= R3 (IU); /* (From+len) >= To */
IF CC JUMP .Loverlap;
.Lno_overlap:
R3 = 11;
CC = R2 <= R3;
IF CC JUMP .Lbytes;
R3 = R1 | R0; /* OR addresses together */
R3 <<= 30; /* check bottom two bits */
CC = AZ; /* AZ set if zero.*/
IF !CC JUMP .Lbytes; /* Jump if addrs not aligned.*/
I0 = P3;
P1 = P2 >> 2; /* count = n/4 */
P1 += -1;
R3 = 3;
R2 = R2 & R3; /* remainder */
P2 = R2; /* set remainder */
R1 = [I0++];
LSETUP (.Lquad_loops, .Lquad_loope) LC0=P1;
#if ANOMALY_05000202
.Lquad_loops:
[P0++] = R1;
.Lquad_loope:
R1 = [I0++];
#else
.Lquad_loops:
.Lquad_loope:
MNOP || [P0++] = R1 || R1 = [I0++];
#endif
[P0++] = R1;
CC = P2 == 0; /* any remaining bytes? */
P3 = I0; /* Amend P3 to updated ptr. */
IF !CC JUMP .Lbytes;
P3 = I1;
RTS;
.Lbytes: LSETUP (.Lbyte2_s, .Lbyte2_e) LC0=P2;
.Lbyte2_s: R1 = B[P3++](Z);
.Lbyte2_e: B[P0++] = R1;
.Lfinished: P3 = I1;
RTS;
.Loverlap:
P2 += -1;
P0 = P0 + P2;
P3 = P3 + P2;
R1 = B[P3--] (Z);
CC = P2 == 0;
IF CC JUMP .Lno_loop;
#if ANOMALY_05000245
NOP;
NOP;
#endif
LSETUP (.Lol_s, .Lol_e) LC0 = P2;
.Lol_s: B[P0--] = R1;
.Lol_e: R1 = B[P3--] (Z);
.Lno_loop: B[P0] = R1;
P3 = I1;
RTS;
ENDPROC(_memmove)

View file

@ -0,0 +1,87 @@
/*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
.align 2
#ifdef CONFIG_MEMSET_L1
.section .l1.text
#else
.text
#endif
/*
* C Library function MEMSET
* R0 = address (leave unchanged to form result)
* R1 = filler byte
* R2 = count
* Favours word aligned data.
* The strncpy assumes that I0 and I1 are not used in this function
*/
ENTRY(_memset)
P0 = R0 ; /* P0 = address */
P2 = R2 ; /* P2 = count */
R3 = R0 + R2; /* end */
CC = R2 <= 7(IU);
IF CC JUMP .Ltoo_small;
R1 = R1.B (Z); /* R1 = fill char */
R2 = 3;
R2 = R0 & R2; /* addr bottom two bits */
CC = R2 == 0; /* AZ set if zero. */
IF !CC JUMP .Lforce_align ; /* Jump if addr not aligned. */
.Laligned:
P1 = P2 >> 2; /* count = n/4 */
R2 = R1 << 8; /* create quad filler */
R2.L = R2.L + R1.L(NS);
R2.H = R2.L + R1.H(NS);
P2 = R3;
LSETUP (.Lquad_loop , .Lquad_loop) LC0=P1;
.Lquad_loop:
[P0++] = R2;
CC = P0 == P2;
IF !CC JUMP .Lbytes_left;
RTS;
.Lbytes_left:
R2 = R3; /* end point */
R3 = P0; /* current position */
R2 = R2 - R3; /* bytes left */
P2 = R2;
.Ltoo_small:
CC = P2 == 0; /* Check zero count */
IF CC JUMP .Lfinished; /* Unusual */
.Lbytes:
LSETUP (.Lbyte_loop , .Lbyte_loop) LC0=P2;
.Lbyte_loop:
B[P0++] = R1;
.Lfinished:
RTS;
.Lforce_align:
CC = BITTST (R0, 0); /* odd byte */
R0 = 4;
R0 = R0 - R2;
P1 = R0;
R0 = P0; /* Recover return address */
IF !CC JUMP .Lskip1;
B[P0++] = R1;
.Lskip1:
CC = R2 <= 2; /* 2 bytes */
P2 -= P1; /* reduce count */
IF !CC JUMP .Laligned;
B[P0++] = R1;
B[P0++] = R1;
JUMP .Laligned;
ENDPROC(_memset)

View file

@ -0,0 +1,57 @@
/*
* This program computes 32 bit signed remainder. It calls div32 function
* for quotient estimation.
* Registers in: R0, R1 = Numerator/ Denominator
* Registers out: R0 = Remainder
*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
.global ___modsi3;
.type ___modsi3, STT_FUNC;
.extern ___divsi3;
.type ___divsi3, STT_FUNC;
#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif
___modsi3:
CC=R0==0;
IF CC JUMP .LRETURN_R0; /* Return 0, if numerator == 0 */
CC=R1==0;
IF CC JUMP .LRETURN_ZERO; /* Return 0, if denominator == 0 */
CC=R0==R1;
IF CC JUMP .LRETURN_ZERO; /* Return 0, if numerator == denominator */
CC = R1 == 1;
IF CC JUMP .LRETURN_ZERO; /* Return 0, if denominator == 1 */
CC = R1 == -1;
IF CC JUMP .LRETURN_ZERO; /* Return 0, if denominator == -1 */
/* Valid input. Use __divsi3() to compute the quotient, and then
* derive the remainder from that. */
[--SP] = (R7:6); /* Push R7 and R6 */
[--SP] = RETS; /* and return address */
R7 = R0; /* Copy of R0 */
R6 = R1; /* Save for later */
SP += -12; /* Should always provide this space */
CALL ___divsi3; /* Compute signed quotient using ___divsi3()*/
SP += 12;
R0 *= R6; /* Quotient * divisor */
R0 = R7 - R0; /* Dividend - (quotient * divisor) */
RETS = [SP++]; /* Get back return address */
(R7:6) = [SP++]; /* Pop registers R7 and R4 */
RTS; /* Store remainder */
.LRETURN_ZERO:
R0 = 0;
.LRETURN_R0:
RTS;
.size ___modsi3, .-___modsi3

View file

@ -0,0 +1,74 @@
/*
* Copyright 2008 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
.align 2
.global ___muldi3;
.type ___muldi3, STT_FUNC;
#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif
/*
R1:R0 * R3:R2
= R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
[X] = (R1.h * R3.h) * 2^96
[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80
[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16
[T4] + (R0.l * R2.l)
We can discard the first three lines marked "X" since we produce
only a 64 bit result. So, we need ten 16-bit multiplies.
Individual mul-acc results:
[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
[E3] = R0.l * R2.h + R2.l * R0.h
[E4] = R0.l * R2.l
We also need to add high parts from lower-level results to higher ones:
E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
One interesting property is that all parts of the result that depend
on the sign of the multiplication are discarded. Those would be the
multiplications involving R1.h and R3.h, but only the top 16 bit of
the 32 bit result depend on the sign, and since R1.h and R3.h only
occur in E1, the top half of these results is cut off.
So, we can just use FU mode for all of the 16-bit multiplies, and
ignore questions of when to use mixed mode. */
___muldi3:
/* [SP] technically is part of the caller's frame, but we can
use it as scratch space. */
A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */
A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */
A0 += A1; /* E1 */
R4 = A0.w;
A0 = R0.l * R3.l (FU); /* E2 */
A0 += R2.l * R1.l (FU); /* E2 */
A1 = R2.L * R0.L (FU); /* E4 */
R3 = A1.w;
A1 = A1 >> 16; /* E3c */
A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */
A1 += R0.L * R2.H (FU); /* E3c */
R0 = A1.w;
A1 = A1 >> 16; /* E2c */
A0 += A1; /* E2c */
R1 = A0.w;
/* low(result) = low(E3c):low(E4) */
R0 = PACK (R0.l, R3.l);
/* high(result) = E2c + (E1 << 16) */
R1.h = R1.h + R4.l (NS) || R4 = [SP];
RTS;
.size ___muldi3, .-___muldi3

68
arch/blackfin/lib/outs.S Normal file
View file

@ -0,0 +1,68 @@
/*
* Implementation of outs{bwl} for BlackFin processors using zero overhead loops.
*
* Copyright 2005-2009 Analog Devices Inc.
* 2005 BuyWays BV
* Bas Vermeulen <bas@buyways.nl>
*
* Licensed under the GPL-2.
*/
#include <linux/linkage.h>
.align 2
ENTRY(_outsl)
CC = R2 == 0;
IF CC JUMP 1f;
P0 = R0; /* P0 = port */
P1 = R1; /* P1 = address */
P2 = R2; /* P2 = count */
LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
.Llong_loop_s: R0 = [P1++];
.Llong_loop_e: [P0] = R0;
1: RTS;
ENDPROC(_outsl)
ENTRY(_outsw)
CC = R2 == 0;
IF CC JUMP 1f;
P0 = R0; /* P0 = port */
P1 = R1; /* P1 = address */
P2 = R2; /* P2 = count */
LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
.Lword_loop_s: R0 = W[P1++];
.Lword_loop_e: W[P0] = R0;
1: RTS;
ENDPROC(_outsw)
ENTRY(_outsb)
CC = R2 == 0;
IF CC JUMP 1f;
P0 = R0; /* P0 = port */
P1 = R1; /* P1 = address */
P2 = R2; /* P2 = count */
LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
.Lbyte_loop_s: R0 = B[P1++];
.Lbyte_loop_e: B[P0] = R0;
1: RTS;
ENDPROC(_outsb)
ENTRY(_outsw_8)
CC = R2 == 0;
IF CC JUMP 1f;
P0 = R0; /* P0 = port */
P1 = R1; /* P1 = address */
P2 = R2; /* P2 = count */
LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2;
.Lword8_loop_s: R1 = B[P1++];
R0 = B[P1++];
R0 = R0 << 8;
R0 = R0 + R1;
.Lword8_loop_e: W[P0] = R0;
1: RTS;
ENDPROC(_outsw_8)

View file

@ -0,0 +1,38 @@
/*
* Copyright 2007 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
.align 2
.global ___smulsi3_highpart;
.type ___smulsi3_highpart, STT_FUNC;
#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif
___smulsi3_highpart:
R2 = R1.L * R0.L (FU);
R3 = R1.H * R0.L (IS,M);
R0 = R0.H * R1.H, R1 = R0.H * R1.L (IS,M);
R1.L = R2.H + R1.L;
cc = ac0;
R2 = cc;
R1.L = R1.L + R3.L;
cc = ac0;
R1 >>>= 16;
R3 >>>= 16;
R1 = R1 + R3;
R1 = R1 + R2;
R2 = cc;
R1 = R1 + R2;
R0 = R0 + R1;
RTS;
.size ___smulsi3_highpart, .-___smulsi3_highpart

View file

@ -0,0 +1,43 @@
/*
* Copyright 2005-2010 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
/* void *strcmp(char *s1, const char *s2);
* R0 = address (s1)
* R1 = address (s2)
*
* Returns an integer less than, equal to, or greater than zero if s1
* (or the first n bytes thereof) is found, respectively, to be less
* than, to match, or be greater than s2.
*/
#ifdef CONFIG_STRCMP_L1
.section .l1.text
#else
.text
#endif
.align 2
ENTRY(_strcmp)
P0 = R0 ; /* s1 */
P1 = R1 ; /* s2 */
1:
R0 = B[P0++] (Z); /* get *s1 */
R1 = B[P1++] (Z); /* get *s2 */
CC = R0 == R1; /* compare a byte */
if ! cc jump 2f; /* not equal, break out */
CC = R0; /* at end of s1? */
if cc jump 1b (bp); /* no, keep going */
jump.s 3f; /* strings are equal */
2:
R0 = R0 - R1; /* *s1 - *s2 */
3:
RTS;
ENDPROC(_strcmp)

View file

@ -0,0 +1,35 @@
/*
* Copyright 2005-2010 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
/* void *strcpy(char *dest, const char *src);
* R0 = address (dest)
* R1 = address (src)
*
* Returns a pointer to the destination string dest
*/
#ifdef CONFIG_STRCPY_L1
.section .l1.text
#else
.text
#endif
.align 2
ENTRY(_strcpy)
P0 = R0 ; /* dst*/
P1 = R1 ; /* src*/
1:
R1 = B [P1++] (Z);
B [P0++] = R1;
CC = R1;
if cc jump 1b (bp);
RTS;
ENDPROC(_strcpy)

View file

@ -0,0 +1,52 @@
/*
* Copyright 2005-2010 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
/* void *strncpy(char *s1, const char *s2, size_t n);
* R0 = address (dest)
* R1 = address (src)
* R2 = size (n)
* Returns a pointer to the destination string dest
*/
#ifdef CONFIG_STRNCMP_L1
.section .l1.text
#else
.text
#endif
.align 2
ENTRY(_strncmp)
CC = R2 == 0;
if CC JUMP 5f;
P0 = R0 ; /* s1 */
P1 = R1 ; /* s2 */
1:
R0 = B[P0++] (Z); /* get *s1 */
R1 = B[P1++] (Z); /* get *s2 */
CC = R0 == R1; /* compare a byte */
if ! cc jump 3f; /* not equal, break out */
CC = R0; /* at end of s1? */
if ! cc jump 4f; /* yes, all done */
R2 += -1; /* no, adjust count */
CC = R2 == 0;
if ! cc jump 1b (bp); /* more to do, keep going */
2:
R0 = 0; /* strings are equal */
jump.s 4f;
3:
R0 = R0 - R1; /* *s1 - *s2 */
4:
RTS;
5:
R0 = 0;
RTS;
ENDPROC(_strncmp)

View file

@ -0,0 +1,85 @@
/*
* Copyright 2005-2010 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
#include <asm/context.S>
/* void *strncpy(char *dest, const char *src, size_t n);
* R0 = address (dest)
* R1 = address (src)
* R2 = size
* Returns a pointer (R0) to the destination string dest
* we do this by not changing R0
*/
#ifdef CONFIG_STRNCPY_L1
.section .l1.text
#else
.text
#endif
.align 2
ENTRY(_strncpy)
CC = R2 == 0;
if CC JUMP 6f;
P2 = R2 ; /* size */
P0 = R0 ; /* dst*/
P1 = R1 ; /* src*/
LSETUP (1f, 2f) LC0 = P2;
1:
R1 = B [P1++] (Z);
B [P0++] = R1;
CC = R1 == 0;
2:
if CC jump 3f;
RTS;
/* if src is shorter than n, we need to null pad bytes in dest
* but, we can get here when the last byte is zero, and we don't
* want to copy an extra byte at the end, so we need to check
*/
3:
R2 = LC0;
CC = R2
if ! CC jump 6f;
/* if the required null padded portion is small, do it here, rather than
* handling the overhead of memset (which is OK when things are big).
*/
R3 = 0x20;
CC = R2 < R3;
IF CC jump 4f;
R2 += -1;
/* Set things up for memset
* R0 = address
* R1 = filler byte (this case it's zero, set above)
* R2 = count (set above)
*/
I1 = R0;
R0 = RETS;
I0 = R0;
R0 = P0;
pseudo_long_call _memset, p0;
R0 = I0;
RETS = R0;
R0 = I1;
RTS;
4:
LSETUP(5f, 5f) LC0;
5:
B [P0++] = R1;
6:
RTS;
ENDPROC(_strncpy)

277
arch/blackfin/lib/udivsi3.S Normal file
View file

@ -0,0 +1,277 @@
/*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#include <linux/linkage.h>
#define CARRY AC0
#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif
ENTRY(___udivsi3)
CC = R0 < R1 (IU); /* If X < Y, always return 0 */
IF CC JUMP .Lreturn_ident;
R2 = R1 << 16;
CC = R2 <= R0 (IU);
IF CC JUMP .Lidents;
R2 = R0 >> 31; /* if X is a 31-bit number */
R3 = R1 >> 15; /* and Y is a 15-bit number */
R2 = R2 | R3; /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/
CC = R2;
IF CC JUMP .Ly_16bit;
/* METHOD 1: FAST DIVQ
We know we have a 31-bit dividend, and 15-bit divisor so we can use the
simple divq approach (first setting AQ to 0 - implying unsigned division,
then 16 DIVQ's).
*/
AQ = CC; /* Clear AQ (CC==0) */
/* ISR States: When dividing two integers (32.0/16.0) using divide primitives,
we need to shift the dividend one bit to the left.
We have already checked that we have a 31-bit number so we are safe to do
that.
*/
R0 <<= 1;
DIVQ(R0, R1); // 1
DIVQ(R0, R1); // 2
DIVQ(R0, R1); // 3
DIVQ(R0, R1); // 4
DIVQ(R0, R1); // 5
DIVQ(R0, R1); // 6
DIVQ(R0, R1); // 7
DIVQ(R0, R1); // 8
DIVQ(R0, R1); // 9
DIVQ(R0, R1); // 10
DIVQ(R0, R1); // 11
DIVQ(R0, R1); // 12
DIVQ(R0, R1); // 13
DIVQ(R0, R1); // 14
DIVQ(R0, R1); // 15
DIVQ(R0, R1); // 16
R0 = R0.L (Z);
RTS;
.Ly_16bit:
/* We know that the upper 17 bits of Y might have bits set,
** or that the sign bit of X might have a bit. If Y is a
** 16-bit number, but not bigger, then we can use the builtins
** with a post-divide correction.
** R3 currently holds Y>>15, which means R3's LSB is the
** bit we're interested in.
*/
/* According to the ISR, to use the Divide primitives for
** unsigned integer divide, the useable range is 31 bits
*/
CC = ! BITTST(R0, 31);
/* IF condition is true we can scale our inputs and use the divide primitives,
** with some post-adjustment
*/
R3 += -1; /* if so, Y is 0x00008nnn */
CC &= AZ;
/* If condition is true we can scale our inputs and use the divide primitives,
** with some post-adjustment
*/
R3 = R1 >> 1; /* Pre-scaled divisor for primitive case */
R2 = R0 >> 16;
R2 = R3 - R2; /* shifted divisor < upper 16 bits of dividend */
CC &= CARRY;
IF CC JUMP .Lshift_and_correct;
/* Fall through to the identities */
/* METHOD 2: identities and manual calculation
We are not able to use the divide primites, but may still catch some special
cases.
*/
.Lidents:
/* Test for common identities. Value to be returned is placed in R2. */
CC = R0 == 0; /* 0/Y => 0 */
IF CC JUMP .Lreturn_r0;
CC = R0 == R1; /* X==Y => 1 */
IF CC JUMP .Lreturn_ident;
CC = R1 == 1; /* X/1 => X */
IF CC JUMP .Lreturn_ident;
R2.L = ONES R1;
R2 = R2.L (Z);
CC = R2 == 1;
IF CC JUMP .Lpower_of_two;
[--SP] = (R7:5); /* Push registers R5-R7 */
/* Idents don't match. Go for the full operation. */
R6 = 2; /* assume we'll shift two */
R3 = 1;
P2 = R1;
/* If either R0 or R1 have sign set, */
/* divide them by two, and note it's */
/* been done. */
CC = R1 < 0;
R2 = R1 >> 1;
IF CC R1 = R2; /* Possibly-shifted R1 */
IF !CC R6 = R3; /* R1 doesn't, so at most 1 shifted */
P0 = 0;
R3 = -R1;
[--SP] = R3;
R2 = R0 >> 1;
R2 = R0 >> 1;
CC = R0 < 0;
IF CC P0 = R6; /* Number of values divided */
IF !CC R2 = R0; /* Shifted R0 */
/* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */
/* r2 holds Copy dividend */
R3 = 0; /* Clear partial remainder */
R7 = 0; /* Initialise quotient bit */
P1 = 32; /* Set loop counter */
LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */
.Lulst: R6 = R2 >> 31; /* R6 = sign bit of R2, for carry */
R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */
R3 = R3 << 1 || R5 = [SP];
R3 = R3 | R6; /* Include any carry */
CC = R7 < 0; /* Check quotient(AQ) */
/* If AQ==0, we'll sub divisor */
IF CC R5 = R1; /* and if AQ==1, we'll add it. */
R3 = R3 + R5; /* Add/sub divsor to partial remainder */
R7 = R3 ^ R1; /* Generate next quotient bit */
R5 = R7 >> 31; /* Get AQ */
BITTGL(R5, 0); /* Invert it, to get what we'll shift */
.Lulend: R2 = R2 + R5; /* and "shift" it in. */
CC = P0 == 0; /* Check how many inputs we shifted */
IF CC JUMP .Lno_mult; /* if none... */
R6 = R2 << 1;
CC = P0 == 1;
IF CC R2 = R6; /* if 1, Q = Q*2 */
IF !CC R1 = P2; /* if 2, restore stored divisor */
R3 = R2; /* Copy of R2 */
R3 *= R1; /* Q * divisor */
R5 = R0 - R3; /* Z = (dividend - Q * divisor) */
CC = R1 <= R5 (IU); /* Check if divisor <= Z? */
R6 = CC; /* if yes, R6 = 1 */
R2 = R2 + R6; /* if yes, add one to quotient(Q) */
.Lno_mult:
SP += 4;
(R7:5) = [SP++]; /* Pop registers R5-R7 */
R0 = R2; /* Store quotient */
RTS;
.Lreturn_ident:
CC = R0 < R1 (IU); /* If X < Y, always return 0 */
R2 = 0;
IF CC JUMP .Ltrue_return_ident;
R2 = -1 (X); /* X/0 => 0xFFFFFFFF */
CC = R1 == 0;
IF CC JUMP .Ltrue_return_ident;
R2 = -R2; /* R2 now 1 */
CC = R0 == R1; /* X==Y => 1 */
IF CC JUMP .Ltrue_return_ident;
R2 = R0; /* X/1 => X */
/*FALLTHRU*/
.Ltrue_return_ident:
R0 = R2;
.Lreturn_r0:
RTS;
.Lpower_of_two:
/* Y has a single bit set, which means it's a power of two.
** That means we can perform the division just by shifting
** X to the right the appropriate number of bits
*/
/* signbits returns the number of sign bits, minus one.
** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
** to shift right n-signbits spaces. It also means 0x80000000
** is a special case, because that *also* gives a signbits of 0
*/
R2 = R0 >> 31;
CC = R1 < 0;
IF CC JUMP .Ltrue_return_ident;
R1.l = SIGNBITS R1;
R1 = R1.L (Z);
R1 += -30;
R0 = LSHIFT R0 by R1.L;
RTS;
/* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION
Two scaling operations are required to use the divide primitives with a
divisor > 0x7FFFF.
Firstly (as in method 1) we need to shift the dividend 1 to the left for
integer division.
Secondly we need to shift both the divisor and dividend 1 to the right so
both are in range for the primitives.
The left/right shift of the dividend does nothing so we can skip it.
*/
.Lshift_and_correct:
R2 = R0;
// R3 is already R1 >> 1
CC=!CC;
AQ = CC; /* Clear AQ, got here with CC = 0 */
DIVQ(R2, R3); // 1
DIVQ(R2, R3); // 2
DIVQ(R2, R3); // 3
DIVQ(R2, R3); // 4
DIVQ(R2, R3); // 5
DIVQ(R2, R3); // 6
DIVQ(R2, R3); // 7
DIVQ(R2, R3); // 8
DIVQ(R2, R3); // 9
DIVQ(R2, R3); // 10
DIVQ(R2, R3); // 11
DIVQ(R2, R3); // 12
DIVQ(R2, R3); // 13
DIVQ(R2, R3); // 14
DIVQ(R2, R3); // 15
DIVQ(R2, R3); // 16
/* According to the Instruction Set Reference:
To divide by a divisor > 0x7FFF,
1. prescale and perform divide to obtain quotient (Q) (done above),
2. multiply quotient by unscaled divisor (result M)
3. subtract the product from the divident to get an error (E = X - M)
4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q)
*/
R3 = R2.L (Z); /* Q = X' / Y' */
R2 = R3; /* Preserve Q */
R2 *= R1; /* M = Q * Y */
R2 = R0 - R2; /* E = X - M */
R0 = R3; /* Copy Q into result reg */
/* Correction: If result of the multiply is negative, we overflowed
and need to correct the result by subtracting 1 from the result.*/
R3 = 0xFFFF (Z);
R2 = R2 >> 16; /* E >> 16 */
CC = R2 == R3;
R3 = 1 ;
R1 = R0 - R3;
IF CC R0 = R1;
RTS;
ENDPROC(___udivsi3)

View file

@ -0,0 +1,49 @@
/*
* libgcc1 routines for Blackfin 5xx
*
* Copyright 2004-2009 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif
.extern ___udivsi3;
.type ___udivsi3, STT_FUNC;
.globl ___umodsi3
.type ___umodsi3, STT_FUNC;
___umodsi3:
CC=R0==0;
IF CC JUMP .LRETURN_R0; /* Return 0, if NR == 0 */
CC= R1==0;
IF CC JUMP .LRETURN_ZERO_VAL; /* Return 0, if DR == 0 */
CC=R0==R1;
IF CC JUMP .LRETURN_ZERO_VAL; /* Return 0, if NR == DR */
CC = R1 == 1;
IF CC JUMP .LRETURN_ZERO_VAL; /* Return 0, if DR == 1 */
CC = R0<R1 (IU);
IF CC JUMP .LRETURN_R0; /* Return dividend (R0),IF NR<DR */
[--SP] = (R7:6); /* Push registers and */
[--SP] = RETS; /* Return address */
R7 = R0; /* Copy of R0 */
R6 = R1;
SP += -12; /* Should always provide this space */
CALL ___udivsi3; /* Compute unsigned quotient using ___udiv32()*/
SP += 12;
R0 *= R6; /* Quotient * divisor */
R0 = R7 - R0; /* Dividend - (quotient * divisor) */
RETS = [SP++]; /* Pop return address */
( R7:6) = [SP++]; /* And registers */
RTS; /* Return remainder */
.LRETURN_ZERO_VAL:
R0 = 0;
.LRETURN_R0:
RTS;
.size ___umodsi3, .-___umodsi3

View file

@ -0,0 +1,31 @@
/*
* Copyright 2007 Analog Devices Inc.
*
* Licensed under the Clear BSD license or the GPL-2 (or later)
*/
.align 2
.global ___umulsi3_highpart;
.type ___umulsi3_highpart, STT_FUNC;
#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif
___umulsi3_highpart:
R2 = R1.H * R0.H, R3 = R1.L * R0.H (FU);
R0 = R1.L * R0.L, R1 = R1.H * R0.L (FU);
R0 >>= 16;
/* Unsigned multiplication has the nice property that we can
ignore carry on this first addition. */
R0 = R0 + R3;
R0 = R0 + R1;
cc = ac0;
R1 = cc;
R1 = PACK(R1.l,R0.h);
R0 = R1 + R2;
RTS;
.size ___umulsi3_highpart, .-___umulsi3_highpart