mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-09-08 17:18:05 -04:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
42
arch/powerpc/lib/Makefile
Normal file
42
arch/powerpc/lib/Makefile
Normal file
|
@ -0,0 +1,42 @@
|
|||
#
|
||||
# Makefile for ppc-specific library files..
|
||||
#
|
||||
|
||||
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
|
||||
|
||||
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
|
||||
|
||||
CFLAGS_REMOVE_code-patching.o = -pg
|
||||
CFLAGS_REMOVE_feature-fixups.o = -pg
|
||||
|
||||
obj-y := string.o alloc.o \
|
||||
crtsavres.o ppc_ksyms.o
|
||||
obj-$(CONFIG_PPC32) += div64.o copy_32.o
|
||||
obj-$(CONFIG_HAS_IOMEM) += devres.o
|
||||
|
||||
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
|
||||
usercopy_64.o mem_64.o string.o \
|
||||
hweight_64.o \
|
||||
copyuser_power7.o string_64.o copypage_power7.o
|
||||
ifeq ($(CONFIG_GENERIC_CSUM),)
|
||||
obj-y += checksum_$(CONFIG_WORD_SIZE).o
|
||||
obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
|
||||
endif
|
||||
|
||||
obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
|
||||
|
||||
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
|
||||
|
||||
ifeq ($(CONFIG_PPC64),y)
|
||||
obj-$(CONFIG_SMP) += locks.o
|
||||
obj-$(CONFIG_ALTIVEC) += vmx-helper.o
|
||||
endif
|
||||
|
||||
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
|
||||
|
||||
obj-y += code-patching.o
|
||||
obj-y += feature-fixups.o
|
||||
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
|
||||
|
||||
obj-$(CONFIG_ALTIVEC) += xor_vmx.o
|
||||
CFLAGS_xor_vmx.o += -maltivec -mabi=altivec
|
21
arch/powerpc/lib/alloc.c
Normal file
21
arch/powerpc/lib/alloc.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/string.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
|
||||
void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
|
||||
{
|
||||
void *p;
|
||||
|
||||
if (mem_init_done)
|
||||
p = kzalloc(size, mask);
|
||||
else {
|
||||
p = alloc_bootmem(size);
|
||||
if (p)
|
||||
memset(p, 0, size);
|
||||
}
|
||||
return p;
|
||||
}
|
225
arch/powerpc/lib/checksum_32.S
Normal file
225
arch/powerpc/lib/checksum_32.S
Normal file
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* This file contains assembly-language implementations
|
||||
* of IP-style 1's complement checksum routines.
|
||||
*
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
|
||||
*/
|
||||
|
||||
#include <linux/sys.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* ip_fast_csum(buf, len) -- Optimized for IP header
|
||||
* len is in words and is always >= 5.
|
||||
*/
|
||||
_GLOBAL(ip_fast_csum)
|
||||
lwz r0,0(r3)
|
||||
lwzu r5,4(r3)
|
||||
addic. r4,r4,-2
|
||||
addc r0,r0,r5
|
||||
mtctr r4
|
||||
blelr-
|
||||
1: lwzu r4,4(r3)
|
||||
adde r0,r0,r4
|
||||
bdnz 1b
|
||||
addze r0,r0 /* add in final carry */
|
||||
rlwinm r3,r0,16,0,31 /* fold two halves together */
|
||||
add r3,r0,r3
|
||||
not r3,r3
|
||||
srwi r3,r3,16
|
||||
blr
|
||||
|
||||
/*
|
||||
* Compute checksum of TCP or UDP pseudo-header:
|
||||
* csum_tcpudp_magic(saddr, daddr, len, proto, sum)
|
||||
*/
|
||||
_GLOBAL(csum_tcpudp_magic)
|
||||
rlwimi r5,r6,16,0,15 /* put proto in upper half of len */
|
||||
addc r0,r3,r4 /* add 4 32-bit words together */
|
||||
adde r0,r0,r5
|
||||
adde r0,r0,r7
|
||||
addze r0,r0 /* add in final carry */
|
||||
rlwinm r3,r0,16,0,31 /* fold two halves together */
|
||||
add r3,r0,r3
|
||||
not r3,r3
|
||||
srwi r3,r3,16
|
||||
blr
|
||||
|
||||
/*
|
||||
* computes the checksum of a memory block at buff, length len,
|
||||
* and adds in "sum" (32-bit)
|
||||
*
|
||||
* csum_partial(buff, len, sum)
|
||||
*/
|
||||
_GLOBAL(csum_partial)
|
||||
addic r0,r5,0
|
||||
subi r3,r3,4
|
||||
srwi. r6,r4,2
|
||||
beq 3f /* if we're doing < 4 bytes */
|
||||
andi. r5,r3,2 /* Align buffer to longword boundary */
|
||||
beq+ 1f
|
||||
lhz r5,4(r3) /* do 2 bytes to get aligned */
|
||||
addi r3,r3,2
|
||||
subi r4,r4,2
|
||||
addc r0,r0,r5
|
||||
srwi. r6,r4,2 /* # words to do */
|
||||
beq 3f
|
||||
1: mtctr r6
|
||||
2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */
|
||||
adde r0,r0,r5 /* be unnecessary to unroll this loop */
|
||||
bdnz 2b
|
||||
andi. r4,r4,3
|
||||
3: cmpwi 0,r4,2
|
||||
blt+ 4f
|
||||
lhz r5,4(r3)
|
||||
addi r3,r3,2
|
||||
subi r4,r4,2
|
||||
adde r0,r0,r5
|
||||
4: cmpwi 0,r4,1
|
||||
bne+ 5f
|
||||
lbz r5,4(r3)
|
||||
slwi r5,r5,8 /* Upper byte of word */
|
||||
adde r0,r0,r5
|
||||
5: addze r3,r0 /* add in final carry */
|
||||
blr
|
||||
|
||||
/*
|
||||
* Computes the checksum of a memory block at src, length len,
|
||||
* and adds in "sum" (32-bit), while copying the block to dst.
|
||||
* If an access exception occurs on src or dst, it stores -EFAULT
|
||||
* to *src_err or *dst_err respectively, and (for an error on
|
||||
* src) zeroes the rest of dst.
|
||||
*
|
||||
* csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
|
||||
*/
|
||||
_GLOBAL(csum_partial_copy_generic)
|
||||
addic r0,r6,0
|
||||
subi r3,r3,4
|
||||
subi r4,r4,4
|
||||
srwi. r6,r5,2
|
||||
beq 3f /* if we're doing < 4 bytes */
|
||||
andi. r9,r4,2 /* Align dst to longword boundary */
|
||||
beq+ 1f
|
||||
81: lhz r6,4(r3) /* do 2 bytes to get aligned */
|
||||
addi r3,r3,2
|
||||
subi r5,r5,2
|
||||
91: sth r6,4(r4)
|
||||
addi r4,r4,2
|
||||
addc r0,r0,r6
|
||||
srwi. r6,r5,2 /* # words to do */
|
||||
beq 3f
|
||||
1: srwi. r6,r5,4 /* # groups of 4 words to do */
|
||||
beq 10f
|
||||
mtctr r6
|
||||
71: lwz r6,4(r3)
|
||||
72: lwz r9,8(r3)
|
||||
73: lwz r10,12(r3)
|
||||
74: lwzu r11,16(r3)
|
||||
adde r0,r0,r6
|
||||
75: stw r6,4(r4)
|
||||
adde r0,r0,r9
|
||||
76: stw r9,8(r4)
|
||||
adde r0,r0,r10
|
||||
77: stw r10,12(r4)
|
||||
adde r0,r0,r11
|
||||
78: stwu r11,16(r4)
|
||||
bdnz 71b
|
||||
10: rlwinm. r6,r5,30,30,31 /* # words left to do */
|
||||
beq 13f
|
||||
mtctr r6
|
||||
82: lwzu r9,4(r3)
|
||||
92: stwu r9,4(r4)
|
||||
adde r0,r0,r9
|
||||
bdnz 82b
|
||||
13: andi. r5,r5,3
|
||||
3: cmpwi 0,r5,2
|
||||
blt+ 4f
|
||||
83: lhz r6,4(r3)
|
||||
addi r3,r3,2
|
||||
subi r5,r5,2
|
||||
93: sth r6,4(r4)
|
||||
addi r4,r4,2
|
||||
adde r0,r0,r6
|
||||
4: cmpwi 0,r5,1
|
||||
bne+ 5f
|
||||
84: lbz r6,4(r3)
|
||||
94: stb r6,4(r4)
|
||||
slwi r6,r6,8 /* Upper byte of word */
|
||||
adde r0,r0,r6
|
||||
5: addze r3,r0 /* add in final carry */
|
||||
blr
|
||||
|
||||
/* These shouldn't go in the fixup section, since that would
|
||||
cause the ex_table addresses to get out of order. */
|
||||
|
||||
src_error_4:
|
||||
mfctr r6 /* update # bytes remaining from ctr */
|
||||
rlwimi r5,r6,4,0,27
|
||||
b 79f
|
||||
src_error_1:
|
||||
li r6,0
|
||||
subi r5,r5,2
|
||||
95: sth r6,4(r4)
|
||||
addi r4,r4,2
|
||||
79: srwi. r6,r5,2
|
||||
beq 3f
|
||||
mtctr r6
|
||||
src_error_2:
|
||||
li r6,0
|
||||
96: stwu r6,4(r4)
|
||||
bdnz 96b
|
||||
3: andi. r5,r5,3
|
||||
beq src_error
|
||||
src_error_3:
|
||||
li r6,0
|
||||
mtctr r5
|
||||
addi r4,r4,3
|
||||
97: stbu r6,1(r4)
|
||||
bdnz 97b
|
||||
src_error:
|
||||
cmpwi 0,r7,0
|
||||
beq 1f
|
||||
li r6,-EFAULT
|
||||
stw r6,0(r7)
|
||||
1: addze r3,r0
|
||||
blr
|
||||
|
||||
dst_error:
|
||||
cmpwi 0,r8,0
|
||||
beq 1f
|
||||
li r6,-EFAULT
|
||||
stw r6,0(r8)
|
||||
1: addze r3,r0
|
||||
blr
|
||||
|
||||
.section __ex_table,"a"
|
||||
.long 81b,src_error_1
|
||||
.long 91b,dst_error
|
||||
.long 71b,src_error_4
|
||||
.long 72b,src_error_4
|
||||
.long 73b,src_error_4
|
||||
.long 74b,src_error_4
|
||||
.long 75b,dst_error
|
||||
.long 76b,dst_error
|
||||
.long 77b,dst_error
|
||||
.long 78b,dst_error
|
||||
.long 82b,src_error_2
|
||||
.long 92b,dst_error
|
||||
.long 83b,src_error_3
|
||||
.long 93b,dst_error
|
||||
.long 84b,src_error_3
|
||||
.long 94b,dst_error
|
||||
.long 95b,dst_error
|
||||
.long 96b,dst_error
|
||||
.long 97b,dst_error
|
480
arch/powerpc/lib/checksum_64.S
Normal file
480
arch/powerpc/lib/checksum_64.S
Normal file
|
@ -0,0 +1,480 @@
|
|||
/*
|
||||
* This file contains assembly-language implementations
|
||||
* of IP-style 1's complement checksum routines.
|
||||
*
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
|
||||
*/
|
||||
|
||||
#include <linux/sys.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
/*
|
||||
* ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
|
||||
* len is in words and is always >= 5.
|
||||
*
|
||||
* In practice len == 5, but this is not guaranteed. So this code does not
|
||||
* attempt to use doubleword instructions.
|
||||
*/
|
||||
_GLOBAL(ip_fast_csum)
|
||||
lwz r0,0(r3)
|
||||
lwzu r5,4(r3)
|
||||
addic. r4,r4,-2
|
||||
addc r0,r0,r5
|
||||
mtctr r4
|
||||
blelr-
|
||||
1: lwzu r4,4(r3)
|
||||
adde r0,r0,r4
|
||||
bdnz 1b
|
||||
addze r0,r0 /* add in final carry */
|
||||
rldicl r4,r0,32,0 /* fold two 32-bit halves together */
|
||||
add r0,r0,r4
|
||||
srdi r0,r0,32
|
||||
rlwinm r3,r0,16,0,31 /* fold two halves together */
|
||||
add r3,r0,r3
|
||||
not r3,r3
|
||||
srwi r3,r3,16
|
||||
blr
|
||||
|
||||
/*
|
||||
* Compute checksum of TCP or UDP pseudo-header:
|
||||
* csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
|
||||
* No real gain trying to do this specially for 64 bit, but
|
||||
* the 32 bit addition may spill into the upper bits of
|
||||
* the doubleword so we still must fold it down from 64.
|
||||
*/
|
||||
_GLOBAL(csum_tcpudp_magic)
|
||||
rlwimi r5,r6,16,0,15 /* put proto in upper half of len */
|
||||
addc r0,r3,r4 /* add 4 32-bit words together */
|
||||
adde r0,r0,r5
|
||||
adde r0,r0,r7
|
||||
rldicl r4,r0,32,0 /* fold 64 bit value */
|
||||
add r0,r4,r0
|
||||
srdi r0,r0,32
|
||||
rlwinm r3,r0,16,0,31 /* fold two halves together */
|
||||
add r3,r0,r3
|
||||
not r3,r3
|
||||
srwi r3,r3,16
|
||||
blr
|
||||
|
||||
/*
|
||||
* Computes the checksum of a memory block at buff, length len,
|
||||
* and adds in "sum" (32-bit).
|
||||
*
|
||||
* csum_partial(r3=buff, r4=len, r5=sum)
|
||||
*/
|
||||
_GLOBAL(csum_partial)
|
||||
addic r0,r5,0 /* clear carry */
|
||||
|
||||
srdi. r6,r4,3 /* less than 8 bytes? */
|
||||
beq .Lcsum_tail_word
|
||||
|
||||
/*
|
||||
* If only halfword aligned, align to a double word. Since odd
|
||||
* aligned addresses should be rare and they would require more
|
||||
* work to calculate the correct checksum, we ignore that case
|
||||
* and take the potential slowdown of unaligned loads.
|
||||
*/
|
||||
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
|
||||
beq .Lcsum_aligned
|
||||
|
||||
li r7,4
|
||||
sub r6,r7,r6
|
||||
mtctr r6
|
||||
|
||||
1:
|
||||
lhz r6,0(r3) /* align to doubleword */
|
||||
subi r4,r4,2
|
||||
addi r3,r3,2
|
||||
adde r0,r0,r6
|
||||
bdnz 1b
|
||||
|
||||
.Lcsum_aligned:
|
||||
/*
|
||||
* We unroll the loop such that each iteration is 64 bytes with an
|
||||
* entry and exit limb of 64 bytes, meaning a minimum size of
|
||||
* 128 bytes.
|
||||
*/
|
||||
srdi. r6,r4,7
|
||||
beq .Lcsum_tail_doublewords /* len < 128 */
|
||||
|
||||
srdi r6,r4,6
|
||||
subi r6,r6,1
|
||||
mtctr r6
|
||||
|
||||
stdu r1,-STACKFRAMESIZE(r1)
|
||||
std r14,STK_REG(R14)(r1)
|
||||
std r15,STK_REG(R15)(r1)
|
||||
std r16,STK_REG(R16)(r1)
|
||||
|
||||
ld r6,0(r3)
|
||||
ld r9,8(r3)
|
||||
|
||||
ld r10,16(r3)
|
||||
ld r11,24(r3)
|
||||
|
||||
/*
|
||||
* On POWER6 and POWER7 back to back addes take 2 cycles because of
|
||||
* the XER dependency. This means the fastest this loop can go is
|
||||
* 16 cycles per iteration. The scheduling of the loop below has
|
||||
* been shown to hit this on both POWER6 and POWER7.
|
||||
*/
|
||||
.align 5
|
||||
2:
|
||||
adde r0,r0,r6
|
||||
ld r12,32(r3)
|
||||
ld r14,40(r3)
|
||||
|
||||
adde r0,r0,r9
|
||||
ld r15,48(r3)
|
||||
ld r16,56(r3)
|
||||
addi r3,r3,64
|
||||
|
||||
adde r0,r0,r10
|
||||
|
||||
adde r0,r0,r11
|
||||
|
||||
adde r0,r0,r12
|
||||
|
||||
adde r0,r0,r14
|
||||
|
||||
adde r0,r0,r15
|
||||
ld r6,0(r3)
|
||||
ld r9,8(r3)
|
||||
|
||||
adde r0,r0,r16
|
||||
ld r10,16(r3)
|
||||
ld r11,24(r3)
|
||||
bdnz 2b
|
||||
|
||||
|
||||
adde r0,r0,r6
|
||||
ld r12,32(r3)
|
||||
ld r14,40(r3)
|
||||
|
||||
adde r0,r0,r9
|
||||
ld r15,48(r3)
|
||||
ld r16,56(r3)
|
||||
addi r3,r3,64
|
||||
|
||||
adde r0,r0,r10
|
||||
adde r0,r0,r11
|
||||
adde r0,r0,r12
|
||||
adde r0,r0,r14
|
||||
adde r0,r0,r15
|
||||
adde r0,r0,r16
|
||||
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
|
||||
andi. r4,r4,63
|
||||
|
||||
.Lcsum_tail_doublewords: /* Up to 127 bytes to go */
|
||||
srdi. r6,r4,3
|
||||
beq .Lcsum_tail_word
|
||||
|
||||
mtctr r6
|
||||
3:
|
||||
ld r6,0(r3)
|
||||
addi r3,r3,8
|
||||
adde r0,r0,r6
|
||||
bdnz 3b
|
||||
|
||||
andi. r4,r4,7
|
||||
|
||||
.Lcsum_tail_word: /* Up to 7 bytes to go */
|
||||
srdi. r6,r4,2
|
||||
beq .Lcsum_tail_halfword
|
||||
|
||||
lwz r6,0(r3)
|
||||
addi r3,r3,4
|
||||
adde r0,r0,r6
|
||||
subi r4,r4,4
|
||||
|
||||
.Lcsum_tail_halfword: /* Up to 3 bytes to go */
|
||||
srdi. r6,r4,1
|
||||
beq .Lcsum_tail_byte
|
||||
|
||||
lhz r6,0(r3)
|
||||
addi r3,r3,2
|
||||
adde r0,r0,r6
|
||||
subi r4,r4,2
|
||||
|
||||
.Lcsum_tail_byte: /* Up to 1 byte to go */
|
||||
andi. r6,r4,1
|
||||
beq .Lcsum_finish
|
||||
|
||||
lbz r6,0(r3)
|
||||
sldi r9,r6,8 /* Pad the byte out to 16 bits */
|
||||
adde r0,r0,r9
|
||||
|
||||
.Lcsum_finish:
|
||||
addze r0,r0 /* add in final carry */
|
||||
rldicl r4,r0,32,0 /* fold two 32 bit halves together */
|
||||
add r3,r4,r0
|
||||
srdi r3,r3,32
|
||||
blr
|
||||
|
||||
|
||||
.macro srcnr
|
||||
100:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 100b,.Lsrc_error_nr
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro source
|
||||
150:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 150b,.Lsrc_error
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro dstnr
|
||||
200:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 200b,.Ldest_error_nr
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro dest
|
||||
250:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 250b,.Ldest_error
|
||||
.previous
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the checksum of a memory block at src, length len,
|
||||
* and adds in "sum" (32-bit), while copying the block to dst.
|
||||
* If an access exception occurs on src or dst, it stores -EFAULT
|
||||
* to *src_err or *dst_err respectively. The caller must take any action
|
||||
* required in this case (zeroing memory, recalculating partial checksum etc).
|
||||
*
|
||||
* csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
|
||||
*/
|
||||
_GLOBAL(csum_partial_copy_generic)
|
||||
addic r0,r6,0 /* clear carry */
|
||||
|
||||
srdi. r6,r5,3 /* less than 8 bytes? */
|
||||
beq .Lcopy_tail_word
|
||||
|
||||
/*
|
||||
* If only halfword aligned, align to a double word. Since odd
|
||||
* aligned addresses should be rare and they would require more
|
||||
* work to calculate the correct checksum, we ignore that case
|
||||
* and take the potential slowdown of unaligned loads.
|
||||
*
|
||||
* If the source and destination are relatively unaligned we only
|
||||
* align the source. This keeps things simple.
|
||||
*/
|
||||
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
|
||||
beq .Lcopy_aligned
|
||||
|
||||
li r9,4
|
||||
sub r6,r9,r6
|
||||
mtctr r6
|
||||
|
||||
1:
|
||||
srcnr; lhz r6,0(r3) /* align to doubleword */
|
||||
subi r5,r5,2
|
||||
addi r3,r3,2
|
||||
adde r0,r0,r6
|
||||
dstnr; sth r6,0(r4)
|
||||
addi r4,r4,2
|
||||
bdnz 1b
|
||||
|
||||
.Lcopy_aligned:
|
||||
/*
|
||||
* We unroll the loop such that each iteration is 64 bytes with an
|
||||
* entry and exit limb of 64 bytes, meaning a minimum size of
|
||||
* 128 bytes.
|
||||
*/
|
||||
srdi. r6,r5,7
|
||||
beq .Lcopy_tail_doublewords /* len < 128 */
|
||||
|
||||
srdi r6,r5,6
|
||||
subi r6,r6,1
|
||||
mtctr r6
|
||||
|
||||
stdu r1,-STACKFRAMESIZE(r1)
|
||||
std r14,STK_REG(R14)(r1)
|
||||
std r15,STK_REG(R15)(r1)
|
||||
std r16,STK_REG(R16)(r1)
|
||||
|
||||
source; ld r6,0(r3)
|
||||
source; ld r9,8(r3)
|
||||
|
||||
source; ld r10,16(r3)
|
||||
source; ld r11,24(r3)
|
||||
|
||||
/*
|
||||
* On POWER6 and POWER7 back to back addes take 2 cycles because of
|
||||
* the XER dependency. This means the fastest this loop can go is
|
||||
* 16 cycles per iteration. The scheduling of the loop below has
|
||||
* been shown to hit this on both POWER6 and POWER7.
|
||||
*/
|
||||
.align 5
|
||||
2:
|
||||
adde r0,r0,r6
|
||||
source; ld r12,32(r3)
|
||||
source; ld r14,40(r3)
|
||||
|
||||
adde r0,r0,r9
|
||||
source; ld r15,48(r3)
|
||||
source; ld r16,56(r3)
|
||||
addi r3,r3,64
|
||||
|
||||
adde r0,r0,r10
|
||||
dest; std r6,0(r4)
|
||||
dest; std r9,8(r4)
|
||||
|
||||
adde r0,r0,r11
|
||||
dest; std r10,16(r4)
|
||||
dest; std r11,24(r4)
|
||||
|
||||
adde r0,r0,r12
|
||||
dest; std r12,32(r4)
|
||||
dest; std r14,40(r4)
|
||||
|
||||
adde r0,r0,r14
|
||||
dest; std r15,48(r4)
|
||||
dest; std r16,56(r4)
|
||||
addi r4,r4,64
|
||||
|
||||
adde r0,r0,r15
|
||||
source; ld r6,0(r3)
|
||||
source; ld r9,8(r3)
|
||||
|
||||
adde r0,r0,r16
|
||||
source; ld r10,16(r3)
|
||||
source; ld r11,24(r3)
|
||||
bdnz 2b
|
||||
|
||||
|
||||
adde r0,r0,r6
|
||||
source; ld r12,32(r3)
|
||||
source; ld r14,40(r3)
|
||||
|
||||
adde r0,r0,r9
|
||||
source; ld r15,48(r3)
|
||||
source; ld r16,56(r3)
|
||||
addi r3,r3,64
|
||||
|
||||
adde r0,r0,r10
|
||||
dest; std r6,0(r4)
|
||||
dest; std r9,8(r4)
|
||||
|
||||
adde r0,r0,r11
|
||||
dest; std r10,16(r4)
|
||||
dest; std r11,24(r4)
|
||||
|
||||
adde r0,r0,r12
|
||||
dest; std r12,32(r4)
|
||||
dest; std r14,40(r4)
|
||||
|
||||
adde r0,r0,r14
|
||||
dest; std r15,48(r4)
|
||||
dest; std r16,56(r4)
|
||||
addi r4,r4,64
|
||||
|
||||
adde r0,r0,r15
|
||||
adde r0,r0,r16
|
||||
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
|
||||
andi. r5,r5,63
|
||||
|
||||
.Lcopy_tail_doublewords: /* Up to 127 bytes to go */
|
||||
srdi. r6,r5,3
|
||||
beq .Lcopy_tail_word
|
||||
|
||||
mtctr r6
|
||||
3:
|
||||
srcnr; ld r6,0(r3)
|
||||
addi r3,r3,8
|
||||
adde r0,r0,r6
|
||||
dstnr; std r6,0(r4)
|
||||
addi r4,r4,8
|
||||
bdnz 3b
|
||||
|
||||
andi. r5,r5,7
|
||||
|
||||
.Lcopy_tail_word: /* Up to 7 bytes to go */
|
||||
srdi. r6,r5,2
|
||||
beq .Lcopy_tail_halfword
|
||||
|
||||
srcnr; lwz r6,0(r3)
|
||||
addi r3,r3,4
|
||||
adde r0,r0,r6
|
||||
dstnr; stw r6,0(r4)
|
||||
addi r4,r4,4
|
||||
subi r5,r5,4
|
||||
|
||||
.Lcopy_tail_halfword: /* Up to 3 bytes to go */
|
||||
srdi. r6,r5,1
|
||||
beq .Lcopy_tail_byte
|
||||
|
||||
srcnr; lhz r6,0(r3)
|
||||
addi r3,r3,2
|
||||
adde r0,r0,r6
|
||||
dstnr; sth r6,0(r4)
|
||||
addi r4,r4,2
|
||||
subi r5,r5,2
|
||||
|
||||
.Lcopy_tail_byte: /* Up to 1 byte to go */
|
||||
andi. r6,r5,1
|
||||
beq .Lcopy_finish
|
||||
|
||||
srcnr; lbz r6,0(r3)
|
||||
sldi r9,r6,8 /* Pad the byte out to 16 bits */
|
||||
adde r0,r0,r9
|
||||
dstnr; stb r6,0(r4)
|
||||
|
||||
.Lcopy_finish:
|
||||
addze r0,r0 /* add in final carry */
|
||||
rldicl r4,r0,32,0 /* fold two 32 bit halves together */
|
||||
add r3,r4,r0
|
||||
srdi r3,r3,32
|
||||
blr
|
||||
|
||||
.Lsrc_error:
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
.Lsrc_error_nr:
|
||||
cmpdi 0,r7,0
|
||||
beqlr
|
||||
li r6,-EFAULT
|
||||
stw r6,0(r7)
|
||||
blr
|
||||
|
||||
.Ldest_error:
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
.Ldest_error_nr:
|
||||
cmpdi 0,r8,0
|
||||
beqlr
|
||||
li r6,-EFAULT
|
||||
stw r6,0(r8)
|
||||
blr
|
102
arch/powerpc/lib/checksum_wrappers_64.c
Normal file
102
arch/powerpc/lib/checksum_wrappers_64.c
Normal file
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2010
|
||||
*
|
||||
* Author: Anton Blanchard <anton@au.ibm.com>
|
||||
*/
|
||||
#include <linux/export.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/checksum.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
__wsum csum_and_copy_from_user(const void __user *src, void *dst,
|
||||
int len, __wsum sum, int *err_ptr)
|
||||
{
|
||||
unsigned int csum;
|
||||
|
||||
might_sleep();
|
||||
|
||||
*err_ptr = 0;
|
||||
|
||||
if (!len) {
|
||||
csum = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) {
|
||||
*err_ptr = -EFAULT;
|
||||
csum = (__force unsigned int)sum;
|
||||
goto out;
|
||||
}
|
||||
|
||||
csum = csum_partial_copy_generic((void __force *)src, dst,
|
||||
len, sum, err_ptr, NULL);
|
||||
|
||||
if (unlikely(*err_ptr)) {
|
||||
int missing = __copy_from_user(dst, src, len);
|
||||
|
||||
if (missing) {
|
||||
memset(dst + len - missing, 0, missing);
|
||||
*err_ptr = -EFAULT;
|
||||
} else {
|
||||
*err_ptr = 0;
|
||||
}
|
||||
|
||||
csum = csum_partial(dst, len, sum);
|
||||
}
|
||||
|
||||
out:
|
||||
return (__force __wsum)csum;
|
||||
}
|
||||
EXPORT_SYMBOL(csum_and_copy_from_user);
|
||||
|
||||
__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
|
||||
__wsum sum, int *err_ptr)
|
||||
{
|
||||
unsigned int csum;
|
||||
|
||||
might_sleep();
|
||||
|
||||
*err_ptr = 0;
|
||||
|
||||
if (!len) {
|
||||
csum = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) {
|
||||
*err_ptr = -EFAULT;
|
||||
csum = -1; /* invalid checksum */
|
||||
goto out;
|
||||
}
|
||||
|
||||
csum = csum_partial_copy_generic(src, (void __force *)dst,
|
||||
len, sum, NULL, err_ptr);
|
||||
|
||||
if (unlikely(*err_ptr)) {
|
||||
csum = csum_partial(src, len, sum);
|
||||
|
||||
if (copy_to_user(dst, src, len)) {
|
||||
*err_ptr = -EFAULT;
|
||||
csum = -1; /* invalid checksum */
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return (__force __wsum)csum;
|
||||
}
|
||||
EXPORT_SYMBOL(csum_and_copy_to_user);
|
470
arch/powerpc/lib/code-patching.c
Normal file
470
arch/powerpc/lib/code-patching.c
Normal file
|
@ -0,0 +1,470 @@
|
|||
/*
|
||||
* Copyright 2008 Michael Ellerman, IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/code-patching.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
|
||||
int patch_instruction(unsigned int *addr, unsigned int instr)
|
||||
{
|
||||
int err;
|
||||
|
||||
__put_user_size(instr, addr, 4, err);
|
||||
if (err)
|
||||
return err;
|
||||
asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int patch_branch(unsigned int *addr, unsigned long target, int flags)
|
||||
{
|
||||
return patch_instruction(addr, create_branch(addr, target, flags));
|
||||
}
|
||||
|
||||
unsigned int create_branch(const unsigned int *addr,
|
||||
unsigned long target, int flags)
|
||||
{
|
||||
unsigned int instruction;
|
||||
long offset;
|
||||
|
||||
offset = target;
|
||||
if (! (flags & BRANCH_ABSOLUTE))
|
||||
offset = offset - (unsigned long)addr;
|
||||
|
||||
/* Check we can represent the target in the instruction format */
|
||||
if (offset < -0x2000000 || offset > 0x1fffffc || offset & 0x3)
|
||||
return 0;
|
||||
|
||||
/* Mask out the flags and target, so they don't step on each other. */
|
||||
instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC);
|
||||
|
||||
return instruction;
|
||||
}
|
||||
|
||||
unsigned int create_cond_branch(const unsigned int *addr,
|
||||
unsigned long target, int flags)
|
||||
{
|
||||
unsigned int instruction;
|
||||
long offset;
|
||||
|
||||
offset = target;
|
||||
if (! (flags & BRANCH_ABSOLUTE))
|
||||
offset = offset - (unsigned long)addr;
|
||||
|
||||
/* Check we can represent the target in the instruction format */
|
||||
if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3)
|
||||
return 0;
|
||||
|
||||
/* Mask out the flags and target, so they don't step on each other. */
|
||||
instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC);
|
||||
|
||||
return instruction;
|
||||
}
|
||||
|
||||
static unsigned int branch_opcode(unsigned int instr)
|
||||
{
|
||||
return (instr >> 26) & 0x3F;
|
||||
}
|
||||
|
||||
static int instr_is_branch_iform(unsigned int instr)
|
||||
{
|
||||
return branch_opcode(instr) == 18;
|
||||
}
|
||||
|
||||
static int instr_is_branch_bform(unsigned int instr)
|
||||
{
|
||||
return branch_opcode(instr) == 16;
|
||||
}
|
||||
|
||||
int instr_is_relative_branch(unsigned int instr)
|
||||
{
|
||||
if (instr & BRANCH_ABSOLUTE)
|
||||
return 0;
|
||||
|
||||
return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
|
||||
}
|
||||
|
||||
static unsigned long branch_iform_target(const unsigned int *instr)
|
||||
{
|
||||
signed long imm;
|
||||
|
||||
imm = *instr & 0x3FFFFFC;
|
||||
|
||||
/* If the top bit of the immediate value is set this is negative */
|
||||
if (imm & 0x2000000)
|
||||
imm -= 0x4000000;
|
||||
|
||||
if ((*instr & BRANCH_ABSOLUTE) == 0)
|
||||
imm += (unsigned long)instr;
|
||||
|
||||
return (unsigned long)imm;
|
||||
}
|
||||
|
||||
static unsigned long branch_bform_target(const unsigned int *instr)
|
||||
{
|
||||
signed long imm;
|
||||
|
||||
imm = *instr & 0xFFFC;
|
||||
|
||||
/* If the top bit of the immediate value is set this is negative */
|
||||
if (imm & 0x8000)
|
||||
imm -= 0x10000;
|
||||
|
||||
if ((*instr & BRANCH_ABSOLUTE) == 0)
|
||||
imm += (unsigned long)instr;
|
||||
|
||||
return (unsigned long)imm;
|
||||
}
|
||||
|
||||
unsigned long branch_target(const unsigned int *instr)
|
||||
{
|
||||
if (instr_is_branch_iform(*instr))
|
||||
return branch_iform_target(instr);
|
||||
else if (instr_is_branch_bform(*instr))
|
||||
return branch_bform_target(instr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr)
|
||||
{
|
||||
if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr))
|
||||
return branch_target(instr) == addr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int translate_branch(const unsigned int *dest, const unsigned int *src)
|
||||
{
|
||||
unsigned long target;
|
||||
|
||||
target = branch_target(src);
|
||||
|
||||
if (instr_is_branch_iform(*src))
|
||||
return create_branch(dest, target, *src);
|
||||
else if (instr_is_branch_bform(*src))
|
||||
return create_cond_branch(dest, target, *src);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3E_64
|
||||
void __patch_exception(int exc, unsigned long addr)
|
||||
{
|
||||
extern unsigned int interrupt_base_book3e;
|
||||
unsigned int *ibase = &interrupt_base_book3e;
|
||||
|
||||
/* Our exceptions vectors start with a NOP and -then- a branch
|
||||
* to deal with single stepping from userspace which stops on
|
||||
* the second instruction. Thus we need to patch the second
|
||||
* instruction of the exception, not the first one
|
||||
*/
|
||||
|
||||
patch_branch(ibase + (exc / 4) + 1, addr, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CODE_PATCHING_SELFTEST
|
||||
|
||||
static void __init test_trampoline(void)
|
||||
{
|
||||
asm ("nop;\n");
|
||||
}
|
||||
|
||||
#define check(x) \
|
||||
if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__);
|
||||
|
||||
static void __init test_branch_iform(void)
|
||||
{
|
||||
unsigned int instr;
|
||||
unsigned long addr;
|
||||
|
||||
addr = (unsigned long)&instr;
|
||||
|
||||
/* The simplest case, branch to self, no flags */
|
||||
check(instr_is_branch_iform(0x48000000));
|
||||
/* All bits of target set, and flags */
|
||||
check(instr_is_branch_iform(0x4bffffff));
|
||||
/* High bit of opcode set, which is wrong */
|
||||
check(!instr_is_branch_iform(0xcbffffff));
|
||||
/* Middle bits of opcode set, which is wrong */
|
||||
check(!instr_is_branch_iform(0x7bffffff));
|
||||
|
||||
/* Simplest case, branch to self with link */
|
||||
check(instr_is_branch_iform(0x48000001));
|
||||
/* All bits of targets set */
|
||||
check(instr_is_branch_iform(0x4bfffffd));
|
||||
/* Some bits of targets set */
|
||||
check(instr_is_branch_iform(0x4bff00fd));
|
||||
/* Must be a valid branch to start with */
|
||||
check(!instr_is_branch_iform(0x7bfffffd));
|
||||
|
||||
/* Absolute branch to 0x100 */
|
||||
instr = 0x48000103;
|
||||
check(instr_is_branch_to_addr(&instr, 0x100));
|
||||
/* Absolute branch to 0x420fc */
|
||||
instr = 0x480420ff;
|
||||
check(instr_is_branch_to_addr(&instr, 0x420fc));
|
||||
/* Maximum positive relative branch, + 20MB - 4B */
|
||||
instr = 0x49fffffc;
|
||||
check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC));
|
||||
/* Smallest negative relative branch, - 4B */
|
||||
instr = 0x4bfffffc;
|
||||
check(instr_is_branch_to_addr(&instr, addr - 4));
|
||||
/* Largest negative relative branch, - 32 MB */
|
||||
instr = 0x4a000000;
|
||||
check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
|
||||
|
||||
/* Branch to self, with link */
|
||||
instr = create_branch(&instr, addr, BRANCH_SET_LINK);
|
||||
check(instr_is_branch_to_addr(&instr, addr));
|
||||
|
||||
/* Branch to self - 0x100, with link */
|
||||
instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK);
|
||||
check(instr_is_branch_to_addr(&instr, addr - 0x100));
|
||||
|
||||
/* Branch to self + 0x100, no link */
|
||||
instr = create_branch(&instr, addr + 0x100, 0);
|
||||
check(instr_is_branch_to_addr(&instr, addr + 0x100));
|
||||
|
||||
/* Maximum relative negative offset, - 32 MB */
|
||||
instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK);
|
||||
check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
|
||||
|
||||
/* Out of range relative negative offset, - 32 MB + 4*/
|
||||
instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK);
|
||||
check(instr == 0);
|
||||
|
||||
/* Out of range relative positive offset, + 32 MB */
|
||||
instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK);
|
||||
check(instr == 0);
|
||||
|
||||
/* Unaligned target */
|
||||
instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK);
|
||||
check(instr == 0);
|
||||
|
||||
/* Check flags are masked correctly */
|
||||
instr = create_branch(&instr, addr, 0xFFFFFFFC);
|
||||
check(instr_is_branch_to_addr(&instr, addr));
|
||||
check(instr == 0x48000000);
|
||||
}
|
||||
|
||||
static void __init test_create_function_call(void)
|
||||
{
|
||||
unsigned int *iptr;
|
||||
unsigned long dest;
|
||||
|
||||
/* Check we can create a function call */
|
||||
iptr = (unsigned int *)ppc_function_entry(test_trampoline);
|
||||
dest = ppc_function_entry(test_create_function_call);
|
||||
patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK));
|
||||
check(instr_is_branch_to_addr(iptr, dest));
|
||||
}
|
||||
|
||||
static void __init test_branch_bform(void)
|
||||
{
|
||||
unsigned long addr;
|
||||
unsigned int *iptr, instr, flags;
|
||||
|
||||
iptr = &instr;
|
||||
addr = (unsigned long)iptr;
|
||||
|
||||
/* The simplest case, branch to self, no flags */
|
||||
check(instr_is_branch_bform(0x40000000));
|
||||
/* All bits of target set, and flags */
|
||||
check(instr_is_branch_bform(0x43ffffff));
|
||||
/* High bit of opcode set, which is wrong */
|
||||
check(!instr_is_branch_bform(0xc3ffffff));
|
||||
/* Middle bits of opcode set, which is wrong */
|
||||
check(!instr_is_branch_bform(0x7bffffff));
|
||||
|
||||
/* Absolute conditional branch to 0x100 */
|
||||
instr = 0x43ff0103;
|
||||
check(instr_is_branch_to_addr(&instr, 0x100));
|
||||
/* Absolute conditional branch to 0x20fc */
|
||||
instr = 0x43ff20ff;
|
||||
check(instr_is_branch_to_addr(&instr, 0x20fc));
|
||||
/* Maximum positive relative conditional branch, + 32 KB - 4B */
|
||||
instr = 0x43ff7ffc;
|
||||
check(instr_is_branch_to_addr(&instr, addr + 0x7FFC));
|
||||
/* Smallest negative relative conditional branch, - 4B */
|
||||
instr = 0x43fffffc;
|
||||
check(instr_is_branch_to_addr(&instr, addr - 4));
|
||||
/* Largest negative relative conditional branch, - 32 KB */
|
||||
instr = 0x43ff8000;
|
||||
check(instr_is_branch_to_addr(&instr, addr - 0x8000));
|
||||
|
||||
/* All condition code bits set & link */
|
||||
flags = 0x3ff000 | BRANCH_SET_LINK;
|
||||
|
||||
/* Branch to self */
|
||||
instr = create_cond_branch(iptr, addr, flags);
|
||||
check(instr_is_branch_to_addr(&instr, addr));
|
||||
|
||||
/* Branch to self - 0x100 */
|
||||
instr = create_cond_branch(iptr, addr - 0x100, flags);
|
||||
check(instr_is_branch_to_addr(&instr, addr - 0x100));
|
||||
|
||||
/* Branch to self + 0x100 */
|
||||
instr = create_cond_branch(iptr, addr + 0x100, flags);
|
||||
check(instr_is_branch_to_addr(&instr, addr + 0x100));
|
||||
|
||||
/* Maximum relative negative offset, - 32 KB */
|
||||
instr = create_cond_branch(iptr, addr - 0x8000, flags);
|
||||
check(instr_is_branch_to_addr(&instr, addr - 0x8000));
|
||||
|
||||
/* Out of range relative negative offset, - 32 KB + 4*/
|
||||
instr = create_cond_branch(iptr, addr - 0x8004, flags);
|
||||
check(instr == 0);
|
||||
|
||||
/* Out of range relative positive offset, + 32 KB */
|
||||
instr = create_cond_branch(iptr, addr + 0x8000, flags);
|
||||
check(instr == 0);
|
||||
|
||||
/* Unaligned target */
|
||||
instr = create_cond_branch(iptr, addr + 3, flags);
|
||||
check(instr == 0);
|
||||
|
||||
/* Check flags are masked correctly */
|
||||
instr = create_cond_branch(iptr, addr, 0xFFFFFFFC);
|
||||
check(instr_is_branch_to_addr(&instr, addr));
|
||||
check(instr == 0x43FF0000);
|
||||
}
|
||||
|
||||
static void __init test_translate_branch(void)
|
||||
{
|
||||
unsigned long addr;
|
||||
unsigned int *p, *q;
|
||||
void *buf;
|
||||
|
||||
buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
|
||||
check(buf);
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
/* Simple case, branch to self moved a little */
|
||||
p = buf;
|
||||
addr = (unsigned long)p;
|
||||
patch_branch(p, addr, 0);
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
q = p + 1;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
|
||||
/* Maximum negative case, move b . to addr + 32 MB */
|
||||
p = buf;
|
||||
addr = (unsigned long)p;
|
||||
patch_branch(p, addr, 0);
|
||||
q = buf + 0x2000000;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
check(*q == 0x4a000000);
|
||||
|
||||
/* Maximum positive case, move x to x - 32 MB + 4 */
|
||||
p = buf + 0x2000000;
|
||||
addr = (unsigned long)p;
|
||||
patch_branch(p, addr, 0);
|
||||
q = buf + 4;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
check(*q == 0x49fffffc);
|
||||
|
||||
/* Jump to x + 16 MB moved to x + 20 MB */
|
||||
p = buf;
|
||||
addr = 0x1000000 + (unsigned long)buf;
|
||||
patch_branch(p, addr, BRANCH_SET_LINK);
|
||||
q = buf + 0x1400000;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
|
||||
/* Jump to x + 16 MB moved to x - 16 MB + 4 */
|
||||
p = buf + 0x1000000;
|
||||
addr = 0x2000000 + (unsigned long)buf;
|
||||
patch_branch(p, addr, 0);
|
||||
q = buf + 4;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
|
||||
|
||||
/* Conditional branch tests */
|
||||
|
||||
/* Simple case, branch to self moved a little */
|
||||
p = buf;
|
||||
addr = (unsigned long)p;
|
||||
patch_instruction(p, create_cond_branch(p, addr, 0));
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
q = p + 1;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
|
||||
/* Maximum negative case, move b . to addr + 32 KB */
|
||||
p = buf;
|
||||
addr = (unsigned long)p;
|
||||
patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
|
||||
q = buf + 0x8000;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
check(*q == 0x43ff8000);
|
||||
|
||||
/* Maximum positive case, move x to x - 32 KB + 4 */
|
||||
p = buf + 0x8000;
|
||||
addr = (unsigned long)p;
|
||||
patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
|
||||
q = buf + 4;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
check(*q == 0x43ff7ffc);
|
||||
|
||||
/* Jump to x + 12 KB moved to x + 20 KB */
|
||||
p = buf;
|
||||
addr = 0x3000 + (unsigned long)buf;
|
||||
patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK));
|
||||
q = buf + 0x5000;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
|
||||
/* Jump to x + 8 KB moved to x - 8 KB + 4 */
|
||||
p = buf + 0x2000;
|
||||
addr = 0x4000 + (unsigned long)buf;
|
||||
patch_instruction(p, create_cond_branch(p, addr, 0));
|
||||
q = buf + 4;
|
||||
patch_instruction(q, translate_branch(q, p));
|
||||
check(instr_is_branch_to_addr(p, addr));
|
||||
check(instr_is_branch_to_addr(q, addr));
|
||||
|
||||
/* Free the buffer we were using */
|
||||
vfree(buf);
|
||||
}
|
||||
|
||||
static int __init test_code_patching(void)
|
||||
{
|
||||
printk(KERN_DEBUG "Running code patching self-tests ...\n");
|
||||
|
||||
test_branch_iform();
|
||||
test_branch_bform();
|
||||
test_create_function_call();
|
||||
test_translate_branch();
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(test_code_patching);
|
||||
|
||||
#endif /* CONFIG_CODE_PATCHING_SELFTEST */
|
518
arch/powerpc/lib/copy_32.S
Normal file
518
arch/powerpc/lib/copy_32.S
Normal file
|
@ -0,0 +1,518 @@
|
|||
/*
|
||||
* Memory copy functions for 32-bit PowerPC.
|
||||
*
|
||||
* Copyright (C) 1996-2005 Paul Mackerras.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
#define COPY_16_BYTES \
|
||||
lwz r7,4(r4); \
|
||||
lwz r8,8(r4); \
|
||||
lwz r9,12(r4); \
|
||||
lwzu r10,16(r4); \
|
||||
stw r7,4(r6); \
|
||||
stw r8,8(r6); \
|
||||
stw r9,12(r6); \
|
||||
stwu r10,16(r6)
|
||||
|
||||
#define COPY_16_BYTES_WITHEX(n) \
|
||||
8 ## n ## 0: \
|
||||
lwz r7,4(r4); \
|
||||
8 ## n ## 1: \
|
||||
lwz r8,8(r4); \
|
||||
8 ## n ## 2: \
|
||||
lwz r9,12(r4); \
|
||||
8 ## n ## 3: \
|
||||
lwzu r10,16(r4); \
|
||||
8 ## n ## 4: \
|
||||
stw r7,4(r6); \
|
||||
8 ## n ## 5: \
|
||||
stw r8,8(r6); \
|
||||
8 ## n ## 6: \
|
||||
stw r9,12(r6); \
|
||||
8 ## n ## 7: \
|
||||
stwu r10,16(r6)
|
||||
|
||||
#define COPY_16_BYTES_EXCODE(n) \
|
||||
9 ## n ## 0: \
|
||||
addi r5,r5,-(16 * n); \
|
||||
b 104f; \
|
||||
9 ## n ## 1: \
|
||||
addi r5,r5,-(16 * n); \
|
||||
b 105f; \
|
||||
.section __ex_table,"a"; \
|
||||
.align 2; \
|
||||
.long 8 ## n ## 0b,9 ## n ## 0b; \
|
||||
.long 8 ## n ## 1b,9 ## n ## 0b; \
|
||||
.long 8 ## n ## 2b,9 ## n ## 0b; \
|
||||
.long 8 ## n ## 3b,9 ## n ## 0b; \
|
||||
.long 8 ## n ## 4b,9 ## n ## 1b; \
|
||||
.long 8 ## n ## 5b,9 ## n ## 1b; \
|
||||
.long 8 ## n ## 6b,9 ## n ## 1b; \
|
||||
.long 8 ## n ## 7b,9 ## n ## 1b; \
|
||||
.text
|
||||
|
||||
.text
|
||||
.stabs "arch/powerpc/lib/",N_SO,0,0,0f
|
||||
.stabs "copy_32.S",N_SO,0,0,0f
|
||||
0:
|
||||
|
||||
CACHELINE_BYTES = L1_CACHE_BYTES
|
||||
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
|
||||
CACHELINE_MASK = (L1_CACHE_BYTES-1)
|
||||
|
||||
/*
|
||||
* Use dcbz on the complete cache lines in the destination
|
||||
* to set them to zero. This requires that the destination
|
||||
* area is cacheable. -- paulus
|
||||
*/
|
||||
_GLOBAL(cacheable_memzero)
|
||||
mr r5,r4
|
||||
li r4,0
|
||||
addi r6,r3,-4
|
||||
cmplwi 0,r5,4
|
||||
blt 7f
|
||||
stwu r4,4(r6)
|
||||
beqlr
|
||||
andi. r0,r6,3
|
||||
add r5,r0,r5
|
||||
subf r6,r0,r6
|
||||
clrlwi r7,r6,32-LG_CACHELINE_BYTES
|
||||
add r8,r7,r5
|
||||
srwi r9,r8,LG_CACHELINE_BYTES
|
||||
addic. r9,r9,-1 /* total number of complete cachelines */
|
||||
ble 2f
|
||||
xori r0,r7,CACHELINE_MASK & ~3
|
||||
srwi. r0,r0,2
|
||||
beq 3f
|
||||
mtctr r0
|
||||
4: stwu r4,4(r6)
|
||||
bdnz 4b
|
||||
3: mtctr r9
|
||||
li r7,4
|
||||
10: dcbz r7,r6
|
||||
addi r6,r6,CACHELINE_BYTES
|
||||
bdnz 10b
|
||||
clrlwi r5,r8,32-LG_CACHELINE_BYTES
|
||||
addi r5,r5,4
|
||||
2: srwi r0,r5,2
|
||||
mtctr r0
|
||||
bdz 6f
|
||||
1: stwu r4,4(r6)
|
||||
bdnz 1b
|
||||
6: andi. r5,r5,3
|
||||
7: cmpwi 0,r5,0
|
||||
beqlr
|
||||
mtctr r5
|
||||
addi r6,r6,3
|
||||
8: stbu r4,1(r6)
|
||||
bdnz 8b
|
||||
blr
|
||||
|
||||
_GLOBAL(memset)
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
addi r6,r3,-4
|
||||
cmplwi 0,r5,4
|
||||
blt 7f
|
||||
stwu r4,4(r6)
|
||||
beqlr
|
||||
andi. r0,r6,3
|
||||
add r5,r0,r5
|
||||
subf r6,r0,r6
|
||||
srwi r0,r5,2
|
||||
mtctr r0
|
||||
bdz 6f
|
||||
1: stwu r4,4(r6)
|
||||
bdnz 1b
|
||||
6: andi. r5,r5,3
|
||||
7: cmpwi 0,r5,0
|
||||
beqlr
|
||||
mtctr r5
|
||||
addi r6,r6,3
|
||||
8: stbu r4,1(r6)
|
||||
bdnz 8b
|
||||
blr
|
||||
|
||||
/*
|
||||
* This version uses dcbz on the complete cache lines in the
|
||||
* destination area to reduce memory traffic. This requires that
|
||||
* the destination area is cacheable.
|
||||
* We only use this version if the source and dest don't overlap.
|
||||
* -- paulus.
|
||||
*/
|
||||
_GLOBAL(cacheable_memcpy)
|
||||
add r7,r3,r5 /* test if the src & dst overlap */
|
||||
add r8,r4,r5
|
||||
cmplw 0,r4,r7
|
||||
cmplw 1,r3,r8
|
||||
crand 0,0,4 /* cr0.lt &= cr1.lt */
|
||||
blt memcpy /* if regions overlap */
|
||||
|
||||
addi r4,r4,-4
|
||||
addi r6,r3,-4
|
||||
neg r0,r3
|
||||
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
|
||||
beq 58f
|
||||
|
||||
cmplw 0,r5,r0 /* is this more than total to do? */
|
||||
blt 63f /* if not much to do */
|
||||
andi. r8,r0,3 /* get it word-aligned first */
|
||||
subf r5,r0,r5
|
||||
mtctr r8
|
||||
beq+ 61f
|
||||
70: lbz r9,4(r4) /* do some bytes */
|
||||
stb r9,4(r6)
|
||||
addi r4,r4,1
|
||||
addi r6,r6,1
|
||||
bdnz 70b
|
||||
61: srwi. r0,r0,2
|
||||
mtctr r0
|
||||
beq 58f
|
||||
72: lwzu r9,4(r4) /* do some words */
|
||||
stwu r9,4(r6)
|
||||
bdnz 72b
|
||||
|
||||
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
|
||||
clrlwi r5,r5,32-LG_CACHELINE_BYTES
|
||||
li r11,4
|
||||
mtctr r0
|
||||
beq 63f
|
||||
53:
|
||||
dcbz r11,r6
|
||||
COPY_16_BYTES
|
||||
#if L1_CACHE_BYTES >= 32
|
||||
COPY_16_BYTES
|
||||
#if L1_CACHE_BYTES >= 64
|
||||
COPY_16_BYTES
|
||||
COPY_16_BYTES
|
||||
#if L1_CACHE_BYTES >= 128
|
||||
COPY_16_BYTES
|
||||
COPY_16_BYTES
|
||||
COPY_16_BYTES
|
||||
COPY_16_BYTES
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
bdnz 53b
|
||||
|
||||
63: srwi. r0,r5,2
|
||||
mtctr r0
|
||||
beq 64f
|
||||
30: lwzu r0,4(r4)
|
||||
stwu r0,4(r6)
|
||||
bdnz 30b
|
||||
|
||||
64: andi. r0,r5,3
|
||||
mtctr r0
|
||||
beq+ 65f
|
||||
40: lbz r0,4(r4)
|
||||
stb r0,4(r6)
|
||||
addi r4,r4,1
|
||||
addi r6,r6,1
|
||||
bdnz 40b
|
||||
65: blr
|
||||
|
||||
_GLOBAL(memmove)
|
||||
cmplw 0,r3,r4
|
||||
bgt backwards_memcpy
|
||||
/* fall through */
|
||||
|
||||
_GLOBAL(memcpy)
|
||||
srwi. r7,r5,3
|
||||
addi r6,r3,-4
|
||||
addi r4,r4,-4
|
||||
beq 2f /* if less than 8 bytes to do */
|
||||
andi. r0,r6,3 /* get dest word aligned */
|
||||
mtctr r7
|
||||
bne 5f
|
||||
1: lwz r7,4(r4)
|
||||
lwzu r8,8(r4)
|
||||
stw r7,4(r6)
|
||||
stwu r8,8(r6)
|
||||
bdnz 1b
|
||||
andi. r5,r5,7
|
||||
2: cmplwi 0,r5,4
|
||||
blt 3f
|
||||
lwzu r0,4(r4)
|
||||
addi r5,r5,-4
|
||||
stwu r0,4(r6)
|
||||
3: cmpwi 0,r5,0
|
||||
beqlr
|
||||
mtctr r5
|
||||
addi r4,r4,3
|
||||
addi r6,r6,3
|
||||
4: lbzu r0,1(r4)
|
||||
stbu r0,1(r6)
|
||||
bdnz 4b
|
||||
blr
|
||||
5: subfic r0,r0,4
|
||||
mtctr r0
|
||||
6: lbz r7,4(r4)
|
||||
addi r4,r4,1
|
||||
stb r7,4(r6)
|
||||
addi r6,r6,1
|
||||
bdnz 6b
|
||||
subf r5,r0,r5
|
||||
rlwinm. r7,r5,32-3,3,31
|
||||
beq 2b
|
||||
mtctr r7
|
||||
b 1b
|
||||
|
||||
_GLOBAL(backwards_memcpy)
|
||||
rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
|
||||
add r6,r3,r5
|
||||
add r4,r4,r5
|
||||
beq 2f
|
||||
andi. r0,r6,3
|
||||
mtctr r7
|
||||
bne 5f
|
||||
1: lwz r7,-4(r4)
|
||||
lwzu r8,-8(r4)
|
||||
stw r7,-4(r6)
|
||||
stwu r8,-8(r6)
|
||||
bdnz 1b
|
||||
andi. r5,r5,7
|
||||
2: cmplwi 0,r5,4
|
||||
blt 3f
|
||||
lwzu r0,-4(r4)
|
||||
subi r5,r5,4
|
||||
stwu r0,-4(r6)
|
||||
3: cmpwi 0,r5,0
|
||||
beqlr
|
||||
mtctr r5
|
||||
4: lbzu r0,-1(r4)
|
||||
stbu r0,-1(r6)
|
||||
bdnz 4b
|
||||
blr
|
||||
5: mtctr r0
|
||||
6: lbzu r7,-1(r4)
|
||||
stbu r7,-1(r6)
|
||||
bdnz 6b
|
||||
subf r5,r0,r5
|
||||
rlwinm. r7,r5,32-3,3,31
|
||||
beq 2b
|
||||
mtctr r7
|
||||
b 1b
|
||||
|
||||
_GLOBAL(__copy_tofrom_user)
|
||||
addi r4,r4,-4
|
||||
addi r6,r3,-4
|
||||
neg r0,r3
|
||||
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
|
||||
beq 58f
|
||||
|
||||
cmplw 0,r5,r0 /* is this more than total to do? */
|
||||
blt 63f /* if not much to do */
|
||||
andi. r8,r0,3 /* get it word-aligned first */
|
||||
mtctr r8
|
||||
beq+ 61f
|
||||
70: lbz r9,4(r4) /* do some bytes */
|
||||
71: stb r9,4(r6)
|
||||
addi r4,r4,1
|
||||
addi r6,r6,1
|
||||
bdnz 70b
|
||||
61: subf r5,r0,r5
|
||||
srwi. r0,r0,2
|
||||
mtctr r0
|
||||
beq 58f
|
||||
72: lwzu r9,4(r4) /* do some words */
|
||||
73: stwu r9,4(r6)
|
||||
bdnz 72b
|
||||
|
||||
.section __ex_table,"a"
|
||||
.align 2
|
||||
.long 70b,100f
|
||||
.long 71b,101f
|
||||
.long 72b,102f
|
||||
.long 73b,103f
|
||||
.text
|
||||
|
||||
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
|
||||
clrlwi r5,r5,32-LG_CACHELINE_BYTES
|
||||
li r11,4
|
||||
beq 63f
|
||||
|
||||
/* Here we decide how far ahead to prefetch the source */
|
||||
li r3,4
|
||||
cmpwi r0,1
|
||||
li r7,0
|
||||
ble 114f
|
||||
li r7,1
|
||||
#if MAX_COPY_PREFETCH > 1
|
||||
/* Heuristically, for large transfers we prefetch
|
||||
MAX_COPY_PREFETCH cachelines ahead. For small transfers
|
||||
we prefetch 1 cacheline ahead. */
|
||||
cmpwi r0,MAX_COPY_PREFETCH
|
||||
ble 112f
|
||||
li r7,MAX_COPY_PREFETCH
|
||||
112: mtctr r7
|
||||
111: dcbt r3,r4
|
||||
addi r3,r3,CACHELINE_BYTES
|
||||
bdnz 111b
|
||||
#else
|
||||
dcbt r3,r4
|
||||
addi r3,r3,CACHELINE_BYTES
|
||||
#endif /* MAX_COPY_PREFETCH > 1 */
|
||||
|
||||
114: subf r8,r7,r0
|
||||
mr r0,r7
|
||||
mtctr r8
|
||||
|
||||
53: dcbt r3,r4
|
||||
54: dcbz r11,r6
|
||||
.section __ex_table,"a"
|
||||
.align 2
|
||||
.long 54b,105f
|
||||
.text
|
||||
/* the main body of the cacheline loop */
|
||||
COPY_16_BYTES_WITHEX(0)
|
||||
#if L1_CACHE_BYTES >= 32
|
||||
COPY_16_BYTES_WITHEX(1)
|
||||
#if L1_CACHE_BYTES >= 64
|
||||
COPY_16_BYTES_WITHEX(2)
|
||||
COPY_16_BYTES_WITHEX(3)
|
||||
#if L1_CACHE_BYTES >= 128
|
||||
COPY_16_BYTES_WITHEX(4)
|
||||
COPY_16_BYTES_WITHEX(5)
|
||||
COPY_16_BYTES_WITHEX(6)
|
||||
COPY_16_BYTES_WITHEX(7)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
bdnz 53b
|
||||
cmpwi r0,0
|
||||
li r3,4
|
||||
li r7,0
|
||||
bne 114b
|
||||
|
||||
63: srwi. r0,r5,2
|
||||
mtctr r0
|
||||
beq 64f
|
||||
30: lwzu r0,4(r4)
|
||||
31: stwu r0,4(r6)
|
||||
bdnz 30b
|
||||
|
||||
64: andi. r0,r5,3
|
||||
mtctr r0
|
||||
beq+ 65f
|
||||
40: lbz r0,4(r4)
|
||||
41: stb r0,4(r6)
|
||||
addi r4,r4,1
|
||||
addi r6,r6,1
|
||||
bdnz 40b
|
||||
65: li r3,0
|
||||
blr
|
||||
|
||||
/* read fault, initial single-byte copy */
|
||||
100: li r9,0
|
||||
b 90f
|
||||
/* write fault, initial single-byte copy */
|
||||
101: li r9,1
|
||||
90: subf r5,r8,r5
|
||||
li r3,0
|
||||
b 99f
|
||||
/* read fault, initial word copy */
|
||||
102: li r9,0
|
||||
b 91f
|
||||
/* write fault, initial word copy */
|
||||
103: li r9,1
|
||||
91: li r3,2
|
||||
b 99f
|
||||
|
||||
/*
|
||||
* this stuff handles faults in the cacheline loop and branches to either
|
||||
* 104f (if in read part) or 105f (if in write part), after updating r5
|
||||
*/
|
||||
COPY_16_BYTES_EXCODE(0)
|
||||
#if L1_CACHE_BYTES >= 32
|
||||
COPY_16_BYTES_EXCODE(1)
|
||||
#if L1_CACHE_BYTES >= 64
|
||||
COPY_16_BYTES_EXCODE(2)
|
||||
COPY_16_BYTES_EXCODE(3)
|
||||
#if L1_CACHE_BYTES >= 128
|
||||
COPY_16_BYTES_EXCODE(4)
|
||||
COPY_16_BYTES_EXCODE(5)
|
||||
COPY_16_BYTES_EXCODE(6)
|
||||
COPY_16_BYTES_EXCODE(7)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* read fault in cacheline loop */
|
||||
104: li r9,0
|
||||
b 92f
|
||||
/* fault on dcbz (effectively a write fault) */
|
||||
/* or write fault in cacheline loop */
|
||||
105: li r9,1
|
||||
92: li r3,LG_CACHELINE_BYTES
|
||||
mfctr r8
|
||||
add r0,r0,r8
|
||||
b 106f
|
||||
/* read fault in final word loop */
|
||||
108: li r9,0
|
||||
b 93f
|
||||
/* write fault in final word loop */
|
||||
109: li r9,1
|
||||
93: andi. r5,r5,3
|
||||
li r3,2
|
||||
b 99f
|
||||
/* read fault in final byte loop */
|
||||
110: li r9,0
|
||||
b 94f
|
||||
/* write fault in final byte loop */
|
||||
111: li r9,1
|
||||
94: li r5,0
|
||||
li r3,0
|
||||
/*
|
||||
* At this stage the number of bytes not copied is
|
||||
* r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
|
||||
*/
|
||||
99: mfctr r0
|
||||
106: slw r3,r0,r3
|
||||
add. r3,r3,r5
|
||||
beq 120f /* shouldn't happen */
|
||||
cmpwi 0,r9,0
|
||||
bne 120f
|
||||
/* for a read fault, first try to continue the copy one byte at a time */
|
||||
mtctr r3
|
||||
130: lbz r0,4(r4)
|
||||
131: stb r0,4(r6)
|
||||
addi r4,r4,1
|
||||
addi r6,r6,1
|
||||
bdnz 130b
|
||||
/* then clear out the destination: r3 bytes starting at 4(r6) */
|
||||
132: mfctr r3
|
||||
srwi. r0,r3,2
|
||||
li r9,0
|
||||
mtctr r0
|
||||
beq 113f
|
||||
112: stwu r9,4(r6)
|
||||
bdnz 112b
|
||||
113: andi. r0,r3,3
|
||||
mtctr r0
|
||||
beq 120f
|
||||
114: stb r9,4(r6)
|
||||
addi r6,r6,1
|
||||
bdnz 114b
|
||||
120: blr
|
||||
|
||||
.section __ex_table,"a"
|
||||
.align 2
|
||||
.long 30b,108b
|
||||
.long 31b,109b
|
||||
.long 40b,110b
|
||||
.long 41b,111b
|
||||
.long 130b,132b
|
||||
.long 131b,120b
|
||||
.long 112b,120b
|
||||
.long 114b,120b
|
||||
.text
|
112
arch/powerpc/lib/copypage_64.S
Normal file
112
arch/powerpc/lib/copypage_64.S
Normal file
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Mark Nelson, IBM Corp.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <asm/page.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
.section ".toc","aw"
|
||||
PPC64_CACHES:
|
||||
.tc ppc64_caches[TC],ppc64_caches
|
||||
.section ".text"
|
||||
|
||||
_GLOBAL_TOC(copy_page)
|
||||
BEGIN_FTR_SECTION
|
||||
lis r5,PAGE_SIZE@h
|
||||
FTR_SECTION_ELSE
|
||||
b copypage_power7
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
|
||||
ori r5,r5,PAGE_SIZE@l
|
||||
BEGIN_FTR_SECTION
|
||||
ld r10,PPC64_CACHES@toc(r2)
|
||||
lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */
|
||||
lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */
|
||||
li r9,0
|
||||
srd r8,r5,r11
|
||||
|
||||
mtctr r8
|
||||
.Lsetup:
|
||||
dcbt r9,r4
|
||||
dcbz r9,r3
|
||||
add r9,r9,r12
|
||||
bdnz .Lsetup
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
|
||||
addi r3,r3,-8
|
||||
srdi r8,r5,7 /* page is copied in 128 byte strides */
|
||||
addi r8,r8,-1 /* one stride copied outside loop */
|
||||
|
||||
mtctr r8
|
||||
|
||||
ld r5,0(r4)
|
||||
ld r6,8(r4)
|
||||
ld r7,16(r4)
|
||||
ldu r8,24(r4)
|
||||
1: std r5,8(r3)
|
||||
std r6,16(r3)
|
||||
ld r9,8(r4)
|
||||
ld r10,16(r4)
|
||||
std r7,24(r3)
|
||||
std r8,32(r3)
|
||||
ld r11,24(r4)
|
||||
ld r12,32(r4)
|
||||
std r9,40(r3)
|
||||
std r10,48(r3)
|
||||
ld r5,40(r4)
|
||||
ld r6,48(r4)
|
||||
std r11,56(r3)
|
||||
std r12,64(r3)
|
||||
ld r7,56(r4)
|
||||
ld r8,64(r4)
|
||||
std r5,72(r3)
|
||||
std r6,80(r3)
|
||||
ld r9,72(r4)
|
||||
ld r10,80(r4)
|
||||
std r7,88(r3)
|
||||
std r8,96(r3)
|
||||
ld r11,88(r4)
|
||||
ld r12,96(r4)
|
||||
std r9,104(r3)
|
||||
std r10,112(r3)
|
||||
ld r5,104(r4)
|
||||
ld r6,112(r4)
|
||||
std r11,120(r3)
|
||||
stdu r12,128(r3)
|
||||
ld r7,120(r4)
|
||||
ldu r8,128(r4)
|
||||
bdnz 1b
|
||||
|
||||
std r5,8(r3)
|
||||
std r6,16(r3)
|
||||
ld r9,8(r4)
|
||||
ld r10,16(r4)
|
||||
std r7,24(r3)
|
||||
std r8,32(r3)
|
||||
ld r11,24(r4)
|
||||
ld r12,32(r4)
|
||||
std r9,40(r3)
|
||||
std r10,48(r3)
|
||||
ld r5,40(r4)
|
||||
ld r6,48(r4)
|
||||
std r11,56(r3)
|
||||
std r12,64(r3)
|
||||
ld r7,56(r4)
|
||||
ld r8,64(r4)
|
||||
std r5,72(r3)
|
||||
std r6,80(r3)
|
||||
ld r9,72(r4)
|
||||
ld r10,80(r4)
|
||||
std r7,88(r3)
|
||||
std r8,96(r3)
|
||||
ld r11,88(r4)
|
||||
ld r12,96(r4)
|
||||
std r9,104(r3)
|
||||
std r10,112(r3)
|
||||
std r11,120(r3)
|
||||
std r12,128(r3)
|
||||
blr
|
168
arch/powerpc/lib/copypage_power7.S
Normal file
168
arch/powerpc/lib/copypage_power7.S
Normal file
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2012
|
||||
*
|
||||
* Author: Anton Blanchard <anton@au.ibm.com>
|
||||
*/
|
||||
#include <asm/page.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
_GLOBAL(copypage_power7)
|
||||
/*
|
||||
* We prefetch both the source and destination using enhanced touch
|
||||
* instructions. We use a stream ID of 0 for the load side and
|
||||
* 1 for the store side. Since source and destination are page
|
||||
* aligned we don't need to clear the bottom 7 bits of either
|
||||
* address.
|
||||
*/
|
||||
ori r9,r3,1 /* stream=1 => to */
|
||||
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
lis r7,0x0E01 /* depth=7
|
||||
* units/cachelines=512 */
|
||||
#else
|
||||
lis r7,0x0E00 /* depth=7 */
|
||||
ori r7,r7,0x1000 /* units/cachelines=32 */
|
||||
#endif
|
||||
ori r10,r7,1 /* stream=1 */
|
||||
|
||||
lis r8,0x8000 /* GO=1 */
|
||||
clrldi r8,r8,32
|
||||
|
||||
.machine push
|
||||
.machine "power4"
|
||||
/* setup read stream 0 */
|
||||
dcbt r0,r4,0b01000 /* addr from */
|
||||
dcbt r0,r7,0b01010 /* length and depth from */
|
||||
/* setup write stream 1 */
|
||||
dcbtst r0,r9,0b01000 /* addr to */
|
||||
dcbtst r0,r10,0b01010 /* length and depth to */
|
||||
eieio
|
||||
dcbt r0,r8,0b01010 /* all streams GO */
|
||||
.machine pop
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
mflr r0
|
||||
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
|
||||
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
|
||||
std r0,16(r1)
|
||||
stdu r1,-STACKFRAMESIZE(r1)
|
||||
bl enter_vmx_copy
|
||||
cmpwi r3,0
|
||||
ld r0,STACKFRAMESIZE+16(r1)
|
||||
ld r3,STK_REG(R31)(r1)
|
||||
ld r4,STK_REG(R30)(r1)
|
||||
mtlr r0
|
||||
|
||||
li r0,(PAGE_SIZE/128)
|
||||
mtctr r0
|
||||
|
||||
beq .Lnonvmx_copy
|
||||
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
|
||||
li r6,16
|
||||
li r7,32
|
||||
li r8,48
|
||||
li r9,64
|
||||
li r10,80
|
||||
li r11,96
|
||||
li r12,112
|
||||
|
||||
.align 5
|
||||
1: lvx vr7,r0,r4
|
||||
lvx vr6,r4,r6
|
||||
lvx vr5,r4,r7
|
||||
lvx vr4,r4,r8
|
||||
lvx vr3,r4,r9
|
||||
lvx vr2,r4,r10
|
||||
lvx vr1,r4,r11
|
||||
lvx vr0,r4,r12
|
||||
addi r4,r4,128
|
||||
stvx vr7,r0,r3
|
||||
stvx vr6,r3,r6
|
||||
stvx vr5,r3,r7
|
||||
stvx vr4,r3,r8
|
||||
stvx vr3,r3,r9
|
||||
stvx vr2,r3,r10
|
||||
stvx vr1,r3,r11
|
||||
stvx vr0,r3,r12
|
||||
addi r3,r3,128
|
||||
bdnz 1b
|
||||
|
||||
b exit_vmx_copy /* tail call optimise */
|
||||
|
||||
#else
|
||||
li r0,(PAGE_SIZE/128)
|
||||
mtctr r0
|
||||
|
||||
stdu r1,-STACKFRAMESIZE(r1)
|
||||
#endif
|
||||
|
||||
.Lnonvmx_copy:
|
||||
std r14,STK_REG(R14)(r1)
|
||||
std r15,STK_REG(R15)(r1)
|
||||
std r16,STK_REG(R16)(r1)
|
||||
std r17,STK_REG(R17)(r1)
|
||||
std r18,STK_REG(R18)(r1)
|
||||
std r19,STK_REG(R19)(r1)
|
||||
std r20,STK_REG(R20)(r1)
|
||||
|
||||
1: ld r0,0(r4)
|
||||
ld r5,8(r4)
|
||||
ld r6,16(r4)
|
||||
ld r7,24(r4)
|
||||
ld r8,32(r4)
|
||||
ld r9,40(r4)
|
||||
ld r10,48(r4)
|
||||
ld r11,56(r4)
|
||||
ld r12,64(r4)
|
||||
ld r14,72(r4)
|
||||
ld r15,80(r4)
|
||||
ld r16,88(r4)
|
||||
ld r17,96(r4)
|
||||
ld r18,104(r4)
|
||||
ld r19,112(r4)
|
||||
ld r20,120(r4)
|
||||
addi r4,r4,128
|
||||
std r0,0(r3)
|
||||
std r5,8(r3)
|
||||
std r6,16(r3)
|
||||
std r7,24(r3)
|
||||
std r8,32(r3)
|
||||
std r9,40(r3)
|
||||
std r10,48(r3)
|
||||
std r11,56(r3)
|
||||
std r12,64(r3)
|
||||
std r14,72(r3)
|
||||
std r15,80(r3)
|
||||
std r16,88(r3)
|
||||
std r17,96(r3)
|
||||
std r18,104(r3)
|
||||
std r19,112(r3)
|
||||
std r20,120(r3)
|
||||
addi r3,r3,128
|
||||
bdnz 1b
|
||||
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
ld r17,STK_REG(R17)(r1)
|
||||
ld r18,STK_REG(R18)(r1)
|
||||
ld r19,STK_REG(R19)(r1)
|
||||
ld r20,STK_REG(R20)(r1)
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
blr
|
673
arch/powerpc/lib/copyuser_64.S
Normal file
673
arch/powerpc/lib/copyuser_64.S
Normal file
|
@ -0,0 +1,673 @@
|
|||
/*
|
||||
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
#ifdef __BIG_ENDIAN__
|
||||
#define sLd sld /* Shift towards low-numbered address. */
|
||||
#define sHd srd /* Shift towards high-numbered address. */
|
||||
#else
|
||||
#define sLd srd /* Shift towards low-numbered address. */
|
||||
#define sHd sld /* Shift towards high-numbered address. */
|
||||
#endif
|
||||
|
||||
.align 7
|
||||
_GLOBAL_TOC(__copy_tofrom_user)
|
||||
BEGIN_FTR_SECTION
|
||||
nop
|
||||
FTR_SECTION_ELSE
|
||||
b __copy_tofrom_user_power7
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
|
||||
_GLOBAL(__copy_tofrom_user_base)
|
||||
/* first check for a whole page copy on a page boundary */
|
||||
cmpldi cr1,r5,16
|
||||
cmpdi cr6,r5,4096
|
||||
or r0,r3,r4
|
||||
neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
|
||||
andi. r0,r0,4095
|
||||
std r3,-24(r1)
|
||||
crand cr0*4+2,cr0*4+2,cr6*4+2
|
||||
std r4,-16(r1)
|
||||
std r5,-8(r1)
|
||||
dcbt 0,r4
|
||||
beq .Lcopy_page_4K
|
||||
andi. r6,r6,7
|
||||
PPC_MTOCRF(0x01,r5)
|
||||
blt cr1,.Lshort_copy
|
||||
/* Below we want to nop out the bne if we're on a CPU that has the
|
||||
* CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
|
||||
* cleared.
|
||||
* At the time of writing the only CPU that has this combination of bits
|
||||
* set is Power6.
|
||||
*/
|
||||
BEGIN_FTR_SECTION
|
||||
nop
|
||||
FTR_SECTION_ELSE
|
||||
bne .Ldst_unaligned
|
||||
ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
|
||||
CPU_FTR_UNALIGNED_LD_STD)
|
||||
.Ldst_aligned:
|
||||
addi r3,r3,-16
|
||||
BEGIN_FTR_SECTION
|
||||
andi. r0,r4,7
|
||||
bne .Lsrc_unaligned
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
|
||||
blt cr1,.Ldo_tail /* if < 16 bytes to copy */
|
||||
srdi r0,r5,5
|
||||
cmpdi cr1,r0,0
|
||||
20: ld r7,0(r4)
|
||||
220: ld r6,8(r4)
|
||||
addi r4,r4,16
|
||||
mtctr r0
|
||||
andi. r0,r5,0x10
|
||||
beq 22f
|
||||
addi r3,r3,16
|
||||
addi r4,r4,-16
|
||||
mr r9,r7
|
||||
mr r8,r6
|
||||
beq cr1,72f
|
||||
21: ld r7,16(r4)
|
||||
221: ld r6,24(r4)
|
||||
addi r4,r4,32
|
||||
70: std r9,0(r3)
|
||||
270: std r8,8(r3)
|
||||
22: ld r9,0(r4)
|
||||
222: ld r8,8(r4)
|
||||
71: std r7,16(r3)
|
||||
271: std r6,24(r3)
|
||||
addi r3,r3,32
|
||||
bdnz 21b
|
||||
72: std r9,0(r3)
|
||||
272: std r8,8(r3)
|
||||
andi. r5,r5,0xf
|
||||
beq+ 3f
|
||||
addi r4,r4,16
|
||||
.Ldo_tail:
|
||||
addi r3,r3,16
|
||||
bf cr7*4+0,246f
|
||||
244: ld r9,0(r4)
|
||||
addi r4,r4,8
|
||||
245: std r9,0(r3)
|
||||
addi r3,r3,8
|
||||
246: bf cr7*4+1,1f
|
||||
23: lwz r9,0(r4)
|
||||
addi r4,r4,4
|
||||
73: stw r9,0(r3)
|
||||
addi r3,r3,4
|
||||
1: bf cr7*4+2,2f
|
||||
44: lhz r9,0(r4)
|
||||
addi r4,r4,2
|
||||
74: sth r9,0(r3)
|
||||
addi r3,r3,2
|
||||
2: bf cr7*4+3,3f
|
||||
45: lbz r9,0(r4)
|
||||
75: stb r9,0(r3)
|
||||
3: li r3,0
|
||||
blr
|
||||
|
||||
.Lsrc_unaligned:
|
||||
srdi r6,r5,3
|
||||
addi r5,r5,-16
|
||||
subf r4,r0,r4
|
||||
srdi r7,r5,4
|
||||
sldi r10,r0,3
|
||||
cmpldi cr6,r6,3
|
||||
andi. r5,r5,7
|
||||
mtctr r7
|
||||
subfic r11,r10,64
|
||||
add r5,r5,r0
|
||||
bt cr7*4+0,28f
|
||||
|
||||
24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
|
||||
25: ld r0,8(r4)
|
||||
sLd r6,r9,r10
|
||||
26: ldu r9,16(r4)
|
||||
sHd r7,r0,r11
|
||||
sLd r8,r0,r10
|
||||
or r7,r7,r6
|
||||
blt cr6,79f
|
||||
27: ld r0,8(r4)
|
||||
b 2f
|
||||
|
||||
28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
|
||||
29: ldu r9,8(r4)
|
||||
sLd r8,r0,r10
|
||||
addi r3,r3,-8
|
||||
blt cr6,5f
|
||||
30: ld r0,8(r4)
|
||||
sHd r12,r9,r11
|
||||
sLd r6,r9,r10
|
||||
31: ldu r9,16(r4)
|
||||
or r12,r8,r12
|
||||
sHd r7,r0,r11
|
||||
sLd r8,r0,r10
|
||||
addi r3,r3,16
|
||||
beq cr6,78f
|
||||
|
||||
1: or r7,r7,r6
|
||||
32: ld r0,8(r4)
|
||||
76: std r12,8(r3)
|
||||
2: sHd r12,r9,r11
|
||||
sLd r6,r9,r10
|
||||
33: ldu r9,16(r4)
|
||||
or r12,r8,r12
|
||||
77: stdu r7,16(r3)
|
||||
sHd r7,r0,r11
|
||||
sLd r8,r0,r10
|
||||
bdnz 1b
|
||||
|
||||
78: std r12,8(r3)
|
||||
or r7,r7,r6
|
||||
79: std r7,16(r3)
|
||||
5: sHd r12,r9,r11
|
||||
or r12,r8,r12
|
||||
80: std r12,24(r3)
|
||||
bne 6f
|
||||
li r3,0
|
||||
blr
|
||||
6: cmpwi cr1,r5,8
|
||||
addi r3,r3,32
|
||||
sLd r9,r9,r10
|
||||
ble cr1,7f
|
||||
34: ld r0,8(r4)
|
||||
sHd r7,r0,r11
|
||||
or r9,r7,r9
|
||||
7:
|
||||
bf cr7*4+1,1f
|
||||
#ifdef __BIG_ENDIAN__
|
||||
rotldi r9,r9,32
|
||||
#endif
|
||||
94: stw r9,0(r3)
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
rotrdi r9,r9,32
|
||||
#endif
|
||||
addi r3,r3,4
|
||||
1: bf cr7*4+2,2f
|
||||
#ifdef __BIG_ENDIAN__
|
||||
rotldi r9,r9,16
|
||||
#endif
|
||||
95: sth r9,0(r3)
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
rotrdi r9,r9,16
|
||||
#endif
|
||||
addi r3,r3,2
|
||||
2: bf cr7*4+3,3f
|
||||
#ifdef __BIG_ENDIAN__
|
||||
rotldi r9,r9,8
|
||||
#endif
|
||||
96: stb r9,0(r3)
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
rotrdi r9,r9,8
|
||||
#endif
|
||||
3: li r3,0
|
||||
blr
|
||||
|
||||
.Ldst_unaligned:
|
||||
PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
|
||||
subf r5,r6,r5
|
||||
li r7,0
|
||||
cmpldi cr1,r5,16
|
||||
bf cr7*4+3,1f
|
||||
35: lbz r0,0(r4)
|
||||
81: stb r0,0(r3)
|
||||
addi r7,r7,1
|
||||
1: bf cr7*4+2,2f
|
||||
36: lhzx r0,r7,r4
|
||||
82: sthx r0,r7,r3
|
||||
addi r7,r7,2
|
||||
2: bf cr7*4+1,3f
|
||||
37: lwzx r0,r7,r4
|
||||
83: stwx r0,r7,r3
|
||||
3: PPC_MTOCRF(0x01,r5)
|
||||
add r4,r6,r4
|
||||
add r3,r6,r3
|
||||
b .Ldst_aligned
|
||||
|
||||
.Lshort_copy:
|
||||
bf cr7*4+0,1f
|
||||
38: lwz r0,0(r4)
|
||||
39: lwz r9,4(r4)
|
||||
addi r4,r4,8
|
||||
84: stw r0,0(r3)
|
||||
85: stw r9,4(r3)
|
||||
addi r3,r3,8
|
||||
1: bf cr7*4+1,2f
|
||||
40: lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
86: stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
2: bf cr7*4+2,3f
|
||||
41: lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
87: sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
3: bf cr7*4+3,4f
|
||||
42: lbz r0,0(r4)
|
||||
88: stb r0,0(r3)
|
||||
4: li r3,0
|
||||
blr
|
||||
|
||||
/*
|
||||
* exception handlers follow
|
||||
* we have to return the number of bytes not copied
|
||||
* for an exception on a load, we set the rest of the destination to 0
|
||||
*/
|
||||
|
||||
136:
|
||||
137:
|
||||
add r3,r3,r7
|
||||
b 1f
|
||||
130:
|
||||
131:
|
||||
addi r3,r3,8
|
||||
120:
|
||||
320:
|
||||
122:
|
||||
322:
|
||||
124:
|
||||
125:
|
||||
126:
|
||||
127:
|
||||
128:
|
||||
129:
|
||||
133:
|
||||
addi r3,r3,8
|
||||
132:
|
||||
addi r3,r3,8
|
||||
121:
|
||||
321:
|
||||
344:
|
||||
134:
|
||||
135:
|
||||
138:
|
||||
139:
|
||||
140:
|
||||
141:
|
||||
142:
|
||||
123:
|
||||
144:
|
||||
145:
|
||||
|
||||
/*
|
||||
* here we have had a fault on a load and r3 points to the first
|
||||
* unmodified byte of the destination
|
||||
*/
|
||||
1: ld r6,-24(r1)
|
||||
ld r4,-16(r1)
|
||||
ld r5,-8(r1)
|
||||
subf r6,r6,r3
|
||||
add r4,r4,r6
|
||||
subf r5,r6,r5 /* #bytes left to go */
|
||||
|
||||
/*
|
||||
* first see if we can copy any more bytes before hitting another exception
|
||||
*/
|
||||
mtctr r5
|
||||
43: lbz r0,0(r4)
|
||||
addi r4,r4,1
|
||||
89: stb r0,0(r3)
|
||||
addi r3,r3,1
|
||||
bdnz 43b
|
||||
li r3,0 /* huh? all copied successfully this time? */
|
||||
blr
|
||||
|
||||
/*
|
||||
* here we have trapped again, need to clear ctr bytes starting at r3
|
||||
*/
|
||||
143: mfctr r5
|
||||
li r0,0
|
||||
mr r4,r3
|
||||
mr r3,r5 /* return the number of bytes not copied */
|
||||
1: andi. r9,r4,7
|
||||
beq 3f
|
||||
90: stb r0,0(r4)
|
||||
addic. r5,r5,-1
|
||||
addi r4,r4,1
|
||||
bne 1b
|
||||
blr
|
||||
3: cmpldi cr1,r5,8
|
||||
srdi r9,r5,3
|
||||
andi. r5,r5,7
|
||||
blt cr1,93f
|
||||
mtctr r9
|
||||
91: std r0,0(r4)
|
||||
addi r4,r4,8
|
||||
bdnz 91b
|
||||
93: beqlr
|
||||
mtctr r5
|
||||
92: stb r0,0(r4)
|
||||
addi r4,r4,1
|
||||
bdnz 92b
|
||||
blr
|
||||
|
||||
/*
|
||||
* exception handlers for stores: we just need to work
|
||||
* out how many bytes weren't copied
|
||||
*/
|
||||
182:
|
||||
183:
|
||||
add r3,r3,r7
|
||||
b 1f
|
||||
371:
|
||||
180:
|
||||
addi r3,r3,8
|
||||
171:
|
||||
177:
|
||||
addi r3,r3,8
|
||||
370:
|
||||
372:
|
||||
176:
|
||||
178:
|
||||
addi r3,r3,4
|
||||
185:
|
||||
addi r3,r3,4
|
||||
170:
|
||||
172:
|
||||
345:
|
||||
173:
|
||||
174:
|
||||
175:
|
||||
179:
|
||||
181:
|
||||
184:
|
||||
186:
|
||||
187:
|
||||
188:
|
||||
189:
|
||||
194:
|
||||
195:
|
||||
196:
|
||||
1:
|
||||
ld r6,-24(r1)
|
||||
ld r5,-8(r1)
|
||||
add r6,r6,r5
|
||||
subf r3,r3,r6 /* #bytes not copied */
|
||||
190:
|
||||
191:
|
||||
192:
|
||||
blr /* #bytes not copied in r3 */
|
||||
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 20b,120b
|
||||
.llong 220b,320b
|
||||
.llong 21b,121b
|
||||
.llong 221b,321b
|
||||
.llong 70b,170b
|
||||
.llong 270b,370b
|
||||
.llong 22b,122b
|
||||
.llong 222b,322b
|
||||
.llong 71b,171b
|
||||
.llong 271b,371b
|
||||
.llong 72b,172b
|
||||
.llong 272b,372b
|
||||
.llong 244b,344b
|
||||
.llong 245b,345b
|
||||
.llong 23b,123b
|
||||
.llong 73b,173b
|
||||
.llong 44b,144b
|
||||
.llong 74b,174b
|
||||
.llong 45b,145b
|
||||
.llong 75b,175b
|
||||
.llong 24b,124b
|
||||
.llong 25b,125b
|
||||
.llong 26b,126b
|
||||
.llong 27b,127b
|
||||
.llong 28b,128b
|
||||
.llong 29b,129b
|
||||
.llong 30b,130b
|
||||
.llong 31b,131b
|
||||
.llong 32b,132b
|
||||
.llong 76b,176b
|
||||
.llong 33b,133b
|
||||
.llong 77b,177b
|
||||
.llong 78b,178b
|
||||
.llong 79b,179b
|
||||
.llong 80b,180b
|
||||
.llong 34b,134b
|
||||
.llong 94b,194b
|
||||
.llong 95b,195b
|
||||
.llong 96b,196b
|
||||
.llong 35b,135b
|
||||
.llong 81b,181b
|
||||
.llong 36b,136b
|
||||
.llong 82b,182b
|
||||
.llong 37b,137b
|
||||
.llong 83b,183b
|
||||
.llong 38b,138b
|
||||
.llong 39b,139b
|
||||
.llong 84b,184b
|
||||
.llong 85b,185b
|
||||
.llong 40b,140b
|
||||
.llong 86b,186b
|
||||
.llong 41b,141b
|
||||
.llong 87b,187b
|
||||
.llong 42b,142b
|
||||
.llong 88b,188b
|
||||
.llong 43b,143b
|
||||
.llong 89b,189b
|
||||
.llong 90b,190b
|
||||
.llong 91b,191b
|
||||
.llong 92b,192b
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* Routine to copy a whole page of data, optimized for POWER4.
|
||||
* On POWER4 it is more than 50% faster than the simple loop
|
||||
* above (following the .Ldst_aligned label).
|
||||
*/
|
||||
.Lcopy_page_4K:
|
||||
std r31,-32(1)
|
||||
std r30,-40(1)
|
||||
std r29,-48(1)
|
||||
std r28,-56(1)
|
||||
std r27,-64(1)
|
||||
std r26,-72(1)
|
||||
std r25,-80(1)
|
||||
std r24,-88(1)
|
||||
std r23,-96(1)
|
||||
std r22,-104(1)
|
||||
std r21,-112(1)
|
||||
std r20,-120(1)
|
||||
li r5,4096/32 - 1
|
||||
addi r3,r3,-8
|
||||
li r0,5
|
||||
0: addi r5,r5,-24
|
||||
mtctr r0
|
||||
20: ld r22,640(4)
|
||||
21: ld r21,512(4)
|
||||
22: ld r20,384(4)
|
||||
23: ld r11,256(4)
|
||||
24: ld r9,128(4)
|
||||
25: ld r7,0(4)
|
||||
26: ld r25,648(4)
|
||||
27: ld r24,520(4)
|
||||
28: ld r23,392(4)
|
||||
29: ld r10,264(4)
|
||||
30: ld r8,136(4)
|
||||
31: ldu r6,8(4)
|
||||
cmpwi r5,24
|
||||
1:
|
||||
32: std r22,648(3)
|
||||
33: std r21,520(3)
|
||||
34: std r20,392(3)
|
||||
35: std r11,264(3)
|
||||
36: std r9,136(3)
|
||||
37: std r7,8(3)
|
||||
38: ld r28,648(4)
|
||||
39: ld r27,520(4)
|
||||
40: ld r26,392(4)
|
||||
41: ld r31,264(4)
|
||||
42: ld r30,136(4)
|
||||
43: ld r29,8(4)
|
||||
44: std r25,656(3)
|
||||
45: std r24,528(3)
|
||||
46: std r23,400(3)
|
||||
47: std r10,272(3)
|
||||
48: std r8,144(3)
|
||||
49: std r6,16(3)
|
||||
50: ld r22,656(4)
|
||||
51: ld r21,528(4)
|
||||
52: ld r20,400(4)
|
||||
53: ld r11,272(4)
|
||||
54: ld r9,144(4)
|
||||
55: ld r7,16(4)
|
||||
56: std r28,664(3)
|
||||
57: std r27,536(3)
|
||||
58: std r26,408(3)
|
||||
59: std r31,280(3)
|
||||
60: std r30,152(3)
|
||||
61: stdu r29,24(3)
|
||||
62: ld r25,664(4)
|
||||
63: ld r24,536(4)
|
||||
64: ld r23,408(4)
|
||||
65: ld r10,280(4)
|
||||
66: ld r8,152(4)
|
||||
67: ldu r6,24(4)
|
||||
bdnz 1b
|
||||
68: std r22,648(3)
|
||||
69: std r21,520(3)
|
||||
70: std r20,392(3)
|
||||
71: std r11,264(3)
|
||||
72: std r9,136(3)
|
||||
73: std r7,8(3)
|
||||
74: addi r4,r4,640
|
||||
75: addi r3,r3,648
|
||||
bge 0b
|
||||
mtctr r5
|
||||
76: ld r7,0(4)
|
||||
77: ld r8,8(4)
|
||||
78: ldu r9,16(4)
|
||||
3:
|
||||
79: ld r10,8(4)
|
||||
80: std r7,8(3)
|
||||
81: ld r7,16(4)
|
||||
82: std r8,16(3)
|
||||
83: ld r8,24(4)
|
||||
84: std r9,24(3)
|
||||
85: ldu r9,32(4)
|
||||
86: stdu r10,32(3)
|
||||
bdnz 3b
|
||||
4:
|
||||
87: ld r10,8(4)
|
||||
88: std r7,8(3)
|
||||
89: std r8,16(3)
|
||||
90: std r9,24(3)
|
||||
91: std r10,32(3)
|
||||
9: ld r20,-120(1)
|
||||
ld r21,-112(1)
|
||||
ld r22,-104(1)
|
||||
ld r23,-96(1)
|
||||
ld r24,-88(1)
|
||||
ld r25,-80(1)
|
||||
ld r26,-72(1)
|
||||
ld r27,-64(1)
|
||||
ld r28,-56(1)
|
||||
ld r29,-48(1)
|
||||
ld r30,-40(1)
|
||||
ld r31,-32(1)
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
/*
|
||||
* on an exception, reset to the beginning and jump back into the
|
||||
* standard __copy_tofrom_user
|
||||
*/
|
||||
100: ld r20,-120(1)
|
||||
ld r21,-112(1)
|
||||
ld r22,-104(1)
|
||||
ld r23,-96(1)
|
||||
ld r24,-88(1)
|
||||
ld r25,-80(1)
|
||||
ld r26,-72(1)
|
||||
ld r27,-64(1)
|
||||
ld r28,-56(1)
|
||||
ld r29,-48(1)
|
||||
ld r30,-40(1)
|
||||
ld r31,-32(1)
|
||||
ld r3,-24(r1)
|
||||
ld r4,-16(r1)
|
||||
li r5,4096
|
||||
b .Ldst_aligned
|
||||
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 20b,100b
|
||||
.llong 21b,100b
|
||||
.llong 22b,100b
|
||||
.llong 23b,100b
|
||||
.llong 24b,100b
|
||||
.llong 25b,100b
|
||||
.llong 26b,100b
|
||||
.llong 27b,100b
|
||||
.llong 28b,100b
|
||||
.llong 29b,100b
|
||||
.llong 30b,100b
|
||||
.llong 31b,100b
|
||||
.llong 32b,100b
|
||||
.llong 33b,100b
|
||||
.llong 34b,100b
|
||||
.llong 35b,100b
|
||||
.llong 36b,100b
|
||||
.llong 37b,100b
|
||||
.llong 38b,100b
|
||||
.llong 39b,100b
|
||||
.llong 40b,100b
|
||||
.llong 41b,100b
|
||||
.llong 42b,100b
|
||||
.llong 43b,100b
|
||||
.llong 44b,100b
|
||||
.llong 45b,100b
|
||||
.llong 46b,100b
|
||||
.llong 47b,100b
|
||||
.llong 48b,100b
|
||||
.llong 49b,100b
|
||||
.llong 50b,100b
|
||||
.llong 51b,100b
|
||||
.llong 52b,100b
|
||||
.llong 53b,100b
|
||||
.llong 54b,100b
|
||||
.llong 55b,100b
|
||||
.llong 56b,100b
|
||||
.llong 57b,100b
|
||||
.llong 58b,100b
|
||||
.llong 59b,100b
|
||||
.llong 60b,100b
|
||||
.llong 61b,100b
|
||||
.llong 62b,100b
|
||||
.llong 63b,100b
|
||||
.llong 64b,100b
|
||||
.llong 65b,100b
|
||||
.llong 66b,100b
|
||||
.llong 67b,100b
|
||||
.llong 68b,100b
|
||||
.llong 69b,100b
|
||||
.llong 70b,100b
|
||||
.llong 71b,100b
|
||||
.llong 72b,100b
|
||||
.llong 73b,100b
|
||||
.llong 74b,100b
|
||||
.llong 75b,100b
|
||||
.llong 76b,100b
|
||||
.llong 77b,100b
|
||||
.llong 78b,100b
|
||||
.llong 79b,100b
|
||||
.llong 80b,100b
|
||||
.llong 81b,100b
|
||||
.llong 82b,100b
|
||||
.llong 83b,100b
|
||||
.llong 84b,100b
|
||||
.llong 85b,100b
|
||||
.llong 86b,100b
|
||||
.llong 87b,100b
|
||||
.llong 88b,100b
|
||||
.llong 89b,100b
|
||||
.llong 90b,100b
|
||||
.llong 91b,100b
|
721
arch/powerpc/lib/copyuser_power7.S
Normal file
721
arch/powerpc/lib/copyuser_power7.S
Normal file
|
@ -0,0 +1,721 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2011
|
||||
*
|
||||
* Author: Anton Blanchard <anton@au.ibm.com>
|
||||
*/
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
#ifdef __BIG_ENDIAN__
|
||||
#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
|
||||
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
|
||||
#else
|
||||
#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
|
||||
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
|
||||
#endif
|
||||
|
||||
.macro err1
|
||||
100:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 100b,.Ldo_err1
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro err2
|
||||
200:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 200b,.Ldo_err2
|
||||
.previous
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
.macro err3
|
||||
300:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 300b,.Ldo_err3
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro err4
|
||||
400:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 400b,.Ldo_err4
|
||||
.previous
|
||||
.endm
|
||||
|
||||
|
||||
.Ldo_err4:
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
.Ldo_err3:
|
||||
bl exit_vmx_usercopy
|
||||
ld r0,STACKFRAMESIZE+16(r1)
|
||||
mtlr r0
|
||||
b .Lexit
|
||||
#endif /* CONFIG_ALTIVEC */
|
||||
|
||||
.Ldo_err2:
|
||||
ld r22,STK_REG(R22)(r1)
|
||||
ld r21,STK_REG(R21)(r1)
|
||||
ld r20,STK_REG(R20)(r1)
|
||||
ld r19,STK_REG(R19)(r1)
|
||||
ld r18,STK_REG(R18)(r1)
|
||||
ld r17,STK_REG(R17)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
.Lexit:
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
.Ldo_err1:
|
||||
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
|
||||
ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
|
||||
ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
|
||||
b __copy_tofrom_user_base
|
||||
|
||||
|
||||
_GLOBAL(__copy_tofrom_user_power7)
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
cmpldi r5,16
|
||||
cmpldi cr1,r5,4096
|
||||
|
||||
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
|
||||
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
|
||||
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
|
||||
|
||||
blt .Lshort_copy
|
||||
bgt cr1,.Lvmx_copy
|
||||
#else
|
||||
cmpldi r5,16
|
||||
|
||||
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
|
||||
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
|
||||
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
|
||||
|
||||
blt .Lshort_copy
|
||||
#endif
|
||||
|
||||
.Lnonvmx_copy:
|
||||
/* Get the source 8B aligned */
|
||||
neg r6,r4
|
||||
mtocrf 0x01,r6
|
||||
clrldi r6,r6,(64-3)
|
||||
|
||||
bf cr7*4+3,1f
|
||||
err1; lbz r0,0(r4)
|
||||
addi r4,r4,1
|
||||
err1; stb r0,0(r3)
|
||||
addi r3,r3,1
|
||||
|
||||
1: bf cr7*4+2,2f
|
||||
err1; lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
err1; sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
2: bf cr7*4+1,3f
|
||||
err1; lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
err1; stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
3: sub r5,r5,r6
|
||||
cmpldi r5,128
|
||||
blt 5f
|
||||
|
||||
mflr r0
|
||||
stdu r1,-STACKFRAMESIZE(r1)
|
||||
std r14,STK_REG(R14)(r1)
|
||||
std r15,STK_REG(R15)(r1)
|
||||
std r16,STK_REG(R16)(r1)
|
||||
std r17,STK_REG(R17)(r1)
|
||||
std r18,STK_REG(R18)(r1)
|
||||
std r19,STK_REG(R19)(r1)
|
||||
std r20,STK_REG(R20)(r1)
|
||||
std r21,STK_REG(R21)(r1)
|
||||
std r22,STK_REG(R22)(r1)
|
||||
std r0,STACKFRAMESIZE+16(r1)
|
||||
|
||||
srdi r6,r5,7
|
||||
mtctr r6
|
||||
|
||||
/* Now do cacheline (128B) sized loads and stores. */
|
||||
.align 5
|
||||
4:
|
||||
err2; ld r0,0(r4)
|
||||
err2; ld r6,8(r4)
|
||||
err2; ld r7,16(r4)
|
||||
err2; ld r8,24(r4)
|
||||
err2; ld r9,32(r4)
|
||||
err2; ld r10,40(r4)
|
||||
err2; ld r11,48(r4)
|
||||
err2; ld r12,56(r4)
|
||||
err2; ld r14,64(r4)
|
||||
err2; ld r15,72(r4)
|
||||
err2; ld r16,80(r4)
|
||||
err2; ld r17,88(r4)
|
||||
err2; ld r18,96(r4)
|
||||
err2; ld r19,104(r4)
|
||||
err2; ld r20,112(r4)
|
||||
err2; ld r21,120(r4)
|
||||
addi r4,r4,128
|
||||
err2; std r0,0(r3)
|
||||
err2; std r6,8(r3)
|
||||
err2; std r7,16(r3)
|
||||
err2; std r8,24(r3)
|
||||
err2; std r9,32(r3)
|
||||
err2; std r10,40(r3)
|
||||
err2; std r11,48(r3)
|
||||
err2; std r12,56(r3)
|
||||
err2; std r14,64(r3)
|
||||
err2; std r15,72(r3)
|
||||
err2; std r16,80(r3)
|
||||
err2; std r17,88(r3)
|
||||
err2; std r18,96(r3)
|
||||
err2; std r19,104(r3)
|
||||
err2; std r20,112(r3)
|
||||
err2; std r21,120(r3)
|
||||
addi r3,r3,128
|
||||
bdnz 4b
|
||||
|
||||
clrldi r5,r5,(64-7)
|
||||
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
ld r17,STK_REG(R17)(r1)
|
||||
ld r18,STK_REG(R18)(r1)
|
||||
ld r19,STK_REG(R19)(r1)
|
||||
ld r20,STK_REG(R20)(r1)
|
||||
ld r21,STK_REG(R21)(r1)
|
||||
ld r22,STK_REG(R22)(r1)
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
|
||||
/* Up to 127B to go */
|
||||
5: srdi r6,r5,4
|
||||
mtocrf 0x01,r6
|
||||
|
||||
6: bf cr7*4+1,7f
|
||||
err1; ld r0,0(r4)
|
||||
err1; ld r6,8(r4)
|
||||
err1; ld r7,16(r4)
|
||||
err1; ld r8,24(r4)
|
||||
err1; ld r9,32(r4)
|
||||
err1; ld r10,40(r4)
|
||||
err1; ld r11,48(r4)
|
||||
err1; ld r12,56(r4)
|
||||
addi r4,r4,64
|
||||
err1; std r0,0(r3)
|
||||
err1; std r6,8(r3)
|
||||
err1; std r7,16(r3)
|
||||
err1; std r8,24(r3)
|
||||
err1; std r9,32(r3)
|
||||
err1; std r10,40(r3)
|
||||
err1; std r11,48(r3)
|
||||
err1; std r12,56(r3)
|
||||
addi r3,r3,64
|
||||
|
||||
/* Up to 63B to go */
|
||||
7: bf cr7*4+2,8f
|
||||
err1; ld r0,0(r4)
|
||||
err1; ld r6,8(r4)
|
||||
err1; ld r7,16(r4)
|
||||
err1; ld r8,24(r4)
|
||||
addi r4,r4,32
|
||||
err1; std r0,0(r3)
|
||||
err1; std r6,8(r3)
|
||||
err1; std r7,16(r3)
|
||||
err1; std r8,24(r3)
|
||||
addi r3,r3,32
|
||||
|
||||
/* Up to 31B to go */
|
||||
8: bf cr7*4+3,9f
|
||||
err1; ld r0,0(r4)
|
||||
err1; ld r6,8(r4)
|
||||
addi r4,r4,16
|
||||
err1; std r0,0(r3)
|
||||
err1; std r6,8(r3)
|
||||
addi r3,r3,16
|
||||
|
||||
9: clrldi r5,r5,(64-4)
|
||||
|
||||
/* Up to 15B to go */
|
||||
.Lshort_copy:
|
||||
mtocrf 0x01,r5
|
||||
bf cr7*4+0,12f
|
||||
err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
|
||||
err1; lwz r6,4(r4)
|
||||
addi r4,r4,8
|
||||
err1; stw r0,0(r3)
|
||||
err1; stw r6,4(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
12: bf cr7*4+1,13f
|
||||
err1; lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
err1; stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
13: bf cr7*4+2,14f
|
||||
err1; lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
err1; sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
14: bf cr7*4+3,15f
|
||||
err1; lbz r0,0(r4)
|
||||
err1; stb r0,0(r3)
|
||||
|
||||
15: li r3,0
|
||||
blr
|
||||
|
||||
.Lunwind_stack_nonvmx_copy:
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
b .Lnonvmx_copy
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
.Lvmx_copy:
|
||||
mflr r0
|
||||
std r0,16(r1)
|
||||
stdu r1,-STACKFRAMESIZE(r1)
|
||||
bl enter_vmx_usercopy
|
||||
cmpwi cr1,r3,0
|
||||
ld r0,STACKFRAMESIZE+16(r1)
|
||||
ld r3,STK_REG(R31)(r1)
|
||||
ld r4,STK_REG(R30)(r1)
|
||||
ld r5,STK_REG(R29)(r1)
|
||||
mtlr r0
|
||||
|
||||
/*
|
||||
* We prefetch both the source and destination using enhanced touch
|
||||
* instructions. We use a stream ID of 0 for the load side and
|
||||
* 1 for the store side.
|
||||
*/
|
||||
clrrdi r6,r4,7
|
||||
clrrdi r9,r3,7
|
||||
ori r9,r9,1 /* stream=1 */
|
||||
|
||||
srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
|
||||
cmpldi r7,0x3FF
|
||||
ble 1f
|
||||
li r7,0x3FF
|
||||
1: lis r0,0x0E00 /* depth=7 */
|
||||
sldi r7,r7,7
|
||||
or r7,r7,r0
|
||||
ori r10,r7,1 /* stream=1 */
|
||||
|
||||
lis r8,0x8000 /* GO=1 */
|
||||
clrldi r8,r8,32
|
||||
|
||||
.machine push
|
||||
.machine "power4"
|
||||
/* setup read stream 0 */
|
||||
dcbt r0,r6,0b01000 /* addr from */
|
||||
dcbt r0,r7,0b01010 /* length and depth from */
|
||||
/* setup write stream 1 */
|
||||
dcbtst r0,r9,0b01000 /* addr to */
|
||||
dcbtst r0,r10,0b01010 /* length and depth to */
|
||||
eieio
|
||||
dcbt r0,r8,0b01010 /* all streams GO */
|
||||
.machine pop
|
||||
|
||||
beq cr1,.Lunwind_stack_nonvmx_copy
|
||||
|
||||
/*
|
||||
* If source and destination are not relatively aligned we use a
|
||||
* slower permute loop.
|
||||
*/
|
||||
xor r6,r4,r3
|
||||
rldicl. r6,r6,0,(64-4)
|
||||
bne .Lvmx_unaligned_copy
|
||||
|
||||
/* Get the destination 16B aligned */
|
||||
neg r6,r3
|
||||
mtocrf 0x01,r6
|
||||
clrldi r6,r6,(64-4)
|
||||
|
||||
bf cr7*4+3,1f
|
||||
err3; lbz r0,0(r4)
|
||||
addi r4,r4,1
|
||||
err3; stb r0,0(r3)
|
||||
addi r3,r3,1
|
||||
|
||||
1: bf cr7*4+2,2f
|
||||
err3; lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
err3; sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
2: bf cr7*4+1,3f
|
||||
err3; lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
err3; stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
3: bf cr7*4+0,4f
|
||||
err3; ld r0,0(r4)
|
||||
addi r4,r4,8
|
||||
err3; std r0,0(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
4: sub r5,r5,r6
|
||||
|
||||
/* Get the desination 128B aligned */
|
||||
neg r6,r3
|
||||
srdi r7,r6,4
|
||||
mtocrf 0x01,r7
|
||||
clrldi r6,r6,(64-7)
|
||||
|
||||
li r9,16
|
||||
li r10,32
|
||||
li r11,48
|
||||
|
||||
bf cr7*4+3,5f
|
||||
err3; lvx vr1,r0,r4
|
||||
addi r4,r4,16
|
||||
err3; stvx vr1,r0,r3
|
||||
addi r3,r3,16
|
||||
|
||||
5: bf cr7*4+2,6f
|
||||
err3; lvx vr1,r0,r4
|
||||
err3; lvx vr0,r4,r9
|
||||
addi r4,r4,32
|
||||
err3; stvx vr1,r0,r3
|
||||
err3; stvx vr0,r3,r9
|
||||
addi r3,r3,32
|
||||
|
||||
6: bf cr7*4+1,7f
|
||||
err3; lvx vr3,r0,r4
|
||||
err3; lvx vr2,r4,r9
|
||||
err3; lvx vr1,r4,r10
|
||||
err3; lvx vr0,r4,r11
|
||||
addi r4,r4,64
|
||||
err3; stvx vr3,r0,r3
|
||||
err3; stvx vr2,r3,r9
|
||||
err3; stvx vr1,r3,r10
|
||||
err3; stvx vr0,r3,r11
|
||||
addi r3,r3,64
|
||||
|
||||
7: sub r5,r5,r6
|
||||
srdi r6,r5,7
|
||||
|
||||
std r14,STK_REG(R14)(r1)
|
||||
std r15,STK_REG(R15)(r1)
|
||||
std r16,STK_REG(R16)(r1)
|
||||
|
||||
li r12,64
|
||||
li r14,80
|
||||
li r15,96
|
||||
li r16,112
|
||||
|
||||
mtctr r6
|
||||
|
||||
/*
|
||||
* Now do cacheline sized loads and stores. By this stage the
|
||||
* cacheline stores are also cacheline aligned.
|
||||
*/
|
||||
.align 5
|
||||
8:
|
||||
err4; lvx vr7,r0,r4
|
||||
err4; lvx vr6,r4,r9
|
||||
err4; lvx vr5,r4,r10
|
||||
err4; lvx vr4,r4,r11
|
||||
err4; lvx vr3,r4,r12
|
||||
err4; lvx vr2,r4,r14
|
||||
err4; lvx vr1,r4,r15
|
||||
err4; lvx vr0,r4,r16
|
||||
addi r4,r4,128
|
||||
err4; stvx vr7,r0,r3
|
||||
err4; stvx vr6,r3,r9
|
||||
err4; stvx vr5,r3,r10
|
||||
err4; stvx vr4,r3,r11
|
||||
err4; stvx vr3,r3,r12
|
||||
err4; stvx vr2,r3,r14
|
||||
err4; stvx vr1,r3,r15
|
||||
err4; stvx vr0,r3,r16
|
||||
addi r3,r3,128
|
||||
bdnz 8b
|
||||
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
|
||||
/* Up to 127B to go */
|
||||
clrldi r5,r5,(64-7)
|
||||
srdi r6,r5,4
|
||||
mtocrf 0x01,r6
|
||||
|
||||
bf cr7*4+1,9f
|
||||
err3; lvx vr3,r0,r4
|
||||
err3; lvx vr2,r4,r9
|
||||
err3; lvx vr1,r4,r10
|
||||
err3; lvx vr0,r4,r11
|
||||
addi r4,r4,64
|
||||
err3; stvx vr3,r0,r3
|
||||
err3; stvx vr2,r3,r9
|
||||
err3; stvx vr1,r3,r10
|
||||
err3; stvx vr0,r3,r11
|
||||
addi r3,r3,64
|
||||
|
||||
9: bf cr7*4+2,10f
|
||||
err3; lvx vr1,r0,r4
|
||||
err3; lvx vr0,r4,r9
|
||||
addi r4,r4,32
|
||||
err3; stvx vr1,r0,r3
|
||||
err3; stvx vr0,r3,r9
|
||||
addi r3,r3,32
|
||||
|
||||
10: bf cr7*4+3,11f
|
||||
err3; lvx vr1,r0,r4
|
||||
addi r4,r4,16
|
||||
err3; stvx vr1,r0,r3
|
||||
addi r3,r3,16
|
||||
|
||||
/* Up to 15B to go */
|
||||
11: clrldi r5,r5,(64-4)
|
||||
mtocrf 0x01,r5
|
||||
bf cr7*4+0,12f
|
||||
err3; ld r0,0(r4)
|
||||
addi r4,r4,8
|
||||
err3; std r0,0(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
12: bf cr7*4+1,13f
|
||||
err3; lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
err3; stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
13: bf cr7*4+2,14f
|
||||
err3; lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
err3; sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
14: bf cr7*4+3,15f
|
||||
err3; lbz r0,0(r4)
|
||||
err3; stb r0,0(r3)
|
||||
|
||||
15: addi r1,r1,STACKFRAMESIZE
|
||||
b exit_vmx_usercopy /* tail call optimise */
|
||||
|
||||
.Lvmx_unaligned_copy:
|
||||
/* Get the destination 16B aligned */
|
||||
neg r6,r3
|
||||
mtocrf 0x01,r6
|
||||
clrldi r6,r6,(64-4)
|
||||
|
||||
bf cr7*4+3,1f
|
||||
err3; lbz r0,0(r4)
|
||||
addi r4,r4,1
|
||||
err3; stb r0,0(r3)
|
||||
addi r3,r3,1
|
||||
|
||||
1: bf cr7*4+2,2f
|
||||
err3; lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
err3; sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
2: bf cr7*4+1,3f
|
||||
err3; lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
err3; stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
3: bf cr7*4+0,4f
|
||||
err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
|
||||
err3; lwz r7,4(r4)
|
||||
addi r4,r4,8
|
||||
err3; stw r0,0(r3)
|
||||
err3; stw r7,4(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
4: sub r5,r5,r6
|
||||
|
||||
/* Get the desination 128B aligned */
|
||||
neg r6,r3
|
||||
srdi r7,r6,4
|
||||
mtocrf 0x01,r7
|
||||
clrldi r6,r6,(64-7)
|
||||
|
||||
li r9,16
|
||||
li r10,32
|
||||
li r11,48
|
||||
|
||||
LVS(vr16,0,r4) /* Setup permute control vector */
|
||||
err3; lvx vr0,0,r4
|
||||
addi r4,r4,16
|
||||
|
||||
bf cr7*4+3,5f
|
||||
err3; lvx vr1,r0,r4
|
||||
VPERM(vr8,vr0,vr1,vr16)
|
||||
addi r4,r4,16
|
||||
err3; stvx vr8,r0,r3
|
||||
addi r3,r3,16
|
||||
vor vr0,vr1,vr1
|
||||
|
||||
5: bf cr7*4+2,6f
|
||||
err3; lvx vr1,r0,r4
|
||||
VPERM(vr8,vr0,vr1,vr16)
|
||||
err3; lvx vr0,r4,r9
|
||||
VPERM(vr9,vr1,vr0,vr16)
|
||||
addi r4,r4,32
|
||||
err3; stvx vr8,r0,r3
|
||||
err3; stvx vr9,r3,r9
|
||||
addi r3,r3,32
|
||||
|
||||
6: bf cr7*4+1,7f
|
||||
err3; lvx vr3,r0,r4
|
||||
VPERM(vr8,vr0,vr3,vr16)
|
||||
err3; lvx vr2,r4,r9
|
||||
VPERM(vr9,vr3,vr2,vr16)
|
||||
err3; lvx vr1,r4,r10
|
||||
VPERM(vr10,vr2,vr1,vr16)
|
||||
err3; lvx vr0,r4,r11
|
||||
VPERM(vr11,vr1,vr0,vr16)
|
||||
addi r4,r4,64
|
||||
err3; stvx vr8,r0,r3
|
||||
err3; stvx vr9,r3,r9
|
||||
err3; stvx vr10,r3,r10
|
||||
err3; stvx vr11,r3,r11
|
||||
addi r3,r3,64
|
||||
|
||||
7: sub r5,r5,r6
|
||||
srdi r6,r5,7
|
||||
|
||||
std r14,STK_REG(R14)(r1)
|
||||
std r15,STK_REG(R15)(r1)
|
||||
std r16,STK_REG(R16)(r1)
|
||||
|
||||
li r12,64
|
||||
li r14,80
|
||||
li r15,96
|
||||
li r16,112
|
||||
|
||||
mtctr r6
|
||||
|
||||
/*
|
||||
* Now do cacheline sized loads and stores. By this stage the
|
||||
* cacheline stores are also cacheline aligned.
|
||||
*/
|
||||
.align 5
|
||||
8:
|
||||
err4; lvx vr7,r0,r4
|
||||
VPERM(vr8,vr0,vr7,vr16)
|
||||
err4; lvx vr6,r4,r9
|
||||
VPERM(vr9,vr7,vr6,vr16)
|
||||
err4; lvx vr5,r4,r10
|
||||
VPERM(vr10,vr6,vr5,vr16)
|
||||
err4; lvx vr4,r4,r11
|
||||
VPERM(vr11,vr5,vr4,vr16)
|
||||
err4; lvx vr3,r4,r12
|
||||
VPERM(vr12,vr4,vr3,vr16)
|
||||
err4; lvx vr2,r4,r14
|
||||
VPERM(vr13,vr3,vr2,vr16)
|
||||
err4; lvx vr1,r4,r15
|
||||
VPERM(vr14,vr2,vr1,vr16)
|
||||
err4; lvx vr0,r4,r16
|
||||
VPERM(vr15,vr1,vr0,vr16)
|
||||
addi r4,r4,128
|
||||
err4; stvx vr8,r0,r3
|
||||
err4; stvx vr9,r3,r9
|
||||
err4; stvx vr10,r3,r10
|
||||
err4; stvx vr11,r3,r11
|
||||
err4; stvx vr12,r3,r12
|
||||
err4; stvx vr13,r3,r14
|
||||
err4; stvx vr14,r3,r15
|
||||
err4; stvx vr15,r3,r16
|
||||
addi r3,r3,128
|
||||
bdnz 8b
|
||||
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
|
||||
/* Up to 127B to go */
|
||||
clrldi r5,r5,(64-7)
|
||||
srdi r6,r5,4
|
||||
mtocrf 0x01,r6
|
||||
|
||||
bf cr7*4+1,9f
|
||||
err3; lvx vr3,r0,r4
|
||||
VPERM(vr8,vr0,vr3,vr16)
|
||||
err3; lvx vr2,r4,r9
|
||||
VPERM(vr9,vr3,vr2,vr16)
|
||||
err3; lvx vr1,r4,r10
|
||||
VPERM(vr10,vr2,vr1,vr16)
|
||||
err3; lvx vr0,r4,r11
|
||||
VPERM(vr11,vr1,vr0,vr16)
|
||||
addi r4,r4,64
|
||||
err3; stvx vr8,r0,r3
|
||||
err3; stvx vr9,r3,r9
|
||||
err3; stvx vr10,r3,r10
|
||||
err3; stvx vr11,r3,r11
|
||||
addi r3,r3,64
|
||||
|
||||
9: bf cr7*4+2,10f
|
||||
err3; lvx vr1,r0,r4
|
||||
VPERM(vr8,vr0,vr1,vr16)
|
||||
err3; lvx vr0,r4,r9
|
||||
VPERM(vr9,vr1,vr0,vr16)
|
||||
addi r4,r4,32
|
||||
err3; stvx vr8,r0,r3
|
||||
err3; stvx vr9,r3,r9
|
||||
addi r3,r3,32
|
||||
|
||||
10: bf cr7*4+3,11f
|
||||
err3; lvx vr1,r0,r4
|
||||
VPERM(vr8,vr0,vr1,vr16)
|
||||
addi r4,r4,16
|
||||
err3; stvx vr8,r0,r3
|
||||
addi r3,r3,16
|
||||
|
||||
/* Up to 15B to go */
|
||||
11: clrldi r5,r5,(64-4)
|
||||
addi r4,r4,-16 /* Unwind the +16 load offset */
|
||||
mtocrf 0x01,r5
|
||||
bf cr7*4+0,12f
|
||||
err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
|
||||
err3; lwz r6,4(r4)
|
||||
addi r4,r4,8
|
||||
err3; stw r0,0(r3)
|
||||
err3; stw r6,4(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
12: bf cr7*4+1,13f
|
||||
err3; lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
err3; stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
13: bf cr7*4+2,14f
|
||||
err3; lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
err3; sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
14: bf cr7*4+3,15f
|
||||
err3; lbz r0,0(r4)
|
||||
err3; stb r0,0(r3)
|
||||
|
||||
15: addi r1,r1,STACKFRAMESIZE
|
||||
b exit_vmx_usercopy /* tail call optimise */
|
||||
#endif /* CONFiG_ALTIVEC */
|
547
arch/powerpc/lib/crtsavres.S
Normal file
547
arch/powerpc/lib/crtsavres.S
Normal file
|
@ -0,0 +1,547 @@
|
|||
/*
|
||||
* Special support for eabi and SVR4
|
||||
*
|
||||
* Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
|
||||
* Copyright 2008 Freescale Semiconductor, Inc.
|
||||
* Written By Michael Meissner
|
||||
*
|
||||
* Based on gcc/config/rs6000/crtsavres.asm from gcc
|
||||
* 64 bit additions from reading the PPC elf64abi document.
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 2, or (at your option) any
|
||||
* later version.
|
||||
*
|
||||
* In addition to the permissions in the GNU General Public License, the
|
||||
* Free Software Foundation gives you unlimited permission to link the
|
||||
* compiled version of this file with other programs, and to distribute
|
||||
* those programs without any restriction coming from the use of this
|
||||
* file. (The General Public License restrictions do apply in other
|
||||
* respects; for example, they cover modification of the file, and
|
||||
* distribution when not linked into another program.)
|
||||
*
|
||||
* This file is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* As a special exception, if you link this library with files
|
||||
* compiled with GCC to produce an executable, this does not cause
|
||||
* the resulting executable to be covered by the GNU General Public License.
|
||||
* This exception does not however invalidate any other reasons why
|
||||
* the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
.file "crtsavres.S"
|
||||
|
||||
#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
|
||||
#ifndef CONFIG_PPC64
|
||||
|
||||
.section ".text"
|
||||
|
||||
/* Routines for saving integer registers, called by the compiler. */
|
||||
/* Called with r11 pointing to the stack header word of the caller of the */
|
||||
/* function, just beyond the end of the integer save area. */
|
||||
|
||||
_GLOBAL(_savegpr_14)
|
||||
_GLOBAL(_save32gpr_14)
|
||||
stw 14,-72(11) /* save gp registers */
|
||||
_GLOBAL(_savegpr_15)
|
||||
_GLOBAL(_save32gpr_15)
|
||||
stw 15,-68(11)
|
||||
_GLOBAL(_savegpr_16)
|
||||
_GLOBAL(_save32gpr_16)
|
||||
stw 16,-64(11)
|
||||
_GLOBAL(_savegpr_17)
|
||||
_GLOBAL(_save32gpr_17)
|
||||
stw 17,-60(11)
|
||||
_GLOBAL(_savegpr_18)
|
||||
_GLOBAL(_save32gpr_18)
|
||||
stw 18,-56(11)
|
||||
_GLOBAL(_savegpr_19)
|
||||
_GLOBAL(_save32gpr_19)
|
||||
stw 19,-52(11)
|
||||
_GLOBAL(_savegpr_20)
|
||||
_GLOBAL(_save32gpr_20)
|
||||
stw 20,-48(11)
|
||||
_GLOBAL(_savegpr_21)
|
||||
_GLOBAL(_save32gpr_21)
|
||||
stw 21,-44(11)
|
||||
_GLOBAL(_savegpr_22)
|
||||
_GLOBAL(_save32gpr_22)
|
||||
stw 22,-40(11)
|
||||
_GLOBAL(_savegpr_23)
|
||||
_GLOBAL(_save32gpr_23)
|
||||
stw 23,-36(11)
|
||||
_GLOBAL(_savegpr_24)
|
||||
_GLOBAL(_save32gpr_24)
|
||||
stw 24,-32(11)
|
||||
_GLOBAL(_savegpr_25)
|
||||
_GLOBAL(_save32gpr_25)
|
||||
stw 25,-28(11)
|
||||
_GLOBAL(_savegpr_26)
|
||||
_GLOBAL(_save32gpr_26)
|
||||
stw 26,-24(11)
|
||||
_GLOBAL(_savegpr_27)
|
||||
_GLOBAL(_save32gpr_27)
|
||||
stw 27,-20(11)
|
||||
_GLOBAL(_savegpr_28)
|
||||
_GLOBAL(_save32gpr_28)
|
||||
stw 28,-16(11)
|
||||
_GLOBAL(_savegpr_29)
|
||||
_GLOBAL(_save32gpr_29)
|
||||
stw 29,-12(11)
|
||||
_GLOBAL(_savegpr_30)
|
||||
_GLOBAL(_save32gpr_30)
|
||||
stw 30,-8(11)
|
||||
_GLOBAL(_savegpr_31)
|
||||
_GLOBAL(_save32gpr_31)
|
||||
stw 31,-4(11)
|
||||
blr
|
||||
|
||||
/* Routines for restoring integer registers, called by the compiler. */
|
||||
/* Called with r11 pointing to the stack header word of the caller of the */
|
||||
/* function, just beyond the end of the integer restore area. */
|
||||
|
||||
_GLOBAL(_restgpr_14)
|
||||
_GLOBAL(_rest32gpr_14)
|
||||
lwz 14,-72(11) /* restore gp registers */
|
||||
_GLOBAL(_restgpr_15)
|
||||
_GLOBAL(_rest32gpr_15)
|
||||
lwz 15,-68(11)
|
||||
_GLOBAL(_restgpr_16)
|
||||
_GLOBAL(_rest32gpr_16)
|
||||
lwz 16,-64(11)
|
||||
_GLOBAL(_restgpr_17)
|
||||
_GLOBAL(_rest32gpr_17)
|
||||
lwz 17,-60(11)
|
||||
_GLOBAL(_restgpr_18)
|
||||
_GLOBAL(_rest32gpr_18)
|
||||
lwz 18,-56(11)
|
||||
_GLOBAL(_restgpr_19)
|
||||
_GLOBAL(_rest32gpr_19)
|
||||
lwz 19,-52(11)
|
||||
_GLOBAL(_restgpr_20)
|
||||
_GLOBAL(_rest32gpr_20)
|
||||
lwz 20,-48(11)
|
||||
_GLOBAL(_restgpr_21)
|
||||
_GLOBAL(_rest32gpr_21)
|
||||
lwz 21,-44(11)
|
||||
_GLOBAL(_restgpr_22)
|
||||
_GLOBAL(_rest32gpr_22)
|
||||
lwz 22,-40(11)
|
||||
_GLOBAL(_restgpr_23)
|
||||
_GLOBAL(_rest32gpr_23)
|
||||
lwz 23,-36(11)
|
||||
_GLOBAL(_restgpr_24)
|
||||
_GLOBAL(_rest32gpr_24)
|
||||
lwz 24,-32(11)
|
||||
_GLOBAL(_restgpr_25)
|
||||
_GLOBAL(_rest32gpr_25)
|
||||
lwz 25,-28(11)
|
||||
_GLOBAL(_restgpr_26)
|
||||
_GLOBAL(_rest32gpr_26)
|
||||
lwz 26,-24(11)
|
||||
_GLOBAL(_restgpr_27)
|
||||
_GLOBAL(_rest32gpr_27)
|
||||
lwz 27,-20(11)
|
||||
_GLOBAL(_restgpr_28)
|
||||
_GLOBAL(_rest32gpr_28)
|
||||
lwz 28,-16(11)
|
||||
_GLOBAL(_restgpr_29)
|
||||
_GLOBAL(_rest32gpr_29)
|
||||
lwz 29,-12(11)
|
||||
_GLOBAL(_restgpr_30)
|
||||
_GLOBAL(_rest32gpr_30)
|
||||
lwz 30,-8(11)
|
||||
_GLOBAL(_restgpr_31)
|
||||
_GLOBAL(_rest32gpr_31)
|
||||
lwz 31,-4(11)
|
||||
blr
|
||||
|
||||
/* Routines for restoring integer registers, called by the compiler. */
|
||||
/* Called with r11 pointing to the stack header word of the caller of the */
|
||||
/* function, just beyond the end of the integer restore area. */
|
||||
|
||||
_GLOBAL(_restgpr_14_x)
|
||||
_GLOBAL(_rest32gpr_14_x)
|
||||
lwz 14,-72(11) /* restore gp registers */
|
||||
_GLOBAL(_restgpr_15_x)
|
||||
_GLOBAL(_rest32gpr_15_x)
|
||||
lwz 15,-68(11)
|
||||
_GLOBAL(_restgpr_16_x)
|
||||
_GLOBAL(_rest32gpr_16_x)
|
||||
lwz 16,-64(11)
|
||||
_GLOBAL(_restgpr_17_x)
|
||||
_GLOBAL(_rest32gpr_17_x)
|
||||
lwz 17,-60(11)
|
||||
_GLOBAL(_restgpr_18_x)
|
||||
_GLOBAL(_rest32gpr_18_x)
|
||||
lwz 18,-56(11)
|
||||
_GLOBAL(_restgpr_19_x)
|
||||
_GLOBAL(_rest32gpr_19_x)
|
||||
lwz 19,-52(11)
|
||||
_GLOBAL(_restgpr_20_x)
|
||||
_GLOBAL(_rest32gpr_20_x)
|
||||
lwz 20,-48(11)
|
||||
_GLOBAL(_restgpr_21_x)
|
||||
_GLOBAL(_rest32gpr_21_x)
|
||||
lwz 21,-44(11)
|
||||
_GLOBAL(_restgpr_22_x)
|
||||
_GLOBAL(_rest32gpr_22_x)
|
||||
lwz 22,-40(11)
|
||||
_GLOBAL(_restgpr_23_x)
|
||||
_GLOBAL(_rest32gpr_23_x)
|
||||
lwz 23,-36(11)
|
||||
_GLOBAL(_restgpr_24_x)
|
||||
_GLOBAL(_rest32gpr_24_x)
|
||||
lwz 24,-32(11)
|
||||
_GLOBAL(_restgpr_25_x)
|
||||
_GLOBAL(_rest32gpr_25_x)
|
||||
lwz 25,-28(11)
|
||||
_GLOBAL(_restgpr_26_x)
|
||||
_GLOBAL(_rest32gpr_26_x)
|
||||
lwz 26,-24(11)
|
||||
_GLOBAL(_restgpr_27_x)
|
||||
_GLOBAL(_rest32gpr_27_x)
|
||||
lwz 27,-20(11)
|
||||
_GLOBAL(_restgpr_28_x)
|
||||
_GLOBAL(_rest32gpr_28_x)
|
||||
lwz 28,-16(11)
|
||||
_GLOBAL(_restgpr_29_x)
|
||||
_GLOBAL(_rest32gpr_29_x)
|
||||
lwz 29,-12(11)
|
||||
_GLOBAL(_restgpr_30_x)
|
||||
_GLOBAL(_rest32gpr_30_x)
|
||||
lwz 30,-8(11)
|
||||
_GLOBAL(_restgpr_31_x)
|
||||
_GLOBAL(_rest32gpr_31_x)
|
||||
lwz 0,4(11)
|
||||
lwz 31,-4(11)
|
||||
mtlr 0
|
||||
mr 1,11
|
||||
blr
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
/* Called with r0 pointing just beyond the end of the vector save area. */
|
||||
|
||||
_GLOBAL(_savevr_20)
|
||||
li r11,-192
|
||||
stvx vr20,r11,r0
|
||||
_GLOBAL(_savevr_21)
|
||||
li r11,-176
|
||||
stvx vr21,r11,r0
|
||||
_GLOBAL(_savevr_22)
|
||||
li r11,-160
|
||||
stvx vr22,r11,r0
|
||||
_GLOBAL(_savevr_23)
|
||||
li r11,-144
|
||||
stvx vr23,r11,r0
|
||||
_GLOBAL(_savevr_24)
|
||||
li r11,-128
|
||||
stvx vr24,r11,r0
|
||||
_GLOBAL(_savevr_25)
|
||||
li r11,-112
|
||||
stvx vr25,r11,r0
|
||||
_GLOBAL(_savevr_26)
|
||||
li r11,-96
|
||||
stvx vr26,r11,r0
|
||||
_GLOBAL(_savevr_27)
|
||||
li r11,-80
|
||||
stvx vr27,r11,r0
|
||||
_GLOBAL(_savevr_28)
|
||||
li r11,-64
|
||||
stvx vr28,r11,r0
|
||||
_GLOBAL(_savevr_29)
|
||||
li r11,-48
|
||||
stvx vr29,r11,r0
|
||||
_GLOBAL(_savevr_30)
|
||||
li r11,-32
|
||||
stvx vr30,r11,r0
|
||||
_GLOBAL(_savevr_31)
|
||||
li r11,-16
|
||||
stvx vr31,r11,r0
|
||||
blr
|
||||
|
||||
_GLOBAL(_restvr_20)
|
||||
li r11,-192
|
||||
lvx vr20,r11,r0
|
||||
_GLOBAL(_restvr_21)
|
||||
li r11,-176
|
||||
lvx vr21,r11,r0
|
||||
_GLOBAL(_restvr_22)
|
||||
li r11,-160
|
||||
lvx vr22,r11,r0
|
||||
_GLOBAL(_restvr_23)
|
||||
li r11,-144
|
||||
lvx vr23,r11,r0
|
||||
_GLOBAL(_restvr_24)
|
||||
li r11,-128
|
||||
lvx vr24,r11,r0
|
||||
_GLOBAL(_restvr_25)
|
||||
li r11,-112
|
||||
lvx vr25,r11,r0
|
||||
_GLOBAL(_restvr_26)
|
||||
li r11,-96
|
||||
lvx vr26,r11,r0
|
||||
_GLOBAL(_restvr_27)
|
||||
li r11,-80
|
||||
lvx vr27,r11,r0
|
||||
_GLOBAL(_restvr_28)
|
||||
li r11,-64
|
||||
lvx vr28,r11,r0
|
||||
_GLOBAL(_restvr_29)
|
||||
li r11,-48
|
||||
lvx vr29,r11,r0
|
||||
_GLOBAL(_restvr_30)
|
||||
li r11,-32
|
||||
lvx vr30,r11,r0
|
||||
_GLOBAL(_restvr_31)
|
||||
li r11,-16
|
||||
lvx vr31,r11,r0
|
||||
blr
|
||||
|
||||
#endif /* CONFIG_ALTIVEC */
|
||||
|
||||
#else /* CONFIG_PPC64 */
|
||||
|
||||
.section ".text.save.restore","ax",@progbits
|
||||
|
||||
.globl _savegpr0_14
|
||||
_savegpr0_14:
|
||||
std r14,-144(r1)
|
||||
.globl _savegpr0_15
|
||||
_savegpr0_15:
|
||||
std r15,-136(r1)
|
||||
.globl _savegpr0_16
|
||||
_savegpr0_16:
|
||||
std r16,-128(r1)
|
||||
.globl _savegpr0_17
|
||||
_savegpr0_17:
|
||||
std r17,-120(r1)
|
||||
.globl _savegpr0_18
|
||||
_savegpr0_18:
|
||||
std r18,-112(r1)
|
||||
.globl _savegpr0_19
|
||||
_savegpr0_19:
|
||||
std r19,-104(r1)
|
||||
.globl _savegpr0_20
|
||||
_savegpr0_20:
|
||||
std r20,-96(r1)
|
||||
.globl _savegpr0_21
|
||||
_savegpr0_21:
|
||||
std r21,-88(r1)
|
||||
.globl _savegpr0_22
|
||||
_savegpr0_22:
|
||||
std r22,-80(r1)
|
||||
.globl _savegpr0_23
|
||||
_savegpr0_23:
|
||||
std r23,-72(r1)
|
||||
.globl _savegpr0_24
|
||||
_savegpr0_24:
|
||||
std r24,-64(r1)
|
||||
.globl _savegpr0_25
|
||||
_savegpr0_25:
|
||||
std r25,-56(r1)
|
||||
.globl _savegpr0_26
|
||||
_savegpr0_26:
|
||||
std r26,-48(r1)
|
||||
.globl _savegpr0_27
|
||||
_savegpr0_27:
|
||||
std r27,-40(r1)
|
||||
.globl _savegpr0_28
|
||||
_savegpr0_28:
|
||||
std r28,-32(r1)
|
||||
.globl _savegpr0_29
|
||||
_savegpr0_29:
|
||||
std r29,-24(r1)
|
||||
.globl _savegpr0_30
|
||||
_savegpr0_30:
|
||||
std r30,-16(r1)
|
||||
.globl _savegpr0_31
|
||||
_savegpr0_31:
|
||||
std r31,-8(r1)
|
||||
std r0,16(r1)
|
||||
blr
|
||||
|
||||
.globl _restgpr0_14
|
||||
_restgpr0_14:
|
||||
ld r14,-144(r1)
|
||||
.globl _restgpr0_15
|
||||
_restgpr0_15:
|
||||
ld r15,-136(r1)
|
||||
.globl _restgpr0_16
|
||||
_restgpr0_16:
|
||||
ld r16,-128(r1)
|
||||
.globl _restgpr0_17
|
||||
_restgpr0_17:
|
||||
ld r17,-120(r1)
|
||||
.globl _restgpr0_18
|
||||
_restgpr0_18:
|
||||
ld r18,-112(r1)
|
||||
.globl _restgpr0_19
|
||||
_restgpr0_19:
|
||||
ld r19,-104(r1)
|
||||
.globl _restgpr0_20
|
||||
_restgpr0_20:
|
||||
ld r20,-96(r1)
|
||||
.globl _restgpr0_21
|
||||
_restgpr0_21:
|
||||
ld r21,-88(r1)
|
||||
.globl _restgpr0_22
|
||||
_restgpr0_22:
|
||||
ld r22,-80(r1)
|
||||
.globl _restgpr0_23
|
||||
_restgpr0_23:
|
||||
ld r23,-72(r1)
|
||||
.globl _restgpr0_24
|
||||
_restgpr0_24:
|
||||
ld r24,-64(r1)
|
||||
.globl _restgpr0_25
|
||||
_restgpr0_25:
|
||||
ld r25,-56(r1)
|
||||
.globl _restgpr0_26
|
||||
_restgpr0_26:
|
||||
ld r26,-48(r1)
|
||||
.globl _restgpr0_27
|
||||
_restgpr0_27:
|
||||
ld r27,-40(r1)
|
||||
.globl _restgpr0_28
|
||||
_restgpr0_28:
|
||||
ld r28,-32(r1)
|
||||
.globl _restgpr0_29
|
||||
_restgpr0_29:
|
||||
ld r0,16(r1)
|
||||
ld r29,-24(r1)
|
||||
mtlr r0
|
||||
ld r30,-16(r1)
|
||||
ld r31,-8(r1)
|
||||
blr
|
||||
|
||||
.globl _restgpr0_30
|
||||
_restgpr0_30:
|
||||
ld r30,-16(r1)
|
||||
.globl _restgpr0_31
|
||||
_restgpr0_31:
|
||||
ld r0,16(r1)
|
||||
ld r31,-8(r1)
|
||||
mtlr r0
|
||||
blr
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
/* Called with r0 pointing just beyond the end of the vector save area. */
|
||||
|
||||
.globl _savevr_20
|
||||
_savevr_20:
|
||||
li r12,-192
|
||||
stvx vr20,r12,r0
|
||||
.globl _savevr_21
|
||||
_savevr_21:
|
||||
li r12,-176
|
||||
stvx vr21,r12,r0
|
||||
.globl _savevr_22
|
||||
_savevr_22:
|
||||
li r12,-160
|
||||
stvx vr22,r12,r0
|
||||
.globl _savevr_23
|
||||
_savevr_23:
|
||||
li r12,-144
|
||||
stvx vr23,r12,r0
|
||||
.globl _savevr_24
|
||||
_savevr_24:
|
||||
li r12,-128
|
||||
stvx vr24,r12,r0
|
||||
.globl _savevr_25
|
||||
_savevr_25:
|
||||
li r12,-112
|
||||
stvx vr25,r12,r0
|
||||
.globl _savevr_26
|
||||
_savevr_26:
|
||||
li r12,-96
|
||||
stvx vr26,r12,r0
|
||||
.globl _savevr_27
|
||||
_savevr_27:
|
||||
li r12,-80
|
||||
stvx vr27,r12,r0
|
||||
.globl _savevr_28
|
||||
_savevr_28:
|
||||
li r12,-64
|
||||
stvx vr28,r12,r0
|
||||
.globl _savevr_29
|
||||
_savevr_29:
|
||||
li r12,-48
|
||||
stvx vr29,r12,r0
|
||||
.globl _savevr_30
|
||||
_savevr_30:
|
||||
li r12,-32
|
||||
stvx vr30,r12,r0
|
||||
.globl _savevr_31
|
||||
_savevr_31:
|
||||
li r12,-16
|
||||
stvx vr31,r12,r0
|
||||
blr
|
||||
|
||||
.globl _restvr_20
|
||||
_restvr_20:
|
||||
li r12,-192
|
||||
lvx vr20,r12,r0
|
||||
.globl _restvr_21
|
||||
_restvr_21:
|
||||
li r12,-176
|
||||
lvx vr21,r12,r0
|
||||
.globl _restvr_22
|
||||
_restvr_22:
|
||||
li r12,-160
|
||||
lvx vr22,r12,r0
|
||||
.globl _restvr_23
|
||||
_restvr_23:
|
||||
li r12,-144
|
||||
lvx vr23,r12,r0
|
||||
.globl _restvr_24
|
||||
_restvr_24:
|
||||
li r12,-128
|
||||
lvx vr24,r12,r0
|
||||
.globl _restvr_25
|
||||
_restvr_25:
|
||||
li r12,-112
|
||||
lvx vr25,r12,r0
|
||||
.globl _restvr_26
|
||||
_restvr_26:
|
||||
li r12,-96
|
||||
lvx vr26,r12,r0
|
||||
.globl _restvr_27
|
||||
_restvr_27:
|
||||
li r12,-80
|
||||
lvx vr27,r12,r0
|
||||
.globl _restvr_28
|
||||
_restvr_28:
|
||||
li r12,-64
|
||||
lvx vr28,r12,r0
|
||||
.globl _restvr_29
|
||||
_restvr_29:
|
||||
li r12,-48
|
||||
lvx vr29,r12,r0
|
||||
.globl _restvr_30
|
||||
_restvr_30:
|
||||
li r12,-32
|
||||
lvx vr30,r12,r0
|
||||
.globl _restvr_31
|
||||
_restvr_31:
|
||||
li r12,-16
|
||||
lvx vr31,r12,r0
|
||||
blr
|
||||
|
||||
#endif /* CONFIG_ALTIVEC */
|
||||
|
||||
#endif /* CONFIG_PPC64 */
|
||||
|
||||
#endif
|
43
arch/powerpc/lib/devres.c
Normal file
43
arch/powerpc/lib/devres.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/device.h> /* devres_*(), devm_ioremap_release() */
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/io.h> /* ioremap_prot() */
|
||||
#include <linux/export.h> /* EXPORT_SYMBOL() */
|
||||
|
||||
/**
|
||||
* devm_ioremap_prot - Managed ioremap_prot()
|
||||
* @dev: Generic device to remap IO address for
|
||||
* @offset: BUS offset to map
|
||||
* @size: Size of map
|
||||
* @flags: Page flags
|
||||
*
|
||||
* Managed ioremap_prot(). Map is automatically unmapped on driver
|
||||
* detach.
|
||||
*/
|
||||
void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
|
||||
size_t size, unsigned long flags)
|
||||
{
|
||||
void __iomem **ptr, *addr;
|
||||
|
||||
ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
|
||||
if (!ptr)
|
||||
return NULL;
|
||||
|
||||
addr = ioremap_prot(offset, size, flags);
|
||||
if (addr) {
|
||||
*ptr = addr;
|
||||
devres_add(dev, ptr);
|
||||
} else
|
||||
devres_free(ptr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
EXPORT_SYMBOL(devm_ioremap_prot);
|
59
arch/powerpc/lib/div64.S
Normal file
59
arch/powerpc/lib/div64.S
Normal file
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Divide a 64-bit unsigned number by a 32-bit unsigned number.
|
||||
* This routine assumes that the top 32 bits of the dividend are
|
||||
* non-zero to start with.
|
||||
* On entry, r3 points to the dividend, which get overwritten with
|
||||
* the 64-bit quotient, and r4 contains the divisor.
|
||||
* On exit, r3 contains the remainder.
|
||||
*
|
||||
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
_GLOBAL(__div64_32)
|
||||
lwz r5,0(r3) # get the dividend into r5/r6
|
||||
lwz r6,4(r3)
|
||||
cmplw r5,r4
|
||||
li r7,0
|
||||
li r8,0
|
||||
blt 1f
|
||||
divwu r7,r5,r4 # if dividend.hi >= divisor,
|
||||
mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor
|
||||
subf. r5,r0,r5 # dividend.hi %= divisor
|
||||
beq 3f
|
||||
1: mr r11,r5 # here dividend.hi != 0
|
||||
andis. r0,r5,0xc000
|
||||
bne 2f
|
||||
cntlzw r0,r5 # we are shifting the dividend right
|
||||
li r10,-1 # to make it < 2^32, and shifting
|
||||
srw r10,r10,r0 # the divisor right the same amount,
|
||||
addc r9,r4,r10 # rounding up (so the estimate cannot
|
||||
andc r11,r6,r10 # ever be too large, only too small)
|
||||
andc r9,r9,r10
|
||||
addze r9,r9
|
||||
or r11,r5,r11
|
||||
rotlw r9,r9,r0
|
||||
rotlw r11,r11,r0
|
||||
divwu r11,r11,r9 # then we divide the shifted quantities
|
||||
2: mullw r10,r11,r4 # to get an estimate of the quotient,
|
||||
mulhwu r9,r11,r4 # multiply the estimate by the divisor,
|
||||
subfc r6,r10,r6 # take the product from the divisor,
|
||||
add r8,r8,r11 # and add the estimate to the accumulated
|
||||
subfe. r5,r9,r5 # quotient
|
||||
bne 1b
|
||||
3: cmplw r6,r4
|
||||
blt 4f
|
||||
divwu r0,r6,r4 # perform the remaining 32-bit division
|
||||
mullw r10,r0,r4 # and get the remainder
|
||||
add r8,r8,r0
|
||||
subf r6,r10,r6
|
||||
4: stw r7,0(r3) # return the quotient in *r3
|
||||
stw r8,4(r3)
|
||||
mr r3,r6 # return the remainder in r3
|
||||
blr
|
761
arch/powerpc/lib/feature-fixups-test.S
Normal file
761
arch/powerpc/lib/feature-fixups-test.S
Normal file
|
@ -0,0 +1,761 @@
|
|||
/*
|
||||
* Copyright 2008 Michael Ellerman, IBM Corporation.
|
||||
*
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <asm/feature-fixups.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/synch.h>
|
||||
|
||||
.text
|
||||
|
||||
#define globl(x) \
|
||||
.globl x; \
|
||||
x:
|
||||
|
||||
globl(ftr_fixup_test1)
|
||||
or 1,1,1
|
||||
or 2,2,2 /* fixup will nop out this instruction */
|
||||
or 3,3,3
|
||||
|
||||
globl(end_ftr_fixup_test1)
|
||||
|
||||
globl(ftr_fixup_test1_orig)
|
||||
or 1,1,1
|
||||
or 2,2,2
|
||||
or 3,3,3
|
||||
|
||||
globl(ftr_fixup_test1_expected)
|
||||
or 1,1,1
|
||||
nop
|
||||
or 3,3,3
|
||||
|
||||
globl(ftr_fixup_test2)
|
||||
or 1,1,1
|
||||
or 2,2,2 /* fixup will replace this with ftr_fixup_test2_alt */
|
||||
or 3,3,3
|
||||
|
||||
globl(end_ftr_fixup_test2)
|
||||
|
||||
globl(ftr_fixup_test2_orig)
|
||||
or 1,1,1
|
||||
or 2,2,2
|
||||
or 3,3,3
|
||||
|
||||
globl(ftr_fixup_test2_alt)
|
||||
or 31,31,31
|
||||
|
||||
globl(ftr_fixup_test2_expected)
|
||||
or 1,1,1
|
||||
or 31,31,31
|
||||
or 3,3,3
|
||||
|
||||
globl(ftr_fixup_test3)
|
||||
or 1,1,1
|
||||
or 2,2,2 /* fixup will fail to replace this */
|
||||
or 3,3,3
|
||||
|
||||
globl(end_ftr_fixup_test3)
|
||||
|
||||
globl(ftr_fixup_test3_orig)
|
||||
or 1,1,1
|
||||
or 2,2,2
|
||||
or 3,3,3
|
||||
|
||||
globl(ftr_fixup_test3_alt)
|
||||
or 31,31,31
|
||||
or 31,31,31
|
||||
|
||||
globl(ftr_fixup_test4)
|
||||
or 1,1,1
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 3,3,3
|
||||
|
||||
globl(end_ftr_fixup_test4)
|
||||
|
||||
globl(ftr_fixup_test4_expected)
|
||||
or 1,1,1
|
||||
or 31,31,31
|
||||
or 31,31,31
|
||||
nop
|
||||
nop
|
||||
or 3,3,3
|
||||
|
||||
globl(ftr_fixup_test4_orig)
|
||||
or 1,1,1
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 3,3,3
|
||||
|
||||
globl(ftr_fixup_test4_alt)
|
||||
or 31,31,31
|
||||
or 31,31,31
|
||||
|
||||
|
||||
globl(ftr_fixup_test5)
|
||||
or 1,1,1
|
||||
BEGIN_FTR_SECTION
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
FTR_SECTION_ELSE
|
||||
2: b 3f
|
||||
3: or 5,5,5
|
||||
beq 3b
|
||||
b 1f
|
||||
or 6,6,6
|
||||
b 2b
|
||||
1: bdnz 3b
|
||||
ALT_FTR_SECTION_END(0, 1)
|
||||
or 1,1,1
|
||||
|
||||
globl(end_ftr_fixup_test5)
|
||||
|
||||
globl(ftr_fixup_test5_expected)
|
||||
or 1,1,1
|
||||
2: b 3f
|
||||
3: or 5,5,5
|
||||
beq 3b
|
||||
b 1f
|
||||
or 6,6,6
|
||||
b 2b
|
||||
1: bdnz 3b
|
||||
or 1,1,1
|
||||
|
||||
globl(ftr_fixup_test6)
|
||||
1: or 1,1,1
|
||||
BEGIN_FTR_SECTION
|
||||
or 5,5,5
|
||||
2: PPC_LCMPI r3,0
|
||||
beq 4f
|
||||
blt 2b
|
||||
b 1b
|
||||
b 4f
|
||||
FTR_SECTION_ELSE
|
||||
2: or 2,2,2
|
||||
PPC_LCMPI r3,1
|
||||
beq 3f
|
||||
blt 2b
|
||||
b 3f
|
||||
b 1b
|
||||
ALT_FTR_SECTION_END(0, 1)
|
||||
3: or 1,1,1
|
||||
or 2,2,2
|
||||
4: or 3,3,3
|
||||
|
||||
globl(end_ftr_fixup_test6)
|
||||
|
||||
globl(ftr_fixup_test6_expected)
|
||||
1: or 1,1,1
|
||||
2: or 2,2,2
|
||||
PPC_LCMPI r3,1
|
||||
beq 3f
|
||||
blt 2b
|
||||
b 3f
|
||||
b 1b
|
||||
2: or 1,1,1
|
||||
or 2,2,2
|
||||
3: or 3,3,3
|
||||
|
||||
|
||||
#if 0
|
||||
/* Test that if we have a larger else case the assembler spots it and
|
||||
* reports an error. #if 0'ed so as not to break the build normally.
|
||||
*/
|
||||
ftr_fixup_test7:
|
||||
or 1,1,1
|
||||
BEGIN_FTR_SECTION
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
or 2,2,2
|
||||
FTR_SECTION_ELSE
|
||||
or 3,3,3
|
||||
or 3,3,3
|
||||
or 3,3,3
|
||||
or 3,3,3
|
||||
ALT_FTR_SECTION_END(0, 1)
|
||||
or 1,1,1
|
||||
#endif
|
||||
|
||||
#define MAKE_MACRO_TEST(TYPE) \
|
||||
globl(ftr_fixup_test_ ##TYPE##_macros) \
|
||||
or 1,1,1; \
|
||||
/* Basic test, this section should all be nop'ed */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
END_##TYPE##_SECTION(0, 1) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Basic test, this section should NOT be nop'ed */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
END_##TYPE##_SECTION(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nesting test, inner section should be nop'ed */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(80) \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
END_##TYPE##_SECTION_NESTED(0, 1, 80) \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
END_##TYPE##_SECTION(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nesting test, whole section should be nop'ed */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(80) \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
END_##TYPE##_SECTION_NESTED(0, 0, 80) \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
END_##TYPE##_SECTION(0, 1) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nesting test, none should be nop'ed */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(80) \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
END_##TYPE##_SECTION_NESTED(0, 0, 80) \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
END_##TYPE##_SECTION(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Basic alt section test, default case should be taken */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 5,5,5; \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Basic alt section test, else case should be taken */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 31,31,31; \
|
||||
or 31,31,31; \
|
||||
or 31,31,31; \
|
||||
ALT_##TYPE##_SECTION_END(0, 1) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Alt with smaller else case, should be padded with nops */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 31,31,31; \
|
||||
ALT_##TYPE##_SECTION_END(0, 1) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Alt section with nested section in default case */ \
|
||||
/* Default case should be taken, with nop'ed inner section */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 3,3,3; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
END_##TYPE##_SECTION_NESTED(0, 1, 95) \
|
||||
or 3,3,3; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
ALT_##TYPE##_SECTION_END(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Alt section with nested section in else, default taken */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 5,5,5; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 3,3,3; \
|
||||
END_##TYPE##_SECTION_NESTED(0, 1, 95) \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Alt section with nested section in else, else taken & nop */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 5,5,5; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 3,3,3; \
|
||||
END_##TYPE##_SECTION_NESTED(0, 1, 95) \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END(0, 1) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Feature section with nested alt section, default taken */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 1,1,1; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(95) \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
|
||||
or 2,2,2; \
|
||||
END_##TYPE##_SECTION(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Feature section with nested alt section, else taken */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 1,1,1; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(95) \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
|
||||
or 2,2,2; \
|
||||
END_##TYPE##_SECTION(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Feature section with nested alt section, all nop'ed */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 1,1,1; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(95) \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
|
||||
or 2,2,2; \
|
||||
END_##TYPE##_SECTION(0, 1) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, default with inner default taken */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 1,1,1; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(95) \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
|
||||
or 2,2,2; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 31,31,31; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(94) \
|
||||
or 5,5,5; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(94) \
|
||||
or 1,1,1; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
|
||||
or 31,31,31; \
|
||||
ALT_##TYPE##_SECTION_END(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, default with inner else taken */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 1,1,1; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(95) \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
|
||||
or 2,2,2; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 31,31,31; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(94) \
|
||||
or 5,5,5; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(94) \
|
||||
or 1,1,1; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
|
||||
or 31,31,31; \
|
||||
ALT_##TYPE##_SECTION_END(0, 0) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, else with inner default taken */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 1,1,1; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(95) \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
|
||||
or 2,2,2; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 31,31,31; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(94) \
|
||||
or 5,5,5; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(94) \
|
||||
or 1,1,1; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
|
||||
or 31,31,31; \
|
||||
ALT_##TYPE##_SECTION_END(0, 1) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, else with inner else taken */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(95) \
|
||||
or 1,1,1; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(95) \
|
||||
or 5,5,5; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
|
||||
or 2,2,2; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
or 31,31,31; \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(94) \
|
||||
or 5,5,5; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(94) \
|
||||
or 1,1,1; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) \
|
||||
or 31,31,31; \
|
||||
ALT_##TYPE##_SECTION_END(0, 1) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, else can have large else case */ \
|
||||
BEGIN_##TYPE##_SECTION \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
##TYPE##_SECTION_ELSE \
|
||||
BEGIN_##TYPE##_SECTION_NESTED(94) \
|
||||
or 5,5,5; \
|
||||
or 5,5,5; \
|
||||
or 5,5,5; \
|
||||
or 5,5,5; \
|
||||
##TYPE##_SECTION_ELSE_NESTED(94) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) \
|
||||
ALT_##TYPE##_SECTION_END(0, 1) \
|
||||
or 1,1,1; \
|
||||
or 1,1,1;
|
||||
|
||||
#define MAKE_MACRO_TEST_EXPECTED(TYPE) \
|
||||
globl(ftr_fixup_test_ ##TYPE##_macros_expected) \
|
||||
or 1,1,1; \
|
||||
/* Basic test, this section should all be nop'ed */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
nop; \
|
||||
nop; \
|
||||
nop; \
|
||||
/* END_##TYPE##_SECTION(0, 1) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Basic test, this section should NOT be nop'ed */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
/* END_##TYPE##_SECTION(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nesting test, inner section should be nop'ed */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
|
||||
nop; \
|
||||
nop; \
|
||||
/* END_##TYPE##_SECTION_NESTED(0, 1, 80) */ \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
/* END_##TYPE##_SECTION(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nesting test, whole section should be nop'ed */ \
|
||||
/* NB. inner section is not nop'ed, but then entire outer is */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
nop; \
|
||||
nop; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
|
||||
nop; \
|
||||
nop; \
|
||||
/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */ \
|
||||
nop; \
|
||||
nop; \
|
||||
/* END_##TYPE##_SECTION(0, 1) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nesting test, none should be nop'ed */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */ \
|
||||
or 2,2,2; \
|
||||
or 2,2,2; \
|
||||
/* END_##TYPE##_SECTION(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Basic alt section test, default case should be taken */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Basic alt section test, else case should be taken */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
or 31,31,31; \
|
||||
or 31,31,31; \
|
||||
or 31,31,31; \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Alt with smaller else case, should be padded with nops */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
or 31,31,31; \
|
||||
nop; \
|
||||
nop; \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Alt section with nested section in default case */ \
|
||||
/* Default case should be taken, with nop'ed inner section */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 3,3,3; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
nop; \
|
||||
nop; \
|
||||
/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
|
||||
or 3,3,3; \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Alt section with nested section in else, default taken */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
or 3,3,3; \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Alt section with nested section in else, else taken & nop */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* or 3,3,3; */ \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
or 5,5,5; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
nop; \
|
||||
/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
|
||||
or 5,5,5; \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Feature section with nested alt section, default taken */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 2,2,2; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
or 1,1,1; \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
|
||||
or 2,2,2; \
|
||||
/* END_##TYPE##_SECTION(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Feature section with nested alt section, else taken */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 2,2,2; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
/* or 1,1,1; */ \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
|
||||
or 5,5,5; \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
|
||||
or 2,2,2; \
|
||||
/* END_##TYPE##_SECTION(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Feature section with nested alt section, all nop'ed */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
nop; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
nop; \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
|
||||
nop; \
|
||||
/* END_##TYPE##_SECTION(0, 1) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, default with inner default taken */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 2,2,2; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
or 1,1,1; \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
|
||||
or 2,2,2; \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
/* or 31,31,31; */ \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
|
||||
/* or 1,1,1; */ \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
|
||||
/* or 31,31,31; */ \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, default with inner else taken */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
or 2,2,2; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
/* or 1,1,1; */ \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
|
||||
or 5,5,5; \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
|
||||
or 2,2,2; \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
/* or 31,31,31; */ \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
|
||||
/* or 1,1,1; */ \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
|
||||
/* or 31,31,31; */ \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, else with inner default taken */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
/* or 1,1,1; */ \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
or 31,31,31; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
|
||||
or 5,5,5; \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
|
||||
/* or 1,1,1; */ \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
|
||||
or 31,31,31; \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, else with inner else taken */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
|
||||
/* or 1,1,1; */ \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
or 31,31,31; \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
|
||||
or 1,1,1; \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */ \
|
||||
or 31,31,31; \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* Nested alt sections, else can have large else case */ \
|
||||
/* BEGIN_##TYPE##_SECTION */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* or 2,2,2; */ \
|
||||
/* ##TYPE##_SECTION_ELSE */ \
|
||||
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* or 5,5,5; */ \
|
||||
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
or 1,1,1; \
|
||||
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */ \
|
||||
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
|
||||
or 1,1,1; \
|
||||
or 1,1,1;
|
||||
|
||||
MAKE_MACRO_TEST(FTR);
|
||||
MAKE_MACRO_TEST_EXPECTED(FTR);
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
MAKE_MACRO_TEST(FW_FTR);
|
||||
MAKE_MACRO_TEST_EXPECTED(FW_FTR);
|
||||
#endif
|
||||
|
||||
globl(lwsync_fixup_test)
|
||||
1: or 1,1,1
|
||||
LWSYNC
|
||||
globl(end_lwsync_fixup_test)
|
||||
|
||||
globl(lwsync_fixup_test_expected_LWSYNC)
|
||||
1: or 1,1,1
|
||||
lwsync
|
||||
|
||||
globl(lwsync_fixup_test_expected_SYNC)
|
||||
1: or 1,1,1
|
||||
sync
|
||||
|
376
arch/powerpc/lib/feature-fixups.c
Normal file
376
arch/powerpc/lib/feature-fixups.c
Normal file
|
@ -0,0 +1,376 @@
|
|||
/*
|
||||
* Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
|
||||
*
|
||||
* Modifications for ppc64:
|
||||
* Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
|
||||
*
|
||||
* Copyright 2008 Michael Ellerman, IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/code-patching.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
|
||||
struct fixup_entry {
|
||||
unsigned long mask;
|
||||
unsigned long value;
|
||||
long start_off;
|
||||
long end_off;
|
||||
long alt_start_off;
|
||||
long alt_end_off;
|
||||
};
|
||||
|
||||
static unsigned int *calc_addr(struct fixup_entry *fcur, long offset)
|
||||
{
|
||||
/*
|
||||
* We store the offset to the code as a negative offset from
|
||||
* the start of the alt_entry, to support the VDSO. This
|
||||
* routine converts that back into an actual address.
|
||||
*/
|
||||
return (unsigned int *)((unsigned long)fcur + offset);
|
||||
}
|
||||
|
||||
static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
|
||||
unsigned int *alt_start, unsigned int *alt_end)
|
||||
{
|
||||
unsigned int instr;
|
||||
|
||||
instr = *src;
|
||||
|
||||
if (instr_is_relative_branch(*src)) {
|
||||
unsigned int *target = (unsigned int *)branch_target(src);
|
||||
|
||||
/* Branch within the section doesn't need translating */
|
||||
if (target < alt_start || target >= alt_end) {
|
||||
instr = translate_branch(dest, src);
|
||||
if (!instr)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
patch_instruction(dest, instr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
|
||||
{
|
||||
unsigned int *start, *end, *alt_start, *alt_end, *src, *dest;
|
||||
|
||||
start = calc_addr(fcur, fcur->start_off);
|
||||
end = calc_addr(fcur, fcur->end_off);
|
||||
alt_start = calc_addr(fcur, fcur->alt_start_off);
|
||||
alt_end = calc_addr(fcur, fcur->alt_end_off);
|
||||
|
||||
if ((alt_end - alt_start) > (end - start))
|
||||
return 1;
|
||||
|
||||
if ((value & fcur->mask) == fcur->value)
|
||||
return 0;
|
||||
|
||||
src = alt_start;
|
||||
dest = start;
|
||||
|
||||
for (; src < alt_end; src++, dest++) {
|
||||
if (patch_alt_instruction(src, dest, alt_start, alt_end))
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (; dest < end; dest++)
|
||||
patch_instruction(dest, PPC_INST_NOP);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
|
||||
{
|
||||
struct fixup_entry *fcur, *fend;
|
||||
|
||||
fcur = fixup_start;
|
||||
fend = fixup_end;
|
||||
|
||||
for (; fcur < fend; fcur++) {
|
||||
if (patch_feature_section(value, fcur)) {
|
||||
WARN_ON(1);
|
||||
printk("Unable to patch feature section at %p - %p" \
|
||||
" with %p - %p\n",
|
||||
calc_addr(fcur, fcur->start_off),
|
||||
calc_addr(fcur, fcur->end_off),
|
||||
calc_addr(fcur, fcur->alt_start_off),
|
||||
calc_addr(fcur, fcur->alt_end_off));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
|
||||
{
|
||||
long *start, *end;
|
||||
unsigned int *dest;
|
||||
|
||||
if (!(value & CPU_FTR_LWSYNC))
|
||||
return ;
|
||||
|
||||
start = fixup_start;
|
||||
end = fixup_end;
|
||||
|
||||
for (; start < end; start++) {
|
||||
dest = (void *)start + *start;
|
||||
patch_instruction(dest, PPC_INST_LWSYNC);
|
||||
}
|
||||
}
|
||||
|
||||
void do_final_fixups(void)
|
||||
{
|
||||
#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
|
||||
int *src, *dest;
|
||||
unsigned long length;
|
||||
|
||||
if (PHYSICAL_START == 0)
|
||||
return;
|
||||
|
||||
src = (int *)(KERNELBASE + PHYSICAL_START);
|
||||
dest = (int *)KERNELBASE;
|
||||
length = (__end_interrupts - _stext) / sizeof(int);
|
||||
|
||||
while (length--) {
|
||||
patch_instruction(dest, *src);
|
||||
src++;
|
||||
dest++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FTR_FIXUP_SELFTEST
|
||||
|
||||
#define check(x) \
|
||||
if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
|
||||
|
||||
/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
|
||||
static struct fixup_entry fixup;
|
||||
|
||||
static long calc_offset(struct fixup_entry *entry, unsigned int *p)
|
||||
{
|
||||
return (unsigned long)p - (unsigned long)entry;
|
||||
}
|
||||
|
||||
static void test_basic_patching(void)
|
||||
{
|
||||
extern unsigned int ftr_fixup_test1;
|
||||
extern unsigned int end_ftr_fixup_test1;
|
||||
extern unsigned int ftr_fixup_test1_orig;
|
||||
extern unsigned int ftr_fixup_test1_expected;
|
||||
int size = &end_ftr_fixup_test1 - &ftr_fixup_test1;
|
||||
|
||||
fixup.value = fixup.mask = 8;
|
||||
fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1);
|
||||
fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2);
|
||||
fixup.alt_start_off = fixup.alt_end_off = 0;
|
||||
|
||||
/* Sanity check */
|
||||
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
|
||||
|
||||
/* Check we don't patch if the value matches */
|
||||
patch_feature_section(8, &fixup);
|
||||
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
|
||||
|
||||
/* Check we do patch if the value doesn't match */
|
||||
patch_feature_section(0, &fixup);
|
||||
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
|
||||
|
||||
/* Check we do patch if the mask doesn't match */
|
||||
memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size);
|
||||
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
|
||||
patch_feature_section(~8, &fixup);
|
||||
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
|
||||
}
|
||||
|
||||
static void test_alternative_patching(void)
|
||||
{
|
||||
extern unsigned int ftr_fixup_test2;
|
||||
extern unsigned int end_ftr_fixup_test2;
|
||||
extern unsigned int ftr_fixup_test2_orig;
|
||||
extern unsigned int ftr_fixup_test2_alt;
|
||||
extern unsigned int ftr_fixup_test2_expected;
|
||||
int size = &end_ftr_fixup_test2 - &ftr_fixup_test2;
|
||||
|
||||
fixup.value = fixup.mask = 0xF;
|
||||
fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1);
|
||||
fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2);
|
||||
fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt);
|
||||
fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1);
|
||||
|
||||
/* Sanity check */
|
||||
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
|
||||
|
||||
/* Check we don't patch if the value matches */
|
||||
patch_feature_section(0xF, &fixup);
|
||||
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
|
||||
|
||||
/* Check we do patch if the value doesn't match */
|
||||
patch_feature_section(0, &fixup);
|
||||
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
|
||||
|
||||
/* Check we do patch if the mask doesn't match */
|
||||
memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size);
|
||||
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
|
||||
patch_feature_section(~0xF, &fixup);
|
||||
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
|
||||
}
|
||||
|
||||
static void test_alternative_case_too_big(void)
|
||||
{
|
||||
extern unsigned int ftr_fixup_test3;
|
||||
extern unsigned int end_ftr_fixup_test3;
|
||||
extern unsigned int ftr_fixup_test3_orig;
|
||||
extern unsigned int ftr_fixup_test3_alt;
|
||||
int size = &end_ftr_fixup_test3 - &ftr_fixup_test3;
|
||||
|
||||
fixup.value = fixup.mask = 0xC;
|
||||
fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1);
|
||||
fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2);
|
||||
fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt);
|
||||
fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2);
|
||||
|
||||
/* Sanity check */
|
||||
check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
|
||||
|
||||
/* Expect nothing to be patched, and the error returned to us */
|
||||
check(patch_feature_section(0xF, &fixup) == 1);
|
||||
check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
|
||||
check(patch_feature_section(0, &fixup) == 1);
|
||||
check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
|
||||
check(patch_feature_section(~0xF, &fixup) == 1);
|
||||
check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
|
||||
}
|
||||
|
||||
static void test_alternative_case_too_small(void)
|
||||
{
|
||||
extern unsigned int ftr_fixup_test4;
|
||||
extern unsigned int end_ftr_fixup_test4;
|
||||
extern unsigned int ftr_fixup_test4_orig;
|
||||
extern unsigned int ftr_fixup_test4_alt;
|
||||
extern unsigned int ftr_fixup_test4_expected;
|
||||
int size = &end_ftr_fixup_test4 - &ftr_fixup_test4;
|
||||
unsigned long flag;
|
||||
|
||||
/* Check a high-bit flag */
|
||||
flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
|
||||
fixup.value = fixup.mask = flag;
|
||||
fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1);
|
||||
fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5);
|
||||
fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt);
|
||||
fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2);
|
||||
|
||||
/* Sanity check */
|
||||
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
|
||||
|
||||
/* Check we don't patch if the value matches */
|
||||
patch_feature_section(flag, &fixup);
|
||||
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
|
||||
|
||||
/* Check we do patch if the value doesn't match */
|
||||
patch_feature_section(0, &fixup);
|
||||
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
|
||||
|
||||
/* Check we do patch if the mask doesn't match */
|
||||
memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size);
|
||||
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
|
||||
patch_feature_section(~flag, &fixup);
|
||||
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
|
||||
}
|
||||
|
||||
static void test_alternative_case_with_branch(void)
|
||||
{
|
||||
extern unsigned int ftr_fixup_test5;
|
||||
extern unsigned int end_ftr_fixup_test5;
|
||||
extern unsigned int ftr_fixup_test5_expected;
|
||||
int size = &end_ftr_fixup_test5 - &ftr_fixup_test5;
|
||||
|
||||
check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0);
|
||||
}
|
||||
|
||||
static void test_alternative_case_with_external_branch(void)
|
||||
{
|
||||
extern unsigned int ftr_fixup_test6;
|
||||
extern unsigned int end_ftr_fixup_test6;
|
||||
extern unsigned int ftr_fixup_test6_expected;
|
||||
int size = &end_ftr_fixup_test6 - &ftr_fixup_test6;
|
||||
|
||||
check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0);
|
||||
}
|
||||
|
||||
static void test_cpu_macros(void)
|
||||
{
|
||||
extern u8 ftr_fixup_test_FTR_macros;
|
||||
extern u8 ftr_fixup_test_FTR_macros_expected;
|
||||
unsigned long size = &ftr_fixup_test_FTR_macros_expected -
|
||||
&ftr_fixup_test_FTR_macros;
|
||||
|
||||
/* The fixups have already been done for us during boot */
|
||||
check(memcmp(&ftr_fixup_test_FTR_macros,
|
||||
&ftr_fixup_test_FTR_macros_expected, size) == 0);
|
||||
}
|
||||
|
||||
static void test_fw_macros(void)
|
||||
{
|
||||
#ifdef CONFIG_PPC64
|
||||
extern u8 ftr_fixup_test_FW_FTR_macros;
|
||||
extern u8 ftr_fixup_test_FW_FTR_macros_expected;
|
||||
unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected -
|
||||
&ftr_fixup_test_FW_FTR_macros;
|
||||
|
||||
/* The fixups have already been done for us during boot */
|
||||
check(memcmp(&ftr_fixup_test_FW_FTR_macros,
|
||||
&ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test_lwsync_macros(void)
|
||||
{
|
||||
extern u8 lwsync_fixup_test;
|
||||
extern u8 end_lwsync_fixup_test;
|
||||
extern u8 lwsync_fixup_test_expected_LWSYNC;
|
||||
extern u8 lwsync_fixup_test_expected_SYNC;
|
||||
unsigned long size = &end_lwsync_fixup_test -
|
||||
&lwsync_fixup_test;
|
||||
|
||||
/* The fixups have already been done for us during boot */
|
||||
if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
|
||||
check(memcmp(&lwsync_fixup_test,
|
||||
&lwsync_fixup_test_expected_LWSYNC, size) == 0);
|
||||
} else {
|
||||
check(memcmp(&lwsync_fixup_test,
|
||||
&lwsync_fixup_test_expected_SYNC, size) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init test_feature_fixups(void)
|
||||
{
|
||||
printk(KERN_DEBUG "Running feature fixup self-tests ...\n");
|
||||
|
||||
test_basic_patching();
|
||||
test_alternative_patching();
|
||||
test_alternative_case_too_big();
|
||||
test_alternative_case_too_small();
|
||||
test_alternative_case_with_branch();
|
||||
test_alternative_case_with_external_branch();
|
||||
test_cpu_macros();
|
||||
test_fw_macros();
|
||||
test_lwsync_macros();
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(test_feature_fixups);
|
||||
|
||||
#endif /* CONFIG_FTR_FIXUP_SELFTEST */
|
110
arch/powerpc/lib/hweight_64.S
Normal file
110
arch/powerpc/lib/hweight_64.S
Normal file
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2010
|
||||
*
|
||||
* Author: Anton Blanchard <anton@au.ibm.com>
|
||||
*/
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
/* Note: This code relies on -mminimal-toc */
|
||||
|
||||
_GLOBAL(__arch_hweight8)
|
||||
BEGIN_FTR_SECTION
|
||||
b __sw_hweight8
|
||||
nop
|
||||
nop
|
||||
FTR_SECTION_ELSE
|
||||
PPC_POPCNTB(R3,R3)
|
||||
clrldi r3,r3,64-8
|
||||
blr
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
|
||||
|
||||
_GLOBAL(__arch_hweight16)
|
||||
BEGIN_FTR_SECTION
|
||||
b __sw_hweight16
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
FTR_SECTION_ELSE
|
||||
BEGIN_FTR_SECTION_NESTED(50)
|
||||
PPC_POPCNTB(R3,R3)
|
||||
srdi r4,r3,8
|
||||
add r3,r4,r3
|
||||
clrldi r3,r3,64-8
|
||||
blr
|
||||
FTR_SECTION_ELSE_NESTED(50)
|
||||
clrlwi r3,r3,16
|
||||
PPC_POPCNTW(R3,R3)
|
||||
clrldi r3,r3,64-8
|
||||
blr
|
||||
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
|
||||
|
||||
_GLOBAL(__arch_hweight32)
|
||||
BEGIN_FTR_SECTION
|
||||
b __sw_hweight32
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
FTR_SECTION_ELSE
|
||||
BEGIN_FTR_SECTION_NESTED(51)
|
||||
PPC_POPCNTB(R3,R3)
|
||||
srdi r4,r3,16
|
||||
add r3,r4,r3
|
||||
srdi r4,r3,8
|
||||
add r3,r4,r3
|
||||
clrldi r3,r3,64-8
|
||||
blr
|
||||
FTR_SECTION_ELSE_NESTED(51)
|
||||
PPC_POPCNTW(R3,R3)
|
||||
clrldi r3,r3,64-8
|
||||
blr
|
||||
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
|
||||
|
||||
_GLOBAL(__arch_hweight64)
|
||||
BEGIN_FTR_SECTION
|
||||
b __sw_hweight64
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
FTR_SECTION_ELSE
|
||||
BEGIN_FTR_SECTION_NESTED(52)
|
||||
PPC_POPCNTB(R3,R3)
|
||||
srdi r4,r3,32
|
||||
add r3,r4,r3
|
||||
srdi r4,r3,16
|
||||
add r3,r4,r3
|
||||
srdi r4,r3,8
|
||||
add r3,r4,r3
|
||||
clrldi r3,r3,64-8
|
||||
blr
|
||||
FTR_SECTION_ELSE_NESTED(52)
|
||||
PPC_POPCNTD(R3,R3)
|
||||
clrldi r3,r3,64-8
|
||||
blr
|
||||
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
|
379
arch/powerpc/lib/ldstfp.S
Normal file
379
arch/powerpc/lib/ldstfp.S
Normal file
|
@ -0,0 +1,379 @@
|
|||
/*
|
||||
* Floating-point, VMX/Altivec and VSX loads and stores
|
||||
* for use in instruction emulation.
|
||||
*
|
||||
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/ppc-opcode.h>
|
||||
#include <asm/reg.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <linux/errno.h>
|
||||
|
||||
#ifdef CONFIG_PPC_FPU
|
||||
|
||||
#define STKFRM (PPC_MIN_STKFRM + 16)
|
||||
|
||||
.macro extab instr,handler
|
||||
.section __ex_table,"a"
|
||||
PPC_LONG \instr,\handler
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro inst32 op
|
||||
reg = 0
|
||||
.rept 32
|
||||
20: \op reg,0,r4
|
||||
b 3f
|
||||
extab 20b,99f
|
||||
reg = reg + 1
|
||||
.endr
|
||||
.endm
|
||||
|
||||
/* Get the contents of frN into fr0; N is in r3. */
|
||||
_GLOBAL(get_fpr)
|
||||
mflr r0
|
||||
rlwinm r3,r3,3,0xf8
|
||||
bcl 20,31,1f
|
||||
blr /* fr0 is already in fr0 */
|
||||
nop
|
||||
reg = 1
|
||||
.rept 31
|
||||
fmr fr0,reg
|
||||
blr
|
||||
reg = reg + 1
|
||||
.endr
|
||||
1: mflr r5
|
||||
add r5,r3,r5
|
||||
mtctr r5
|
||||
mtlr r0
|
||||
bctr
|
||||
|
||||
/* Put the contents of fr0 into frN; N is in r3. */
|
||||
_GLOBAL(put_fpr)
|
||||
mflr r0
|
||||
rlwinm r3,r3,3,0xf8
|
||||
bcl 20,31,1f
|
||||
blr /* fr0 is already in fr0 */
|
||||
nop
|
||||
reg = 1
|
||||
.rept 31
|
||||
fmr reg,fr0
|
||||
blr
|
||||
reg = reg + 1
|
||||
.endr
|
||||
1: mflr r5
|
||||
add r5,r3,r5
|
||||
mtctr r5
|
||||
mtlr r0
|
||||
bctr
|
||||
|
||||
/* Load FP reg N from float at *p. N is in r3, p in r4. */
|
||||
_GLOBAL(do_lfs)
|
||||
PPC_STLU r1,-STKFRM(r1)
|
||||
mflr r0
|
||||
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mfmsr r6
|
||||
ori r7,r6,MSR_FP
|
||||
cmpwi cr7,r3,0
|
||||
MTMSRD(r7)
|
||||
isync
|
||||
beq cr7,1f
|
||||
stfd fr0,STKFRM-16(r1)
|
||||
1: li r9,-EFAULT
|
||||
2: lfs fr0,0(r4)
|
||||
li r9,0
|
||||
3: bl put_fpr
|
||||
beq cr7,4f
|
||||
lfd fr0,STKFRM-16(r1)
|
||||
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
MTMSRD(r6)
|
||||
isync
|
||||
mr r3,r9
|
||||
addi r1,r1,STKFRM
|
||||
blr
|
||||
extab 2b,3b
|
||||
|
||||
/* Load FP reg N from double at *p. N is in r3, p in r4. */
|
||||
_GLOBAL(do_lfd)
|
||||
PPC_STLU r1,-STKFRM(r1)
|
||||
mflr r0
|
||||
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mfmsr r6
|
||||
ori r7,r6,MSR_FP
|
||||
cmpwi cr7,r3,0
|
||||
MTMSRD(r7)
|
||||
isync
|
||||
beq cr7,1f
|
||||
stfd fr0,STKFRM-16(r1)
|
||||
1: li r9,-EFAULT
|
||||
2: lfd fr0,0(r4)
|
||||
li r9,0
|
||||
3: beq cr7,4f
|
||||
bl put_fpr
|
||||
lfd fr0,STKFRM-16(r1)
|
||||
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
MTMSRD(r6)
|
||||
isync
|
||||
mr r3,r9
|
||||
addi r1,r1,STKFRM
|
||||
blr
|
||||
extab 2b,3b
|
||||
|
||||
/* Store FP reg N to float at *p. N is in r3, p in r4. */
|
||||
_GLOBAL(do_stfs)
|
||||
PPC_STLU r1,-STKFRM(r1)
|
||||
mflr r0
|
||||
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mfmsr r6
|
||||
ori r7,r6,MSR_FP
|
||||
cmpwi cr7,r3,0
|
||||
MTMSRD(r7)
|
||||
isync
|
||||
beq cr7,1f
|
||||
stfd fr0,STKFRM-16(r1)
|
||||
bl get_fpr
|
||||
1: li r9,-EFAULT
|
||||
2: stfs fr0,0(r4)
|
||||
li r9,0
|
||||
3: beq cr7,4f
|
||||
lfd fr0,STKFRM-16(r1)
|
||||
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
MTMSRD(r6)
|
||||
isync
|
||||
mr r3,r9
|
||||
addi r1,r1,STKFRM
|
||||
blr
|
||||
extab 2b,3b
|
||||
|
||||
/* Store FP reg N to double at *p. N is in r3, p in r4. */
|
||||
_GLOBAL(do_stfd)
|
||||
PPC_STLU r1,-STKFRM(r1)
|
||||
mflr r0
|
||||
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mfmsr r6
|
||||
ori r7,r6,MSR_FP
|
||||
cmpwi cr7,r3,0
|
||||
MTMSRD(r7)
|
||||
isync
|
||||
beq cr7,1f
|
||||
stfd fr0,STKFRM-16(r1)
|
||||
bl get_fpr
|
||||
1: li r9,-EFAULT
|
||||
2: stfd fr0,0(r4)
|
||||
li r9,0
|
||||
3: beq cr7,4f
|
||||
lfd fr0,STKFRM-16(r1)
|
||||
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
MTMSRD(r6)
|
||||
isync
|
||||
mr r3,r9
|
||||
addi r1,r1,STKFRM
|
||||
blr
|
||||
extab 2b,3b
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
/* Get the contents of vrN into vr0; N is in r3. */
|
||||
_GLOBAL(get_vr)
|
||||
mflr r0
|
||||
rlwinm r3,r3,3,0xf8
|
||||
bcl 20,31,1f
|
||||
blr /* vr0 is already in vr0 */
|
||||
nop
|
||||
reg = 1
|
||||
.rept 31
|
||||
vor vr0,reg,reg /* assembler doesn't know vmr? */
|
||||
blr
|
||||
reg = reg + 1
|
||||
.endr
|
||||
1: mflr r5
|
||||
add r5,r3,r5
|
||||
mtctr r5
|
||||
mtlr r0
|
||||
bctr
|
||||
|
||||
/* Put the contents of vr0 into vrN; N is in r3. */
|
||||
_GLOBAL(put_vr)
|
||||
mflr r0
|
||||
rlwinm r3,r3,3,0xf8
|
||||
bcl 20,31,1f
|
||||
blr /* vr0 is already in vr0 */
|
||||
nop
|
||||
reg = 1
|
||||
.rept 31
|
||||
vor reg,vr0,vr0
|
||||
blr
|
||||
reg = reg + 1
|
||||
.endr
|
||||
1: mflr r5
|
||||
add r5,r3,r5
|
||||
mtctr r5
|
||||
mtlr r0
|
||||
bctr
|
||||
|
||||
/* Load vector reg N from *p. N is in r3, p in r4. */
|
||||
_GLOBAL(do_lvx)
|
||||
PPC_STLU r1,-STKFRM(r1)
|
||||
mflr r0
|
||||
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mfmsr r6
|
||||
oris r7,r6,MSR_VEC@h
|
||||
cmpwi cr7,r3,0
|
||||
li r8,STKFRM-16
|
||||
MTMSRD(r7)
|
||||
isync
|
||||
beq cr7,1f
|
||||
stvx vr0,r1,r8
|
||||
1: li r9,-EFAULT
|
||||
2: lvx vr0,0,r4
|
||||
li r9,0
|
||||
3: beq cr7,4f
|
||||
bl put_vr
|
||||
lvx vr0,r1,r8
|
||||
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
MTMSRD(r6)
|
||||
isync
|
||||
mr r3,r9
|
||||
addi r1,r1,STKFRM
|
||||
blr
|
||||
extab 2b,3b
|
||||
|
||||
/* Store vector reg N to *p. N is in r3, p in r4. */
|
||||
_GLOBAL(do_stvx)
|
||||
PPC_STLU r1,-STKFRM(r1)
|
||||
mflr r0
|
||||
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mfmsr r6
|
||||
oris r7,r6,MSR_VEC@h
|
||||
cmpwi cr7,r3,0
|
||||
li r8,STKFRM-16
|
||||
MTMSRD(r7)
|
||||
isync
|
||||
beq cr7,1f
|
||||
stvx vr0,r1,r8
|
||||
bl get_vr
|
||||
1: li r9,-EFAULT
|
||||
2: stvx vr0,0,r4
|
||||
li r9,0
|
||||
3: beq cr7,4f
|
||||
lvx vr0,r1,r8
|
||||
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
MTMSRD(r6)
|
||||
isync
|
||||
mr r3,r9
|
||||
addi r1,r1,STKFRM
|
||||
blr
|
||||
extab 2b,3b
|
||||
#endif /* CONFIG_ALTIVEC */
|
||||
|
||||
#ifdef CONFIG_VSX
|
||||
/* Get the contents of vsrN into vsr0; N is in r3. */
|
||||
_GLOBAL(get_vsr)
|
||||
mflr r0
|
||||
rlwinm r3,r3,3,0x1f8
|
||||
bcl 20,31,1f
|
||||
blr /* vsr0 is already in vsr0 */
|
||||
nop
|
||||
reg = 1
|
||||
.rept 63
|
||||
XXLOR(0,reg,reg)
|
||||
blr
|
||||
reg = reg + 1
|
||||
.endr
|
||||
1: mflr r5
|
||||
add r5,r3,r5
|
||||
mtctr r5
|
||||
mtlr r0
|
||||
bctr
|
||||
|
||||
/* Put the contents of vsr0 into vsrN; N is in r3. */
|
||||
_GLOBAL(put_vsr)
|
||||
mflr r0
|
||||
rlwinm r3,r3,3,0x1f8
|
||||
bcl 20,31,1f
|
||||
blr /* vr0 is already in vr0 */
|
||||
nop
|
||||
reg = 1
|
||||
.rept 63
|
||||
XXLOR(reg,0,0)
|
||||
blr
|
||||
reg = reg + 1
|
||||
.endr
|
||||
1: mflr r5
|
||||
add r5,r3,r5
|
||||
mtctr r5
|
||||
mtlr r0
|
||||
bctr
|
||||
|
||||
/* Load VSX reg N from vector doubleword *p. N is in r3, p in r4. */
|
||||
_GLOBAL(do_lxvd2x)
|
||||
PPC_STLU r1,-STKFRM(r1)
|
||||
mflr r0
|
||||
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mfmsr r6
|
||||
oris r7,r6,MSR_VSX@h
|
||||
cmpwi cr7,r3,0
|
||||
li r8,STKFRM-16
|
||||
MTMSRD(r7)
|
||||
isync
|
||||
beq cr7,1f
|
||||
STXVD2X(0,R1,R8)
|
||||
1: li r9,-EFAULT
|
||||
2: LXVD2X(0,R0,R4)
|
||||
li r9,0
|
||||
3: beq cr7,4f
|
||||
bl put_vsr
|
||||
LXVD2X(0,R1,R8)
|
||||
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
MTMSRD(r6)
|
||||
isync
|
||||
mr r3,r9
|
||||
addi r1,r1,STKFRM
|
||||
blr
|
||||
extab 2b,3b
|
||||
|
||||
/* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */
|
||||
_GLOBAL(do_stxvd2x)
|
||||
PPC_STLU r1,-STKFRM(r1)
|
||||
mflr r0
|
||||
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mfmsr r6
|
||||
oris r7,r6,MSR_VSX@h
|
||||
cmpwi cr7,r3,0
|
||||
li r8,STKFRM-16
|
||||
MTMSRD(r7)
|
||||
isync
|
||||
beq cr7,1f
|
||||
STXVD2X(0,R1,R8)
|
||||
bl get_vsr
|
||||
1: li r9,-EFAULT
|
||||
2: STXVD2X(0,R0,R4)
|
||||
li r9,0
|
||||
3: beq cr7,4f
|
||||
LXVD2X(0,R1,R8)
|
||||
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
|
||||
mtlr r0
|
||||
MTMSRD(r6)
|
||||
isync
|
||||
mr r3,r9
|
||||
addi r1,r1,STKFRM
|
||||
blr
|
||||
extab 2b,3b
|
||||
|
||||
#endif /* CONFIG_VSX */
|
||||
|
||||
#endif /* CONFIG_PPC_FPU */
|
85
arch/powerpc/lib/locks.c
Normal file
85
arch/powerpc/lib/locks.c
Normal file
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Spin and read/write lock operations.
|
||||
*
|
||||
* Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
|
||||
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
|
||||
* Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
|
||||
* Rework to support virtual processors
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/stringify.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
/* waiting for a spinlock... */
|
||||
#if defined(CONFIG_PPC_SPLPAR)
|
||||
#include <asm/hvcall.h>
|
||||
#include <asm/smp.h>
|
||||
|
||||
void __spin_yield(arch_spinlock_t *lock)
|
||||
{
|
||||
unsigned int lock_value, holder_cpu, yield_count;
|
||||
|
||||
lock_value = lock->slock;
|
||||
if (lock_value == 0)
|
||||
return;
|
||||
holder_cpu = lock_value & 0xffff;
|
||||
BUG_ON(holder_cpu >= NR_CPUS);
|
||||
yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
|
||||
if ((yield_count & 1) == 0)
|
||||
return; /* virtual cpu is currently running */
|
||||
rmb();
|
||||
if (lock->slock != lock_value)
|
||||
return; /* something has changed */
|
||||
plpar_hcall_norets(H_CONFER,
|
||||
get_hard_smp_processor_id(holder_cpu), yield_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Waiting for a read lock or a write lock on a rwlock...
|
||||
* This turns out to be the same for read and write locks, since
|
||||
* we only know the holder if it is write-locked.
|
||||
*/
|
||||
void __rw_yield(arch_rwlock_t *rw)
|
||||
{
|
||||
int lock_value;
|
||||
unsigned int holder_cpu, yield_count;
|
||||
|
||||
lock_value = rw->lock;
|
||||
if (lock_value >= 0)
|
||||
return; /* no write lock at present */
|
||||
holder_cpu = lock_value & 0xffff;
|
||||
BUG_ON(holder_cpu >= NR_CPUS);
|
||||
yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
|
||||
if ((yield_count & 1) == 0)
|
||||
return; /* virtual cpu is currently running */
|
||||
rmb();
|
||||
if (rw->lock != lock_value)
|
||||
return; /* something has changed */
|
||||
plpar_hcall_norets(H_CONFER,
|
||||
get_hard_smp_processor_id(holder_cpu), yield_count);
|
||||
}
|
||||
#endif
|
||||
|
||||
void arch_spin_unlock_wait(arch_spinlock_t *lock)
|
||||
{
|
||||
smp_mb();
|
||||
|
||||
while (lock->slock) {
|
||||
HMT_low();
|
||||
if (SHARED_PROCESSOR)
|
||||
__spin_yield(lock);
|
||||
}
|
||||
HMT_medium();
|
||||
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(arch_spin_unlock_wait);
|
119
arch/powerpc/lib/mem_64.S
Normal file
119
arch/powerpc/lib/mem_64.S
Normal file
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* String handling functions for PowerPC.
|
||||
*
|
||||
* Copyright (C) 1996 Paul Mackerras.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <asm/processor.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
_GLOBAL(memset)
|
||||
neg r0,r3
|
||||
rlwimi r4,r4,8,16,23
|
||||
andi. r0,r0,7 /* # bytes to be 8-byte aligned */
|
||||
rlwimi r4,r4,16,0,15
|
||||
cmplw cr1,r5,r0 /* do we get that far? */
|
||||
rldimi r4,r4,32,0
|
||||
PPC_MTOCRF(1,r0)
|
||||
mr r6,r3
|
||||
blt cr1,8f
|
||||
beq+ 3f /* if already 8-byte aligned */
|
||||
subf r5,r0,r5
|
||||
bf 31,1f
|
||||
stb r4,0(r6)
|
||||
addi r6,r6,1
|
||||
1: bf 30,2f
|
||||
sth r4,0(r6)
|
||||
addi r6,r6,2
|
||||
2: bf 29,3f
|
||||
stw r4,0(r6)
|
||||
addi r6,r6,4
|
||||
3: srdi. r0,r5,6
|
||||
clrldi r5,r5,58
|
||||
mtctr r0
|
||||
beq 5f
|
||||
4: std r4,0(r6)
|
||||
std r4,8(r6)
|
||||
std r4,16(r6)
|
||||
std r4,24(r6)
|
||||
std r4,32(r6)
|
||||
std r4,40(r6)
|
||||
std r4,48(r6)
|
||||
std r4,56(r6)
|
||||
addi r6,r6,64
|
||||
bdnz 4b
|
||||
5: srwi. r0,r5,3
|
||||
clrlwi r5,r5,29
|
||||
PPC_MTOCRF(1,r0)
|
||||
beq 8f
|
||||
bf 29,6f
|
||||
std r4,0(r6)
|
||||
std r4,8(r6)
|
||||
std r4,16(r6)
|
||||
std r4,24(r6)
|
||||
addi r6,r6,32
|
||||
6: bf 30,7f
|
||||
std r4,0(r6)
|
||||
std r4,8(r6)
|
||||
addi r6,r6,16
|
||||
7: bf 31,8f
|
||||
std r4,0(r6)
|
||||
addi r6,r6,8
|
||||
8: cmpwi r5,0
|
||||
PPC_MTOCRF(1,r5)
|
||||
beqlr+
|
||||
bf 29,9f
|
||||
stw r4,0(r6)
|
||||
addi r6,r6,4
|
||||
9: bf 30,10f
|
||||
sth r4,0(r6)
|
||||
addi r6,r6,2
|
||||
10: bflr 31
|
||||
stb r4,0(r6)
|
||||
blr
|
||||
|
||||
_GLOBAL_TOC(memmove)
|
||||
cmplw 0,r3,r4
|
||||
bgt backwards_memcpy
|
||||
b memcpy
|
||||
|
||||
_GLOBAL(backwards_memcpy)
|
||||
rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
|
||||
add r6,r3,r5
|
||||
add r4,r4,r5
|
||||
beq 2f
|
||||
andi. r0,r6,3
|
||||
mtctr r7
|
||||
bne 5f
|
||||
1: lwz r7,-4(r4)
|
||||
lwzu r8,-8(r4)
|
||||
stw r7,-4(r6)
|
||||
stwu r8,-8(r6)
|
||||
bdnz 1b
|
||||
andi. r5,r5,7
|
||||
2: cmplwi 0,r5,4
|
||||
blt 3f
|
||||
lwzu r0,-4(r4)
|
||||
subi r5,r5,4
|
||||
stwu r0,-4(r6)
|
||||
3: cmpwi 0,r5,0
|
||||
beqlr
|
||||
mtctr r5
|
||||
4: lbzu r0,-1(r4)
|
||||
stbu r0,-1(r6)
|
||||
bdnz 4b
|
||||
blr
|
||||
5: mtctr r0
|
||||
6: lbzu r7,-1(r4)
|
||||
stbu r7,-1(r6)
|
||||
bdnz 6b
|
||||
subf r5,r0,r5
|
||||
rlwinm. r7,r5,32-3,3,31
|
||||
beq 2b
|
||||
mtctr r7
|
||||
b 1b
|
221
arch/powerpc/lib/memcpy_64.S
Normal file
221
arch/powerpc/lib/memcpy_64.S
Normal file
|
@ -0,0 +1,221 @@
|
|||
/*
|
||||
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
.align 7
|
||||
_GLOBAL_TOC(memcpy)
|
||||
BEGIN_FTR_SECTION
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
cmpdi cr7,r5,0
|
||||
#else
|
||||
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
|
||||
#endif
|
||||
FTR_SECTION_ELSE
|
||||
#ifndef SELFTEST
|
||||
b memcpy_power7
|
||||
#endif
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
/* dumb little-endian memcpy that will get replaced at runtime */
|
||||
addi r9,r3,-1
|
||||
addi r4,r4,-1
|
||||
beqlr cr7
|
||||
mtctr r5
|
||||
1: lbzu r10,1(r4)
|
||||
stbu r10,1(r9)
|
||||
bdnz 1b
|
||||
blr
|
||||
#else
|
||||
PPC_MTOCRF(0x01,r5)
|
||||
cmpldi cr1,r5,16
|
||||
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
|
||||
andi. r6,r6,7
|
||||
dcbt 0,r4
|
||||
blt cr1,.Lshort_copy
|
||||
/* Below we want to nop out the bne if we're on a CPU that has the
|
||||
CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
|
||||
cleared.
|
||||
At the time of writing the only CPU that has this combination of bits
|
||||
set is Power6. */
|
||||
BEGIN_FTR_SECTION
|
||||
nop
|
||||
FTR_SECTION_ELSE
|
||||
bne .Ldst_unaligned
|
||||
ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
|
||||
CPU_FTR_UNALIGNED_LD_STD)
|
||||
.Ldst_aligned:
|
||||
addi r3,r3,-16
|
||||
BEGIN_FTR_SECTION
|
||||
andi. r0,r4,7
|
||||
bne .Lsrc_unaligned
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
|
||||
srdi r7,r5,4
|
||||
ld r9,0(r4)
|
||||
addi r4,r4,-8
|
||||
mtctr r7
|
||||
andi. r5,r5,7
|
||||
bf cr7*4+0,2f
|
||||
addi r3,r3,8
|
||||
addi r4,r4,8
|
||||
mr r8,r9
|
||||
blt cr1,3f
|
||||
1: ld r9,8(r4)
|
||||
std r8,8(r3)
|
||||
2: ldu r8,16(r4)
|
||||
stdu r9,16(r3)
|
||||
bdnz 1b
|
||||
3: std r8,8(r3)
|
||||
beq 3f
|
||||
addi r3,r3,16
|
||||
.Ldo_tail:
|
||||
bf cr7*4+1,1f
|
||||
lwz r9,8(r4)
|
||||
addi r4,r4,4
|
||||
stw r9,0(r3)
|
||||
addi r3,r3,4
|
||||
1: bf cr7*4+2,2f
|
||||
lhz r9,8(r4)
|
||||
addi r4,r4,2
|
||||
sth r9,0(r3)
|
||||
addi r3,r3,2
|
||||
2: bf cr7*4+3,3f
|
||||
lbz r9,8(r4)
|
||||
stb r9,0(r3)
|
||||
3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
|
||||
blr
|
||||
|
||||
.Lsrc_unaligned:
|
||||
srdi r6,r5,3
|
||||
addi r5,r5,-16
|
||||
subf r4,r0,r4
|
||||
srdi r7,r5,4
|
||||
sldi r10,r0,3
|
||||
cmpdi cr6,r6,3
|
||||
andi. r5,r5,7
|
||||
mtctr r7
|
||||
subfic r11,r10,64
|
||||
add r5,r5,r0
|
||||
|
||||
bt cr7*4+0,0f
|
||||
|
||||
ld r9,0(r4) # 3+2n loads, 2+2n stores
|
||||
ld r0,8(r4)
|
||||
sld r6,r9,r10
|
||||
ldu r9,16(r4)
|
||||
srd r7,r0,r11
|
||||
sld r8,r0,r10
|
||||
or r7,r7,r6
|
||||
blt cr6,4f
|
||||
ld r0,8(r4)
|
||||
# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
|
||||
b 2f
|
||||
|
||||
0: ld r0,0(r4) # 4+2n loads, 3+2n stores
|
||||
ldu r9,8(r4)
|
||||
sld r8,r0,r10
|
||||
addi r3,r3,-8
|
||||
blt cr6,5f
|
||||
ld r0,8(r4)
|
||||
srd r12,r9,r11
|
||||
sld r6,r9,r10
|
||||
ldu r9,16(r4)
|
||||
or r12,r8,r12
|
||||
srd r7,r0,r11
|
||||
sld r8,r0,r10
|
||||
addi r3,r3,16
|
||||
beq cr6,3f
|
||||
|
||||
# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
|
||||
1: or r7,r7,r6
|
||||
ld r0,8(r4)
|
||||
std r12,8(r3)
|
||||
2: srd r12,r9,r11
|
||||
sld r6,r9,r10
|
||||
ldu r9,16(r4)
|
||||
or r12,r8,r12
|
||||
stdu r7,16(r3)
|
||||
srd r7,r0,r11
|
||||
sld r8,r0,r10
|
||||
bdnz 1b
|
||||
|
||||
3: std r12,8(r3)
|
||||
or r7,r7,r6
|
||||
4: std r7,16(r3)
|
||||
5: srd r12,r9,r11
|
||||
or r12,r8,r12
|
||||
std r12,24(r3)
|
||||
beq 4f
|
||||
cmpwi cr1,r5,8
|
||||
addi r3,r3,32
|
||||
sld r9,r9,r10
|
||||
ble cr1,6f
|
||||
ld r0,8(r4)
|
||||
srd r7,r0,r11
|
||||
or r9,r7,r9
|
||||
6:
|
||||
bf cr7*4+1,1f
|
||||
rotldi r9,r9,32
|
||||
stw r9,0(r3)
|
||||
addi r3,r3,4
|
||||
1: bf cr7*4+2,2f
|
||||
rotldi r9,r9,16
|
||||
sth r9,0(r3)
|
||||
addi r3,r3,2
|
||||
2: bf cr7*4+3,3f
|
||||
rotldi r9,r9,8
|
||||
stb r9,0(r3)
|
||||
3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
|
||||
blr
|
||||
|
||||
.Ldst_unaligned:
|
||||
PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7
|
||||
subf r5,r6,r5
|
||||
li r7,0
|
||||
cmpldi cr1,r5,16
|
||||
bf cr7*4+3,1f
|
||||
lbz r0,0(r4)
|
||||
stb r0,0(r3)
|
||||
addi r7,r7,1
|
||||
1: bf cr7*4+2,2f
|
||||
lhzx r0,r7,r4
|
||||
sthx r0,r7,r3
|
||||
addi r7,r7,2
|
||||
2: bf cr7*4+1,3f
|
||||
lwzx r0,r7,r4
|
||||
stwx r0,r7,r3
|
||||
3: PPC_MTOCRF(0x01,r5)
|
||||
add r4,r6,r4
|
||||
add r3,r6,r3
|
||||
b .Ldst_aligned
|
||||
|
||||
.Lshort_copy:
|
||||
bf cr7*4+0,1f
|
||||
lwz r0,0(r4)
|
||||
lwz r9,4(r4)
|
||||
addi r4,r4,8
|
||||
stw r0,0(r3)
|
||||
stw r9,4(r3)
|
||||
addi r3,r3,8
|
||||
1: bf cr7*4+1,2f
|
||||
lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
2: bf cr7*4+2,3f
|
||||
lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
3: bf cr7*4+3,4f
|
||||
lbz r0,0(r4)
|
||||
stb r0,0(r3)
|
||||
4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
|
||||
blr
|
||||
#endif
|
656
arch/powerpc/lib/memcpy_power7.S
Normal file
656
arch/powerpc/lib/memcpy_power7.S
Normal file
|
@ -0,0 +1,656 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2012
|
||||
*
|
||||
* Author: Anton Blanchard <anton@au.ibm.com>
|
||||
*/
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
_GLOBAL(memcpy_power7)
|
||||
|
||||
#ifdef __BIG_ENDIAN__
|
||||
#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
|
||||
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
|
||||
#else
|
||||
#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
|
||||
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
cmpldi r5,16
|
||||
cmpldi cr1,r5,4096
|
||||
|
||||
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
|
||||
|
||||
blt .Lshort_copy
|
||||
bgt cr1,.Lvmx_copy
|
||||
#else
|
||||
cmpldi r5,16
|
||||
|
||||
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
|
||||
|
||||
blt .Lshort_copy
|
||||
#endif
|
||||
|
||||
.Lnonvmx_copy:
|
||||
/* Get the source 8B aligned */
|
||||
neg r6,r4
|
||||
mtocrf 0x01,r6
|
||||
clrldi r6,r6,(64-3)
|
||||
|
||||
bf cr7*4+3,1f
|
||||
lbz r0,0(r4)
|
||||
addi r4,r4,1
|
||||
stb r0,0(r3)
|
||||
addi r3,r3,1
|
||||
|
||||
1: bf cr7*4+2,2f
|
||||
lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
2: bf cr7*4+1,3f
|
||||
lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
3: sub r5,r5,r6
|
||||
cmpldi r5,128
|
||||
blt 5f
|
||||
|
||||
mflr r0
|
||||
stdu r1,-STACKFRAMESIZE(r1)
|
||||
std r14,STK_REG(R14)(r1)
|
||||
std r15,STK_REG(R15)(r1)
|
||||
std r16,STK_REG(R16)(r1)
|
||||
std r17,STK_REG(R17)(r1)
|
||||
std r18,STK_REG(R18)(r1)
|
||||
std r19,STK_REG(R19)(r1)
|
||||
std r20,STK_REG(R20)(r1)
|
||||
std r21,STK_REG(R21)(r1)
|
||||
std r22,STK_REG(R22)(r1)
|
||||
std r0,STACKFRAMESIZE+16(r1)
|
||||
|
||||
srdi r6,r5,7
|
||||
mtctr r6
|
||||
|
||||
/* Now do cacheline (128B) sized loads and stores. */
|
||||
.align 5
|
||||
4:
|
||||
ld r0,0(r4)
|
||||
ld r6,8(r4)
|
||||
ld r7,16(r4)
|
||||
ld r8,24(r4)
|
||||
ld r9,32(r4)
|
||||
ld r10,40(r4)
|
||||
ld r11,48(r4)
|
||||
ld r12,56(r4)
|
||||
ld r14,64(r4)
|
||||
ld r15,72(r4)
|
||||
ld r16,80(r4)
|
||||
ld r17,88(r4)
|
||||
ld r18,96(r4)
|
||||
ld r19,104(r4)
|
||||
ld r20,112(r4)
|
||||
ld r21,120(r4)
|
||||
addi r4,r4,128
|
||||
std r0,0(r3)
|
||||
std r6,8(r3)
|
||||
std r7,16(r3)
|
||||
std r8,24(r3)
|
||||
std r9,32(r3)
|
||||
std r10,40(r3)
|
||||
std r11,48(r3)
|
||||
std r12,56(r3)
|
||||
std r14,64(r3)
|
||||
std r15,72(r3)
|
||||
std r16,80(r3)
|
||||
std r17,88(r3)
|
||||
std r18,96(r3)
|
||||
std r19,104(r3)
|
||||
std r20,112(r3)
|
||||
std r21,120(r3)
|
||||
addi r3,r3,128
|
||||
bdnz 4b
|
||||
|
||||
clrldi r5,r5,(64-7)
|
||||
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
ld r17,STK_REG(R17)(r1)
|
||||
ld r18,STK_REG(R18)(r1)
|
||||
ld r19,STK_REG(R19)(r1)
|
||||
ld r20,STK_REG(R20)(r1)
|
||||
ld r21,STK_REG(R21)(r1)
|
||||
ld r22,STK_REG(R22)(r1)
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
|
||||
/* Up to 127B to go */
|
||||
5: srdi r6,r5,4
|
||||
mtocrf 0x01,r6
|
||||
|
||||
6: bf cr7*4+1,7f
|
||||
ld r0,0(r4)
|
||||
ld r6,8(r4)
|
||||
ld r7,16(r4)
|
||||
ld r8,24(r4)
|
||||
ld r9,32(r4)
|
||||
ld r10,40(r4)
|
||||
ld r11,48(r4)
|
||||
ld r12,56(r4)
|
||||
addi r4,r4,64
|
||||
std r0,0(r3)
|
||||
std r6,8(r3)
|
||||
std r7,16(r3)
|
||||
std r8,24(r3)
|
||||
std r9,32(r3)
|
||||
std r10,40(r3)
|
||||
std r11,48(r3)
|
||||
std r12,56(r3)
|
||||
addi r3,r3,64
|
||||
|
||||
/* Up to 63B to go */
|
||||
7: bf cr7*4+2,8f
|
||||
ld r0,0(r4)
|
||||
ld r6,8(r4)
|
||||
ld r7,16(r4)
|
||||
ld r8,24(r4)
|
||||
addi r4,r4,32
|
||||
std r0,0(r3)
|
||||
std r6,8(r3)
|
||||
std r7,16(r3)
|
||||
std r8,24(r3)
|
||||
addi r3,r3,32
|
||||
|
||||
/* Up to 31B to go */
|
||||
8: bf cr7*4+3,9f
|
||||
ld r0,0(r4)
|
||||
ld r6,8(r4)
|
||||
addi r4,r4,16
|
||||
std r0,0(r3)
|
||||
std r6,8(r3)
|
||||
addi r3,r3,16
|
||||
|
||||
9: clrldi r5,r5,(64-4)
|
||||
|
||||
/* Up to 15B to go */
|
||||
.Lshort_copy:
|
||||
mtocrf 0x01,r5
|
||||
bf cr7*4+0,12f
|
||||
lwz r0,0(r4) /* Less chance of a reject with word ops */
|
||||
lwz r6,4(r4)
|
||||
addi r4,r4,8
|
||||
stw r0,0(r3)
|
||||
stw r6,4(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
12: bf cr7*4+1,13f
|
||||
lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
13: bf cr7*4+2,14f
|
||||
lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
14: bf cr7*4+3,15f
|
||||
lbz r0,0(r4)
|
||||
stb r0,0(r3)
|
||||
|
||||
15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
|
||||
blr
|
||||
|
||||
.Lunwind_stack_nonvmx_copy:
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
b .Lnonvmx_copy
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
.Lvmx_copy:
|
||||
mflr r0
|
||||
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
|
||||
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
|
||||
std r0,16(r1)
|
||||
stdu r1,-STACKFRAMESIZE(r1)
|
||||
bl enter_vmx_copy
|
||||
cmpwi cr1,r3,0
|
||||
ld r0,STACKFRAMESIZE+16(r1)
|
||||
ld r3,STK_REG(R31)(r1)
|
||||
ld r4,STK_REG(R30)(r1)
|
||||
ld r5,STK_REG(R29)(r1)
|
||||
mtlr r0
|
||||
|
||||
/*
|
||||
* We prefetch both the source and destination using enhanced touch
|
||||
* instructions. We use a stream ID of 0 for the load side and
|
||||
* 1 for the store side.
|
||||
*/
|
||||
clrrdi r6,r4,7
|
||||
clrrdi r9,r3,7
|
||||
ori r9,r9,1 /* stream=1 */
|
||||
|
||||
srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
|
||||
cmpldi r7,0x3FF
|
||||
ble 1f
|
||||
li r7,0x3FF
|
||||
1: lis r0,0x0E00 /* depth=7 */
|
||||
sldi r7,r7,7
|
||||
or r7,r7,r0
|
||||
ori r10,r7,1 /* stream=1 */
|
||||
|
||||
lis r8,0x8000 /* GO=1 */
|
||||
clrldi r8,r8,32
|
||||
|
||||
.machine push
|
||||
.machine "power4"
|
||||
dcbt r0,r6,0b01000
|
||||
dcbt r0,r7,0b01010
|
||||
dcbtst r0,r9,0b01000
|
||||
dcbtst r0,r10,0b01010
|
||||
eieio
|
||||
dcbt r0,r8,0b01010 /* GO */
|
||||
.machine pop
|
||||
|
||||
beq cr1,.Lunwind_stack_nonvmx_copy
|
||||
|
||||
/*
|
||||
* If source and destination are not relatively aligned we use a
|
||||
* slower permute loop.
|
||||
*/
|
||||
xor r6,r4,r3
|
||||
rldicl. r6,r6,0,(64-4)
|
||||
bne .Lvmx_unaligned_copy
|
||||
|
||||
/* Get the destination 16B aligned */
|
||||
neg r6,r3
|
||||
mtocrf 0x01,r6
|
||||
clrldi r6,r6,(64-4)
|
||||
|
||||
bf cr7*4+3,1f
|
||||
lbz r0,0(r4)
|
||||
addi r4,r4,1
|
||||
stb r0,0(r3)
|
||||
addi r3,r3,1
|
||||
|
||||
1: bf cr7*4+2,2f
|
||||
lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
2: bf cr7*4+1,3f
|
||||
lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
3: bf cr7*4+0,4f
|
||||
ld r0,0(r4)
|
||||
addi r4,r4,8
|
||||
std r0,0(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
4: sub r5,r5,r6
|
||||
|
||||
/* Get the desination 128B aligned */
|
||||
neg r6,r3
|
||||
srdi r7,r6,4
|
||||
mtocrf 0x01,r7
|
||||
clrldi r6,r6,(64-7)
|
||||
|
||||
li r9,16
|
||||
li r10,32
|
||||
li r11,48
|
||||
|
||||
bf cr7*4+3,5f
|
||||
lvx vr1,r0,r4
|
||||
addi r4,r4,16
|
||||
stvx vr1,r0,r3
|
||||
addi r3,r3,16
|
||||
|
||||
5: bf cr7*4+2,6f
|
||||
lvx vr1,r0,r4
|
||||
lvx vr0,r4,r9
|
||||
addi r4,r4,32
|
||||
stvx vr1,r0,r3
|
||||
stvx vr0,r3,r9
|
||||
addi r3,r3,32
|
||||
|
||||
6: bf cr7*4+1,7f
|
||||
lvx vr3,r0,r4
|
||||
lvx vr2,r4,r9
|
||||
lvx vr1,r4,r10
|
||||
lvx vr0,r4,r11
|
||||
addi r4,r4,64
|
||||
stvx vr3,r0,r3
|
||||
stvx vr2,r3,r9
|
||||
stvx vr1,r3,r10
|
||||
stvx vr0,r3,r11
|
||||
addi r3,r3,64
|
||||
|
||||
7: sub r5,r5,r6
|
||||
srdi r6,r5,7
|
||||
|
||||
std r14,STK_REG(R14)(r1)
|
||||
std r15,STK_REG(R15)(r1)
|
||||
std r16,STK_REG(R16)(r1)
|
||||
|
||||
li r12,64
|
||||
li r14,80
|
||||
li r15,96
|
||||
li r16,112
|
||||
|
||||
mtctr r6
|
||||
|
||||
/*
|
||||
* Now do cacheline sized loads and stores. By this stage the
|
||||
* cacheline stores are also cacheline aligned.
|
||||
*/
|
||||
.align 5
|
||||
8:
|
||||
lvx vr7,r0,r4
|
||||
lvx vr6,r4,r9
|
||||
lvx vr5,r4,r10
|
||||
lvx vr4,r4,r11
|
||||
lvx vr3,r4,r12
|
||||
lvx vr2,r4,r14
|
||||
lvx vr1,r4,r15
|
||||
lvx vr0,r4,r16
|
||||
addi r4,r4,128
|
||||
stvx vr7,r0,r3
|
||||
stvx vr6,r3,r9
|
||||
stvx vr5,r3,r10
|
||||
stvx vr4,r3,r11
|
||||
stvx vr3,r3,r12
|
||||
stvx vr2,r3,r14
|
||||
stvx vr1,r3,r15
|
||||
stvx vr0,r3,r16
|
||||
addi r3,r3,128
|
||||
bdnz 8b
|
||||
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
|
||||
/* Up to 127B to go */
|
||||
clrldi r5,r5,(64-7)
|
||||
srdi r6,r5,4
|
||||
mtocrf 0x01,r6
|
||||
|
||||
bf cr7*4+1,9f
|
||||
lvx vr3,r0,r4
|
||||
lvx vr2,r4,r9
|
||||
lvx vr1,r4,r10
|
||||
lvx vr0,r4,r11
|
||||
addi r4,r4,64
|
||||
stvx vr3,r0,r3
|
||||
stvx vr2,r3,r9
|
||||
stvx vr1,r3,r10
|
||||
stvx vr0,r3,r11
|
||||
addi r3,r3,64
|
||||
|
||||
9: bf cr7*4+2,10f
|
||||
lvx vr1,r0,r4
|
||||
lvx vr0,r4,r9
|
||||
addi r4,r4,32
|
||||
stvx vr1,r0,r3
|
||||
stvx vr0,r3,r9
|
||||
addi r3,r3,32
|
||||
|
||||
10: bf cr7*4+3,11f
|
||||
lvx vr1,r0,r4
|
||||
addi r4,r4,16
|
||||
stvx vr1,r0,r3
|
||||
addi r3,r3,16
|
||||
|
||||
/* Up to 15B to go */
|
||||
11: clrldi r5,r5,(64-4)
|
||||
mtocrf 0x01,r5
|
||||
bf cr7*4+0,12f
|
||||
ld r0,0(r4)
|
||||
addi r4,r4,8
|
||||
std r0,0(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
12: bf cr7*4+1,13f
|
||||
lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
13: bf cr7*4+2,14f
|
||||
lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
14: bf cr7*4+3,15f
|
||||
lbz r0,0(r4)
|
||||
stb r0,0(r3)
|
||||
|
||||
15: addi r1,r1,STACKFRAMESIZE
|
||||
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
|
||||
b exit_vmx_copy /* tail call optimise */
|
||||
|
||||
.Lvmx_unaligned_copy:
|
||||
/* Get the destination 16B aligned */
|
||||
neg r6,r3
|
||||
mtocrf 0x01,r6
|
||||
clrldi r6,r6,(64-4)
|
||||
|
||||
bf cr7*4+3,1f
|
||||
lbz r0,0(r4)
|
||||
addi r4,r4,1
|
||||
stb r0,0(r3)
|
||||
addi r3,r3,1
|
||||
|
||||
1: bf cr7*4+2,2f
|
||||
lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
2: bf cr7*4+1,3f
|
||||
lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
3: bf cr7*4+0,4f
|
||||
lwz r0,0(r4) /* Less chance of a reject with word ops */
|
||||
lwz r7,4(r4)
|
||||
addi r4,r4,8
|
||||
stw r0,0(r3)
|
||||
stw r7,4(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
4: sub r5,r5,r6
|
||||
|
||||
/* Get the desination 128B aligned */
|
||||
neg r6,r3
|
||||
srdi r7,r6,4
|
||||
mtocrf 0x01,r7
|
||||
clrldi r6,r6,(64-7)
|
||||
|
||||
li r9,16
|
||||
li r10,32
|
||||
li r11,48
|
||||
|
||||
LVS(vr16,0,r4) /* Setup permute control vector */
|
||||
lvx vr0,0,r4
|
||||
addi r4,r4,16
|
||||
|
||||
bf cr7*4+3,5f
|
||||
lvx vr1,r0,r4
|
||||
VPERM(vr8,vr0,vr1,vr16)
|
||||
addi r4,r4,16
|
||||
stvx vr8,r0,r3
|
||||
addi r3,r3,16
|
||||
vor vr0,vr1,vr1
|
||||
|
||||
5: bf cr7*4+2,6f
|
||||
lvx vr1,r0,r4
|
||||
VPERM(vr8,vr0,vr1,vr16)
|
||||
lvx vr0,r4,r9
|
||||
VPERM(vr9,vr1,vr0,vr16)
|
||||
addi r4,r4,32
|
||||
stvx vr8,r0,r3
|
||||
stvx vr9,r3,r9
|
||||
addi r3,r3,32
|
||||
|
||||
6: bf cr7*4+1,7f
|
||||
lvx vr3,r0,r4
|
||||
VPERM(vr8,vr0,vr3,vr16)
|
||||
lvx vr2,r4,r9
|
||||
VPERM(vr9,vr3,vr2,vr16)
|
||||
lvx vr1,r4,r10
|
||||
VPERM(vr10,vr2,vr1,vr16)
|
||||
lvx vr0,r4,r11
|
||||
VPERM(vr11,vr1,vr0,vr16)
|
||||
addi r4,r4,64
|
||||
stvx vr8,r0,r3
|
||||
stvx vr9,r3,r9
|
||||
stvx vr10,r3,r10
|
||||
stvx vr11,r3,r11
|
||||
addi r3,r3,64
|
||||
|
||||
7: sub r5,r5,r6
|
||||
srdi r6,r5,7
|
||||
|
||||
std r14,STK_REG(R14)(r1)
|
||||
std r15,STK_REG(R15)(r1)
|
||||
std r16,STK_REG(R16)(r1)
|
||||
|
||||
li r12,64
|
||||
li r14,80
|
||||
li r15,96
|
||||
li r16,112
|
||||
|
||||
mtctr r6
|
||||
|
||||
/*
|
||||
* Now do cacheline sized loads and stores. By this stage the
|
||||
* cacheline stores are also cacheline aligned.
|
||||
*/
|
||||
.align 5
|
||||
8:
|
||||
lvx vr7,r0,r4
|
||||
VPERM(vr8,vr0,vr7,vr16)
|
||||
lvx vr6,r4,r9
|
||||
VPERM(vr9,vr7,vr6,vr16)
|
||||
lvx vr5,r4,r10
|
||||
VPERM(vr10,vr6,vr5,vr16)
|
||||
lvx vr4,r4,r11
|
||||
VPERM(vr11,vr5,vr4,vr16)
|
||||
lvx vr3,r4,r12
|
||||
VPERM(vr12,vr4,vr3,vr16)
|
||||
lvx vr2,r4,r14
|
||||
VPERM(vr13,vr3,vr2,vr16)
|
||||
lvx vr1,r4,r15
|
||||
VPERM(vr14,vr2,vr1,vr16)
|
||||
lvx vr0,r4,r16
|
||||
VPERM(vr15,vr1,vr0,vr16)
|
||||
addi r4,r4,128
|
||||
stvx vr8,r0,r3
|
||||
stvx vr9,r3,r9
|
||||
stvx vr10,r3,r10
|
||||
stvx vr11,r3,r11
|
||||
stvx vr12,r3,r12
|
||||
stvx vr13,r3,r14
|
||||
stvx vr14,r3,r15
|
||||
stvx vr15,r3,r16
|
||||
addi r3,r3,128
|
||||
bdnz 8b
|
||||
|
||||
ld r14,STK_REG(R14)(r1)
|
||||
ld r15,STK_REG(R15)(r1)
|
||||
ld r16,STK_REG(R16)(r1)
|
||||
|
||||
/* Up to 127B to go */
|
||||
clrldi r5,r5,(64-7)
|
||||
srdi r6,r5,4
|
||||
mtocrf 0x01,r6
|
||||
|
||||
bf cr7*4+1,9f
|
||||
lvx vr3,r0,r4
|
||||
VPERM(vr8,vr0,vr3,vr16)
|
||||
lvx vr2,r4,r9
|
||||
VPERM(vr9,vr3,vr2,vr16)
|
||||
lvx vr1,r4,r10
|
||||
VPERM(vr10,vr2,vr1,vr16)
|
||||
lvx vr0,r4,r11
|
||||
VPERM(vr11,vr1,vr0,vr16)
|
||||
addi r4,r4,64
|
||||
stvx vr8,r0,r3
|
||||
stvx vr9,r3,r9
|
||||
stvx vr10,r3,r10
|
||||
stvx vr11,r3,r11
|
||||
addi r3,r3,64
|
||||
|
||||
9: bf cr7*4+2,10f
|
||||
lvx vr1,r0,r4
|
||||
VPERM(vr8,vr0,vr1,vr16)
|
||||
lvx vr0,r4,r9
|
||||
VPERM(vr9,vr1,vr0,vr16)
|
||||
addi r4,r4,32
|
||||
stvx vr8,r0,r3
|
||||
stvx vr9,r3,r9
|
||||
addi r3,r3,32
|
||||
|
||||
10: bf cr7*4+3,11f
|
||||
lvx vr1,r0,r4
|
||||
VPERM(vr8,vr0,vr1,vr16)
|
||||
addi r4,r4,16
|
||||
stvx vr8,r0,r3
|
||||
addi r3,r3,16
|
||||
|
||||
/* Up to 15B to go */
|
||||
11: clrldi r5,r5,(64-4)
|
||||
addi r4,r4,-16 /* Unwind the +16 load offset */
|
||||
mtocrf 0x01,r5
|
||||
bf cr7*4+0,12f
|
||||
lwz r0,0(r4) /* Less chance of a reject with word ops */
|
||||
lwz r6,4(r4)
|
||||
addi r4,r4,8
|
||||
stw r0,0(r3)
|
||||
stw r6,4(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
12: bf cr7*4+1,13f
|
||||
lwz r0,0(r4)
|
||||
addi r4,r4,4
|
||||
stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
13: bf cr7*4+2,14f
|
||||
lhz r0,0(r4)
|
||||
addi r4,r4,2
|
||||
sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
14: bf cr7*4+3,15f
|
||||
lbz r0,0(r4)
|
||||
stb r0,0(r3)
|
||||
|
||||
15: addi r1,r1,STACKFRAMESIZE
|
||||
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
|
||||
b exit_vmx_copy /* tail call optimise */
|
||||
#endif /* CONFiG_ALTIVEC */
|
39
arch/powerpc/lib/ppc_ksyms.c
Normal file
39
arch/powerpc/lib/ppc_ksyms.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
#include <linux/string.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <net/checksum.h>
|
||||
|
||||
EXPORT_SYMBOL(memcpy);
|
||||
EXPORT_SYMBOL(memset);
|
||||
EXPORT_SYMBOL(memmove);
|
||||
EXPORT_SYMBOL(memcmp);
|
||||
EXPORT_SYMBOL(memchr);
|
||||
#ifdef CONFIG_PPC32
|
||||
EXPORT_SYMBOL(cacheable_memcpy);
|
||||
EXPORT_SYMBOL(cacheable_memzero);
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL(strcpy);
|
||||
EXPORT_SYMBOL(strncpy);
|
||||
EXPORT_SYMBOL(strcat);
|
||||
EXPORT_SYMBOL(strlen);
|
||||
EXPORT_SYMBOL(strcmp);
|
||||
EXPORT_SYMBOL(strncmp);
|
||||
|
||||
#ifndef CONFIG_GENERIC_CSUM
|
||||
EXPORT_SYMBOL(csum_partial);
|
||||
EXPORT_SYMBOL(csum_partial_copy_generic);
|
||||
EXPORT_SYMBOL(ip_fast_csum);
|
||||
EXPORT_SYMBOL(csum_tcpudp_magic);
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL(__copy_tofrom_user);
|
||||
EXPORT_SYMBOL(__clear_user);
|
||||
EXPORT_SYMBOL(copy_page);
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
EXPORT_SYMBOL(__arch_hweight8);
|
||||
EXPORT_SYMBOL(__arch_hweight16);
|
||||
EXPORT_SYMBOL(__arch_hweight32);
|
||||
EXPORT_SYMBOL(__arch_hweight64);
|
||||
#endif
|
747
arch/powerpc/lib/rheap.c
Normal file
747
arch/powerpc/lib/rheap.c
Normal file
|
@ -0,0 +1,747 @@
|
|||
/*
|
||||
* A Remote Heap. Remote means that we don't touch the memory that the
|
||||
* heap points to. Normal heap implementations use the memory they manage
|
||||
* to place their list. We cannot do that because the memory we manage may
|
||||
* have special properties, for example it is uncachable or of different
|
||||
* endianess.
|
||||
*
|
||||
* Author: Pantelis Antoniou <panto@intracom.gr>
|
||||
*
|
||||
* 2004 (c) INTRACOM S.A. Greece. This file is licensed under
|
||||
* the terms of the GNU General Public License version 2. This program
|
||||
* is licensed "as is" without any warranty of any kind, whether express
|
||||
* or implied.
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/rheap.h>
|
||||
|
||||
/*
|
||||
* Fixup a list_head, needed when copying lists. If the pointers fall
|
||||
* between s and e, apply the delta. This assumes that
|
||||
* sizeof(struct list_head *) == sizeof(unsigned long *).
|
||||
*/
|
||||
static inline void fixup(unsigned long s, unsigned long e, int d,
|
||||
struct list_head *l)
|
||||
{
|
||||
unsigned long *pp;
|
||||
|
||||
pp = (unsigned long *)&l->next;
|
||||
if (*pp >= s && *pp < e)
|
||||
*pp += d;
|
||||
|
||||
pp = (unsigned long *)&l->prev;
|
||||
if (*pp >= s && *pp < e)
|
||||
*pp += d;
|
||||
}
|
||||
|
||||
/* Grow the allocated blocks */
|
||||
static int grow(rh_info_t * info, int max_blocks)
|
||||
{
|
||||
rh_block_t *block, *blk;
|
||||
int i, new_blocks;
|
||||
int delta;
|
||||
unsigned long blks, blke;
|
||||
|
||||
if (max_blocks <= info->max_blocks)
|
||||
return -EINVAL;
|
||||
|
||||
new_blocks = max_blocks - info->max_blocks;
|
||||
|
||||
block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_ATOMIC);
|
||||
if (block == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
if (info->max_blocks > 0) {
|
||||
|
||||
/* copy old block area */
|
||||
memcpy(block, info->block,
|
||||
sizeof(rh_block_t) * info->max_blocks);
|
||||
|
||||
delta = (char *)block - (char *)info->block;
|
||||
|
||||
/* and fixup list pointers */
|
||||
blks = (unsigned long)info->block;
|
||||
blke = (unsigned long)(info->block + info->max_blocks);
|
||||
|
||||
for (i = 0, blk = block; i < info->max_blocks; i++, blk++)
|
||||
fixup(blks, blke, delta, &blk->list);
|
||||
|
||||
fixup(blks, blke, delta, &info->empty_list);
|
||||
fixup(blks, blke, delta, &info->free_list);
|
||||
fixup(blks, blke, delta, &info->taken_list);
|
||||
|
||||
/* free the old allocated memory */
|
||||
if ((info->flags & RHIF_STATIC_BLOCK) == 0)
|
||||
kfree(info->block);
|
||||
}
|
||||
|
||||
info->block = block;
|
||||
info->empty_slots += new_blocks;
|
||||
info->max_blocks = max_blocks;
|
||||
info->flags &= ~RHIF_STATIC_BLOCK;
|
||||
|
||||
/* add all new blocks to the free list */
|
||||
blk = block + info->max_blocks - new_blocks;
|
||||
for (i = 0; i < new_blocks; i++, blk++)
|
||||
list_add(&blk->list, &info->empty_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Assure at least the required amount of empty slots. If this function
|
||||
* causes a grow in the block area then all pointers kept to the block
|
||||
* area are invalid!
|
||||
*/
|
||||
static int assure_empty(rh_info_t * info, int slots)
|
||||
{
|
||||
int max_blocks;
|
||||
|
||||
/* This function is not meant to be used to grow uncontrollably */
|
||||
if (slots >= 4)
|
||||
return -EINVAL;
|
||||
|
||||
/* Enough space */
|
||||
if (info->empty_slots >= slots)
|
||||
return 0;
|
||||
|
||||
/* Next 16 sized block */
|
||||
max_blocks = ((info->max_blocks + slots) + 15) & ~15;
|
||||
|
||||
return grow(info, max_blocks);
|
||||
}
|
||||
|
||||
static rh_block_t *get_slot(rh_info_t * info)
|
||||
{
|
||||
rh_block_t *blk;
|
||||
|
||||
/* If no more free slots, and failure to extend. */
|
||||
/* XXX: You should have called assure_empty before */
|
||||
if (info->empty_slots == 0) {
|
||||
printk(KERN_ERR "rh: out of slots; crash is imminent.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Get empty slot to use */
|
||||
blk = list_entry(info->empty_list.next, rh_block_t, list);
|
||||
list_del_init(&blk->list);
|
||||
info->empty_slots--;
|
||||
|
||||
/* Initialize */
|
||||
blk->start = 0;
|
||||
blk->size = 0;
|
||||
blk->owner = NULL;
|
||||
|
||||
return blk;
|
||||
}
|
||||
|
||||
static inline void release_slot(rh_info_t * info, rh_block_t * blk)
|
||||
{
|
||||
list_add(&blk->list, &info->empty_list);
|
||||
info->empty_slots++;
|
||||
}
|
||||
|
||||
static void attach_free_block(rh_info_t * info, rh_block_t * blkn)
|
||||
{
|
||||
rh_block_t *blk;
|
||||
rh_block_t *before;
|
||||
rh_block_t *after;
|
||||
rh_block_t *next;
|
||||
int size;
|
||||
unsigned long s, e, bs, be;
|
||||
struct list_head *l;
|
||||
|
||||
/* We assume that they are aligned properly */
|
||||
size = blkn->size;
|
||||
s = blkn->start;
|
||||
e = s + size;
|
||||
|
||||
/* Find the blocks immediately before and after the given one
|
||||
* (if any) */
|
||||
before = NULL;
|
||||
after = NULL;
|
||||
next = NULL;
|
||||
|
||||
list_for_each(l, &info->free_list) {
|
||||
blk = list_entry(l, rh_block_t, list);
|
||||
|
||||
bs = blk->start;
|
||||
be = bs + blk->size;
|
||||
|
||||
if (next == NULL && s >= bs)
|
||||
next = blk;
|
||||
|
||||
if (be == s)
|
||||
before = blk;
|
||||
|
||||
if (e == bs)
|
||||
after = blk;
|
||||
|
||||
/* If both are not null, break now */
|
||||
if (before != NULL && after != NULL)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Now check if they are really adjacent */
|
||||
if (before && s != (before->start + before->size))
|
||||
before = NULL;
|
||||
|
||||
if (after && e != after->start)
|
||||
after = NULL;
|
||||
|
||||
/* No coalescing; list insert and return */
|
||||
if (before == NULL && after == NULL) {
|
||||
|
||||
if (next != NULL)
|
||||
list_add(&blkn->list, &next->list);
|
||||
else
|
||||
list_add(&blkn->list, &info->free_list);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* We don't need it anymore */
|
||||
release_slot(info, blkn);
|
||||
|
||||
/* Grow the before block */
|
||||
if (before != NULL && after == NULL) {
|
||||
before->size += size;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Grow the after block backwards */
|
||||
if (before == NULL && after != NULL) {
|
||||
after->start -= size;
|
||||
after->size += size;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Grow the before block, and release the after block */
|
||||
before->size += size + after->size;
|
||||
list_del(&after->list);
|
||||
release_slot(info, after);
|
||||
}
|
||||
|
||||
static void attach_taken_block(rh_info_t * info, rh_block_t * blkn)
|
||||
{
|
||||
rh_block_t *blk;
|
||||
struct list_head *l;
|
||||
|
||||
/* Find the block immediately before the given one (if any) */
|
||||
list_for_each(l, &info->taken_list) {
|
||||
blk = list_entry(l, rh_block_t, list);
|
||||
if (blk->start > blkn->start) {
|
||||
list_add_tail(&blkn->list, &blk->list);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
list_add_tail(&blkn->list, &info->taken_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a remote heap dynamically. Note that no memory for the blocks
|
||||
* are allocated. It will upon the first allocation
|
||||
*/
|
||||
rh_info_t *rh_create(unsigned int alignment)
|
||||
{
|
||||
rh_info_t *info;
|
||||
|
||||
/* Alignment must be a power of two */
|
||||
if ((alignment & (alignment - 1)) != 0)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
info = kmalloc(sizeof(*info), GFP_ATOMIC);
|
||||
if (info == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
info->alignment = alignment;
|
||||
|
||||
/* Initially everything as empty */
|
||||
info->block = NULL;
|
||||
info->max_blocks = 0;
|
||||
info->empty_slots = 0;
|
||||
info->flags = 0;
|
||||
|
||||
INIT_LIST_HEAD(&info->empty_list);
|
||||
INIT_LIST_HEAD(&info->free_list);
|
||||
INIT_LIST_HEAD(&info->taken_list);
|
||||
|
||||
return info;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_create);
|
||||
|
||||
/*
|
||||
* Destroy a dynamically created remote heap. Deallocate only if the areas
|
||||
* are not static
|
||||
*/
|
||||
void rh_destroy(rh_info_t * info)
|
||||
{
|
||||
if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL)
|
||||
kfree(info->block);
|
||||
|
||||
if ((info->flags & RHIF_STATIC_INFO) == 0)
|
||||
kfree(info);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_destroy);
|
||||
|
||||
/*
|
||||
* Initialize in place a remote heap info block. This is needed to support
|
||||
* operation very early in the startup of the kernel, when it is not yet safe
|
||||
* to call kmalloc.
|
||||
*/
|
||||
void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
|
||||
rh_block_t * block)
|
||||
{
|
||||
int i;
|
||||
rh_block_t *blk;
|
||||
|
||||
/* Alignment must be a power of two */
|
||||
if ((alignment & (alignment - 1)) != 0)
|
||||
return;
|
||||
|
||||
info->alignment = alignment;
|
||||
|
||||
/* Initially everything as empty */
|
||||
info->block = block;
|
||||
info->max_blocks = max_blocks;
|
||||
info->empty_slots = max_blocks;
|
||||
info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK;
|
||||
|
||||
INIT_LIST_HEAD(&info->empty_list);
|
||||
INIT_LIST_HEAD(&info->free_list);
|
||||
INIT_LIST_HEAD(&info->taken_list);
|
||||
|
||||
/* Add all new blocks to the free list */
|
||||
for (i = 0, blk = block; i < max_blocks; i++, blk++)
|
||||
list_add(&blk->list, &info->empty_list);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_init);
|
||||
|
||||
/* Attach a free memory region, coalesces regions if adjuscent */
|
||||
int rh_attach_region(rh_info_t * info, unsigned long start, int size)
|
||||
{
|
||||
rh_block_t *blk;
|
||||
unsigned long s, e, m;
|
||||
int r;
|
||||
|
||||
/* The region must be aligned */
|
||||
s = start;
|
||||
e = s + size;
|
||||
m = info->alignment - 1;
|
||||
|
||||
/* Round start up */
|
||||
s = (s + m) & ~m;
|
||||
|
||||
/* Round end down */
|
||||
e = e & ~m;
|
||||
|
||||
if (IS_ERR_VALUE(e) || (e < s))
|
||||
return -ERANGE;
|
||||
|
||||
/* Take final values */
|
||||
start = s;
|
||||
size = e - s;
|
||||
|
||||
/* Grow the blocks, if needed */
|
||||
r = assure_empty(info, 1);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
blk = get_slot(info);
|
||||
blk->start = start;
|
||||
blk->size = size;
|
||||
blk->owner = NULL;
|
||||
|
||||
attach_free_block(info, blk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_attach_region);
|
||||
|
||||
/* Detatch given address range, splits free block if needed. */
|
||||
unsigned long rh_detach_region(rh_info_t * info, unsigned long start, int size)
|
||||
{
|
||||
struct list_head *l;
|
||||
rh_block_t *blk, *newblk;
|
||||
unsigned long s, e, m, bs, be;
|
||||
|
||||
/* Validate size */
|
||||
if (size <= 0)
|
||||
return (unsigned long) -EINVAL;
|
||||
|
||||
/* The region must be aligned */
|
||||
s = start;
|
||||
e = s + size;
|
||||
m = info->alignment - 1;
|
||||
|
||||
/* Round start up */
|
||||
s = (s + m) & ~m;
|
||||
|
||||
/* Round end down */
|
||||
e = e & ~m;
|
||||
|
||||
if (assure_empty(info, 1) < 0)
|
||||
return (unsigned long) -ENOMEM;
|
||||
|
||||
blk = NULL;
|
||||
list_for_each(l, &info->free_list) {
|
||||
blk = list_entry(l, rh_block_t, list);
|
||||
/* The range must lie entirely inside one free block */
|
||||
bs = blk->start;
|
||||
be = blk->start + blk->size;
|
||||
if (s >= bs && e <= be)
|
||||
break;
|
||||
blk = NULL;
|
||||
}
|
||||
|
||||
if (blk == NULL)
|
||||
return (unsigned long) -ENOMEM;
|
||||
|
||||
/* Perfect fit */
|
||||
if (bs == s && be == e) {
|
||||
/* Delete from free list, release slot */
|
||||
list_del(&blk->list);
|
||||
release_slot(info, blk);
|
||||
return s;
|
||||
}
|
||||
|
||||
/* blk still in free list, with updated start and/or size */
|
||||
if (bs == s || be == e) {
|
||||
if (bs == s)
|
||||
blk->start += size;
|
||||
blk->size -= size;
|
||||
|
||||
} else {
|
||||
/* The front free fragment */
|
||||
blk->size = s - bs;
|
||||
|
||||
/* the back free fragment */
|
||||
newblk = get_slot(info);
|
||||
newblk->start = e;
|
||||
newblk->size = be - e;
|
||||
|
||||
list_add(&newblk->list, &blk->list);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_detach_region);
|
||||
|
||||
/* Allocate a block of memory at the specified alignment. The value returned
|
||||
* is an offset into the buffer initialized by rh_init(), or a negative number
|
||||
* if there is an error.
|
||||
*/
|
||||
unsigned long rh_alloc_align(rh_info_t * info, int size, int alignment, const char *owner)
|
||||
{
|
||||
struct list_head *l;
|
||||
rh_block_t *blk;
|
||||
rh_block_t *newblk;
|
||||
unsigned long start, sp_size;
|
||||
|
||||
/* Validate size, and alignment must be power of two */
|
||||
if (size <= 0 || (alignment & (alignment - 1)) != 0)
|
||||
return (unsigned long) -EINVAL;
|
||||
|
||||
/* Align to configured alignment */
|
||||
size = (size + (info->alignment - 1)) & ~(info->alignment - 1);
|
||||
|
||||
if (assure_empty(info, 2) < 0)
|
||||
return (unsigned long) -ENOMEM;
|
||||
|
||||
blk = NULL;
|
||||
list_for_each(l, &info->free_list) {
|
||||
blk = list_entry(l, rh_block_t, list);
|
||||
if (size <= blk->size) {
|
||||
start = (blk->start + alignment - 1) & ~(alignment - 1);
|
||||
if (start + size <= blk->start + blk->size)
|
||||
break;
|
||||
}
|
||||
blk = NULL;
|
||||
}
|
||||
|
||||
if (blk == NULL)
|
||||
return (unsigned long) -ENOMEM;
|
||||
|
||||
/* Just fits */
|
||||
if (blk->size == size) {
|
||||
/* Move from free list to taken list */
|
||||
list_del(&blk->list);
|
||||
newblk = blk;
|
||||
} else {
|
||||
/* Fragment caused, split if needed */
|
||||
/* Create block for fragment in the beginning */
|
||||
sp_size = start - blk->start;
|
||||
if (sp_size) {
|
||||
rh_block_t *spblk;
|
||||
|
||||
spblk = get_slot(info);
|
||||
spblk->start = blk->start;
|
||||
spblk->size = sp_size;
|
||||
/* add before the blk */
|
||||
list_add(&spblk->list, blk->list.prev);
|
||||
}
|
||||
newblk = get_slot(info);
|
||||
newblk->start = start;
|
||||
newblk->size = size;
|
||||
|
||||
/* blk still in free list, with updated start and size
|
||||
* for fragment in the end */
|
||||
blk->start = start + size;
|
||||
blk->size -= sp_size + size;
|
||||
/* No fragment in the end, remove blk */
|
||||
if (blk->size == 0) {
|
||||
list_del(&blk->list);
|
||||
release_slot(info, blk);
|
||||
}
|
||||
}
|
||||
|
||||
newblk->owner = owner;
|
||||
attach_taken_block(info, newblk);
|
||||
|
||||
return start;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_alloc_align);
|
||||
|
||||
/* Allocate a block of memory at the default alignment. The value returned is
|
||||
* an offset into the buffer initialized by rh_init(), or a negative number if
|
||||
* there is an error.
|
||||
*/
|
||||
unsigned long rh_alloc(rh_info_t * info, int size, const char *owner)
|
||||
{
|
||||
return rh_alloc_align(info, size, info->alignment, owner);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_alloc);
|
||||
|
||||
/* Allocate a block of memory at the given offset, rounded up to the default
|
||||
* alignment. The value returned is an offset into the buffer initialized by
|
||||
* rh_init(), or a negative number if there is an error.
|
||||
*/
|
||||
unsigned long rh_alloc_fixed(rh_info_t * info, unsigned long start, int size, const char *owner)
|
||||
{
|
||||
struct list_head *l;
|
||||
rh_block_t *blk, *newblk1, *newblk2;
|
||||
unsigned long s, e, m, bs = 0, be = 0;
|
||||
|
||||
/* Validate size */
|
||||
if (size <= 0)
|
||||
return (unsigned long) -EINVAL;
|
||||
|
||||
/* The region must be aligned */
|
||||
s = start;
|
||||
e = s + size;
|
||||
m = info->alignment - 1;
|
||||
|
||||
/* Round start up */
|
||||
s = (s + m) & ~m;
|
||||
|
||||
/* Round end down */
|
||||
e = e & ~m;
|
||||
|
||||
if (assure_empty(info, 2) < 0)
|
||||
return (unsigned long) -ENOMEM;
|
||||
|
||||
blk = NULL;
|
||||
list_for_each(l, &info->free_list) {
|
||||
blk = list_entry(l, rh_block_t, list);
|
||||
/* The range must lie entirely inside one free block */
|
||||
bs = blk->start;
|
||||
be = blk->start + blk->size;
|
||||
if (s >= bs && e <= be)
|
||||
break;
|
||||
blk = NULL;
|
||||
}
|
||||
|
||||
if (blk == NULL)
|
||||
return (unsigned long) -ENOMEM;
|
||||
|
||||
/* Perfect fit */
|
||||
if (bs == s && be == e) {
|
||||
/* Move from free list to taken list */
|
||||
list_del(&blk->list);
|
||||
blk->owner = owner;
|
||||
|
||||
start = blk->start;
|
||||
attach_taken_block(info, blk);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
/* blk still in free list, with updated start and/or size */
|
||||
if (bs == s || be == e) {
|
||||
if (bs == s)
|
||||
blk->start += size;
|
||||
blk->size -= size;
|
||||
|
||||
} else {
|
||||
/* The front free fragment */
|
||||
blk->size = s - bs;
|
||||
|
||||
/* The back free fragment */
|
||||
newblk2 = get_slot(info);
|
||||
newblk2->start = e;
|
||||
newblk2->size = be - e;
|
||||
|
||||
list_add(&newblk2->list, &blk->list);
|
||||
}
|
||||
|
||||
newblk1 = get_slot(info);
|
||||
newblk1->start = s;
|
||||
newblk1->size = e - s;
|
||||
newblk1->owner = owner;
|
||||
|
||||
start = newblk1->start;
|
||||
attach_taken_block(info, newblk1);
|
||||
|
||||
return start;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_alloc_fixed);
|
||||
|
||||
/* Deallocate the memory previously allocated by one of the rh_alloc functions.
|
||||
* The return value is the size of the deallocated block, or a negative number
|
||||
* if there is an error.
|
||||
*/
|
||||
int rh_free(rh_info_t * info, unsigned long start)
|
||||
{
|
||||
rh_block_t *blk, *blk2;
|
||||
struct list_head *l;
|
||||
int size;
|
||||
|
||||
/* Linear search for block */
|
||||
blk = NULL;
|
||||
list_for_each(l, &info->taken_list) {
|
||||
blk2 = list_entry(l, rh_block_t, list);
|
||||
if (start < blk2->start)
|
||||
break;
|
||||
blk = blk2;
|
||||
}
|
||||
|
||||
if (blk == NULL || start > (blk->start + blk->size))
|
||||
return -EINVAL;
|
||||
|
||||
/* Remove from taken list */
|
||||
list_del(&blk->list);
|
||||
|
||||
/* Get size of freed block */
|
||||
size = blk->size;
|
||||
attach_free_block(info, blk);
|
||||
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_free);
|
||||
|
||||
int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats)
|
||||
{
|
||||
rh_block_t *blk;
|
||||
struct list_head *l;
|
||||
struct list_head *h;
|
||||
int nr;
|
||||
|
||||
switch (what) {
|
||||
|
||||
case RHGS_FREE:
|
||||
h = &info->free_list;
|
||||
break;
|
||||
|
||||
case RHGS_TAKEN:
|
||||
h = &info->taken_list;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Linear search for block */
|
||||
nr = 0;
|
||||
list_for_each(l, h) {
|
||||
blk = list_entry(l, rh_block_t, list);
|
||||
if (stats != NULL && nr < max_stats) {
|
||||
stats->start = blk->start;
|
||||
stats->size = blk->size;
|
||||
stats->owner = blk->owner;
|
||||
stats++;
|
||||
}
|
||||
nr++;
|
||||
}
|
||||
|
||||
return nr;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_get_stats);
|
||||
|
||||
int rh_set_owner(rh_info_t * info, unsigned long start, const char *owner)
|
||||
{
|
||||
rh_block_t *blk, *blk2;
|
||||
struct list_head *l;
|
||||
int size;
|
||||
|
||||
/* Linear search for block */
|
||||
blk = NULL;
|
||||
list_for_each(l, &info->taken_list) {
|
||||
blk2 = list_entry(l, rh_block_t, list);
|
||||
if (start < blk2->start)
|
||||
break;
|
||||
blk = blk2;
|
||||
}
|
||||
|
||||
if (blk == NULL || start > (blk->start + blk->size))
|
||||
return -EINVAL;
|
||||
|
||||
blk->owner = owner;
|
||||
size = blk->size;
|
||||
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_set_owner);
|
||||
|
||||
void rh_dump(rh_info_t * info)
|
||||
{
|
||||
static rh_stats_t st[32]; /* XXX maximum 32 blocks */
|
||||
int maxnr;
|
||||
int i, nr;
|
||||
|
||||
maxnr = ARRAY_SIZE(st);
|
||||
|
||||
printk(KERN_INFO
|
||||
"info @0x%p (%d slots empty / %d max)\n",
|
||||
info, info->empty_slots, info->max_blocks);
|
||||
|
||||
printk(KERN_INFO " Free:\n");
|
||||
nr = rh_get_stats(info, RHGS_FREE, maxnr, st);
|
||||
if (nr > maxnr)
|
||||
nr = maxnr;
|
||||
for (i = 0; i < nr; i++)
|
||||
printk(KERN_INFO
|
||||
" 0x%lx-0x%lx (%u)\n",
|
||||
st[i].start, st[i].start + st[i].size,
|
||||
st[i].size);
|
||||
printk(KERN_INFO "\n");
|
||||
|
||||
printk(KERN_INFO " Taken:\n");
|
||||
nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st);
|
||||
if (nr > maxnr)
|
||||
nr = maxnr;
|
||||
for (i = 0; i < nr; i++)
|
||||
printk(KERN_INFO
|
||||
" 0x%lx-0x%lx (%u) %s\n",
|
||||
st[i].start, st[i].start + st[i].size,
|
||||
st[i].size, st[i].owner != NULL ? st[i].owner : "");
|
||||
printk(KERN_INFO "\n");
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_dump);
|
||||
|
||||
void rh_dump_blk(rh_info_t * info, rh_block_t * blk)
|
||||
{
|
||||
printk(KERN_INFO
|
||||
"blk @0x%p: 0x%lx-0x%lx (%u)\n",
|
||||
blk, blk->start, blk->start + blk->size, blk->size);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rh_dump_blk);
|
||||
|
2013
arch/powerpc/lib/sstep.c
Normal file
2013
arch/powerpc/lib/sstep.c
Normal file
File diff suppressed because it is too large
Load diff
164
arch/powerpc/lib/string.S
Normal file
164
arch/powerpc/lib/string.S
Normal file
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* String handling functions for PowerPC.
|
||||
*
|
||||
* Copyright (C) 1996 Paul Mackerras.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <asm/processor.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
|
||||
.section __ex_table,"a"
|
||||
PPC_LONG_ALIGN
|
||||
.text
|
||||
|
||||
_GLOBAL(strcpy)
|
||||
addi r5,r3,-1
|
||||
addi r4,r4,-1
|
||||
1: lbzu r0,1(r4)
|
||||
cmpwi 0,r0,0
|
||||
stbu r0,1(r5)
|
||||
bne 1b
|
||||
blr
|
||||
|
||||
/* This clears out any unused part of the destination buffer,
|
||||
just as the libc version does. -- paulus */
|
||||
_GLOBAL(strncpy)
|
||||
PPC_LCMPI 0,r5,0
|
||||
beqlr
|
||||
mtctr r5
|
||||
addi r6,r3,-1
|
||||
addi r4,r4,-1
|
||||
1: lbzu r0,1(r4)
|
||||
cmpwi 0,r0,0
|
||||
stbu r0,1(r6)
|
||||
bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
|
||||
bnelr /* if we didn't hit a null char, we're done */
|
||||
mfctr r5
|
||||
PPC_LCMPI 0,r5,0 /* any space left in destination buffer? */
|
||||
beqlr /* we know r0 == 0 here */
|
||||
2: stbu r0,1(r6) /* clear it out if so */
|
||||
bdnz 2b
|
||||
blr
|
||||
|
||||
_GLOBAL(strcat)
|
||||
addi r5,r3,-1
|
||||
addi r4,r4,-1
|
||||
1: lbzu r0,1(r5)
|
||||
cmpwi 0,r0,0
|
||||
bne 1b
|
||||
addi r5,r5,-1
|
||||
1: lbzu r0,1(r4)
|
||||
cmpwi 0,r0,0
|
||||
stbu r0,1(r5)
|
||||
bne 1b
|
||||
blr
|
||||
|
||||
_GLOBAL(strcmp)
|
||||
addi r5,r3,-1
|
||||
addi r4,r4,-1
|
||||
1: lbzu r3,1(r5)
|
||||
cmpwi 1,r3,0
|
||||
lbzu r0,1(r4)
|
||||
subf. r3,r0,r3
|
||||
beqlr 1
|
||||
beq 1b
|
||||
blr
|
||||
|
||||
_GLOBAL(strncmp)
|
||||
PPC_LCMPI 0,r5,0
|
||||
beq- 2f
|
||||
mtctr r5
|
||||
addi r5,r3,-1
|
||||
addi r4,r4,-1
|
||||
1: lbzu r3,1(r5)
|
||||
cmpwi 1,r3,0
|
||||
lbzu r0,1(r4)
|
||||
subf. r3,r0,r3
|
||||
beqlr 1
|
||||
bdnzt eq,1b
|
||||
blr
|
||||
2: li r3,0
|
||||
blr
|
||||
|
||||
_GLOBAL(strlen)
|
||||
addi r4,r3,-1
|
||||
1: lbzu r0,1(r4)
|
||||
cmpwi 0,r0,0
|
||||
bne 1b
|
||||
subf r3,r3,r4
|
||||
blr
|
||||
|
||||
_GLOBAL(memcmp)
|
||||
PPC_LCMPI 0,r5,0
|
||||
beq- 2f
|
||||
mtctr r5
|
||||
addi r6,r3,-1
|
||||
addi r4,r4,-1
|
||||
1: lbzu r3,1(r6)
|
||||
lbzu r0,1(r4)
|
||||
subf. r3,r0,r3
|
||||
bdnzt 2,1b
|
||||
blr
|
||||
2: li r3,0
|
||||
blr
|
||||
|
||||
_GLOBAL(memchr)
|
||||
PPC_LCMPI 0,r5,0
|
||||
beq- 2f
|
||||
mtctr r5
|
||||
addi r3,r3,-1
|
||||
1: lbzu r0,1(r3)
|
||||
cmpw 0,r0,r4
|
||||
bdnzf 2,1b
|
||||
beqlr
|
||||
2: li r3,0
|
||||
blr
|
||||
|
||||
#ifdef CONFIG_PPC32
|
||||
_GLOBAL(__clear_user)
|
||||
addi r6,r3,-4
|
||||
li r3,0
|
||||
li r5,0
|
||||
cmplwi 0,r4,4
|
||||
blt 7f
|
||||
/* clear a single word */
|
||||
11: stwu r5,4(r6)
|
||||
beqlr
|
||||
/* clear word sized chunks */
|
||||
andi. r0,r6,3
|
||||
add r4,r0,r4
|
||||
subf r6,r0,r6
|
||||
srwi r0,r4,2
|
||||
andi. r4,r4,3
|
||||
mtctr r0
|
||||
bdz 7f
|
||||
1: stwu r5,4(r6)
|
||||
bdnz 1b
|
||||
/* clear byte sized chunks */
|
||||
7: cmpwi 0,r4,0
|
||||
beqlr
|
||||
mtctr r4
|
||||
addi r6,r6,3
|
||||
8: stbu r5,1(r6)
|
||||
bdnz 8b
|
||||
blr
|
||||
90: mr r3,r4
|
||||
blr
|
||||
91: mfctr r3
|
||||
slwi r3,r3,2
|
||||
add r3,r3,r4
|
||||
blr
|
||||
92: mfctr r3
|
||||
blr
|
||||
|
||||
.section __ex_table,"a"
|
||||
PPC_LONG 11b,90b
|
||||
PPC_LONG 1b,91b
|
||||
PPC_LONG 8b,92b
|
||||
.text
|
||||
#endif
|
202
arch/powerpc/lib/string_64.S
Normal file
202
arch/powerpc/lib/string_64.S
Normal file
|
@ -0,0 +1,202 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2012
|
||||
*
|
||||
* Author: Anton Blanchard <anton@au.ibm.com>
|
||||
*/
|
||||
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
.section ".toc","aw"
|
||||
PPC64_CACHES:
|
||||
.tc ppc64_caches[TC],ppc64_caches
|
||||
.section ".text"
|
||||
|
||||
/**
|
||||
* __clear_user: - Zero a block of memory in user space, with less checking.
|
||||
* @to: Destination address, in user space.
|
||||
* @n: Number of bytes to zero.
|
||||
*
|
||||
* Zero a block of memory in user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
*
|
||||
* Returns number of bytes that could not be cleared.
|
||||
* On success, this will be zero.
|
||||
*/
|
||||
|
||||
.macro err1
|
||||
100:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 100b,.Ldo_err1
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro err2
|
||||
200:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 200b,.Ldo_err2
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro err3
|
||||
300:
|
||||
.section __ex_table,"a"
|
||||
.align 3
|
||||
.llong 300b,.Ldo_err3
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.Ldo_err1:
|
||||
mr r3,r8
|
||||
|
||||
.Ldo_err2:
|
||||
mtctr r4
|
||||
1:
|
||||
err3; stb r0,0(r3)
|
||||
addi r3,r3,1
|
||||
addi r4,r4,-1
|
||||
bdnz 1b
|
||||
|
||||
.Ldo_err3:
|
||||
mr r3,r4
|
||||
blr
|
||||
|
||||
_GLOBAL_TOC(__clear_user)
|
||||
cmpdi r4,32
|
||||
neg r6,r3
|
||||
li r0,0
|
||||
blt .Lshort_clear
|
||||
mr r8,r3
|
||||
mtocrf 0x01,r6
|
||||
clrldi r6,r6,(64-3)
|
||||
|
||||
/* Get the destination 8 byte aligned */
|
||||
bf cr7*4+3,1f
|
||||
err1; stb r0,0(r3)
|
||||
addi r3,r3,1
|
||||
|
||||
1: bf cr7*4+2,2f
|
||||
err1; sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
2: bf cr7*4+1,3f
|
||||
err1; stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
3: sub r4,r4,r6
|
||||
|
||||
cmpdi r4,32
|
||||
cmpdi cr1,r4,512
|
||||
blt .Lshort_clear
|
||||
bgt cr1,.Llong_clear
|
||||
|
||||
.Lmedium_clear:
|
||||
srdi r6,r4,5
|
||||
mtctr r6
|
||||
|
||||
/* Do 32 byte chunks */
|
||||
4:
|
||||
err2; std r0,0(r3)
|
||||
err2; std r0,8(r3)
|
||||
err2; std r0,16(r3)
|
||||
err2; std r0,24(r3)
|
||||
addi r3,r3,32
|
||||
addi r4,r4,-32
|
||||
bdnz 4b
|
||||
|
||||
.Lshort_clear:
|
||||
/* up to 31 bytes to go */
|
||||
cmpdi r4,16
|
||||
blt 6f
|
||||
err2; std r0,0(r3)
|
||||
err2; std r0,8(r3)
|
||||
addi r3,r3,16
|
||||
addi r4,r4,-16
|
||||
|
||||
/* Up to 15 bytes to go */
|
||||
6: mr r8,r3
|
||||
clrldi r4,r4,(64-4)
|
||||
mtocrf 0x01,r4
|
||||
bf cr7*4+0,7f
|
||||
err1; std r0,0(r3)
|
||||
addi r3,r3,8
|
||||
|
||||
7: bf cr7*4+1,8f
|
||||
err1; stw r0,0(r3)
|
||||
addi r3,r3,4
|
||||
|
||||
8: bf cr7*4+2,9f
|
||||
err1; sth r0,0(r3)
|
||||
addi r3,r3,2
|
||||
|
||||
9: bf cr7*4+3,10f
|
||||
err1; stb r0,0(r3)
|
||||
|
||||
10: li r3,0
|
||||
blr
|
||||
|
||||
.Llong_clear:
|
||||
ld r5,PPC64_CACHES@toc(r2)
|
||||
|
||||
bf cr7*4+0,11f
|
||||
err2; std r0,0(r3)
|
||||
addi r3,r3,8
|
||||
addi r4,r4,-8
|
||||
|
||||
/* Destination is 16 byte aligned, need to get it cacheline aligned */
|
||||
11: lwz r7,DCACHEL1LOGLINESIZE(r5)
|
||||
lwz r9,DCACHEL1LINESIZE(r5)
|
||||
|
||||
/*
|
||||
* With worst case alignment the long clear loop takes a minimum
|
||||
* of 1 byte less than 2 cachelines.
|
||||
*/
|
||||
sldi r10,r9,2
|
||||
cmpd r4,r10
|
||||
blt .Lmedium_clear
|
||||
|
||||
neg r6,r3
|
||||
addi r10,r9,-1
|
||||
and. r5,r6,r10
|
||||
beq 13f
|
||||
|
||||
srdi r6,r5,4
|
||||
mtctr r6
|
||||
mr r8,r3
|
||||
12:
|
||||
err1; std r0,0(r3)
|
||||
err1; std r0,8(r3)
|
||||
addi r3,r3,16
|
||||
bdnz 12b
|
||||
|
||||
sub r4,r4,r5
|
||||
|
||||
13: srd r6,r4,r7
|
||||
mtctr r6
|
||||
mr r8,r3
|
||||
14:
|
||||
err1; dcbz r0,r3
|
||||
add r3,r3,r9
|
||||
bdnz 14b
|
||||
|
||||
and r4,r4,r10
|
||||
|
||||
cmpdi r4,32
|
||||
blt .Lshort_clear
|
||||
b .Lmedium_clear
|
41
arch/powerpc/lib/usercopy_64.c
Normal file
41
arch/powerpc/lib/usercopy_64.c
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Functions which are too large to be inlined.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
if (likely(access_ok(VERIFY_READ, from, n)))
|
||||
n = __copy_from_user(to, from, n);
|
||||
else
|
||||
memset(to, 0, n);
|
||||
return n;
|
||||
}
|
||||
|
||||
unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
if (likely(access_ok(VERIFY_WRITE, to, n)))
|
||||
n = __copy_to_user(to, from, n);
|
||||
return n;
|
||||
}
|
||||
|
||||
unsigned long copy_in_user(void __user *to, const void __user *from,
|
||||
unsigned long n)
|
||||
{
|
||||
might_sleep();
|
||||
if (likely(access_ok(VERIFY_READ, from, n) &&
|
||||
access_ok(VERIFY_WRITE, to, n)))
|
||||
n =__copy_tofrom_user(to, from, n);
|
||||
return n;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(copy_from_user);
|
||||
EXPORT_SYMBOL(copy_to_user);
|
||||
EXPORT_SYMBOL(copy_in_user);
|
||||
|
74
arch/powerpc/lib/vmx-helper.c
Normal file
74
arch/powerpc/lib/vmx-helper.c
Normal file
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2011
|
||||
*
|
||||
* Authors: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
|
||||
* Anton Blanchard <anton@au.ibm.com>
|
||||
*/
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
int enter_vmx_usercopy(void)
|
||||
{
|
||||
if (in_interrupt())
|
||||
return 0;
|
||||
|
||||
/* This acts as preempt_disable() as well and will make
|
||||
* enable_kernel_altivec(). We need to disable page faults
|
||||
* as they can call schedule and thus make us lose the VMX
|
||||
* context. So on page faults, we just fail which will cause
|
||||
* a fallback to the normal non-vmx copy.
|
||||
*/
|
||||
pagefault_disable();
|
||||
|
||||
enable_kernel_altivec();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function must return 0 because we tail call optimise when calling
|
||||
* from __copy_tofrom_user_power7 which returns 0 on success.
|
||||
*/
|
||||
int exit_vmx_usercopy(void)
|
||||
{
|
||||
pagefault_enable();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int enter_vmx_copy(void)
|
||||
{
|
||||
if (in_interrupt())
|
||||
return 0;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
enable_kernel_altivec();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* All calls to this function will be optimised into tail calls. We are
|
||||
* passed a pointer to the destination which we return as required by a
|
||||
* memcpy implementation.
|
||||
*/
|
||||
void *exit_vmx_copy(void *dest)
|
||||
{
|
||||
preempt_enable();
|
||||
return dest;
|
||||
}
|
177
arch/powerpc/lib/xor_vmx.c
Normal file
177
arch/powerpc/lib/xor_vmx.c
Normal file
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2012
|
||||
*
|
||||
* Author: Anton Blanchard <anton@au.ibm.com>
|
||||
*/
|
||||
#include <altivec.h>
|
||||
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
typedef vector signed char unative_t;
|
||||
|
||||
#define DEFINE(V) \
|
||||
unative_t *V = (unative_t *)V##_in; \
|
||||
unative_t V##_0, V##_1, V##_2, V##_3
|
||||
|
||||
#define LOAD(V) \
|
||||
do { \
|
||||
V##_0 = V[0]; \
|
||||
V##_1 = V[1]; \
|
||||
V##_2 = V[2]; \
|
||||
V##_3 = V[3]; \
|
||||
} while (0)
|
||||
|
||||
#define STORE(V) \
|
||||
do { \
|
||||
V[0] = V##_0; \
|
||||
V[1] = V##_1; \
|
||||
V[2] = V##_2; \
|
||||
V[3] = V##_3; \
|
||||
} while (0)
|
||||
|
||||
#define XOR(V1, V2) \
|
||||
do { \
|
||||
V1##_0 = vec_xor(V1##_0, V2##_0); \
|
||||
V1##_1 = vec_xor(V1##_1, V2##_1); \
|
||||
V1##_2 = vec_xor(V1##_2, V2##_2); \
|
||||
V1##_3 = vec_xor(V1##_3, V2##_3); \
|
||||
} while (0)
|
||||
|
||||
void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
|
||||
unsigned long *v2_in)
|
||||
{
|
||||
DEFINE(v1);
|
||||
DEFINE(v2);
|
||||
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
|
||||
|
||||
preempt_disable();
|
||||
enable_kernel_altivec();
|
||||
|
||||
do {
|
||||
LOAD(v1);
|
||||
LOAD(v2);
|
||||
XOR(v1, v2);
|
||||
STORE(v1);
|
||||
|
||||
v1 += 4;
|
||||
v2 += 4;
|
||||
} while (--lines > 0);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(xor_altivec_2);
|
||||
|
||||
void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
|
||||
unsigned long *v2_in, unsigned long *v3_in)
|
||||
{
|
||||
DEFINE(v1);
|
||||
DEFINE(v2);
|
||||
DEFINE(v3);
|
||||
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
|
||||
|
||||
preempt_disable();
|
||||
enable_kernel_altivec();
|
||||
|
||||
do {
|
||||
LOAD(v1);
|
||||
LOAD(v2);
|
||||
LOAD(v3);
|
||||
XOR(v1, v2);
|
||||
XOR(v1, v3);
|
||||
STORE(v1);
|
||||
|
||||
v1 += 4;
|
||||
v2 += 4;
|
||||
v3 += 4;
|
||||
} while (--lines > 0);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(xor_altivec_3);
|
||||
|
||||
void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
|
||||
unsigned long *v2_in, unsigned long *v3_in,
|
||||
unsigned long *v4_in)
|
||||
{
|
||||
DEFINE(v1);
|
||||
DEFINE(v2);
|
||||
DEFINE(v3);
|
||||
DEFINE(v4);
|
||||
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
|
||||
|
||||
preempt_disable();
|
||||
enable_kernel_altivec();
|
||||
|
||||
do {
|
||||
LOAD(v1);
|
||||
LOAD(v2);
|
||||
LOAD(v3);
|
||||
LOAD(v4);
|
||||
XOR(v1, v2);
|
||||
XOR(v3, v4);
|
||||
XOR(v1, v3);
|
||||
STORE(v1);
|
||||
|
||||
v1 += 4;
|
||||
v2 += 4;
|
||||
v3 += 4;
|
||||
v4 += 4;
|
||||
} while (--lines > 0);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(xor_altivec_4);
|
||||
|
||||
void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
|
||||
unsigned long *v2_in, unsigned long *v3_in,
|
||||
unsigned long *v4_in, unsigned long *v5_in)
|
||||
{
|
||||
DEFINE(v1);
|
||||
DEFINE(v2);
|
||||
DEFINE(v3);
|
||||
DEFINE(v4);
|
||||
DEFINE(v5);
|
||||
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
|
||||
|
||||
preempt_disable();
|
||||
enable_kernel_altivec();
|
||||
|
||||
do {
|
||||
LOAD(v1);
|
||||
LOAD(v2);
|
||||
LOAD(v3);
|
||||
LOAD(v4);
|
||||
LOAD(v5);
|
||||
XOR(v1, v2);
|
||||
XOR(v3, v4);
|
||||
XOR(v1, v5);
|
||||
XOR(v1, v3);
|
||||
STORE(v1);
|
||||
|
||||
v1 += 4;
|
||||
v2 += 4;
|
||||
v3 += 4;
|
||||
v4 += 4;
|
||||
v5 += 4;
|
||||
} while (--lines > 0);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(xor_altivec_5);
|
Loading…
Add table
Add a link
Reference in a new issue