Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

42
arch/powerpc/lib/Makefile Normal file
View file

@ -0,0 +1,42 @@
#
# Makefile for ppc-specific library files..
#
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
CFLAGS_REMOVE_code-patching.o = -pg
CFLAGS_REMOVE_feature-fixups.o = -pg
obj-y := string.o alloc.o \
crtsavres.o ppc_ksyms.o
obj-$(CONFIG_PPC32) += div64.o copy_32.o
obj-$(CONFIG_HAS_IOMEM) += devres.o
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
usercopy_64.o mem_64.o string.o \
hweight_64.o \
copyuser_power7.o string_64.o copypage_power7.o
ifeq ($(CONFIG_GENERIC_CSUM),)
obj-y += checksum_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
endif
obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
ifeq ($(CONFIG_PPC64),y)
obj-$(CONFIG_SMP) += locks.o
obj-$(CONFIG_ALTIVEC) += vmx-helper.o
endif
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-y += code-patching.o
obj-y += feature-fixups.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
obj-$(CONFIG_ALTIVEC) += xor_vmx.o
CFLAGS_xor_vmx.o += -maltivec -mabi=altivec

21
arch/powerpc/lib/alloc.c Normal file
View file

@ -0,0 +1,21 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/string.h>
#include <asm/setup.h>
void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
{
void *p;
if (mem_init_done)
p = kzalloc(size, mask);
else {
p = alloc_bootmem(size);
if (p)
memset(p, 0, size);
}
return p;
}

View file

@ -0,0 +1,225 @@
/*
* This file contains assembly-language implementations
* of IP-style 1's complement checksum routines.
*
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
*/
#include <linux/sys.h>
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
.text
/*
* ip_fast_csum(buf, len) -- Optimized for IP header
* len is in words and is always >= 5.
*/
_GLOBAL(ip_fast_csum)
lwz r0,0(r3)
lwzu r5,4(r3)
addic. r4,r4,-2
addc r0,r0,r5
mtctr r4
blelr-
1: lwzu r4,4(r3)
adde r0,r0,r4
bdnz 1b
addze r0,r0 /* add in final carry */
rlwinm r3,r0,16,0,31 /* fold two halves together */
add r3,r0,r3
not r3,r3
srwi r3,r3,16
blr
/*
* Compute checksum of TCP or UDP pseudo-header:
* csum_tcpudp_magic(saddr, daddr, len, proto, sum)
*/
_GLOBAL(csum_tcpudp_magic)
rlwimi r5,r6,16,0,15 /* put proto in upper half of len */
addc r0,r3,r4 /* add 4 32-bit words together */
adde r0,r0,r5
adde r0,r0,r7
addze r0,r0 /* add in final carry */
rlwinm r3,r0,16,0,31 /* fold two halves together */
add r3,r0,r3
not r3,r3
srwi r3,r3,16
blr
/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
*
* csum_partial(buff, len, sum)
*/
_GLOBAL(csum_partial)
addic r0,r5,0
subi r3,r3,4
srwi. r6,r4,2
beq 3f /* if we're doing < 4 bytes */
andi. r5,r3,2 /* Align buffer to longword boundary */
beq+ 1f
lhz r5,4(r3) /* do 2 bytes to get aligned */
addi r3,r3,2
subi r4,r4,2
addc r0,r0,r5
srwi. r6,r4,2 /* # words to do */
beq 3f
1: mtctr r6
2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */
adde r0,r0,r5 /* be unnecessary to unroll this loop */
bdnz 2b
andi. r4,r4,3
3: cmpwi 0,r4,2
blt+ 4f
lhz r5,4(r3)
addi r3,r3,2
subi r4,r4,2
adde r0,r0,r5
4: cmpwi 0,r4,1
bne+ 5f
lbz r5,4(r3)
slwi r5,r5,8 /* Upper byte of word */
adde r0,r0,r5
5: addze r3,r0 /* add in final carry */
blr
/*
* Computes the checksum of a memory block at src, length len,
* and adds in "sum" (32-bit), while copying the block to dst.
* If an access exception occurs on src or dst, it stores -EFAULT
* to *src_err or *dst_err respectively, and (for an error on
* src) zeroes the rest of dst.
*
* csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
*/
_GLOBAL(csum_partial_copy_generic)
addic r0,r6,0
subi r3,r3,4
subi r4,r4,4
srwi. r6,r5,2
beq 3f /* if we're doing < 4 bytes */
andi. r9,r4,2 /* Align dst to longword boundary */
beq+ 1f
81: lhz r6,4(r3) /* do 2 bytes to get aligned */
addi r3,r3,2
subi r5,r5,2
91: sth r6,4(r4)
addi r4,r4,2
addc r0,r0,r6
srwi. r6,r5,2 /* # words to do */
beq 3f
1: srwi. r6,r5,4 /* # groups of 4 words to do */
beq 10f
mtctr r6
71: lwz r6,4(r3)
72: lwz r9,8(r3)
73: lwz r10,12(r3)
74: lwzu r11,16(r3)
adde r0,r0,r6
75: stw r6,4(r4)
adde r0,r0,r9
76: stw r9,8(r4)
adde r0,r0,r10
77: stw r10,12(r4)
adde r0,r0,r11
78: stwu r11,16(r4)
bdnz 71b
10: rlwinm. r6,r5,30,30,31 /* # words left to do */
beq 13f
mtctr r6
82: lwzu r9,4(r3)
92: stwu r9,4(r4)
adde r0,r0,r9
bdnz 82b
13: andi. r5,r5,3
3: cmpwi 0,r5,2
blt+ 4f
83: lhz r6,4(r3)
addi r3,r3,2
subi r5,r5,2
93: sth r6,4(r4)
addi r4,r4,2
adde r0,r0,r6
4: cmpwi 0,r5,1
bne+ 5f
84: lbz r6,4(r3)
94: stb r6,4(r4)
slwi r6,r6,8 /* Upper byte of word */
adde r0,r0,r6
5: addze r3,r0 /* add in final carry */
blr
/* These shouldn't go in the fixup section, since that would
cause the ex_table addresses to get out of order. */
src_error_4:
mfctr r6 /* update # bytes remaining from ctr */
rlwimi r5,r6,4,0,27
b 79f
src_error_1:
li r6,0
subi r5,r5,2
95: sth r6,4(r4)
addi r4,r4,2
79: srwi. r6,r5,2
beq 3f
mtctr r6
src_error_2:
li r6,0
96: stwu r6,4(r4)
bdnz 96b
3: andi. r5,r5,3
beq src_error
src_error_3:
li r6,0
mtctr r5
addi r4,r4,3
97: stbu r6,1(r4)
bdnz 97b
src_error:
cmpwi 0,r7,0
beq 1f
li r6,-EFAULT
stw r6,0(r7)
1: addze r3,r0
blr
dst_error:
cmpwi 0,r8,0
beq 1f
li r6,-EFAULT
stw r6,0(r8)
1: addze r3,r0
blr
.section __ex_table,"a"
.long 81b,src_error_1
.long 91b,dst_error
.long 71b,src_error_4
.long 72b,src_error_4
.long 73b,src_error_4
.long 74b,src_error_4
.long 75b,dst_error
.long 76b,dst_error
.long 77b,dst_error
.long 78b,dst_error
.long 82b,src_error_2
.long 92b,dst_error
.long 83b,src_error_3
.long 93b,dst_error
.long 84b,src_error_3
.long 94b,dst_error
.long 95b,dst_error
.long 96b,dst_error
.long 97b,dst_error

View file

@ -0,0 +1,480 @@
/*
* This file contains assembly-language implementations
* of IP-style 1's complement checksum routines.
*
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
*/
#include <linux/sys.h>
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
/*
* ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
* len is in words and is always >= 5.
*
* In practice len == 5, but this is not guaranteed. So this code does not
* attempt to use doubleword instructions.
*/
_GLOBAL(ip_fast_csum)
lwz r0,0(r3)
lwzu r5,4(r3)
addic. r4,r4,-2
addc r0,r0,r5
mtctr r4
blelr-
1: lwzu r4,4(r3)
adde r0,r0,r4
bdnz 1b
addze r0,r0 /* add in final carry */
rldicl r4,r0,32,0 /* fold two 32-bit halves together */
add r0,r0,r4
srdi r0,r0,32
rlwinm r3,r0,16,0,31 /* fold two halves together */
add r3,r0,r3
not r3,r3
srwi r3,r3,16
blr
/*
* Compute checksum of TCP or UDP pseudo-header:
* csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
* No real gain trying to do this specially for 64 bit, but
* the 32 bit addition may spill into the upper bits of
* the doubleword so we still must fold it down from 64.
*/
_GLOBAL(csum_tcpudp_magic)
rlwimi r5,r6,16,0,15 /* put proto in upper half of len */
addc r0,r3,r4 /* add 4 32-bit words together */
adde r0,r0,r5
adde r0,r0,r7
rldicl r4,r0,32,0 /* fold 64 bit value */
add r0,r4,r0
srdi r0,r0,32
rlwinm r3,r0,16,0,31 /* fold two halves together */
add r3,r0,r3
not r3,r3
srwi r3,r3,16
blr
/*
* Computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit).
*
* csum_partial(r3=buff, r4=len, r5=sum)
*/
_GLOBAL(csum_partial)
addic r0,r5,0 /* clear carry */
srdi. r6,r4,3 /* less than 8 bytes? */
beq .Lcsum_tail_word
/*
* If only halfword aligned, align to a double word. Since odd
* aligned addresses should be rare and they would require more
* work to calculate the correct checksum, we ignore that case
* and take the potential slowdown of unaligned loads.
*/
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
beq .Lcsum_aligned
li r7,4
sub r6,r7,r6
mtctr r6
1:
lhz r6,0(r3) /* align to doubleword */
subi r4,r4,2
addi r3,r3,2
adde r0,r0,r6
bdnz 1b
.Lcsum_aligned:
/*
* We unroll the loop such that each iteration is 64 bytes with an
* entry and exit limb of 64 bytes, meaning a minimum size of
* 128 bytes.
*/
srdi. r6,r4,7
beq .Lcsum_tail_doublewords /* len < 128 */
srdi r6,r4,6
subi r6,r6,1
mtctr r6
stdu r1,-STACKFRAMESIZE(r1)
std r14,STK_REG(R14)(r1)
std r15,STK_REG(R15)(r1)
std r16,STK_REG(R16)(r1)
ld r6,0(r3)
ld r9,8(r3)
ld r10,16(r3)
ld r11,24(r3)
/*
* On POWER6 and POWER7 back to back addes take 2 cycles because of
* the XER dependency. This means the fastest this loop can go is
* 16 cycles per iteration. The scheduling of the loop below has
* been shown to hit this on both POWER6 and POWER7.
*/
.align 5
2:
adde r0,r0,r6
ld r12,32(r3)
ld r14,40(r3)
adde r0,r0,r9
ld r15,48(r3)
ld r16,56(r3)
addi r3,r3,64
adde r0,r0,r10
adde r0,r0,r11
adde r0,r0,r12
adde r0,r0,r14
adde r0,r0,r15
ld r6,0(r3)
ld r9,8(r3)
adde r0,r0,r16
ld r10,16(r3)
ld r11,24(r3)
bdnz 2b
adde r0,r0,r6
ld r12,32(r3)
ld r14,40(r3)
adde r0,r0,r9
ld r15,48(r3)
ld r16,56(r3)
addi r3,r3,64
adde r0,r0,r10
adde r0,r0,r11
adde r0,r0,r12
adde r0,r0,r14
adde r0,r0,r15
adde r0,r0,r16
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
addi r1,r1,STACKFRAMESIZE
andi. r4,r4,63
.Lcsum_tail_doublewords: /* Up to 127 bytes to go */
srdi. r6,r4,3
beq .Lcsum_tail_word
mtctr r6
3:
ld r6,0(r3)
addi r3,r3,8
adde r0,r0,r6
bdnz 3b
andi. r4,r4,7
.Lcsum_tail_word: /* Up to 7 bytes to go */
srdi. r6,r4,2
beq .Lcsum_tail_halfword
lwz r6,0(r3)
addi r3,r3,4
adde r0,r0,r6
subi r4,r4,4
.Lcsum_tail_halfword: /* Up to 3 bytes to go */
srdi. r6,r4,1
beq .Lcsum_tail_byte
lhz r6,0(r3)
addi r3,r3,2
adde r0,r0,r6
subi r4,r4,2
.Lcsum_tail_byte: /* Up to 1 byte to go */
andi. r6,r4,1
beq .Lcsum_finish
lbz r6,0(r3)
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
.Lcsum_finish:
addze r0,r0 /* add in final carry */
rldicl r4,r0,32,0 /* fold two 32 bit halves together */
add r3,r4,r0
srdi r3,r3,32
blr
.macro srcnr
100:
.section __ex_table,"a"
.align 3
.llong 100b,.Lsrc_error_nr
.previous
.endm
.macro source
150:
.section __ex_table,"a"
.align 3
.llong 150b,.Lsrc_error
.previous
.endm
.macro dstnr
200:
.section __ex_table,"a"
.align 3
.llong 200b,.Ldest_error_nr
.previous
.endm
.macro dest
250:
.section __ex_table,"a"
.align 3
.llong 250b,.Ldest_error
.previous
.endm
/*
* Computes the checksum of a memory block at src, length len,
* and adds in "sum" (32-bit), while copying the block to dst.
* If an access exception occurs on src or dst, it stores -EFAULT
* to *src_err or *dst_err respectively. The caller must take any action
* required in this case (zeroing memory, recalculating partial checksum etc).
*
* csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
*/
_GLOBAL(csum_partial_copy_generic)
addic r0,r6,0 /* clear carry */
srdi. r6,r5,3 /* less than 8 bytes? */
beq .Lcopy_tail_word
/*
* If only halfword aligned, align to a double word. Since odd
* aligned addresses should be rare and they would require more
* work to calculate the correct checksum, we ignore that case
* and take the potential slowdown of unaligned loads.
*
* If the source and destination are relatively unaligned we only
* align the source. This keeps things simple.
*/
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
beq .Lcopy_aligned
li r9,4
sub r6,r9,r6
mtctr r6
1:
srcnr; lhz r6,0(r3) /* align to doubleword */
subi r5,r5,2
addi r3,r3,2
adde r0,r0,r6
dstnr; sth r6,0(r4)
addi r4,r4,2
bdnz 1b
.Lcopy_aligned:
/*
* We unroll the loop such that each iteration is 64 bytes with an
* entry and exit limb of 64 bytes, meaning a minimum size of
* 128 bytes.
*/
srdi. r6,r5,7
beq .Lcopy_tail_doublewords /* len < 128 */
srdi r6,r5,6
subi r6,r6,1
mtctr r6
stdu r1,-STACKFRAMESIZE(r1)
std r14,STK_REG(R14)(r1)
std r15,STK_REG(R15)(r1)
std r16,STK_REG(R16)(r1)
source; ld r6,0(r3)
source; ld r9,8(r3)
source; ld r10,16(r3)
source; ld r11,24(r3)
/*
* On POWER6 and POWER7 back to back addes take 2 cycles because of
* the XER dependency. This means the fastest this loop can go is
* 16 cycles per iteration. The scheduling of the loop below has
* been shown to hit this on both POWER6 and POWER7.
*/
.align 5
2:
adde r0,r0,r6
source; ld r12,32(r3)
source; ld r14,40(r3)
adde r0,r0,r9
source; ld r15,48(r3)
source; ld r16,56(r3)
addi r3,r3,64
adde r0,r0,r10
dest; std r6,0(r4)
dest; std r9,8(r4)
adde r0,r0,r11
dest; std r10,16(r4)
dest; std r11,24(r4)
adde r0,r0,r12
dest; std r12,32(r4)
dest; std r14,40(r4)
adde r0,r0,r14
dest; std r15,48(r4)
dest; std r16,56(r4)
addi r4,r4,64
adde r0,r0,r15
source; ld r6,0(r3)
source; ld r9,8(r3)
adde r0,r0,r16
source; ld r10,16(r3)
source; ld r11,24(r3)
bdnz 2b
adde r0,r0,r6
source; ld r12,32(r3)
source; ld r14,40(r3)
adde r0,r0,r9
source; ld r15,48(r3)
source; ld r16,56(r3)
addi r3,r3,64
adde r0,r0,r10
dest; std r6,0(r4)
dest; std r9,8(r4)
adde r0,r0,r11
dest; std r10,16(r4)
dest; std r11,24(r4)
adde r0,r0,r12
dest; std r12,32(r4)
dest; std r14,40(r4)
adde r0,r0,r14
dest; std r15,48(r4)
dest; std r16,56(r4)
addi r4,r4,64
adde r0,r0,r15
adde r0,r0,r16
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
addi r1,r1,STACKFRAMESIZE
andi. r5,r5,63
.Lcopy_tail_doublewords: /* Up to 127 bytes to go */
srdi. r6,r5,3
beq .Lcopy_tail_word
mtctr r6
3:
srcnr; ld r6,0(r3)
addi r3,r3,8
adde r0,r0,r6
dstnr; std r6,0(r4)
addi r4,r4,8
bdnz 3b
andi. r5,r5,7
.Lcopy_tail_word: /* Up to 7 bytes to go */
srdi. r6,r5,2
beq .Lcopy_tail_halfword
srcnr; lwz r6,0(r3)
addi r3,r3,4
adde r0,r0,r6
dstnr; stw r6,0(r4)
addi r4,r4,4
subi r5,r5,4
.Lcopy_tail_halfword: /* Up to 3 bytes to go */
srdi. r6,r5,1
beq .Lcopy_tail_byte
srcnr; lhz r6,0(r3)
addi r3,r3,2
adde r0,r0,r6
dstnr; sth r6,0(r4)
addi r4,r4,2
subi r5,r5,2
.Lcopy_tail_byte: /* Up to 1 byte to go */
andi. r6,r5,1
beq .Lcopy_finish
srcnr; lbz r6,0(r3)
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
dstnr; stb r6,0(r4)
.Lcopy_finish:
addze r0,r0 /* add in final carry */
rldicl r4,r0,32,0 /* fold two 32 bit halves together */
add r3,r4,r0
srdi r3,r3,32
blr
.Lsrc_error:
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
addi r1,r1,STACKFRAMESIZE
.Lsrc_error_nr:
cmpdi 0,r7,0
beqlr
li r6,-EFAULT
stw r6,0(r7)
blr
.Ldest_error:
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
addi r1,r1,STACKFRAMESIZE
.Ldest_error_nr:
cmpdi 0,r8,0
beqlr
li r6,-EFAULT
stw r6,0(r8)
blr

View file

@ -0,0 +1,102 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2010
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
#include <linux/export.h>
#include <linux/compiler.h>
#include <linux/types.h>
#include <asm/checksum.h>
#include <asm/uaccess.h>
__wsum csum_and_copy_from_user(const void __user *src, void *dst,
int len, __wsum sum, int *err_ptr)
{
unsigned int csum;
might_sleep();
*err_ptr = 0;
if (!len) {
csum = 0;
goto out;
}
if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) {
*err_ptr = -EFAULT;
csum = (__force unsigned int)sum;
goto out;
}
csum = csum_partial_copy_generic((void __force *)src, dst,
len, sum, err_ptr, NULL);
if (unlikely(*err_ptr)) {
int missing = __copy_from_user(dst, src, len);
if (missing) {
memset(dst + len - missing, 0, missing);
*err_ptr = -EFAULT;
} else {
*err_ptr = 0;
}
csum = csum_partial(dst, len, sum);
}
out:
return (__force __wsum)csum;
}
EXPORT_SYMBOL(csum_and_copy_from_user);
__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
__wsum sum, int *err_ptr)
{
unsigned int csum;
might_sleep();
*err_ptr = 0;
if (!len) {
csum = 0;
goto out;
}
if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) {
*err_ptr = -EFAULT;
csum = -1; /* invalid checksum */
goto out;
}
csum = csum_partial_copy_generic(src, (void __force *)dst,
len, sum, NULL, err_ptr);
if (unlikely(*err_ptr)) {
csum = csum_partial(src, len, sum);
if (copy_to_user(dst, src, len)) {
*err_ptr = -EFAULT;
csum = -1; /* invalid checksum */
}
}
out:
return (__force __wsum)csum;
}
EXPORT_SYMBOL(csum_and_copy_to_user);

View file

@ -0,0 +1,470 @@
/*
* Copyright 2008 Michael Ellerman, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <asm/page.h>
#include <asm/code-patching.h>
#include <asm/uaccess.h>
int patch_instruction(unsigned int *addr, unsigned int instr)
{
int err;
__put_user_size(instr, addr, 4, err);
if (err)
return err;
asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
return 0;
}
int patch_branch(unsigned int *addr, unsigned long target, int flags)
{
return patch_instruction(addr, create_branch(addr, target, flags));
}
unsigned int create_branch(const unsigned int *addr,
unsigned long target, int flags)
{
unsigned int instruction;
long offset;
offset = target;
if (! (flags & BRANCH_ABSOLUTE))
offset = offset - (unsigned long)addr;
/* Check we can represent the target in the instruction format */
if (offset < -0x2000000 || offset > 0x1fffffc || offset & 0x3)
return 0;
/* Mask out the flags and target, so they don't step on each other. */
instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC);
return instruction;
}
unsigned int create_cond_branch(const unsigned int *addr,
unsigned long target, int flags)
{
unsigned int instruction;
long offset;
offset = target;
if (! (flags & BRANCH_ABSOLUTE))
offset = offset - (unsigned long)addr;
/* Check we can represent the target in the instruction format */
if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3)
return 0;
/* Mask out the flags and target, so they don't step on each other. */
instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC);
return instruction;
}
static unsigned int branch_opcode(unsigned int instr)
{
return (instr >> 26) & 0x3F;
}
static int instr_is_branch_iform(unsigned int instr)
{
return branch_opcode(instr) == 18;
}
static int instr_is_branch_bform(unsigned int instr)
{
return branch_opcode(instr) == 16;
}
int instr_is_relative_branch(unsigned int instr)
{
if (instr & BRANCH_ABSOLUTE)
return 0;
return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
}
static unsigned long branch_iform_target(const unsigned int *instr)
{
signed long imm;
imm = *instr & 0x3FFFFFC;
/* If the top bit of the immediate value is set this is negative */
if (imm & 0x2000000)
imm -= 0x4000000;
if ((*instr & BRANCH_ABSOLUTE) == 0)
imm += (unsigned long)instr;
return (unsigned long)imm;
}
static unsigned long branch_bform_target(const unsigned int *instr)
{
signed long imm;
imm = *instr & 0xFFFC;
/* If the top bit of the immediate value is set this is negative */
if (imm & 0x8000)
imm -= 0x10000;
if ((*instr & BRANCH_ABSOLUTE) == 0)
imm += (unsigned long)instr;
return (unsigned long)imm;
}
unsigned long branch_target(const unsigned int *instr)
{
if (instr_is_branch_iform(*instr))
return branch_iform_target(instr);
else if (instr_is_branch_bform(*instr))
return branch_bform_target(instr);
return 0;
}
int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr)
{
if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr))
return branch_target(instr) == addr;
return 0;
}
unsigned int translate_branch(const unsigned int *dest, const unsigned int *src)
{
unsigned long target;
target = branch_target(src);
if (instr_is_branch_iform(*src))
return create_branch(dest, target, *src);
else if (instr_is_branch_bform(*src))
return create_cond_branch(dest, target, *src);
return 0;
}
#ifdef CONFIG_PPC_BOOK3E_64
void __patch_exception(int exc, unsigned long addr)
{
extern unsigned int interrupt_base_book3e;
unsigned int *ibase = &interrupt_base_book3e;
/* Our exceptions vectors start with a NOP and -then- a branch
* to deal with single stepping from userspace which stops on
* the second instruction. Thus we need to patch the second
* instruction of the exception, not the first one
*/
patch_branch(ibase + (exc / 4) + 1, addr, 0);
}
#endif
#ifdef CONFIG_CODE_PATCHING_SELFTEST
static void __init test_trampoline(void)
{
asm ("nop;\n");
}
#define check(x) \
if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__);
static void __init test_branch_iform(void)
{
unsigned int instr;
unsigned long addr;
addr = (unsigned long)&instr;
/* The simplest case, branch to self, no flags */
check(instr_is_branch_iform(0x48000000));
/* All bits of target set, and flags */
check(instr_is_branch_iform(0x4bffffff));
/* High bit of opcode set, which is wrong */
check(!instr_is_branch_iform(0xcbffffff));
/* Middle bits of opcode set, which is wrong */
check(!instr_is_branch_iform(0x7bffffff));
/* Simplest case, branch to self with link */
check(instr_is_branch_iform(0x48000001));
/* All bits of targets set */
check(instr_is_branch_iform(0x4bfffffd));
/* Some bits of targets set */
check(instr_is_branch_iform(0x4bff00fd));
/* Must be a valid branch to start with */
check(!instr_is_branch_iform(0x7bfffffd));
/* Absolute branch to 0x100 */
instr = 0x48000103;
check(instr_is_branch_to_addr(&instr, 0x100));
/* Absolute branch to 0x420fc */
instr = 0x480420ff;
check(instr_is_branch_to_addr(&instr, 0x420fc));
/* Maximum positive relative branch, + 20MB - 4B */
instr = 0x49fffffc;
check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC));
/* Smallest negative relative branch, - 4B */
instr = 0x4bfffffc;
check(instr_is_branch_to_addr(&instr, addr - 4));
/* Largest negative relative branch, - 32 MB */
instr = 0x4a000000;
check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
/* Branch to self, with link */
instr = create_branch(&instr, addr, BRANCH_SET_LINK);
check(instr_is_branch_to_addr(&instr, addr));
/* Branch to self - 0x100, with link */
instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK);
check(instr_is_branch_to_addr(&instr, addr - 0x100));
/* Branch to self + 0x100, no link */
instr = create_branch(&instr, addr + 0x100, 0);
check(instr_is_branch_to_addr(&instr, addr + 0x100));
/* Maximum relative negative offset, - 32 MB */
instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK);
check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
/* Out of range relative negative offset, - 32 MB + 4*/
instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK);
check(instr == 0);
/* Out of range relative positive offset, + 32 MB */
instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK);
check(instr == 0);
/* Unaligned target */
instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK);
check(instr == 0);
/* Check flags are masked correctly */
instr = create_branch(&instr, addr, 0xFFFFFFFC);
check(instr_is_branch_to_addr(&instr, addr));
check(instr == 0x48000000);
}
static void __init test_create_function_call(void)
{
unsigned int *iptr;
unsigned long dest;
/* Check we can create a function call */
iptr = (unsigned int *)ppc_function_entry(test_trampoline);
dest = ppc_function_entry(test_create_function_call);
patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK));
check(instr_is_branch_to_addr(iptr, dest));
}
static void __init test_branch_bform(void)
{
unsigned long addr;
unsigned int *iptr, instr, flags;
iptr = &instr;
addr = (unsigned long)iptr;
/* The simplest case, branch to self, no flags */
check(instr_is_branch_bform(0x40000000));
/* All bits of target set, and flags */
check(instr_is_branch_bform(0x43ffffff));
/* High bit of opcode set, which is wrong */
check(!instr_is_branch_bform(0xc3ffffff));
/* Middle bits of opcode set, which is wrong */
check(!instr_is_branch_bform(0x7bffffff));
/* Absolute conditional branch to 0x100 */
instr = 0x43ff0103;
check(instr_is_branch_to_addr(&instr, 0x100));
/* Absolute conditional branch to 0x20fc */
instr = 0x43ff20ff;
check(instr_is_branch_to_addr(&instr, 0x20fc));
/* Maximum positive relative conditional branch, + 32 KB - 4B */
instr = 0x43ff7ffc;
check(instr_is_branch_to_addr(&instr, addr + 0x7FFC));
/* Smallest negative relative conditional branch, - 4B */
instr = 0x43fffffc;
check(instr_is_branch_to_addr(&instr, addr - 4));
/* Largest negative relative conditional branch, - 32 KB */
instr = 0x43ff8000;
check(instr_is_branch_to_addr(&instr, addr - 0x8000));
/* All condition code bits set & link */
flags = 0x3ff000 | BRANCH_SET_LINK;
/* Branch to self */
instr = create_cond_branch(iptr, addr, flags);
check(instr_is_branch_to_addr(&instr, addr));
/* Branch to self - 0x100 */
instr = create_cond_branch(iptr, addr - 0x100, flags);
check(instr_is_branch_to_addr(&instr, addr - 0x100));
/* Branch to self + 0x100 */
instr = create_cond_branch(iptr, addr + 0x100, flags);
check(instr_is_branch_to_addr(&instr, addr + 0x100));
/* Maximum relative negative offset, - 32 KB */
instr = create_cond_branch(iptr, addr - 0x8000, flags);
check(instr_is_branch_to_addr(&instr, addr - 0x8000));
/* Out of range relative negative offset, - 32 KB + 4*/
instr = create_cond_branch(iptr, addr - 0x8004, flags);
check(instr == 0);
/* Out of range relative positive offset, + 32 KB */
instr = create_cond_branch(iptr, addr + 0x8000, flags);
check(instr == 0);
/* Unaligned target */
instr = create_cond_branch(iptr, addr + 3, flags);
check(instr == 0);
/* Check flags are masked correctly */
instr = create_cond_branch(iptr, addr, 0xFFFFFFFC);
check(instr_is_branch_to_addr(&instr, addr));
check(instr == 0x43FF0000);
}
static void __init test_translate_branch(void)
{
unsigned long addr;
unsigned int *p, *q;
void *buf;
buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
check(buf);
if (!buf)
return;
/* Simple case, branch to self moved a little */
p = buf;
addr = (unsigned long)p;
patch_branch(p, addr, 0);
check(instr_is_branch_to_addr(p, addr));
q = p + 1;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(q, addr));
/* Maximum negative case, move b . to addr + 32 MB */
p = buf;
addr = (unsigned long)p;
patch_branch(p, addr, 0);
q = buf + 0x2000000;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(p, addr));
check(instr_is_branch_to_addr(q, addr));
check(*q == 0x4a000000);
/* Maximum positive case, move x to x - 32 MB + 4 */
p = buf + 0x2000000;
addr = (unsigned long)p;
patch_branch(p, addr, 0);
q = buf + 4;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(p, addr));
check(instr_is_branch_to_addr(q, addr));
check(*q == 0x49fffffc);
/* Jump to x + 16 MB moved to x + 20 MB */
p = buf;
addr = 0x1000000 + (unsigned long)buf;
patch_branch(p, addr, BRANCH_SET_LINK);
q = buf + 0x1400000;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(p, addr));
check(instr_is_branch_to_addr(q, addr));
/* Jump to x + 16 MB moved to x - 16 MB + 4 */
p = buf + 0x1000000;
addr = 0x2000000 + (unsigned long)buf;
patch_branch(p, addr, 0);
q = buf + 4;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(p, addr));
check(instr_is_branch_to_addr(q, addr));
/* Conditional branch tests */
/* Simple case, branch to self moved a little */
p = buf;
addr = (unsigned long)p;
patch_instruction(p, create_cond_branch(p, addr, 0));
check(instr_is_branch_to_addr(p, addr));
q = p + 1;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(q, addr));
/* Maximum negative case, move b . to addr + 32 KB */
p = buf;
addr = (unsigned long)p;
patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
q = buf + 0x8000;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(p, addr));
check(instr_is_branch_to_addr(q, addr));
check(*q == 0x43ff8000);
/* Maximum positive case, move x to x - 32 KB + 4 */
p = buf + 0x8000;
addr = (unsigned long)p;
patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
q = buf + 4;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(p, addr));
check(instr_is_branch_to_addr(q, addr));
check(*q == 0x43ff7ffc);
/* Jump to x + 12 KB moved to x + 20 KB */
p = buf;
addr = 0x3000 + (unsigned long)buf;
patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK));
q = buf + 0x5000;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(p, addr));
check(instr_is_branch_to_addr(q, addr));
/* Jump to x + 8 KB moved to x - 8 KB + 4 */
p = buf + 0x2000;
addr = 0x4000 + (unsigned long)buf;
patch_instruction(p, create_cond_branch(p, addr, 0));
q = buf + 4;
patch_instruction(q, translate_branch(q, p));
check(instr_is_branch_to_addr(p, addr));
check(instr_is_branch_to_addr(q, addr));
/* Free the buffer we were using */
vfree(buf);
}
static int __init test_code_patching(void)
{
printk(KERN_DEBUG "Running code patching self-tests ...\n");
test_branch_iform();
test_branch_bform();
test_create_function_call();
test_translate_branch();
return 0;
}
late_initcall(test_code_patching);
#endif /* CONFIG_CODE_PATCHING_SELFTEST */

518
arch/powerpc/lib/copy_32.S Normal file
View file

@ -0,0 +1,518 @@
/*
* Memory copy functions for 32-bit PowerPC.
*
* Copyright (C) 1996-2005 Paul Mackerras.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
#define COPY_16_BYTES \
lwz r7,4(r4); \
lwz r8,8(r4); \
lwz r9,12(r4); \
lwzu r10,16(r4); \
stw r7,4(r6); \
stw r8,8(r6); \
stw r9,12(r6); \
stwu r10,16(r6)
#define COPY_16_BYTES_WITHEX(n) \
8 ## n ## 0: \
lwz r7,4(r4); \
8 ## n ## 1: \
lwz r8,8(r4); \
8 ## n ## 2: \
lwz r9,12(r4); \
8 ## n ## 3: \
lwzu r10,16(r4); \
8 ## n ## 4: \
stw r7,4(r6); \
8 ## n ## 5: \
stw r8,8(r6); \
8 ## n ## 6: \
stw r9,12(r6); \
8 ## n ## 7: \
stwu r10,16(r6)
#define COPY_16_BYTES_EXCODE(n) \
9 ## n ## 0: \
addi r5,r5,-(16 * n); \
b 104f; \
9 ## n ## 1: \
addi r5,r5,-(16 * n); \
b 105f; \
.section __ex_table,"a"; \
.align 2; \
.long 8 ## n ## 0b,9 ## n ## 0b; \
.long 8 ## n ## 1b,9 ## n ## 0b; \
.long 8 ## n ## 2b,9 ## n ## 0b; \
.long 8 ## n ## 3b,9 ## n ## 0b; \
.long 8 ## n ## 4b,9 ## n ## 1b; \
.long 8 ## n ## 5b,9 ## n ## 1b; \
.long 8 ## n ## 6b,9 ## n ## 1b; \
.long 8 ## n ## 7b,9 ## n ## 1b; \
.text
.text
.stabs "arch/powerpc/lib/",N_SO,0,0,0f
.stabs "copy_32.S",N_SO,0,0,0f
0:
CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
/*
* Use dcbz on the complete cache lines in the destination
* to set them to zero. This requires that the destination
* area is cacheable. -- paulus
*/
_GLOBAL(cacheable_memzero)
mr r5,r4
li r4,0
addi r6,r3,-4
cmplwi 0,r5,4
blt 7f
stwu r4,4(r6)
beqlr
andi. r0,r6,3
add r5,r0,r5
subf r6,r0,r6
clrlwi r7,r6,32-LG_CACHELINE_BYTES
add r8,r7,r5
srwi r9,r8,LG_CACHELINE_BYTES
addic. r9,r9,-1 /* total number of complete cachelines */
ble 2f
xori r0,r7,CACHELINE_MASK & ~3
srwi. r0,r0,2
beq 3f
mtctr r0
4: stwu r4,4(r6)
bdnz 4b
3: mtctr r9
li r7,4
10: dcbz r7,r6
addi r6,r6,CACHELINE_BYTES
bdnz 10b
clrlwi r5,r8,32-LG_CACHELINE_BYTES
addi r5,r5,4
2: srwi r0,r5,2
mtctr r0
bdz 6f
1: stwu r4,4(r6)
bdnz 1b
6: andi. r5,r5,3
7: cmpwi 0,r5,0
beqlr
mtctr r5
addi r6,r6,3
8: stbu r4,1(r6)
bdnz 8b
blr
_GLOBAL(memset)
rlwimi r4,r4,8,16,23
rlwimi r4,r4,16,0,15
addi r6,r3,-4
cmplwi 0,r5,4
blt 7f
stwu r4,4(r6)
beqlr
andi. r0,r6,3
add r5,r0,r5
subf r6,r0,r6
srwi r0,r5,2
mtctr r0
bdz 6f
1: stwu r4,4(r6)
bdnz 1b
6: andi. r5,r5,3
7: cmpwi 0,r5,0
beqlr
mtctr r5
addi r6,r6,3
8: stbu r4,1(r6)
bdnz 8b
blr
/*
* This version uses dcbz on the complete cache lines in the
* destination area to reduce memory traffic. This requires that
* the destination area is cacheable.
* We only use this version if the source and dest don't overlap.
* -- paulus.
*/
_GLOBAL(cacheable_memcpy)
add r7,r3,r5 /* test if the src & dst overlap */
add r8,r4,r5
cmplw 0,r4,r7
cmplw 1,r3,r8
crand 0,0,4 /* cr0.lt &= cr1.lt */
blt memcpy /* if regions overlap */
addi r4,r4,-4
addi r6,r3,-4
neg r0,r3
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
beq 58f
cmplw 0,r5,r0 /* is this more than total to do? */
blt 63f /* if not much to do */
andi. r8,r0,3 /* get it word-aligned first */
subf r5,r0,r5
mtctr r8
beq+ 61f
70: lbz r9,4(r4) /* do some bytes */
stb r9,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 70b
61: srwi. r0,r0,2
mtctr r0
beq 58f
72: lwzu r9,4(r4) /* do some words */
stwu r9,4(r6)
bdnz 72b
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
clrlwi r5,r5,32-LG_CACHELINE_BYTES
li r11,4
mtctr r0
beq 63f
53:
dcbz r11,r6
COPY_16_BYTES
#if L1_CACHE_BYTES >= 32
COPY_16_BYTES
#if L1_CACHE_BYTES >= 64
COPY_16_BYTES
COPY_16_BYTES
#if L1_CACHE_BYTES >= 128
COPY_16_BYTES
COPY_16_BYTES
COPY_16_BYTES
COPY_16_BYTES
#endif
#endif
#endif
bdnz 53b
63: srwi. r0,r5,2
mtctr r0
beq 64f
30: lwzu r0,4(r4)
stwu r0,4(r6)
bdnz 30b
64: andi. r0,r5,3
mtctr r0
beq+ 65f
40: lbz r0,4(r4)
stb r0,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 40b
65: blr
_GLOBAL(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy
/* fall through */
_GLOBAL(memcpy)
srwi. r7,r5,3
addi r6,r3,-4
addi r4,r4,-4
beq 2f /* if less than 8 bytes to do */
andi. r0,r6,3 /* get dest word aligned */
mtctr r7
bne 5f
1: lwz r7,4(r4)
lwzu r8,8(r4)
stw r7,4(r6)
stwu r8,8(r6)
bdnz 1b
andi. r5,r5,7
2: cmplwi 0,r5,4
blt 3f
lwzu r0,4(r4)
addi r5,r5,-4
stwu r0,4(r6)
3: cmpwi 0,r5,0
beqlr
mtctr r5
addi r4,r4,3
addi r6,r6,3
4: lbzu r0,1(r4)
stbu r0,1(r6)
bdnz 4b
blr
5: subfic r0,r0,4
mtctr r0
6: lbz r7,4(r4)
addi r4,r4,1
stb r7,4(r6)
addi r6,r6,1
bdnz 6b
subf r5,r0,r5
rlwinm. r7,r5,32-3,3,31
beq 2b
mtctr r7
b 1b
_GLOBAL(backwards_memcpy)
rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
add r6,r3,r5
add r4,r4,r5
beq 2f
andi. r0,r6,3
mtctr r7
bne 5f
1: lwz r7,-4(r4)
lwzu r8,-8(r4)
stw r7,-4(r6)
stwu r8,-8(r6)
bdnz 1b
andi. r5,r5,7
2: cmplwi 0,r5,4
blt 3f
lwzu r0,-4(r4)
subi r5,r5,4
stwu r0,-4(r6)
3: cmpwi 0,r5,0
beqlr
mtctr r5
4: lbzu r0,-1(r4)
stbu r0,-1(r6)
bdnz 4b
blr
5: mtctr r0
6: lbzu r7,-1(r4)
stbu r7,-1(r6)
bdnz 6b
subf r5,r0,r5
rlwinm. r7,r5,32-3,3,31
beq 2b
mtctr r7
b 1b
_GLOBAL(__copy_tofrom_user)
addi r4,r4,-4
addi r6,r3,-4
neg r0,r3
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
beq 58f
cmplw 0,r5,r0 /* is this more than total to do? */
blt 63f /* if not much to do */
andi. r8,r0,3 /* get it word-aligned first */
mtctr r8
beq+ 61f
70: lbz r9,4(r4) /* do some bytes */
71: stb r9,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 70b
61: subf r5,r0,r5
srwi. r0,r0,2
mtctr r0
beq 58f
72: lwzu r9,4(r4) /* do some words */
73: stwu r9,4(r6)
bdnz 72b
.section __ex_table,"a"
.align 2
.long 70b,100f
.long 71b,101f
.long 72b,102f
.long 73b,103f
.text
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
clrlwi r5,r5,32-LG_CACHELINE_BYTES
li r11,4
beq 63f
/* Here we decide how far ahead to prefetch the source */
li r3,4
cmpwi r0,1
li r7,0
ble 114f
li r7,1
#if MAX_COPY_PREFETCH > 1
/* Heuristically, for large transfers we prefetch
MAX_COPY_PREFETCH cachelines ahead. For small transfers
we prefetch 1 cacheline ahead. */
cmpwi r0,MAX_COPY_PREFETCH
ble 112f
li r7,MAX_COPY_PREFETCH
112: mtctr r7
111: dcbt r3,r4
addi r3,r3,CACHELINE_BYTES
bdnz 111b
#else
dcbt r3,r4
addi r3,r3,CACHELINE_BYTES
#endif /* MAX_COPY_PREFETCH > 1 */
114: subf r8,r7,r0
mr r0,r7
mtctr r8
53: dcbt r3,r4
54: dcbz r11,r6
.section __ex_table,"a"
.align 2
.long 54b,105f
.text
/* the main body of the cacheline loop */
COPY_16_BYTES_WITHEX(0)
#if L1_CACHE_BYTES >= 32
COPY_16_BYTES_WITHEX(1)
#if L1_CACHE_BYTES >= 64
COPY_16_BYTES_WITHEX(2)
COPY_16_BYTES_WITHEX(3)
#if L1_CACHE_BYTES >= 128
COPY_16_BYTES_WITHEX(4)
COPY_16_BYTES_WITHEX(5)
COPY_16_BYTES_WITHEX(6)
COPY_16_BYTES_WITHEX(7)
#endif
#endif
#endif
bdnz 53b
cmpwi r0,0
li r3,4
li r7,0
bne 114b
63: srwi. r0,r5,2
mtctr r0
beq 64f
30: lwzu r0,4(r4)
31: stwu r0,4(r6)
bdnz 30b
64: andi. r0,r5,3
mtctr r0
beq+ 65f
40: lbz r0,4(r4)
41: stb r0,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 40b
65: li r3,0
blr
/* read fault, initial single-byte copy */
100: li r9,0
b 90f
/* write fault, initial single-byte copy */
101: li r9,1
90: subf r5,r8,r5
li r3,0
b 99f
/* read fault, initial word copy */
102: li r9,0
b 91f
/* write fault, initial word copy */
103: li r9,1
91: li r3,2
b 99f
/*
* this stuff handles faults in the cacheline loop and branches to either
* 104f (if in read part) or 105f (if in write part), after updating r5
*/
COPY_16_BYTES_EXCODE(0)
#if L1_CACHE_BYTES >= 32
COPY_16_BYTES_EXCODE(1)
#if L1_CACHE_BYTES >= 64
COPY_16_BYTES_EXCODE(2)
COPY_16_BYTES_EXCODE(3)
#if L1_CACHE_BYTES >= 128
COPY_16_BYTES_EXCODE(4)
COPY_16_BYTES_EXCODE(5)
COPY_16_BYTES_EXCODE(6)
COPY_16_BYTES_EXCODE(7)
#endif
#endif
#endif
/* read fault in cacheline loop */
104: li r9,0
b 92f
/* fault on dcbz (effectively a write fault) */
/* or write fault in cacheline loop */
105: li r9,1
92: li r3,LG_CACHELINE_BYTES
mfctr r8
add r0,r0,r8
b 106f
/* read fault in final word loop */
108: li r9,0
b 93f
/* write fault in final word loop */
109: li r9,1
93: andi. r5,r5,3
li r3,2
b 99f
/* read fault in final byte loop */
110: li r9,0
b 94f
/* write fault in final byte loop */
111: li r9,1
94: li r5,0
li r3,0
/*
* At this stage the number of bytes not copied is
* r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
*/
99: mfctr r0
106: slw r3,r0,r3
add. r3,r3,r5
beq 120f /* shouldn't happen */
cmpwi 0,r9,0
bne 120f
/* for a read fault, first try to continue the copy one byte at a time */
mtctr r3
130: lbz r0,4(r4)
131: stb r0,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 130b
/* then clear out the destination: r3 bytes starting at 4(r6) */
132: mfctr r3
srwi. r0,r3,2
li r9,0
mtctr r0
beq 113f
112: stwu r9,4(r6)
bdnz 112b
113: andi. r0,r3,3
mtctr r0
beq 120f
114: stb r9,4(r6)
addi r6,r6,1
bdnz 114b
120: blr
.section __ex_table,"a"
.align 2
.long 30b,108b
.long 31b,109b
.long 40b,110b
.long 41b,111b
.long 130b,132b
.long 131b,120b
.long 112b,120b
.long 114b,120b
.text

View file

@ -0,0 +1,112 @@
/*
* Copyright (C) 2008 Mark Nelson, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/page.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
.section ".toc","aw"
PPC64_CACHES:
.tc ppc64_caches[TC],ppc64_caches
.section ".text"
_GLOBAL_TOC(copy_page)
BEGIN_FTR_SECTION
lis r5,PAGE_SIZE@h
FTR_SECTION_ELSE
b copypage_power7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
ori r5,r5,PAGE_SIZE@l
BEGIN_FTR_SECTION
ld r10,PPC64_CACHES@toc(r2)
lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */
lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */
li r9,0
srd r8,r5,r11
mtctr r8
.Lsetup:
dcbt r9,r4
dcbz r9,r3
add r9,r9,r12
bdnz .Lsetup
END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
addi r3,r3,-8
srdi r8,r5,7 /* page is copied in 128 byte strides */
addi r8,r8,-1 /* one stride copied outside loop */
mtctr r8
ld r5,0(r4)
ld r6,8(r4)
ld r7,16(r4)
ldu r8,24(r4)
1: std r5,8(r3)
std r6,16(r3)
ld r9,8(r4)
ld r10,16(r4)
std r7,24(r3)
std r8,32(r3)
ld r11,24(r4)
ld r12,32(r4)
std r9,40(r3)
std r10,48(r3)
ld r5,40(r4)
ld r6,48(r4)
std r11,56(r3)
std r12,64(r3)
ld r7,56(r4)
ld r8,64(r4)
std r5,72(r3)
std r6,80(r3)
ld r9,72(r4)
ld r10,80(r4)
std r7,88(r3)
std r8,96(r3)
ld r11,88(r4)
ld r12,96(r4)
std r9,104(r3)
std r10,112(r3)
ld r5,104(r4)
ld r6,112(r4)
std r11,120(r3)
stdu r12,128(r3)
ld r7,120(r4)
ldu r8,128(r4)
bdnz 1b
std r5,8(r3)
std r6,16(r3)
ld r9,8(r4)
ld r10,16(r4)
std r7,24(r3)
std r8,32(r3)
ld r11,24(r4)
ld r12,32(r4)
std r9,40(r3)
std r10,48(r3)
ld r5,40(r4)
ld r6,48(r4)
std r11,56(r3)
std r12,64(r3)
ld r7,56(r4)
ld r8,64(r4)
std r5,72(r3)
std r6,80(r3)
ld r9,72(r4)
ld r10,80(r4)
std r7,88(r3)
std r8,96(r3)
ld r11,88(r4)
ld r12,96(r4)
std r9,104(r3)
std r10,112(r3)
std r11,120(r3)
std r12,128(r3)
blr

View file

@ -0,0 +1,168 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
#include <asm/page.h>
#include <asm/ppc_asm.h>
_GLOBAL(copypage_power7)
/*
* We prefetch both the source and destination using enhanced touch
* instructions. We use a stream ID of 0 for the load side and
* 1 for the store side. Since source and destination are page
* aligned we don't need to clear the bottom 7 bits of either
* address.
*/
ori r9,r3,1 /* stream=1 => to */
#ifdef CONFIG_PPC_64K_PAGES
lis r7,0x0E01 /* depth=7
* units/cachelines=512 */
#else
lis r7,0x0E00 /* depth=7 */
ori r7,r7,0x1000 /* units/cachelines=32 */
#endif
ori r10,r7,1 /* stream=1 */
lis r8,0x8000 /* GO=1 */
clrldi r8,r8,32
.machine push
.machine "power4"
/* setup read stream 0 */
dcbt r0,r4,0b01000 /* addr from */
dcbt r0,r7,0b01010 /* length and depth from */
/* setup write stream 1 */
dcbtst r0,r9,0b01000 /* addr to */
dcbtst r0,r10,0b01010 /* length and depth to */
eieio
dcbt r0,r8,0b01010 /* all streams GO */
.machine pop
#ifdef CONFIG_ALTIVEC
mflr r0
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
bl enter_vmx_copy
cmpwi r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
ld r4,STK_REG(R30)(r1)
mtlr r0
li r0,(PAGE_SIZE/128)
mtctr r0
beq .Lnonvmx_copy
addi r1,r1,STACKFRAMESIZE
li r6,16
li r7,32
li r8,48
li r9,64
li r10,80
li r11,96
li r12,112
.align 5
1: lvx vr7,r0,r4
lvx vr6,r4,r6
lvx vr5,r4,r7
lvx vr4,r4,r8
lvx vr3,r4,r9
lvx vr2,r4,r10
lvx vr1,r4,r11
lvx vr0,r4,r12
addi r4,r4,128
stvx vr7,r0,r3
stvx vr6,r3,r6
stvx vr5,r3,r7
stvx vr4,r3,r8
stvx vr3,r3,r9
stvx vr2,r3,r10
stvx vr1,r3,r11
stvx vr0,r3,r12
addi r3,r3,128
bdnz 1b
b exit_vmx_copy /* tail call optimise */
#else
li r0,(PAGE_SIZE/128)
mtctr r0
stdu r1,-STACKFRAMESIZE(r1)
#endif
.Lnonvmx_copy:
std r14,STK_REG(R14)(r1)
std r15,STK_REG(R15)(r1)
std r16,STK_REG(R16)(r1)
std r17,STK_REG(R17)(r1)
std r18,STK_REG(R18)(r1)
std r19,STK_REG(R19)(r1)
std r20,STK_REG(R20)(r1)
1: ld r0,0(r4)
ld r5,8(r4)
ld r6,16(r4)
ld r7,24(r4)
ld r8,32(r4)
ld r9,40(r4)
ld r10,48(r4)
ld r11,56(r4)
ld r12,64(r4)
ld r14,72(r4)
ld r15,80(r4)
ld r16,88(r4)
ld r17,96(r4)
ld r18,104(r4)
ld r19,112(r4)
ld r20,120(r4)
addi r4,r4,128
std r0,0(r3)
std r5,8(r3)
std r6,16(r3)
std r7,24(r3)
std r8,32(r3)
std r9,40(r3)
std r10,48(r3)
std r11,56(r3)
std r12,64(r3)
std r14,72(r3)
std r15,80(r3)
std r16,88(r3)
std r17,96(r3)
std r18,104(r3)
std r19,112(r3)
std r20,120(r3)
addi r3,r3,128
bdnz 1b
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
ld r17,STK_REG(R17)(r1)
ld r18,STK_REG(R18)(r1)
ld r19,STK_REG(R19)(r1)
ld r20,STK_REG(R20)(r1)
addi r1,r1,STACKFRAMESIZE
blr

View file

@ -0,0 +1,673 @@
/*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#ifdef __BIG_ENDIAN__
#define sLd sld /* Shift towards low-numbered address. */
#define sHd srd /* Shift towards high-numbered address. */
#else
#define sLd srd /* Shift towards low-numbered address. */
#define sHd sld /* Shift towards high-numbered address. */
#endif
.align 7
_GLOBAL_TOC(__copy_tofrom_user)
BEGIN_FTR_SECTION
nop
FTR_SECTION_ELSE
b __copy_tofrom_user_power7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
_GLOBAL(__copy_tofrom_user_base)
/* first check for a whole page copy on a page boundary */
cmpldi cr1,r5,16
cmpdi cr6,r5,4096
or r0,r3,r4
neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
andi. r0,r0,4095
std r3,-24(r1)
crand cr0*4+2,cr0*4+2,cr6*4+2
std r4,-16(r1)
std r5,-8(r1)
dcbt 0,r4
beq .Lcopy_page_4K
andi. r6,r6,7
PPC_MTOCRF(0x01,r5)
blt cr1,.Lshort_copy
/* Below we want to nop out the bne if we're on a CPU that has the
* CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
* cleared.
* At the time of writing the only CPU that has this combination of bits
* set is Power6.
*/
BEGIN_FTR_SECTION
nop
FTR_SECTION_ELSE
bne .Ldst_unaligned
ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
CPU_FTR_UNALIGNED_LD_STD)
.Ldst_aligned:
addi r3,r3,-16
BEGIN_FTR_SECTION
andi. r0,r4,7
bne .Lsrc_unaligned
END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blt cr1,.Ldo_tail /* if < 16 bytes to copy */
srdi r0,r5,5
cmpdi cr1,r0,0
20: ld r7,0(r4)
220: ld r6,8(r4)
addi r4,r4,16
mtctr r0
andi. r0,r5,0x10
beq 22f
addi r3,r3,16
addi r4,r4,-16
mr r9,r7
mr r8,r6
beq cr1,72f
21: ld r7,16(r4)
221: ld r6,24(r4)
addi r4,r4,32
70: std r9,0(r3)
270: std r8,8(r3)
22: ld r9,0(r4)
222: ld r8,8(r4)
71: std r7,16(r3)
271: std r6,24(r3)
addi r3,r3,32
bdnz 21b
72: std r9,0(r3)
272: std r8,8(r3)
andi. r5,r5,0xf
beq+ 3f
addi r4,r4,16
.Ldo_tail:
addi r3,r3,16
bf cr7*4+0,246f
244: ld r9,0(r4)
addi r4,r4,8
245: std r9,0(r3)
addi r3,r3,8
246: bf cr7*4+1,1f
23: lwz r9,0(r4)
addi r4,r4,4
73: stw r9,0(r3)
addi r3,r3,4
1: bf cr7*4+2,2f
44: lhz r9,0(r4)
addi r4,r4,2
74: sth r9,0(r3)
addi r3,r3,2
2: bf cr7*4+3,3f
45: lbz r9,0(r4)
75: stb r9,0(r3)
3: li r3,0
blr
.Lsrc_unaligned:
srdi r6,r5,3
addi r5,r5,-16
subf r4,r0,r4
srdi r7,r5,4
sldi r10,r0,3
cmpldi cr6,r6,3
andi. r5,r5,7
mtctr r7
subfic r11,r10,64
add r5,r5,r0
bt cr7*4+0,28f
24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
25: ld r0,8(r4)
sLd r6,r9,r10
26: ldu r9,16(r4)
sHd r7,r0,r11
sLd r8,r0,r10
or r7,r7,r6
blt cr6,79f
27: ld r0,8(r4)
b 2f
28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
29: ldu r9,8(r4)
sLd r8,r0,r10
addi r3,r3,-8
blt cr6,5f
30: ld r0,8(r4)
sHd r12,r9,r11
sLd r6,r9,r10
31: ldu r9,16(r4)
or r12,r8,r12
sHd r7,r0,r11
sLd r8,r0,r10
addi r3,r3,16
beq cr6,78f
1: or r7,r7,r6
32: ld r0,8(r4)
76: std r12,8(r3)
2: sHd r12,r9,r11
sLd r6,r9,r10
33: ldu r9,16(r4)
or r12,r8,r12
77: stdu r7,16(r3)
sHd r7,r0,r11
sLd r8,r0,r10
bdnz 1b
78: std r12,8(r3)
or r7,r7,r6
79: std r7,16(r3)
5: sHd r12,r9,r11
or r12,r8,r12
80: std r12,24(r3)
bne 6f
li r3,0
blr
6: cmpwi cr1,r5,8
addi r3,r3,32
sLd r9,r9,r10
ble cr1,7f
34: ld r0,8(r4)
sHd r7,r0,r11
or r9,r7,r9
7:
bf cr7*4+1,1f
#ifdef __BIG_ENDIAN__
rotldi r9,r9,32
#endif
94: stw r9,0(r3)
#ifdef __LITTLE_ENDIAN__
rotrdi r9,r9,32
#endif
addi r3,r3,4
1: bf cr7*4+2,2f
#ifdef __BIG_ENDIAN__
rotldi r9,r9,16
#endif
95: sth r9,0(r3)
#ifdef __LITTLE_ENDIAN__
rotrdi r9,r9,16
#endif
addi r3,r3,2
2: bf cr7*4+3,3f
#ifdef __BIG_ENDIAN__
rotldi r9,r9,8
#endif
96: stb r9,0(r3)
#ifdef __LITTLE_ENDIAN__
rotrdi r9,r9,8
#endif
3: li r3,0
blr
.Ldst_unaligned:
PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
cmpldi cr1,r5,16
bf cr7*4+3,1f
35: lbz r0,0(r4)
81: stb r0,0(r3)
addi r7,r7,1
1: bf cr7*4+2,2f
36: lhzx r0,r7,r4
82: sthx r0,r7,r3
addi r7,r7,2
2: bf cr7*4+1,3f
37: lwzx r0,r7,r4
83: stwx r0,r7,r3
3: PPC_MTOCRF(0x01,r5)
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
.Lshort_copy:
bf cr7*4+0,1f
38: lwz r0,0(r4)
39: lwz r9,4(r4)
addi r4,r4,8
84: stw r0,0(r3)
85: stw r9,4(r3)
addi r3,r3,8
1: bf cr7*4+1,2f
40: lwz r0,0(r4)
addi r4,r4,4
86: stw r0,0(r3)
addi r3,r3,4
2: bf cr7*4+2,3f
41: lhz r0,0(r4)
addi r4,r4,2
87: sth r0,0(r3)
addi r3,r3,2
3: bf cr7*4+3,4f
42: lbz r0,0(r4)
88: stb r0,0(r3)
4: li r3,0
blr
/*
* exception handlers follow
* we have to return the number of bytes not copied
* for an exception on a load, we set the rest of the destination to 0
*/
136:
137:
add r3,r3,r7
b 1f
130:
131:
addi r3,r3,8
120:
320:
122:
322:
124:
125:
126:
127:
128:
129:
133:
addi r3,r3,8
132:
addi r3,r3,8
121:
321:
344:
134:
135:
138:
139:
140:
141:
142:
123:
144:
145:
/*
* here we have had a fault on a load and r3 points to the first
* unmodified byte of the destination
*/
1: ld r6,-24(r1)
ld r4,-16(r1)
ld r5,-8(r1)
subf r6,r6,r3
add r4,r4,r6
subf r5,r6,r5 /* #bytes left to go */
/*
* first see if we can copy any more bytes before hitting another exception
*/
mtctr r5
43: lbz r0,0(r4)
addi r4,r4,1
89: stb r0,0(r3)
addi r3,r3,1
bdnz 43b
li r3,0 /* huh? all copied successfully this time? */
blr
/*
* here we have trapped again, need to clear ctr bytes starting at r3
*/
143: mfctr r5
li r0,0
mr r4,r3
mr r3,r5 /* return the number of bytes not copied */
1: andi. r9,r4,7
beq 3f
90: stb r0,0(r4)
addic. r5,r5,-1
addi r4,r4,1
bne 1b
blr
3: cmpldi cr1,r5,8
srdi r9,r5,3
andi. r5,r5,7
blt cr1,93f
mtctr r9
91: std r0,0(r4)
addi r4,r4,8
bdnz 91b
93: beqlr
mtctr r5
92: stb r0,0(r4)
addi r4,r4,1
bdnz 92b
blr
/*
* exception handlers for stores: we just need to work
* out how many bytes weren't copied
*/
182:
183:
add r3,r3,r7
b 1f
371:
180:
addi r3,r3,8
171:
177:
addi r3,r3,8
370:
372:
176:
178:
addi r3,r3,4
185:
addi r3,r3,4
170:
172:
345:
173:
174:
175:
179:
181:
184:
186:
187:
188:
189:
194:
195:
196:
1:
ld r6,-24(r1)
ld r5,-8(r1)
add r6,r6,r5
subf r3,r3,r6 /* #bytes not copied */
190:
191:
192:
blr /* #bytes not copied in r3 */
.section __ex_table,"a"
.align 3
.llong 20b,120b
.llong 220b,320b
.llong 21b,121b
.llong 221b,321b
.llong 70b,170b
.llong 270b,370b
.llong 22b,122b
.llong 222b,322b
.llong 71b,171b
.llong 271b,371b
.llong 72b,172b
.llong 272b,372b
.llong 244b,344b
.llong 245b,345b
.llong 23b,123b
.llong 73b,173b
.llong 44b,144b
.llong 74b,174b
.llong 45b,145b
.llong 75b,175b
.llong 24b,124b
.llong 25b,125b
.llong 26b,126b
.llong 27b,127b
.llong 28b,128b
.llong 29b,129b
.llong 30b,130b
.llong 31b,131b
.llong 32b,132b
.llong 76b,176b
.llong 33b,133b
.llong 77b,177b
.llong 78b,178b
.llong 79b,179b
.llong 80b,180b
.llong 34b,134b
.llong 94b,194b
.llong 95b,195b
.llong 96b,196b
.llong 35b,135b
.llong 81b,181b
.llong 36b,136b
.llong 82b,182b
.llong 37b,137b
.llong 83b,183b
.llong 38b,138b
.llong 39b,139b
.llong 84b,184b
.llong 85b,185b
.llong 40b,140b
.llong 86b,186b
.llong 41b,141b
.llong 87b,187b
.llong 42b,142b
.llong 88b,188b
.llong 43b,143b
.llong 89b,189b
.llong 90b,190b
.llong 91b,191b
.llong 92b,192b
.text
/*
* Routine to copy a whole page of data, optimized for POWER4.
* On POWER4 it is more than 50% faster than the simple loop
* above (following the .Ldst_aligned label).
*/
.Lcopy_page_4K:
std r31,-32(1)
std r30,-40(1)
std r29,-48(1)
std r28,-56(1)
std r27,-64(1)
std r26,-72(1)
std r25,-80(1)
std r24,-88(1)
std r23,-96(1)
std r22,-104(1)
std r21,-112(1)
std r20,-120(1)
li r5,4096/32 - 1
addi r3,r3,-8
li r0,5
0: addi r5,r5,-24
mtctr r0
20: ld r22,640(4)
21: ld r21,512(4)
22: ld r20,384(4)
23: ld r11,256(4)
24: ld r9,128(4)
25: ld r7,0(4)
26: ld r25,648(4)
27: ld r24,520(4)
28: ld r23,392(4)
29: ld r10,264(4)
30: ld r8,136(4)
31: ldu r6,8(4)
cmpwi r5,24
1:
32: std r22,648(3)
33: std r21,520(3)
34: std r20,392(3)
35: std r11,264(3)
36: std r9,136(3)
37: std r7,8(3)
38: ld r28,648(4)
39: ld r27,520(4)
40: ld r26,392(4)
41: ld r31,264(4)
42: ld r30,136(4)
43: ld r29,8(4)
44: std r25,656(3)
45: std r24,528(3)
46: std r23,400(3)
47: std r10,272(3)
48: std r8,144(3)
49: std r6,16(3)
50: ld r22,656(4)
51: ld r21,528(4)
52: ld r20,400(4)
53: ld r11,272(4)
54: ld r9,144(4)
55: ld r7,16(4)
56: std r28,664(3)
57: std r27,536(3)
58: std r26,408(3)
59: std r31,280(3)
60: std r30,152(3)
61: stdu r29,24(3)
62: ld r25,664(4)
63: ld r24,536(4)
64: ld r23,408(4)
65: ld r10,280(4)
66: ld r8,152(4)
67: ldu r6,24(4)
bdnz 1b
68: std r22,648(3)
69: std r21,520(3)
70: std r20,392(3)
71: std r11,264(3)
72: std r9,136(3)
73: std r7,8(3)
74: addi r4,r4,640
75: addi r3,r3,648
bge 0b
mtctr r5
76: ld r7,0(4)
77: ld r8,8(4)
78: ldu r9,16(4)
3:
79: ld r10,8(4)
80: std r7,8(3)
81: ld r7,16(4)
82: std r8,16(3)
83: ld r8,24(4)
84: std r9,24(3)
85: ldu r9,32(4)
86: stdu r10,32(3)
bdnz 3b
4:
87: ld r10,8(4)
88: std r7,8(3)
89: std r8,16(3)
90: std r9,24(3)
91: std r10,32(3)
9: ld r20,-120(1)
ld r21,-112(1)
ld r22,-104(1)
ld r23,-96(1)
ld r24,-88(1)
ld r25,-80(1)
ld r26,-72(1)
ld r27,-64(1)
ld r28,-56(1)
ld r29,-48(1)
ld r30,-40(1)
ld r31,-32(1)
li r3,0
blr
/*
* on an exception, reset to the beginning and jump back into the
* standard __copy_tofrom_user
*/
100: ld r20,-120(1)
ld r21,-112(1)
ld r22,-104(1)
ld r23,-96(1)
ld r24,-88(1)
ld r25,-80(1)
ld r26,-72(1)
ld r27,-64(1)
ld r28,-56(1)
ld r29,-48(1)
ld r30,-40(1)
ld r31,-32(1)
ld r3,-24(r1)
ld r4,-16(r1)
li r5,4096
b .Ldst_aligned
.section __ex_table,"a"
.align 3
.llong 20b,100b
.llong 21b,100b
.llong 22b,100b
.llong 23b,100b
.llong 24b,100b
.llong 25b,100b
.llong 26b,100b
.llong 27b,100b
.llong 28b,100b
.llong 29b,100b
.llong 30b,100b
.llong 31b,100b
.llong 32b,100b
.llong 33b,100b
.llong 34b,100b
.llong 35b,100b
.llong 36b,100b
.llong 37b,100b
.llong 38b,100b
.llong 39b,100b
.llong 40b,100b
.llong 41b,100b
.llong 42b,100b
.llong 43b,100b
.llong 44b,100b
.llong 45b,100b
.llong 46b,100b
.llong 47b,100b
.llong 48b,100b
.llong 49b,100b
.llong 50b,100b
.llong 51b,100b
.llong 52b,100b
.llong 53b,100b
.llong 54b,100b
.llong 55b,100b
.llong 56b,100b
.llong 57b,100b
.llong 58b,100b
.llong 59b,100b
.llong 60b,100b
.llong 61b,100b
.llong 62b,100b
.llong 63b,100b
.llong 64b,100b
.llong 65b,100b
.llong 66b,100b
.llong 67b,100b
.llong 68b,100b
.llong 69b,100b
.llong 70b,100b
.llong 71b,100b
.llong 72b,100b
.llong 73b,100b
.llong 74b,100b
.llong 75b,100b
.llong 76b,100b
.llong 77b,100b
.llong 78b,100b
.llong 79b,100b
.llong 80b,100b
.llong 81b,100b
.llong 82b,100b
.llong 83b,100b
.llong 84b,100b
.llong 85b,100b
.llong 86b,100b
.llong 87b,100b
.llong 88b,100b
.llong 89b,100b
.llong 90b,100b
.llong 91b,100b

View file

@ -0,0 +1,721 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2011
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
#include <asm/ppc_asm.h>
#ifdef __BIG_ENDIAN__
#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
#else
#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
#endif
.macro err1
100:
.section __ex_table,"a"
.align 3
.llong 100b,.Ldo_err1
.previous
.endm
.macro err2
200:
.section __ex_table,"a"
.align 3
.llong 200b,.Ldo_err2
.previous
.endm
#ifdef CONFIG_ALTIVEC
.macro err3
300:
.section __ex_table,"a"
.align 3
.llong 300b,.Ldo_err3
.previous
.endm
.macro err4
400:
.section __ex_table,"a"
.align 3
.llong 400b,.Ldo_err4
.previous
.endm
.Ldo_err4:
ld r16,STK_REG(R16)(r1)
ld r15,STK_REG(R15)(r1)
ld r14,STK_REG(R14)(r1)
.Ldo_err3:
bl exit_vmx_usercopy
ld r0,STACKFRAMESIZE+16(r1)
mtlr r0
b .Lexit
#endif /* CONFIG_ALTIVEC */
.Ldo_err2:
ld r22,STK_REG(R22)(r1)
ld r21,STK_REG(R21)(r1)
ld r20,STK_REG(R20)(r1)
ld r19,STK_REG(R19)(r1)
ld r18,STK_REG(R18)(r1)
ld r17,STK_REG(R17)(r1)
ld r16,STK_REG(R16)(r1)
ld r15,STK_REG(R15)(r1)
ld r14,STK_REG(R14)(r1)
.Lexit:
addi r1,r1,STACKFRAMESIZE
.Ldo_err1:
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
b __copy_tofrom_user_base
_GLOBAL(__copy_tofrom_user_power7)
#ifdef CONFIG_ALTIVEC
cmpldi r5,16
cmpldi cr1,r5,4096
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
blt .Lshort_copy
bgt cr1,.Lvmx_copy
#else
cmpldi r5,16
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
blt .Lshort_copy
#endif
.Lnonvmx_copy:
/* Get the source 8B aligned */
neg r6,r4
mtocrf 0x01,r6
clrldi r6,r6,(64-3)
bf cr7*4+3,1f
err1; lbz r0,0(r4)
addi r4,r4,1
err1; stb r0,0(r3)
addi r3,r3,1
1: bf cr7*4+2,2f
err1; lhz r0,0(r4)
addi r4,r4,2
err1; sth r0,0(r3)
addi r3,r3,2
2: bf cr7*4+1,3f
err1; lwz r0,0(r4)
addi r4,r4,4
err1; stw r0,0(r3)
addi r3,r3,4
3: sub r5,r5,r6
cmpldi r5,128
blt 5f
mflr r0
stdu r1,-STACKFRAMESIZE(r1)
std r14,STK_REG(R14)(r1)
std r15,STK_REG(R15)(r1)
std r16,STK_REG(R16)(r1)
std r17,STK_REG(R17)(r1)
std r18,STK_REG(R18)(r1)
std r19,STK_REG(R19)(r1)
std r20,STK_REG(R20)(r1)
std r21,STK_REG(R21)(r1)
std r22,STK_REG(R22)(r1)
std r0,STACKFRAMESIZE+16(r1)
srdi r6,r5,7
mtctr r6
/* Now do cacheline (128B) sized loads and stores. */
.align 5
4:
err2; ld r0,0(r4)
err2; ld r6,8(r4)
err2; ld r7,16(r4)
err2; ld r8,24(r4)
err2; ld r9,32(r4)
err2; ld r10,40(r4)
err2; ld r11,48(r4)
err2; ld r12,56(r4)
err2; ld r14,64(r4)
err2; ld r15,72(r4)
err2; ld r16,80(r4)
err2; ld r17,88(r4)
err2; ld r18,96(r4)
err2; ld r19,104(r4)
err2; ld r20,112(r4)
err2; ld r21,120(r4)
addi r4,r4,128
err2; std r0,0(r3)
err2; std r6,8(r3)
err2; std r7,16(r3)
err2; std r8,24(r3)
err2; std r9,32(r3)
err2; std r10,40(r3)
err2; std r11,48(r3)
err2; std r12,56(r3)
err2; std r14,64(r3)
err2; std r15,72(r3)
err2; std r16,80(r3)
err2; std r17,88(r3)
err2; std r18,96(r3)
err2; std r19,104(r3)
err2; std r20,112(r3)
err2; std r21,120(r3)
addi r3,r3,128
bdnz 4b
clrldi r5,r5,(64-7)
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
ld r17,STK_REG(R17)(r1)
ld r18,STK_REG(R18)(r1)
ld r19,STK_REG(R19)(r1)
ld r20,STK_REG(R20)(r1)
ld r21,STK_REG(R21)(r1)
ld r22,STK_REG(R22)(r1)
addi r1,r1,STACKFRAMESIZE
/* Up to 127B to go */
5: srdi r6,r5,4
mtocrf 0x01,r6
6: bf cr7*4+1,7f
err1; ld r0,0(r4)
err1; ld r6,8(r4)
err1; ld r7,16(r4)
err1; ld r8,24(r4)
err1; ld r9,32(r4)
err1; ld r10,40(r4)
err1; ld r11,48(r4)
err1; ld r12,56(r4)
addi r4,r4,64
err1; std r0,0(r3)
err1; std r6,8(r3)
err1; std r7,16(r3)
err1; std r8,24(r3)
err1; std r9,32(r3)
err1; std r10,40(r3)
err1; std r11,48(r3)
err1; std r12,56(r3)
addi r3,r3,64
/* Up to 63B to go */
7: bf cr7*4+2,8f
err1; ld r0,0(r4)
err1; ld r6,8(r4)
err1; ld r7,16(r4)
err1; ld r8,24(r4)
addi r4,r4,32
err1; std r0,0(r3)
err1; std r6,8(r3)
err1; std r7,16(r3)
err1; std r8,24(r3)
addi r3,r3,32
/* Up to 31B to go */
8: bf cr7*4+3,9f
err1; ld r0,0(r4)
err1; ld r6,8(r4)
addi r4,r4,16
err1; std r0,0(r3)
err1; std r6,8(r3)
addi r3,r3,16
9: clrldi r5,r5,(64-4)
/* Up to 15B to go */
.Lshort_copy:
mtocrf 0x01,r5
bf cr7*4+0,12f
err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
err1; lwz r6,4(r4)
addi r4,r4,8
err1; stw r0,0(r3)
err1; stw r6,4(r3)
addi r3,r3,8
12: bf cr7*4+1,13f
err1; lwz r0,0(r4)
addi r4,r4,4
err1; stw r0,0(r3)
addi r3,r3,4
13: bf cr7*4+2,14f
err1; lhz r0,0(r4)
addi r4,r4,2
err1; sth r0,0(r3)
addi r3,r3,2
14: bf cr7*4+3,15f
err1; lbz r0,0(r4)
err1; stb r0,0(r3)
15: li r3,0
blr
.Lunwind_stack_nonvmx_copy:
addi r1,r1,STACKFRAMESIZE
b .Lnonvmx_copy
#ifdef CONFIG_ALTIVEC
.Lvmx_copy:
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
bl enter_vmx_usercopy
cmpwi cr1,r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
ld r4,STK_REG(R30)(r1)
ld r5,STK_REG(R29)(r1)
mtlr r0
/*
* We prefetch both the source and destination using enhanced touch
* instructions. We use a stream ID of 0 for the load side and
* 1 for the store side.
*/
clrrdi r6,r4,7
clrrdi r9,r3,7
ori r9,r9,1 /* stream=1 */
srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
cmpldi r7,0x3FF
ble 1f
li r7,0x3FF
1: lis r0,0x0E00 /* depth=7 */
sldi r7,r7,7
or r7,r7,r0
ori r10,r7,1 /* stream=1 */
lis r8,0x8000 /* GO=1 */
clrldi r8,r8,32
.machine push
.machine "power4"
/* setup read stream 0 */
dcbt r0,r6,0b01000 /* addr from */
dcbt r0,r7,0b01010 /* length and depth from */
/* setup write stream 1 */
dcbtst r0,r9,0b01000 /* addr to */
dcbtst r0,r10,0b01010 /* length and depth to */
eieio
dcbt r0,r8,0b01010 /* all streams GO */
.machine pop
beq cr1,.Lunwind_stack_nonvmx_copy
/*
* If source and destination are not relatively aligned we use a
* slower permute loop.
*/
xor r6,r4,r3
rldicl. r6,r6,0,(64-4)
bne .Lvmx_unaligned_copy
/* Get the destination 16B aligned */
neg r6,r3
mtocrf 0x01,r6
clrldi r6,r6,(64-4)
bf cr7*4+3,1f
err3; lbz r0,0(r4)
addi r4,r4,1
err3; stb r0,0(r3)
addi r3,r3,1
1: bf cr7*4+2,2f
err3; lhz r0,0(r4)
addi r4,r4,2
err3; sth r0,0(r3)
addi r3,r3,2
2: bf cr7*4+1,3f
err3; lwz r0,0(r4)
addi r4,r4,4
err3; stw r0,0(r3)
addi r3,r3,4
3: bf cr7*4+0,4f
err3; ld r0,0(r4)
addi r4,r4,8
err3; std r0,0(r3)
addi r3,r3,8
4: sub r5,r5,r6
/* Get the desination 128B aligned */
neg r6,r3
srdi r7,r6,4
mtocrf 0x01,r7
clrldi r6,r6,(64-7)
li r9,16
li r10,32
li r11,48
bf cr7*4+3,5f
err3; lvx vr1,r0,r4
addi r4,r4,16
err3; stvx vr1,r0,r3
addi r3,r3,16
5: bf cr7*4+2,6f
err3; lvx vr1,r0,r4
err3; lvx vr0,r4,r9
addi r4,r4,32
err3; stvx vr1,r0,r3
err3; stvx vr0,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
err3; lvx vr3,r0,r4
err3; lvx vr2,r4,r9
err3; lvx vr1,r4,r10
err3; lvx vr0,r4,r11
addi r4,r4,64
err3; stvx vr3,r0,r3
err3; stvx vr2,r3,r9
err3; stvx vr1,r3,r10
err3; stvx vr0,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
srdi r6,r5,7
std r14,STK_REG(R14)(r1)
std r15,STK_REG(R15)(r1)
std r16,STK_REG(R16)(r1)
li r12,64
li r14,80
li r15,96
li r16,112
mtctr r6
/*
* Now do cacheline sized loads and stores. By this stage the
* cacheline stores are also cacheline aligned.
*/
.align 5
8:
err4; lvx vr7,r0,r4
err4; lvx vr6,r4,r9
err4; lvx vr5,r4,r10
err4; lvx vr4,r4,r11
err4; lvx vr3,r4,r12
err4; lvx vr2,r4,r14
err4; lvx vr1,r4,r15
err4; lvx vr0,r4,r16
addi r4,r4,128
err4; stvx vr7,r0,r3
err4; stvx vr6,r3,r9
err4; stvx vr5,r3,r10
err4; stvx vr4,r3,r11
err4; stvx vr3,r3,r12
err4; stvx vr2,r3,r14
err4; stvx vr1,r3,r15
err4; stvx vr0,r3,r16
addi r3,r3,128
bdnz 8b
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
/* Up to 127B to go */
clrldi r5,r5,(64-7)
srdi r6,r5,4
mtocrf 0x01,r6
bf cr7*4+1,9f
err3; lvx vr3,r0,r4
err3; lvx vr2,r4,r9
err3; lvx vr1,r4,r10
err3; lvx vr0,r4,r11
addi r4,r4,64
err3; stvx vr3,r0,r3
err3; stvx vr2,r3,r9
err3; stvx vr1,r3,r10
err3; stvx vr0,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
err3; lvx vr1,r0,r4
err3; lvx vr0,r4,r9
addi r4,r4,32
err3; stvx vr1,r0,r3
err3; stvx vr0,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
err3; lvx vr1,r0,r4
addi r4,r4,16
err3; stvx vr1,r0,r3
addi r3,r3,16
/* Up to 15B to go */
11: clrldi r5,r5,(64-4)
mtocrf 0x01,r5
bf cr7*4+0,12f
err3; ld r0,0(r4)
addi r4,r4,8
err3; std r0,0(r3)
addi r3,r3,8
12: bf cr7*4+1,13f
err3; lwz r0,0(r4)
addi r4,r4,4
err3; stw r0,0(r3)
addi r3,r3,4
13: bf cr7*4+2,14f
err3; lhz r0,0(r4)
addi r4,r4,2
err3; sth r0,0(r3)
addi r3,r3,2
14: bf cr7*4+3,15f
err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
b exit_vmx_usercopy /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
neg r6,r3
mtocrf 0x01,r6
clrldi r6,r6,(64-4)
bf cr7*4+3,1f
err3; lbz r0,0(r4)
addi r4,r4,1
err3; stb r0,0(r3)
addi r3,r3,1
1: bf cr7*4+2,2f
err3; lhz r0,0(r4)
addi r4,r4,2
err3; sth r0,0(r3)
addi r3,r3,2
2: bf cr7*4+1,3f
err3; lwz r0,0(r4)
addi r4,r4,4
err3; stw r0,0(r3)
addi r3,r3,4
3: bf cr7*4+0,4f
err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
err3; lwz r7,4(r4)
addi r4,r4,8
err3; stw r0,0(r3)
err3; stw r7,4(r3)
addi r3,r3,8
4: sub r5,r5,r6
/* Get the desination 128B aligned */
neg r6,r3
srdi r7,r6,4
mtocrf 0x01,r7
clrldi r6,r6,(64-7)
li r9,16
li r10,32
li r11,48
LVS(vr16,0,r4) /* Setup permute control vector */
err3; lvx vr0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
err3; lvx vr1,r0,r4
VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
err3; stvx vr8,r0,r3
addi r3,r3,16
vor vr0,vr1,vr1
5: bf cr7*4+2,6f
err3; lvx vr1,r0,r4
VPERM(vr8,vr0,vr1,vr16)
err3; lvx vr0,r4,r9
VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
err3; lvx vr3,r0,r4
VPERM(vr8,vr0,vr3,vr16)
err3; lvx vr2,r4,r9
VPERM(vr9,vr3,vr2,vr16)
err3; lvx vr1,r4,r10
VPERM(vr10,vr2,vr1,vr16)
err3; lvx vr0,r4,r11
VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
err3; stvx vr10,r3,r10
err3; stvx vr11,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
srdi r6,r5,7
std r14,STK_REG(R14)(r1)
std r15,STK_REG(R15)(r1)
std r16,STK_REG(R16)(r1)
li r12,64
li r14,80
li r15,96
li r16,112
mtctr r6
/*
* Now do cacheline sized loads and stores. By this stage the
* cacheline stores are also cacheline aligned.
*/
.align 5
8:
err4; lvx vr7,r0,r4
VPERM(vr8,vr0,vr7,vr16)
err4; lvx vr6,r4,r9
VPERM(vr9,vr7,vr6,vr16)
err4; lvx vr5,r4,r10
VPERM(vr10,vr6,vr5,vr16)
err4; lvx vr4,r4,r11
VPERM(vr11,vr5,vr4,vr16)
err4; lvx vr3,r4,r12
VPERM(vr12,vr4,vr3,vr16)
err4; lvx vr2,r4,r14
VPERM(vr13,vr3,vr2,vr16)
err4; lvx vr1,r4,r15
VPERM(vr14,vr2,vr1,vr16)
err4; lvx vr0,r4,r16
VPERM(vr15,vr1,vr0,vr16)
addi r4,r4,128
err4; stvx vr8,r0,r3
err4; stvx vr9,r3,r9
err4; stvx vr10,r3,r10
err4; stvx vr11,r3,r11
err4; stvx vr12,r3,r12
err4; stvx vr13,r3,r14
err4; stvx vr14,r3,r15
err4; stvx vr15,r3,r16
addi r3,r3,128
bdnz 8b
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
/* Up to 127B to go */
clrldi r5,r5,(64-7)
srdi r6,r5,4
mtocrf 0x01,r6
bf cr7*4+1,9f
err3; lvx vr3,r0,r4
VPERM(vr8,vr0,vr3,vr16)
err3; lvx vr2,r4,r9
VPERM(vr9,vr3,vr2,vr16)
err3; lvx vr1,r4,r10
VPERM(vr10,vr2,vr1,vr16)
err3; lvx vr0,r4,r11
VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
err3; stvx vr10,r3,r10
err3; stvx vr11,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
err3; lvx vr1,r0,r4
VPERM(vr8,vr0,vr1,vr16)
err3; lvx vr0,r4,r9
VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
err3; lvx vr1,r0,r4
VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
err3; stvx vr8,r0,r3
addi r3,r3,16
/* Up to 15B to go */
11: clrldi r5,r5,(64-4)
addi r4,r4,-16 /* Unwind the +16 load offset */
mtocrf 0x01,r5
bf cr7*4+0,12f
err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
err3; lwz r6,4(r4)
addi r4,r4,8
err3; stw r0,0(r3)
err3; stw r6,4(r3)
addi r3,r3,8
12: bf cr7*4+1,13f
err3; lwz r0,0(r4)
addi r4,r4,4
err3; stw r0,0(r3)
addi r3,r3,4
13: bf cr7*4+2,14f
err3; lhz r0,0(r4)
addi r4,r4,2
err3; sth r0,0(r3)
addi r3,r3,2
14: bf cr7*4+3,15f
err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
b exit_vmx_usercopy /* tail call optimise */
#endif /* CONFiG_ALTIVEC */

View file

@ -0,0 +1,547 @@
/*
* Special support for eabi and SVR4
*
* Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
* Copyright 2008 Freescale Semiconductor, Inc.
* Written By Michael Meissner
*
* Based on gcc/config/rs6000/crtsavres.asm from gcc
* 64 bit additions from reading the PPC elf64abi document.
*
* This file is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* In addition to the permissions in the GNU General Public License, the
* Free Software Foundation gives you unlimited permission to link the
* compiled version of this file with other programs, and to distribute
* those programs without any restriction coming from the use of this
* file. (The General Public License restrictions do apply in other
* respects; for example, they cover modification of the file, and
* distribution when not linked into another program.)
*
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
* As a special exception, if you link this library with files
* compiled with GCC to produce an executable, this does not cause
* the resulting executable to be covered by the GNU General Public License.
* This exception does not however invalidate any other reasons why
* the executable file might be covered by the GNU General Public License.
*/
#include <asm/ppc_asm.h>
.file "crtsavres.S"
#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
#ifndef CONFIG_PPC64
.section ".text"
/* Routines for saving integer registers, called by the compiler. */
/* Called with r11 pointing to the stack header word of the caller of the */
/* function, just beyond the end of the integer save area. */
_GLOBAL(_savegpr_14)
_GLOBAL(_save32gpr_14)
stw 14,-72(11) /* save gp registers */
_GLOBAL(_savegpr_15)
_GLOBAL(_save32gpr_15)
stw 15,-68(11)
_GLOBAL(_savegpr_16)
_GLOBAL(_save32gpr_16)
stw 16,-64(11)
_GLOBAL(_savegpr_17)
_GLOBAL(_save32gpr_17)
stw 17,-60(11)
_GLOBAL(_savegpr_18)
_GLOBAL(_save32gpr_18)
stw 18,-56(11)
_GLOBAL(_savegpr_19)
_GLOBAL(_save32gpr_19)
stw 19,-52(11)
_GLOBAL(_savegpr_20)
_GLOBAL(_save32gpr_20)
stw 20,-48(11)
_GLOBAL(_savegpr_21)
_GLOBAL(_save32gpr_21)
stw 21,-44(11)
_GLOBAL(_savegpr_22)
_GLOBAL(_save32gpr_22)
stw 22,-40(11)
_GLOBAL(_savegpr_23)
_GLOBAL(_save32gpr_23)
stw 23,-36(11)
_GLOBAL(_savegpr_24)
_GLOBAL(_save32gpr_24)
stw 24,-32(11)
_GLOBAL(_savegpr_25)
_GLOBAL(_save32gpr_25)
stw 25,-28(11)
_GLOBAL(_savegpr_26)
_GLOBAL(_save32gpr_26)
stw 26,-24(11)
_GLOBAL(_savegpr_27)
_GLOBAL(_save32gpr_27)
stw 27,-20(11)
_GLOBAL(_savegpr_28)
_GLOBAL(_save32gpr_28)
stw 28,-16(11)
_GLOBAL(_savegpr_29)
_GLOBAL(_save32gpr_29)
stw 29,-12(11)
_GLOBAL(_savegpr_30)
_GLOBAL(_save32gpr_30)
stw 30,-8(11)
_GLOBAL(_savegpr_31)
_GLOBAL(_save32gpr_31)
stw 31,-4(11)
blr
/* Routines for restoring integer registers, called by the compiler. */
/* Called with r11 pointing to the stack header word of the caller of the */
/* function, just beyond the end of the integer restore area. */
_GLOBAL(_restgpr_14)
_GLOBAL(_rest32gpr_14)
lwz 14,-72(11) /* restore gp registers */
_GLOBAL(_restgpr_15)
_GLOBAL(_rest32gpr_15)
lwz 15,-68(11)
_GLOBAL(_restgpr_16)
_GLOBAL(_rest32gpr_16)
lwz 16,-64(11)
_GLOBAL(_restgpr_17)
_GLOBAL(_rest32gpr_17)
lwz 17,-60(11)
_GLOBAL(_restgpr_18)
_GLOBAL(_rest32gpr_18)
lwz 18,-56(11)
_GLOBAL(_restgpr_19)
_GLOBAL(_rest32gpr_19)
lwz 19,-52(11)
_GLOBAL(_restgpr_20)
_GLOBAL(_rest32gpr_20)
lwz 20,-48(11)
_GLOBAL(_restgpr_21)
_GLOBAL(_rest32gpr_21)
lwz 21,-44(11)
_GLOBAL(_restgpr_22)
_GLOBAL(_rest32gpr_22)
lwz 22,-40(11)
_GLOBAL(_restgpr_23)
_GLOBAL(_rest32gpr_23)
lwz 23,-36(11)
_GLOBAL(_restgpr_24)
_GLOBAL(_rest32gpr_24)
lwz 24,-32(11)
_GLOBAL(_restgpr_25)
_GLOBAL(_rest32gpr_25)
lwz 25,-28(11)
_GLOBAL(_restgpr_26)
_GLOBAL(_rest32gpr_26)
lwz 26,-24(11)
_GLOBAL(_restgpr_27)
_GLOBAL(_rest32gpr_27)
lwz 27,-20(11)
_GLOBAL(_restgpr_28)
_GLOBAL(_rest32gpr_28)
lwz 28,-16(11)
_GLOBAL(_restgpr_29)
_GLOBAL(_rest32gpr_29)
lwz 29,-12(11)
_GLOBAL(_restgpr_30)
_GLOBAL(_rest32gpr_30)
lwz 30,-8(11)
_GLOBAL(_restgpr_31)
_GLOBAL(_rest32gpr_31)
lwz 31,-4(11)
blr
/* Routines for restoring integer registers, called by the compiler. */
/* Called with r11 pointing to the stack header word of the caller of the */
/* function, just beyond the end of the integer restore area. */
_GLOBAL(_restgpr_14_x)
_GLOBAL(_rest32gpr_14_x)
lwz 14,-72(11) /* restore gp registers */
_GLOBAL(_restgpr_15_x)
_GLOBAL(_rest32gpr_15_x)
lwz 15,-68(11)
_GLOBAL(_restgpr_16_x)
_GLOBAL(_rest32gpr_16_x)
lwz 16,-64(11)
_GLOBAL(_restgpr_17_x)
_GLOBAL(_rest32gpr_17_x)
lwz 17,-60(11)
_GLOBAL(_restgpr_18_x)
_GLOBAL(_rest32gpr_18_x)
lwz 18,-56(11)
_GLOBAL(_restgpr_19_x)
_GLOBAL(_rest32gpr_19_x)
lwz 19,-52(11)
_GLOBAL(_restgpr_20_x)
_GLOBAL(_rest32gpr_20_x)
lwz 20,-48(11)
_GLOBAL(_restgpr_21_x)
_GLOBAL(_rest32gpr_21_x)
lwz 21,-44(11)
_GLOBAL(_restgpr_22_x)
_GLOBAL(_rest32gpr_22_x)
lwz 22,-40(11)
_GLOBAL(_restgpr_23_x)
_GLOBAL(_rest32gpr_23_x)
lwz 23,-36(11)
_GLOBAL(_restgpr_24_x)
_GLOBAL(_rest32gpr_24_x)
lwz 24,-32(11)
_GLOBAL(_restgpr_25_x)
_GLOBAL(_rest32gpr_25_x)
lwz 25,-28(11)
_GLOBAL(_restgpr_26_x)
_GLOBAL(_rest32gpr_26_x)
lwz 26,-24(11)
_GLOBAL(_restgpr_27_x)
_GLOBAL(_rest32gpr_27_x)
lwz 27,-20(11)
_GLOBAL(_restgpr_28_x)
_GLOBAL(_rest32gpr_28_x)
lwz 28,-16(11)
_GLOBAL(_restgpr_29_x)
_GLOBAL(_rest32gpr_29_x)
lwz 29,-12(11)
_GLOBAL(_restgpr_30_x)
_GLOBAL(_rest32gpr_30_x)
lwz 30,-8(11)
_GLOBAL(_restgpr_31_x)
_GLOBAL(_rest32gpr_31_x)
lwz 0,4(11)
lwz 31,-4(11)
mtlr 0
mr 1,11
blr
#ifdef CONFIG_ALTIVEC
/* Called with r0 pointing just beyond the end of the vector save area. */
_GLOBAL(_savevr_20)
li r11,-192
stvx vr20,r11,r0
_GLOBAL(_savevr_21)
li r11,-176
stvx vr21,r11,r0
_GLOBAL(_savevr_22)
li r11,-160
stvx vr22,r11,r0
_GLOBAL(_savevr_23)
li r11,-144
stvx vr23,r11,r0
_GLOBAL(_savevr_24)
li r11,-128
stvx vr24,r11,r0
_GLOBAL(_savevr_25)
li r11,-112
stvx vr25,r11,r0
_GLOBAL(_savevr_26)
li r11,-96
stvx vr26,r11,r0
_GLOBAL(_savevr_27)
li r11,-80
stvx vr27,r11,r0
_GLOBAL(_savevr_28)
li r11,-64
stvx vr28,r11,r0
_GLOBAL(_savevr_29)
li r11,-48
stvx vr29,r11,r0
_GLOBAL(_savevr_30)
li r11,-32
stvx vr30,r11,r0
_GLOBAL(_savevr_31)
li r11,-16
stvx vr31,r11,r0
blr
_GLOBAL(_restvr_20)
li r11,-192
lvx vr20,r11,r0
_GLOBAL(_restvr_21)
li r11,-176
lvx vr21,r11,r0
_GLOBAL(_restvr_22)
li r11,-160
lvx vr22,r11,r0
_GLOBAL(_restvr_23)
li r11,-144
lvx vr23,r11,r0
_GLOBAL(_restvr_24)
li r11,-128
lvx vr24,r11,r0
_GLOBAL(_restvr_25)
li r11,-112
lvx vr25,r11,r0
_GLOBAL(_restvr_26)
li r11,-96
lvx vr26,r11,r0
_GLOBAL(_restvr_27)
li r11,-80
lvx vr27,r11,r0
_GLOBAL(_restvr_28)
li r11,-64
lvx vr28,r11,r0
_GLOBAL(_restvr_29)
li r11,-48
lvx vr29,r11,r0
_GLOBAL(_restvr_30)
li r11,-32
lvx vr30,r11,r0
_GLOBAL(_restvr_31)
li r11,-16
lvx vr31,r11,r0
blr
#endif /* CONFIG_ALTIVEC */
#else /* CONFIG_PPC64 */
.section ".text.save.restore","ax",@progbits
.globl _savegpr0_14
_savegpr0_14:
std r14,-144(r1)
.globl _savegpr0_15
_savegpr0_15:
std r15,-136(r1)
.globl _savegpr0_16
_savegpr0_16:
std r16,-128(r1)
.globl _savegpr0_17
_savegpr0_17:
std r17,-120(r1)
.globl _savegpr0_18
_savegpr0_18:
std r18,-112(r1)
.globl _savegpr0_19
_savegpr0_19:
std r19,-104(r1)
.globl _savegpr0_20
_savegpr0_20:
std r20,-96(r1)
.globl _savegpr0_21
_savegpr0_21:
std r21,-88(r1)
.globl _savegpr0_22
_savegpr0_22:
std r22,-80(r1)
.globl _savegpr0_23
_savegpr0_23:
std r23,-72(r1)
.globl _savegpr0_24
_savegpr0_24:
std r24,-64(r1)
.globl _savegpr0_25
_savegpr0_25:
std r25,-56(r1)
.globl _savegpr0_26
_savegpr0_26:
std r26,-48(r1)
.globl _savegpr0_27
_savegpr0_27:
std r27,-40(r1)
.globl _savegpr0_28
_savegpr0_28:
std r28,-32(r1)
.globl _savegpr0_29
_savegpr0_29:
std r29,-24(r1)
.globl _savegpr0_30
_savegpr0_30:
std r30,-16(r1)
.globl _savegpr0_31
_savegpr0_31:
std r31,-8(r1)
std r0,16(r1)
blr
.globl _restgpr0_14
_restgpr0_14:
ld r14,-144(r1)
.globl _restgpr0_15
_restgpr0_15:
ld r15,-136(r1)
.globl _restgpr0_16
_restgpr0_16:
ld r16,-128(r1)
.globl _restgpr0_17
_restgpr0_17:
ld r17,-120(r1)
.globl _restgpr0_18
_restgpr0_18:
ld r18,-112(r1)
.globl _restgpr0_19
_restgpr0_19:
ld r19,-104(r1)
.globl _restgpr0_20
_restgpr0_20:
ld r20,-96(r1)
.globl _restgpr0_21
_restgpr0_21:
ld r21,-88(r1)
.globl _restgpr0_22
_restgpr0_22:
ld r22,-80(r1)
.globl _restgpr0_23
_restgpr0_23:
ld r23,-72(r1)
.globl _restgpr0_24
_restgpr0_24:
ld r24,-64(r1)
.globl _restgpr0_25
_restgpr0_25:
ld r25,-56(r1)
.globl _restgpr0_26
_restgpr0_26:
ld r26,-48(r1)
.globl _restgpr0_27
_restgpr0_27:
ld r27,-40(r1)
.globl _restgpr0_28
_restgpr0_28:
ld r28,-32(r1)
.globl _restgpr0_29
_restgpr0_29:
ld r0,16(r1)
ld r29,-24(r1)
mtlr r0
ld r30,-16(r1)
ld r31,-8(r1)
blr
.globl _restgpr0_30
_restgpr0_30:
ld r30,-16(r1)
.globl _restgpr0_31
_restgpr0_31:
ld r0,16(r1)
ld r31,-8(r1)
mtlr r0
blr
#ifdef CONFIG_ALTIVEC
/* Called with r0 pointing just beyond the end of the vector save area. */
.globl _savevr_20
_savevr_20:
li r12,-192
stvx vr20,r12,r0
.globl _savevr_21
_savevr_21:
li r12,-176
stvx vr21,r12,r0
.globl _savevr_22
_savevr_22:
li r12,-160
stvx vr22,r12,r0
.globl _savevr_23
_savevr_23:
li r12,-144
stvx vr23,r12,r0
.globl _savevr_24
_savevr_24:
li r12,-128
stvx vr24,r12,r0
.globl _savevr_25
_savevr_25:
li r12,-112
stvx vr25,r12,r0
.globl _savevr_26
_savevr_26:
li r12,-96
stvx vr26,r12,r0
.globl _savevr_27
_savevr_27:
li r12,-80
stvx vr27,r12,r0
.globl _savevr_28
_savevr_28:
li r12,-64
stvx vr28,r12,r0
.globl _savevr_29
_savevr_29:
li r12,-48
stvx vr29,r12,r0
.globl _savevr_30
_savevr_30:
li r12,-32
stvx vr30,r12,r0
.globl _savevr_31
_savevr_31:
li r12,-16
stvx vr31,r12,r0
blr
.globl _restvr_20
_restvr_20:
li r12,-192
lvx vr20,r12,r0
.globl _restvr_21
_restvr_21:
li r12,-176
lvx vr21,r12,r0
.globl _restvr_22
_restvr_22:
li r12,-160
lvx vr22,r12,r0
.globl _restvr_23
_restvr_23:
li r12,-144
lvx vr23,r12,r0
.globl _restvr_24
_restvr_24:
li r12,-128
lvx vr24,r12,r0
.globl _restvr_25
_restvr_25:
li r12,-112
lvx vr25,r12,r0
.globl _restvr_26
_restvr_26:
li r12,-96
lvx vr26,r12,r0
.globl _restvr_27
_restvr_27:
li r12,-80
lvx vr27,r12,r0
.globl _restvr_28
_restvr_28:
li r12,-64
lvx vr28,r12,r0
.globl _restvr_29
_restvr_29:
li r12,-48
lvx vr29,r12,r0
.globl _restvr_30
_restvr_30:
li r12,-32
lvx vr30,r12,r0
.globl _restvr_31
_restvr_31:
li r12,-16
lvx vr31,r12,r0
blr
#endif /* CONFIG_ALTIVEC */
#endif /* CONFIG_PPC64 */
#endif

43
arch/powerpc/lib/devres.c Normal file
View file

@ -0,0 +1,43 @@
/*
* Copyright (C) 2008 Freescale Semiconductor, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/device.h> /* devres_*(), devm_ioremap_release() */
#include <linux/gfp.h>
#include <linux/io.h> /* ioremap_prot() */
#include <linux/export.h> /* EXPORT_SYMBOL() */
/**
* devm_ioremap_prot - Managed ioremap_prot()
* @dev: Generic device to remap IO address for
* @offset: BUS offset to map
* @size: Size of map
* @flags: Page flags
*
* Managed ioremap_prot(). Map is automatically unmapped on driver
* detach.
*/
void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
size_t size, unsigned long flags)
{
void __iomem **ptr, *addr;
ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
if (!ptr)
return NULL;
addr = ioremap_prot(offset, size, flags);
if (addr) {
*ptr = addr;
devres_add(dev, ptr);
} else
devres_free(ptr);
return addr;
}
EXPORT_SYMBOL(devm_ioremap_prot);

59
arch/powerpc/lib/div64.S Normal file
View file

@ -0,0 +1,59 @@
/*
* Divide a 64-bit unsigned number by a 32-bit unsigned number.
* This routine assumes that the top 32 bits of the dividend are
* non-zero to start with.
* On entry, r3 points to the dividend, which get overwritten with
* the 64-bit quotient, and r4 contains the divisor.
* On exit, r3 contains the remainder.
*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
#include <asm/processor.h>
_GLOBAL(__div64_32)
lwz r5,0(r3) # get the dividend into r5/r6
lwz r6,4(r3)
cmplw r5,r4
li r7,0
li r8,0
blt 1f
divwu r7,r5,r4 # if dividend.hi >= divisor,
mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor
subf. r5,r0,r5 # dividend.hi %= divisor
beq 3f
1: mr r11,r5 # here dividend.hi != 0
andis. r0,r5,0xc000
bne 2f
cntlzw r0,r5 # we are shifting the dividend right
li r10,-1 # to make it < 2^32, and shifting
srw r10,r10,r0 # the divisor right the same amount,
addc r9,r4,r10 # rounding up (so the estimate cannot
andc r11,r6,r10 # ever be too large, only too small)
andc r9,r9,r10
addze r9,r9
or r11,r5,r11
rotlw r9,r9,r0
rotlw r11,r11,r0
divwu r11,r11,r9 # then we divide the shifted quantities
2: mullw r10,r11,r4 # to get an estimate of the quotient,
mulhwu r9,r11,r4 # multiply the estimate by the divisor,
subfc r6,r10,r6 # take the product from the divisor,
add r8,r8,r11 # and add the estimate to the accumulated
subfe. r5,r9,r5 # quotient
bne 1b
3: cmplw r6,r4
blt 4f
divwu r0,r6,r4 # perform the remaining 32-bit division
mullw r10,r0,r4 # and get the remainder
add r8,r8,r0
subf r6,r10,r6
4: stw r7,0(r3) # return the quotient in *r3
stw r8,4(r3)
mr r3,r6 # return the remainder in r3
blr

View file

@ -0,0 +1,761 @@
/*
* Copyright 2008 Michael Ellerman, IBM Corporation.
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/feature-fixups.h>
#include <asm/ppc_asm.h>
#include <asm/synch.h>
.text
#define globl(x) \
.globl x; \
x:
globl(ftr_fixup_test1)
or 1,1,1
or 2,2,2 /* fixup will nop out this instruction */
or 3,3,3
globl(end_ftr_fixup_test1)
globl(ftr_fixup_test1_orig)
or 1,1,1
or 2,2,2
or 3,3,3
globl(ftr_fixup_test1_expected)
or 1,1,1
nop
or 3,3,3
globl(ftr_fixup_test2)
or 1,1,1
or 2,2,2 /* fixup will replace this with ftr_fixup_test2_alt */
or 3,3,3
globl(end_ftr_fixup_test2)
globl(ftr_fixup_test2_orig)
or 1,1,1
or 2,2,2
or 3,3,3
globl(ftr_fixup_test2_alt)
or 31,31,31
globl(ftr_fixup_test2_expected)
or 1,1,1
or 31,31,31
or 3,3,3
globl(ftr_fixup_test3)
or 1,1,1
or 2,2,2 /* fixup will fail to replace this */
or 3,3,3
globl(end_ftr_fixup_test3)
globl(ftr_fixup_test3_orig)
or 1,1,1
or 2,2,2
or 3,3,3
globl(ftr_fixup_test3_alt)
or 31,31,31
or 31,31,31
globl(ftr_fixup_test4)
or 1,1,1
or 2,2,2
or 2,2,2
or 2,2,2
or 2,2,2
or 3,3,3
globl(end_ftr_fixup_test4)
globl(ftr_fixup_test4_expected)
or 1,1,1
or 31,31,31
or 31,31,31
nop
nop
or 3,3,3
globl(ftr_fixup_test4_orig)
or 1,1,1
or 2,2,2
or 2,2,2
or 2,2,2
or 2,2,2
or 3,3,3
globl(ftr_fixup_test4_alt)
or 31,31,31
or 31,31,31
globl(ftr_fixup_test5)
or 1,1,1
BEGIN_FTR_SECTION
or 2,2,2
or 2,2,2
or 2,2,2
or 2,2,2
or 2,2,2
or 2,2,2
or 2,2,2
FTR_SECTION_ELSE
2: b 3f
3: or 5,5,5
beq 3b
b 1f
or 6,6,6
b 2b
1: bdnz 3b
ALT_FTR_SECTION_END(0, 1)
or 1,1,1
globl(end_ftr_fixup_test5)
globl(ftr_fixup_test5_expected)
or 1,1,1
2: b 3f
3: or 5,5,5
beq 3b
b 1f
or 6,6,6
b 2b
1: bdnz 3b
or 1,1,1
globl(ftr_fixup_test6)
1: or 1,1,1
BEGIN_FTR_SECTION
or 5,5,5
2: PPC_LCMPI r3,0
beq 4f
blt 2b
b 1b
b 4f
FTR_SECTION_ELSE
2: or 2,2,2
PPC_LCMPI r3,1
beq 3f
blt 2b
b 3f
b 1b
ALT_FTR_SECTION_END(0, 1)
3: or 1,1,1
or 2,2,2
4: or 3,3,3
globl(end_ftr_fixup_test6)
globl(ftr_fixup_test6_expected)
1: or 1,1,1
2: or 2,2,2
PPC_LCMPI r3,1
beq 3f
blt 2b
b 3f
b 1b
2: or 1,1,1
or 2,2,2
3: or 3,3,3
#if 0
/* Test that if we have a larger else case the assembler spots it and
* reports an error. #if 0'ed so as not to break the build normally.
*/
ftr_fixup_test7:
or 1,1,1
BEGIN_FTR_SECTION
or 2,2,2
or 2,2,2
or 2,2,2
FTR_SECTION_ELSE
or 3,3,3
or 3,3,3
or 3,3,3
or 3,3,3
ALT_FTR_SECTION_END(0, 1)
or 1,1,1
#endif
#define MAKE_MACRO_TEST(TYPE) \
globl(ftr_fixup_test_ ##TYPE##_macros) \
or 1,1,1; \
/* Basic test, this section should all be nop'ed */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
or 2,2,2; \
or 2,2,2; \
END_##TYPE##_SECTION(0, 1) \
or 1,1,1; \
or 1,1,1; \
/* Basic test, this section should NOT be nop'ed */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
or 2,2,2; \
or 2,2,2; \
END_##TYPE##_SECTION(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Nesting test, inner section should be nop'ed */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(80) \
or 3,3,3; \
or 3,3,3; \
END_##TYPE##_SECTION_NESTED(0, 1, 80) \
or 2,2,2; \
or 2,2,2; \
END_##TYPE##_SECTION(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Nesting test, whole section should be nop'ed */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(80) \
or 3,3,3; \
or 3,3,3; \
END_##TYPE##_SECTION_NESTED(0, 0, 80) \
or 2,2,2; \
or 2,2,2; \
END_##TYPE##_SECTION(0, 1) \
or 1,1,1; \
or 1,1,1; \
/* Nesting test, none should be nop'ed */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(80) \
or 3,3,3; \
or 3,3,3; \
END_##TYPE##_SECTION_NESTED(0, 0, 80) \
or 2,2,2; \
or 2,2,2; \
END_##TYPE##_SECTION(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Basic alt section test, default case should be taken */ \
BEGIN_##TYPE##_SECTION \
or 3,3,3; \
or 3,3,3; \
or 3,3,3; \
##TYPE##_SECTION_ELSE \
or 5,5,5; \
or 5,5,5; \
ALT_##TYPE##_SECTION_END(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Basic alt section test, else case should be taken */ \
BEGIN_##TYPE##_SECTION \
or 3,3,3; \
or 3,3,3; \
or 3,3,3; \
##TYPE##_SECTION_ELSE \
or 31,31,31; \
or 31,31,31; \
or 31,31,31; \
ALT_##TYPE##_SECTION_END(0, 1) \
or 1,1,1; \
or 1,1,1; \
/* Alt with smaller else case, should be padded with nops */ \
BEGIN_##TYPE##_SECTION \
or 3,3,3; \
or 3,3,3; \
or 3,3,3; \
##TYPE##_SECTION_ELSE \
or 31,31,31; \
ALT_##TYPE##_SECTION_END(0, 1) \
or 1,1,1; \
or 1,1,1; \
/* Alt section with nested section in default case */ \
/* Default case should be taken, with nop'ed inner section */ \
BEGIN_##TYPE##_SECTION \
or 3,3,3; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 3,3,3; \
or 3,3,3; \
END_##TYPE##_SECTION_NESTED(0, 1, 95) \
or 3,3,3; \
##TYPE##_SECTION_ELSE \
or 2,2,2; \
or 2,2,2; \
ALT_##TYPE##_SECTION_END(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Alt section with nested section in else, default taken */ \
BEGIN_##TYPE##_SECTION \
or 3,3,3; \
or 3,3,3; \
or 3,3,3; \
##TYPE##_SECTION_ELSE \
or 5,5,5; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 3,3,3; \
END_##TYPE##_SECTION_NESTED(0, 1, 95) \
or 5,5,5; \
ALT_##TYPE##_SECTION_END(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Alt section with nested section in else, else taken & nop */ \
BEGIN_##TYPE##_SECTION \
or 3,3,3; \
or 3,3,3; \
or 3,3,3; \
##TYPE##_SECTION_ELSE \
or 5,5,5; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 3,3,3; \
END_##TYPE##_SECTION_NESTED(0, 1, 95) \
or 5,5,5; \
ALT_##TYPE##_SECTION_END(0, 1) \
or 1,1,1; \
or 1,1,1; \
/* Feature section with nested alt section, default taken */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 1,1,1; \
##TYPE##_SECTION_ELSE_NESTED(95) \
or 5,5,5; \
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
or 2,2,2; \
END_##TYPE##_SECTION(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Feature section with nested alt section, else taken */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 1,1,1; \
##TYPE##_SECTION_ELSE_NESTED(95) \
or 5,5,5; \
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
or 2,2,2; \
END_##TYPE##_SECTION(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Feature section with nested alt section, all nop'ed */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 1,1,1; \
##TYPE##_SECTION_ELSE_NESTED(95) \
or 5,5,5; \
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
or 2,2,2; \
END_##TYPE##_SECTION(0, 1) \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, default with inner default taken */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 1,1,1; \
##TYPE##_SECTION_ELSE_NESTED(95) \
or 5,5,5; \
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
or 2,2,2; \
##TYPE##_SECTION_ELSE \
or 31,31,31; \
BEGIN_##TYPE##_SECTION_NESTED(94) \
or 5,5,5; \
##TYPE##_SECTION_ELSE_NESTED(94) \
or 1,1,1; \
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
or 31,31,31; \
ALT_##TYPE##_SECTION_END(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, default with inner else taken */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 1,1,1; \
##TYPE##_SECTION_ELSE_NESTED(95) \
or 5,5,5; \
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
or 2,2,2; \
##TYPE##_SECTION_ELSE \
or 31,31,31; \
BEGIN_##TYPE##_SECTION_NESTED(94) \
or 5,5,5; \
##TYPE##_SECTION_ELSE_NESTED(94) \
or 1,1,1; \
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
or 31,31,31; \
ALT_##TYPE##_SECTION_END(0, 0) \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, else with inner default taken */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 1,1,1; \
##TYPE##_SECTION_ELSE_NESTED(95) \
or 5,5,5; \
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
or 2,2,2; \
##TYPE##_SECTION_ELSE \
or 31,31,31; \
BEGIN_##TYPE##_SECTION_NESTED(94) \
or 5,5,5; \
##TYPE##_SECTION_ELSE_NESTED(94) \
or 1,1,1; \
ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
or 31,31,31; \
ALT_##TYPE##_SECTION_END(0, 1) \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, else with inner else taken */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
BEGIN_##TYPE##_SECTION_NESTED(95) \
or 1,1,1; \
##TYPE##_SECTION_ELSE_NESTED(95) \
or 5,5,5; \
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
or 2,2,2; \
##TYPE##_SECTION_ELSE \
or 31,31,31; \
BEGIN_##TYPE##_SECTION_NESTED(94) \
or 5,5,5; \
##TYPE##_SECTION_ELSE_NESTED(94) \
or 1,1,1; \
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) \
or 31,31,31; \
ALT_##TYPE##_SECTION_END(0, 1) \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, else can have large else case */ \
BEGIN_##TYPE##_SECTION \
or 2,2,2; \
or 2,2,2; \
or 2,2,2; \
or 2,2,2; \
##TYPE##_SECTION_ELSE \
BEGIN_##TYPE##_SECTION_NESTED(94) \
or 5,5,5; \
or 5,5,5; \
or 5,5,5; \
or 5,5,5; \
##TYPE##_SECTION_ELSE_NESTED(94) \
or 1,1,1; \
or 1,1,1; \
or 1,1,1; \
or 1,1,1; \
ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) \
ALT_##TYPE##_SECTION_END(0, 1) \
or 1,1,1; \
or 1,1,1;
#define MAKE_MACRO_TEST_EXPECTED(TYPE) \
globl(ftr_fixup_test_ ##TYPE##_macros_expected) \
or 1,1,1; \
/* Basic test, this section should all be nop'ed */ \
/* BEGIN_##TYPE##_SECTION */ \
nop; \
nop; \
nop; \
/* END_##TYPE##_SECTION(0, 1) */ \
or 1,1,1; \
or 1,1,1; \
/* Basic test, this section should NOT be nop'ed */ \
/* BEGIN_##TYPE##_SECTION */ \
or 2,2,2; \
or 2,2,2; \
or 2,2,2; \
/* END_##TYPE##_SECTION(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Nesting test, inner section should be nop'ed */ \
/* BEGIN_##TYPE##_SECTION */ \
or 2,2,2; \
or 2,2,2; \
/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
nop; \
nop; \
/* END_##TYPE##_SECTION_NESTED(0, 1, 80) */ \
or 2,2,2; \
or 2,2,2; \
/* END_##TYPE##_SECTION(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Nesting test, whole section should be nop'ed */ \
/* NB. inner section is not nop'ed, but then entire outer is */ \
/* BEGIN_##TYPE##_SECTION */ \
nop; \
nop; \
/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
nop; \
nop; \
/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */ \
nop; \
nop; \
/* END_##TYPE##_SECTION(0, 1) */ \
or 1,1,1; \
or 1,1,1; \
/* Nesting test, none should be nop'ed */ \
/* BEGIN_##TYPE##_SECTION */ \
or 2,2,2; \
or 2,2,2; \
/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
or 3,3,3; \
or 3,3,3; \
/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */ \
or 2,2,2; \
or 2,2,2; \
/* END_##TYPE##_SECTION(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Basic alt section test, default case should be taken */ \
/* BEGIN_##TYPE##_SECTION */ \
or 3,3,3; \
or 3,3,3; \
or 3,3,3; \
/* ##TYPE##_SECTION_ELSE */ \
/* or 5,5,5; */ \
/* or 5,5,5; */ \
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Basic alt section test, else case should be taken */ \
/* BEGIN_##TYPE##_SECTION */ \
/* or 3,3,3; */ \
/* or 3,3,3; */ \
/* or 3,3,3; */ \
/* ##TYPE##_SECTION_ELSE */ \
or 31,31,31; \
or 31,31,31; \
or 31,31,31; \
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
or 1,1,1; \
or 1,1,1; \
/* Alt with smaller else case, should be padded with nops */ \
/* BEGIN_##TYPE##_SECTION */ \
/* or 3,3,3; */ \
/* or 3,3,3; */ \
/* or 3,3,3; */ \
/* ##TYPE##_SECTION_ELSE */ \
or 31,31,31; \
nop; \
nop; \
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
or 1,1,1; \
or 1,1,1; \
/* Alt section with nested section in default case */ \
/* Default case should be taken, with nop'ed inner section */ \
/* BEGIN_##TYPE##_SECTION */ \
or 3,3,3; \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
nop; \
nop; \
/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
or 3,3,3; \
/* ##TYPE##_SECTION_ELSE */ \
/* or 2,2,2; */ \
/* or 2,2,2; */ \
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Alt section with nested section in else, default taken */ \
/* BEGIN_##TYPE##_SECTION */ \
or 3,3,3; \
or 3,3,3; \
or 3,3,3; \
/* ##TYPE##_SECTION_ELSE */ \
/* or 5,5,5; */ \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
/* or 3,3,3; */ \
/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
/* or 5,5,5; */ \
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Alt section with nested section in else, else taken & nop */ \
/* BEGIN_##TYPE##_SECTION */ \
/* or 3,3,3; */ \
/* or 3,3,3; */ \
/* or 3,3,3; */ \
/* ##TYPE##_SECTION_ELSE */ \
or 5,5,5; \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
nop; \
/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
or 5,5,5; \
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
or 1,1,1; \
or 1,1,1; \
/* Feature section with nested alt section, default taken */ \
/* BEGIN_##TYPE##_SECTION */ \
or 2,2,2; \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
or 1,1,1; \
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
/* or 5,5,5; */ \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
or 2,2,2; \
/* END_##TYPE##_SECTION(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Feature section with nested alt section, else taken */ \
/* BEGIN_##TYPE##_SECTION */ \
or 2,2,2; \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
/* or 1,1,1; */ \
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
or 5,5,5; \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
or 2,2,2; \
/* END_##TYPE##_SECTION(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Feature section with nested alt section, all nop'ed */ \
/* BEGIN_##TYPE##_SECTION */ \
nop; \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
nop; \
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
/* or 5,5,5; */ \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
nop; \
/* END_##TYPE##_SECTION(0, 1) */ \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, default with inner default taken */ \
/* BEGIN_##TYPE##_SECTION */ \
or 2,2,2; \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
or 1,1,1; \
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
/* or 5,5,5; */ \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
or 2,2,2; \
/* ##TYPE##_SECTION_ELSE */ \
/* or 31,31,31; */ \
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
/* or 5,5,5; */ \
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
/* or 1,1,1; */ \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
/* or 31,31,31; */ \
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, default with inner else taken */ \
/* BEGIN_##TYPE##_SECTION */ \
or 2,2,2; \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
/* or 1,1,1; */ \
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
or 5,5,5; \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
or 2,2,2; \
/* ##TYPE##_SECTION_ELSE */ \
/* or 31,31,31; */ \
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
/* or 5,5,5; */ \
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
/* or 1,1,1; */ \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
/* or 31,31,31; */ \
/* ALT_##TYPE##_SECTION_END(0, 0) */ \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, else with inner default taken */ \
/* BEGIN_##TYPE##_SECTION */ \
/* or 2,2,2; */ \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
/* or 1,1,1; */ \
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
/* or 5,5,5; */ \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
/* or 2,2,2; */ \
/* ##TYPE##_SECTION_ELSE */ \
or 31,31,31; \
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
or 5,5,5; \
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
/* or 1,1,1; */ \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
or 31,31,31; \
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, else with inner else taken */ \
/* BEGIN_##TYPE##_SECTION */ \
/* or 2,2,2; */ \
/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
/* or 1,1,1; */ \
/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
/* or 5,5,5; */ \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
/* or 2,2,2; */ \
/* ##TYPE##_SECTION_ELSE */ \
or 31,31,31; \
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
/* or 5,5,5; */ \
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
or 1,1,1; \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */ \
or 31,31,31; \
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
or 1,1,1; \
or 1,1,1; \
/* Nested alt sections, else can have large else case */ \
/* BEGIN_##TYPE##_SECTION */ \
/* or 2,2,2; */ \
/* or 2,2,2; */ \
/* or 2,2,2; */ \
/* or 2,2,2; */ \
/* ##TYPE##_SECTION_ELSE */ \
/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
/* or 5,5,5; */ \
/* or 5,5,5; */ \
/* or 5,5,5; */ \
/* or 5,5,5; */ \
/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
or 1,1,1; \
or 1,1,1; \
or 1,1,1; \
or 1,1,1; \
/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */ \
/* ALT_##TYPE##_SECTION_END(0, 1) */ \
or 1,1,1; \
or 1,1,1;
MAKE_MACRO_TEST(FTR);
MAKE_MACRO_TEST_EXPECTED(FTR);
#ifdef CONFIG_PPC64
MAKE_MACRO_TEST(FW_FTR);
MAKE_MACRO_TEST_EXPECTED(FW_FTR);
#endif
globl(lwsync_fixup_test)
1: or 1,1,1
LWSYNC
globl(end_lwsync_fixup_test)
globl(lwsync_fixup_test_expected_LWSYNC)
1: or 1,1,1
lwsync
globl(lwsync_fixup_test_expected_SYNC)
1: or 1,1,1
sync

View file

@ -0,0 +1,376 @@
/*
* Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
*
* Modifications for ppc64:
* Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
*
* Copyright 2008 Michael Ellerman, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
#include <asm/cputable.h>
#include <asm/code-patching.h>
#include <asm/page.h>
#include <asm/sections.h>
struct fixup_entry {
unsigned long mask;
unsigned long value;
long start_off;
long end_off;
long alt_start_off;
long alt_end_off;
};
static unsigned int *calc_addr(struct fixup_entry *fcur, long offset)
{
/*
* We store the offset to the code as a negative offset from
* the start of the alt_entry, to support the VDSO. This
* routine converts that back into an actual address.
*/
return (unsigned int *)((unsigned long)fcur + offset);
}
static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
unsigned int *alt_start, unsigned int *alt_end)
{
unsigned int instr;
instr = *src;
if (instr_is_relative_branch(*src)) {
unsigned int *target = (unsigned int *)branch_target(src);
/* Branch within the section doesn't need translating */
if (target < alt_start || target >= alt_end) {
instr = translate_branch(dest, src);
if (!instr)
return 1;
}
}
patch_instruction(dest, instr);
return 0;
}
static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
{
unsigned int *start, *end, *alt_start, *alt_end, *src, *dest;
start = calc_addr(fcur, fcur->start_off);
end = calc_addr(fcur, fcur->end_off);
alt_start = calc_addr(fcur, fcur->alt_start_off);
alt_end = calc_addr(fcur, fcur->alt_end_off);
if ((alt_end - alt_start) > (end - start))
return 1;
if ((value & fcur->mask) == fcur->value)
return 0;
src = alt_start;
dest = start;
for (; src < alt_end; src++, dest++) {
if (patch_alt_instruction(src, dest, alt_start, alt_end))
return 1;
}
for (; dest < end; dest++)
patch_instruction(dest, PPC_INST_NOP);
return 0;
}
void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
struct fixup_entry *fcur, *fend;
fcur = fixup_start;
fend = fixup_end;
for (; fcur < fend; fcur++) {
if (patch_feature_section(value, fcur)) {
WARN_ON(1);
printk("Unable to patch feature section at %p - %p" \
" with %p - %p\n",
calc_addr(fcur, fcur->start_off),
calc_addr(fcur, fcur->end_off),
calc_addr(fcur, fcur->alt_start_off),
calc_addr(fcur, fcur->alt_end_off));
}
}
}
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
long *start, *end;
unsigned int *dest;
if (!(value & CPU_FTR_LWSYNC))
return ;
start = fixup_start;
end = fixup_end;
for (; start < end; start++) {
dest = (void *)start + *start;
patch_instruction(dest, PPC_INST_LWSYNC);
}
}
void do_final_fixups(void)
{
#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
int *src, *dest;
unsigned long length;
if (PHYSICAL_START == 0)
return;
src = (int *)(KERNELBASE + PHYSICAL_START);
dest = (int *)KERNELBASE;
length = (__end_interrupts - _stext) / sizeof(int);
while (length--) {
patch_instruction(dest, *src);
src++;
dest++;
}
#endif
}
#ifdef CONFIG_FTR_FIXUP_SELFTEST
#define check(x) \
if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
static struct fixup_entry fixup;
static long calc_offset(struct fixup_entry *entry, unsigned int *p)
{
return (unsigned long)p - (unsigned long)entry;
}
static void test_basic_patching(void)
{
extern unsigned int ftr_fixup_test1;
extern unsigned int end_ftr_fixup_test1;
extern unsigned int ftr_fixup_test1_orig;
extern unsigned int ftr_fixup_test1_expected;
int size = &end_ftr_fixup_test1 - &ftr_fixup_test1;
fixup.value = fixup.mask = 8;
fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1);
fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2);
fixup.alt_start_off = fixup.alt_end_off = 0;
/* Sanity check */
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(8, &fixup);
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size);
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
patch_feature_section(~8, &fixup);
check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
}
static void test_alternative_patching(void)
{
extern unsigned int ftr_fixup_test2;
extern unsigned int end_ftr_fixup_test2;
extern unsigned int ftr_fixup_test2_orig;
extern unsigned int ftr_fixup_test2_alt;
extern unsigned int ftr_fixup_test2_expected;
int size = &end_ftr_fixup_test2 - &ftr_fixup_test2;
fixup.value = fixup.mask = 0xF;
fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1);
fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2);
fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt);
fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1);
/* Sanity check */
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(0xF, &fixup);
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size);
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
patch_feature_section(~0xF, &fixup);
check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
}
static void test_alternative_case_too_big(void)
{
extern unsigned int ftr_fixup_test3;
extern unsigned int end_ftr_fixup_test3;
extern unsigned int ftr_fixup_test3_orig;
extern unsigned int ftr_fixup_test3_alt;
int size = &end_ftr_fixup_test3 - &ftr_fixup_test3;
fixup.value = fixup.mask = 0xC;
fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1);
fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2);
fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt);
fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2);
/* Sanity check */
check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
/* Expect nothing to be patched, and the error returned to us */
check(patch_feature_section(0xF, &fixup) == 1);
check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
check(patch_feature_section(0, &fixup) == 1);
check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
check(patch_feature_section(~0xF, &fixup) == 1);
check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
}
static void test_alternative_case_too_small(void)
{
extern unsigned int ftr_fixup_test4;
extern unsigned int end_ftr_fixup_test4;
extern unsigned int ftr_fixup_test4_orig;
extern unsigned int ftr_fixup_test4_alt;
extern unsigned int ftr_fixup_test4_expected;
int size = &end_ftr_fixup_test4 - &ftr_fixup_test4;
unsigned long flag;
/* Check a high-bit flag */
flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
fixup.value = fixup.mask = flag;
fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1);
fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5);
fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt);
fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2);
/* Sanity check */
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(flag, &fixup);
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size);
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
patch_feature_section(~flag, &fixup);
check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
}
static void test_alternative_case_with_branch(void)
{
extern unsigned int ftr_fixup_test5;
extern unsigned int end_ftr_fixup_test5;
extern unsigned int ftr_fixup_test5_expected;
int size = &end_ftr_fixup_test5 - &ftr_fixup_test5;
check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0);
}
static void test_alternative_case_with_external_branch(void)
{
extern unsigned int ftr_fixup_test6;
extern unsigned int end_ftr_fixup_test6;
extern unsigned int ftr_fixup_test6_expected;
int size = &end_ftr_fixup_test6 - &ftr_fixup_test6;
check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0);
}
static void test_cpu_macros(void)
{
extern u8 ftr_fixup_test_FTR_macros;
extern u8 ftr_fixup_test_FTR_macros_expected;
unsigned long size = &ftr_fixup_test_FTR_macros_expected -
&ftr_fixup_test_FTR_macros;
/* The fixups have already been done for us during boot */
check(memcmp(&ftr_fixup_test_FTR_macros,
&ftr_fixup_test_FTR_macros_expected, size) == 0);
}
static void test_fw_macros(void)
{
#ifdef CONFIG_PPC64
extern u8 ftr_fixup_test_FW_FTR_macros;
extern u8 ftr_fixup_test_FW_FTR_macros_expected;
unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected -
&ftr_fixup_test_FW_FTR_macros;
/* The fixups have already been done for us during boot */
check(memcmp(&ftr_fixup_test_FW_FTR_macros,
&ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
#endif
}
static void test_lwsync_macros(void)
{
extern u8 lwsync_fixup_test;
extern u8 end_lwsync_fixup_test;
extern u8 lwsync_fixup_test_expected_LWSYNC;
extern u8 lwsync_fixup_test_expected_SYNC;
unsigned long size = &end_lwsync_fixup_test -
&lwsync_fixup_test;
/* The fixups have already been done for us during boot */
if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
check(memcmp(&lwsync_fixup_test,
&lwsync_fixup_test_expected_LWSYNC, size) == 0);
} else {
check(memcmp(&lwsync_fixup_test,
&lwsync_fixup_test_expected_SYNC, size) == 0);
}
}
static int __init test_feature_fixups(void)
{
printk(KERN_DEBUG "Running feature fixup self-tests ...\n");
test_basic_patching();
test_alternative_patching();
test_alternative_case_too_big();
test_alternative_case_too_small();
test_alternative_case_with_branch();
test_alternative_case_with_external_branch();
test_cpu_macros();
test_fw_macros();
test_lwsync_macros();
return 0;
}
late_initcall(test_feature_fixups);
#endif /* CONFIG_FTR_FIXUP_SELFTEST */

View file

@ -0,0 +1,110 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2010
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
/* Note: This code relies on -mminimal-toc */
_GLOBAL(__arch_hweight8)
BEGIN_FTR_SECTION
b __sw_hweight8
nop
nop
FTR_SECTION_ELSE
PPC_POPCNTB(R3,R3)
clrldi r3,r3,64-8
blr
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
_GLOBAL(__arch_hweight16)
BEGIN_FTR_SECTION
b __sw_hweight16
nop
nop
nop
nop
FTR_SECTION_ELSE
BEGIN_FTR_SECTION_NESTED(50)
PPC_POPCNTB(R3,R3)
srdi r4,r3,8
add r3,r4,r3
clrldi r3,r3,64-8
blr
FTR_SECTION_ELSE_NESTED(50)
clrlwi r3,r3,16
PPC_POPCNTW(R3,R3)
clrldi r3,r3,64-8
blr
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
_GLOBAL(__arch_hweight32)
BEGIN_FTR_SECTION
b __sw_hweight32
nop
nop
nop
nop
nop
nop
FTR_SECTION_ELSE
BEGIN_FTR_SECTION_NESTED(51)
PPC_POPCNTB(R3,R3)
srdi r4,r3,16
add r3,r4,r3
srdi r4,r3,8
add r3,r4,r3
clrldi r3,r3,64-8
blr
FTR_SECTION_ELSE_NESTED(51)
PPC_POPCNTW(R3,R3)
clrldi r3,r3,64-8
blr
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
_GLOBAL(__arch_hweight64)
BEGIN_FTR_SECTION
b __sw_hweight64
nop
nop
nop
nop
nop
nop
nop
nop
FTR_SECTION_ELSE
BEGIN_FTR_SECTION_NESTED(52)
PPC_POPCNTB(R3,R3)
srdi r4,r3,32
add r3,r4,r3
srdi r4,r3,16
add r3,r4,r3
srdi r4,r3,8
add r3,r4,r3
clrldi r3,r3,64-8
blr
FTR_SECTION_ELSE_NESTED(52)
PPC_POPCNTD(R3,R3)
clrldi r3,r3,64-8
blr
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)

379
arch/powerpc/lib/ldstfp.S Normal file
View file

@ -0,0 +1,379 @@
/*
* Floating-point, VMX/Altivec and VSX loads and stores
* for use in instruction emulation.
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/ppc-opcode.h>
#include <asm/reg.h>
#include <asm/asm-offsets.h>
#include <linux/errno.h>
#ifdef CONFIG_PPC_FPU
#define STKFRM (PPC_MIN_STKFRM + 16)
.macro extab instr,handler
.section __ex_table,"a"
PPC_LONG \instr,\handler
.previous
.endm
.macro inst32 op
reg = 0
.rept 32
20: \op reg,0,r4
b 3f
extab 20b,99f
reg = reg + 1
.endr
.endm
/* Get the contents of frN into fr0; N is in r3. */
_GLOBAL(get_fpr)
mflr r0
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
blr /* fr0 is already in fr0 */
nop
reg = 1
.rept 31
fmr fr0,reg
blr
reg = reg + 1
.endr
1: mflr r5
add r5,r3,r5
mtctr r5
mtlr r0
bctr
/* Put the contents of fr0 into frN; N is in r3. */
_GLOBAL(put_fpr)
mflr r0
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
blr /* fr0 is already in fr0 */
nop
reg = 1
.rept 31
fmr reg,fr0
blr
reg = reg + 1
.endr
1: mflr r5
add r5,r3,r5
mtctr r5
mtlr r0
bctr
/* Load FP reg N from float at *p. N is in r3, p in r4. */
_GLOBAL(do_lfs)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
ori r7,r6,MSR_FP
cmpwi cr7,r3,0
MTMSRD(r7)
isync
beq cr7,1f
stfd fr0,STKFRM-16(r1)
1: li r9,-EFAULT
2: lfs fr0,0(r4)
li r9,0
3: bl put_fpr
beq cr7,4f
lfd fr0,STKFRM-16(r1)
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
extab 2b,3b
/* Load FP reg N from double at *p. N is in r3, p in r4. */
_GLOBAL(do_lfd)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
ori r7,r6,MSR_FP
cmpwi cr7,r3,0
MTMSRD(r7)
isync
beq cr7,1f
stfd fr0,STKFRM-16(r1)
1: li r9,-EFAULT
2: lfd fr0,0(r4)
li r9,0
3: beq cr7,4f
bl put_fpr
lfd fr0,STKFRM-16(r1)
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
extab 2b,3b
/* Store FP reg N to float at *p. N is in r3, p in r4. */
_GLOBAL(do_stfs)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
ori r7,r6,MSR_FP
cmpwi cr7,r3,0
MTMSRD(r7)
isync
beq cr7,1f
stfd fr0,STKFRM-16(r1)
bl get_fpr
1: li r9,-EFAULT
2: stfs fr0,0(r4)
li r9,0
3: beq cr7,4f
lfd fr0,STKFRM-16(r1)
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
extab 2b,3b
/* Store FP reg N to double at *p. N is in r3, p in r4. */
_GLOBAL(do_stfd)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
ori r7,r6,MSR_FP
cmpwi cr7,r3,0
MTMSRD(r7)
isync
beq cr7,1f
stfd fr0,STKFRM-16(r1)
bl get_fpr
1: li r9,-EFAULT
2: stfd fr0,0(r4)
li r9,0
3: beq cr7,4f
lfd fr0,STKFRM-16(r1)
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
extab 2b,3b
#ifdef CONFIG_ALTIVEC
/* Get the contents of vrN into vr0; N is in r3. */
_GLOBAL(get_vr)
mflr r0
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
blr /* vr0 is already in vr0 */
nop
reg = 1
.rept 31
vor vr0,reg,reg /* assembler doesn't know vmr? */
blr
reg = reg + 1
.endr
1: mflr r5
add r5,r3,r5
mtctr r5
mtlr r0
bctr
/* Put the contents of vr0 into vrN; N is in r3. */
_GLOBAL(put_vr)
mflr r0
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
blr /* vr0 is already in vr0 */
nop
reg = 1
.rept 31
vor reg,vr0,vr0
blr
reg = reg + 1
.endr
1: mflr r5
add r5,r3,r5
mtctr r5
mtlr r0
bctr
/* Load vector reg N from *p. N is in r3, p in r4. */
_GLOBAL(do_lvx)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
oris r7,r6,MSR_VEC@h
cmpwi cr7,r3,0
li r8,STKFRM-16
MTMSRD(r7)
isync
beq cr7,1f
stvx vr0,r1,r8
1: li r9,-EFAULT
2: lvx vr0,0,r4
li r9,0
3: beq cr7,4f
bl put_vr
lvx vr0,r1,r8
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
extab 2b,3b
/* Store vector reg N to *p. N is in r3, p in r4. */
_GLOBAL(do_stvx)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
oris r7,r6,MSR_VEC@h
cmpwi cr7,r3,0
li r8,STKFRM-16
MTMSRD(r7)
isync
beq cr7,1f
stvx vr0,r1,r8
bl get_vr
1: li r9,-EFAULT
2: stvx vr0,0,r4
li r9,0
3: beq cr7,4f
lvx vr0,r1,r8
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
extab 2b,3b
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
/* Get the contents of vsrN into vsr0; N is in r3. */
_GLOBAL(get_vsr)
mflr r0
rlwinm r3,r3,3,0x1f8
bcl 20,31,1f
blr /* vsr0 is already in vsr0 */
nop
reg = 1
.rept 63
XXLOR(0,reg,reg)
blr
reg = reg + 1
.endr
1: mflr r5
add r5,r3,r5
mtctr r5
mtlr r0
bctr
/* Put the contents of vsr0 into vsrN; N is in r3. */
_GLOBAL(put_vsr)
mflr r0
rlwinm r3,r3,3,0x1f8
bcl 20,31,1f
blr /* vr0 is already in vr0 */
nop
reg = 1
.rept 63
XXLOR(reg,0,0)
blr
reg = reg + 1
.endr
1: mflr r5
add r5,r3,r5
mtctr r5
mtlr r0
bctr
/* Load VSX reg N from vector doubleword *p. N is in r3, p in r4. */
_GLOBAL(do_lxvd2x)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
oris r7,r6,MSR_VSX@h
cmpwi cr7,r3,0
li r8,STKFRM-16
MTMSRD(r7)
isync
beq cr7,1f
STXVD2X(0,R1,R8)
1: li r9,-EFAULT
2: LXVD2X(0,R0,R4)
li r9,0
3: beq cr7,4f
bl put_vsr
LXVD2X(0,R1,R8)
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
extab 2b,3b
/* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */
_GLOBAL(do_stxvd2x)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
oris r7,r6,MSR_VSX@h
cmpwi cr7,r3,0
li r8,STKFRM-16
MTMSRD(r7)
isync
beq cr7,1f
STXVD2X(0,R1,R8)
bl get_vsr
1: li r9,-EFAULT
2: STXVD2X(0,R0,R4)
li r9,0
3: beq cr7,4f
LXVD2X(0,R1,R8)
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
extab 2b,3b
#endif /* CONFIG_VSX */
#endif /* CONFIG_PPC_FPU */

85
arch/powerpc/lib/locks.c Normal file
View file

@ -0,0 +1,85 @@
/*
* Spin and read/write lock operations.
*
* Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
* Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
* Rework to support virtual processors
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/stringify.h>
#include <linux/smp.h>
/* waiting for a spinlock... */
#if defined(CONFIG_PPC_SPLPAR)
#include <asm/hvcall.h>
#include <asm/smp.h>
void __spin_yield(arch_spinlock_t *lock)
{
unsigned int lock_value, holder_cpu, yield_count;
lock_value = lock->slock;
if (lock_value == 0)
return;
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (lock->slock != lock_value)
return; /* something has changed */
plpar_hcall_norets(H_CONFER,
get_hard_smp_processor_id(holder_cpu), yield_count);
}
/*
* Waiting for a read lock or a write lock on a rwlock...
* This turns out to be the same for read and write locks, since
* we only know the holder if it is write-locked.
*/
void __rw_yield(arch_rwlock_t *rw)
{
int lock_value;
unsigned int holder_cpu, yield_count;
lock_value = rw->lock;
if (lock_value >= 0)
return; /* no write lock at present */
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (rw->lock != lock_value)
return; /* something has changed */
plpar_hcall_norets(H_CONFER,
get_hard_smp_processor_id(holder_cpu), yield_count);
}
#endif
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
smp_mb();
while (lock->slock) {
HMT_low();
if (SHARED_PROCESSOR)
__spin_yield(lock);
}
HMT_medium();
smp_mb();
}
EXPORT_SYMBOL(arch_spin_unlock_wait);

119
arch/powerpc/lib/mem_64.S Normal file
View file

@ -0,0 +1,119 @@
/*
* String handling functions for PowerPC.
*
* Copyright (C) 1996 Paul Mackerras.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
_GLOBAL(memset)
neg r0,r3
rlwimi r4,r4,8,16,23
andi. r0,r0,7 /* # bytes to be 8-byte aligned */
rlwimi r4,r4,16,0,15
cmplw cr1,r5,r0 /* do we get that far? */
rldimi r4,r4,32,0
PPC_MTOCRF(1,r0)
mr r6,r3
blt cr1,8f
beq+ 3f /* if already 8-byte aligned */
subf r5,r0,r5
bf 31,1f
stb r4,0(r6)
addi r6,r6,1
1: bf 30,2f
sth r4,0(r6)
addi r6,r6,2
2: bf 29,3f
stw r4,0(r6)
addi r6,r6,4
3: srdi. r0,r5,6
clrldi r5,r5,58
mtctr r0
beq 5f
4: std r4,0(r6)
std r4,8(r6)
std r4,16(r6)
std r4,24(r6)
std r4,32(r6)
std r4,40(r6)
std r4,48(r6)
std r4,56(r6)
addi r6,r6,64
bdnz 4b
5: srwi. r0,r5,3
clrlwi r5,r5,29
PPC_MTOCRF(1,r0)
beq 8f
bf 29,6f
std r4,0(r6)
std r4,8(r6)
std r4,16(r6)
std r4,24(r6)
addi r6,r6,32
6: bf 30,7f
std r4,0(r6)
std r4,8(r6)
addi r6,r6,16
7: bf 31,8f
std r4,0(r6)
addi r6,r6,8
8: cmpwi r5,0
PPC_MTOCRF(1,r5)
beqlr+
bf 29,9f
stw r4,0(r6)
addi r6,r6,4
9: bf 30,10f
sth r4,0(r6)
addi r6,r6,2
10: bflr 31
stb r4,0(r6)
blr
_GLOBAL_TOC(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy
b memcpy
_GLOBAL(backwards_memcpy)
rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
add r6,r3,r5
add r4,r4,r5
beq 2f
andi. r0,r6,3
mtctr r7
bne 5f
1: lwz r7,-4(r4)
lwzu r8,-8(r4)
stw r7,-4(r6)
stwu r8,-8(r6)
bdnz 1b
andi. r5,r5,7
2: cmplwi 0,r5,4
blt 3f
lwzu r0,-4(r4)
subi r5,r5,4
stwu r0,-4(r6)
3: cmpwi 0,r5,0
beqlr
mtctr r5
4: lbzu r0,-1(r4)
stbu r0,-1(r6)
bdnz 4b
blr
5: mtctr r0
6: lbzu r7,-1(r4)
stbu r7,-1(r6)
bdnz 6b
subf r5,r0,r5
rlwinm. r7,r5,32-3,3,31
beq 2b
mtctr r7
b 1b

View file

@ -0,0 +1,221 @@
/*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
.align 7
_GLOBAL_TOC(memcpy)
BEGIN_FTR_SECTION
#ifdef __LITTLE_ENDIAN__
cmpdi cr7,r5,0
#else
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
#endif
FTR_SECTION_ELSE
#ifndef SELFTEST
b memcpy_power7
#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
#ifdef __LITTLE_ENDIAN__
/* dumb little-endian memcpy that will get replaced at runtime */
addi r9,r3,-1
addi r4,r4,-1
beqlr cr7
mtctr r5
1: lbzu r10,1(r4)
stbu r10,1(r9)
bdnz 1b
blr
#else
PPC_MTOCRF(0x01,r5)
cmpldi cr1,r5,16
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
andi. r6,r6,7
dcbt 0,r4
blt cr1,.Lshort_copy
/* Below we want to nop out the bne if we're on a CPU that has the
CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
cleared.
At the time of writing the only CPU that has this combination of bits
set is Power6. */
BEGIN_FTR_SECTION
nop
FTR_SECTION_ELSE
bne .Ldst_unaligned
ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
CPU_FTR_UNALIGNED_LD_STD)
.Ldst_aligned:
addi r3,r3,-16
BEGIN_FTR_SECTION
andi. r0,r4,7
bne .Lsrc_unaligned
END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
srdi r7,r5,4
ld r9,0(r4)
addi r4,r4,-8
mtctr r7
andi. r5,r5,7
bf cr7*4+0,2f
addi r3,r3,8
addi r4,r4,8
mr r8,r9
blt cr1,3f
1: ld r9,8(r4)
std r8,8(r3)
2: ldu r8,16(r4)
stdu r9,16(r3)
bdnz 1b
3: std r8,8(r3)
beq 3f
addi r3,r3,16
.Ldo_tail:
bf cr7*4+1,1f
lwz r9,8(r4)
addi r4,r4,4
stw r9,0(r3)
addi r3,r3,4
1: bf cr7*4+2,2f
lhz r9,8(r4)
addi r4,r4,2
sth r9,0(r3)
addi r3,r3,2
2: bf cr7*4+3,3f
lbz r9,8(r4)
stb r9,0(r3)
3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
blr
.Lsrc_unaligned:
srdi r6,r5,3
addi r5,r5,-16
subf r4,r0,r4
srdi r7,r5,4
sldi r10,r0,3
cmpdi cr6,r6,3
andi. r5,r5,7
mtctr r7
subfic r11,r10,64
add r5,r5,r0
bt cr7*4+0,0f
ld r9,0(r4) # 3+2n loads, 2+2n stores
ld r0,8(r4)
sld r6,r9,r10
ldu r9,16(r4)
srd r7,r0,r11
sld r8,r0,r10
or r7,r7,r6
blt cr6,4f
ld r0,8(r4)
# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
b 2f
0: ld r0,0(r4) # 4+2n loads, 3+2n stores
ldu r9,8(r4)
sld r8,r0,r10
addi r3,r3,-8
blt cr6,5f
ld r0,8(r4)
srd r12,r9,r11
sld r6,r9,r10
ldu r9,16(r4)
or r12,r8,r12
srd r7,r0,r11
sld r8,r0,r10
addi r3,r3,16
beq cr6,3f
# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
1: or r7,r7,r6
ld r0,8(r4)
std r12,8(r3)
2: srd r12,r9,r11
sld r6,r9,r10
ldu r9,16(r4)
or r12,r8,r12
stdu r7,16(r3)
srd r7,r0,r11
sld r8,r0,r10
bdnz 1b
3: std r12,8(r3)
or r7,r7,r6
4: std r7,16(r3)
5: srd r12,r9,r11
or r12,r8,r12
std r12,24(r3)
beq 4f
cmpwi cr1,r5,8
addi r3,r3,32
sld r9,r9,r10
ble cr1,6f
ld r0,8(r4)
srd r7,r0,r11
or r9,r7,r9
6:
bf cr7*4+1,1f
rotldi r9,r9,32
stw r9,0(r3)
addi r3,r3,4
1: bf cr7*4+2,2f
rotldi r9,r9,16
sth r9,0(r3)
addi r3,r3,2
2: bf cr7*4+3,3f
rotldi r9,r9,8
stb r9,0(r3)
3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
blr
.Ldst_unaligned:
PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7
subf r5,r6,r5
li r7,0
cmpldi cr1,r5,16
bf cr7*4+3,1f
lbz r0,0(r4)
stb r0,0(r3)
addi r7,r7,1
1: bf cr7*4+2,2f
lhzx r0,r7,r4
sthx r0,r7,r3
addi r7,r7,2
2: bf cr7*4+1,3f
lwzx r0,r7,r4
stwx r0,r7,r3
3: PPC_MTOCRF(0x01,r5)
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
.Lshort_copy:
bf cr7*4+0,1f
lwz r0,0(r4)
lwz r9,4(r4)
addi r4,r4,8
stw r0,0(r3)
stw r9,4(r3)
addi r3,r3,8
1: bf cr7*4+1,2f
lwz r0,0(r4)
addi r4,r4,4
stw r0,0(r3)
addi r3,r3,4
2: bf cr7*4+2,3f
lhz r0,0(r4)
addi r4,r4,2
sth r0,0(r3)
addi r3,r3,2
3: bf cr7*4+3,4f
lbz r0,0(r4)
stb r0,0(r3)
4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
blr
#endif

View file

@ -0,0 +1,656 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
#include <asm/ppc_asm.h>
_GLOBAL(memcpy_power7)
#ifdef __BIG_ENDIAN__
#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
#else
#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
#endif
#ifdef CONFIG_ALTIVEC
cmpldi r5,16
cmpldi cr1,r5,4096
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
blt .Lshort_copy
bgt cr1,.Lvmx_copy
#else
cmpldi r5,16
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
blt .Lshort_copy
#endif
.Lnonvmx_copy:
/* Get the source 8B aligned */
neg r6,r4
mtocrf 0x01,r6
clrldi r6,r6,(64-3)
bf cr7*4+3,1f
lbz r0,0(r4)
addi r4,r4,1
stb r0,0(r3)
addi r3,r3,1
1: bf cr7*4+2,2f
lhz r0,0(r4)
addi r4,r4,2
sth r0,0(r3)
addi r3,r3,2
2: bf cr7*4+1,3f
lwz r0,0(r4)
addi r4,r4,4
stw r0,0(r3)
addi r3,r3,4
3: sub r5,r5,r6
cmpldi r5,128
blt 5f
mflr r0
stdu r1,-STACKFRAMESIZE(r1)
std r14,STK_REG(R14)(r1)
std r15,STK_REG(R15)(r1)
std r16,STK_REG(R16)(r1)
std r17,STK_REG(R17)(r1)
std r18,STK_REG(R18)(r1)
std r19,STK_REG(R19)(r1)
std r20,STK_REG(R20)(r1)
std r21,STK_REG(R21)(r1)
std r22,STK_REG(R22)(r1)
std r0,STACKFRAMESIZE+16(r1)
srdi r6,r5,7
mtctr r6
/* Now do cacheline (128B) sized loads and stores. */
.align 5
4:
ld r0,0(r4)
ld r6,8(r4)
ld r7,16(r4)
ld r8,24(r4)
ld r9,32(r4)
ld r10,40(r4)
ld r11,48(r4)
ld r12,56(r4)
ld r14,64(r4)
ld r15,72(r4)
ld r16,80(r4)
ld r17,88(r4)
ld r18,96(r4)
ld r19,104(r4)
ld r20,112(r4)
ld r21,120(r4)
addi r4,r4,128
std r0,0(r3)
std r6,8(r3)
std r7,16(r3)
std r8,24(r3)
std r9,32(r3)
std r10,40(r3)
std r11,48(r3)
std r12,56(r3)
std r14,64(r3)
std r15,72(r3)
std r16,80(r3)
std r17,88(r3)
std r18,96(r3)
std r19,104(r3)
std r20,112(r3)
std r21,120(r3)
addi r3,r3,128
bdnz 4b
clrldi r5,r5,(64-7)
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
ld r17,STK_REG(R17)(r1)
ld r18,STK_REG(R18)(r1)
ld r19,STK_REG(R19)(r1)
ld r20,STK_REG(R20)(r1)
ld r21,STK_REG(R21)(r1)
ld r22,STK_REG(R22)(r1)
addi r1,r1,STACKFRAMESIZE
/* Up to 127B to go */
5: srdi r6,r5,4
mtocrf 0x01,r6
6: bf cr7*4+1,7f
ld r0,0(r4)
ld r6,8(r4)
ld r7,16(r4)
ld r8,24(r4)
ld r9,32(r4)
ld r10,40(r4)
ld r11,48(r4)
ld r12,56(r4)
addi r4,r4,64
std r0,0(r3)
std r6,8(r3)
std r7,16(r3)
std r8,24(r3)
std r9,32(r3)
std r10,40(r3)
std r11,48(r3)
std r12,56(r3)
addi r3,r3,64
/* Up to 63B to go */
7: bf cr7*4+2,8f
ld r0,0(r4)
ld r6,8(r4)
ld r7,16(r4)
ld r8,24(r4)
addi r4,r4,32
std r0,0(r3)
std r6,8(r3)
std r7,16(r3)
std r8,24(r3)
addi r3,r3,32
/* Up to 31B to go */
8: bf cr7*4+3,9f
ld r0,0(r4)
ld r6,8(r4)
addi r4,r4,16
std r0,0(r3)
std r6,8(r3)
addi r3,r3,16
9: clrldi r5,r5,(64-4)
/* Up to 15B to go */
.Lshort_copy:
mtocrf 0x01,r5
bf cr7*4+0,12f
lwz r0,0(r4) /* Less chance of a reject with word ops */
lwz r6,4(r4)
addi r4,r4,8
stw r0,0(r3)
stw r6,4(r3)
addi r3,r3,8
12: bf cr7*4+1,13f
lwz r0,0(r4)
addi r4,r4,4
stw r0,0(r3)
addi r3,r3,4
13: bf cr7*4+2,14f
lhz r0,0(r4)
addi r4,r4,2
sth r0,0(r3)
addi r3,r3,2
14: bf cr7*4+3,15f
lbz r0,0(r4)
stb r0,0(r3)
15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
blr
.Lunwind_stack_nonvmx_copy:
addi r1,r1,STACKFRAMESIZE
b .Lnonvmx_copy
#ifdef CONFIG_ALTIVEC
.Lvmx_copy:
mflr r0
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
bl enter_vmx_copy
cmpwi cr1,r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
ld r4,STK_REG(R30)(r1)
ld r5,STK_REG(R29)(r1)
mtlr r0
/*
* We prefetch both the source and destination using enhanced touch
* instructions. We use a stream ID of 0 for the load side and
* 1 for the store side.
*/
clrrdi r6,r4,7
clrrdi r9,r3,7
ori r9,r9,1 /* stream=1 */
srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
cmpldi r7,0x3FF
ble 1f
li r7,0x3FF
1: lis r0,0x0E00 /* depth=7 */
sldi r7,r7,7
or r7,r7,r0
ori r10,r7,1 /* stream=1 */
lis r8,0x8000 /* GO=1 */
clrldi r8,r8,32
.machine push
.machine "power4"
dcbt r0,r6,0b01000
dcbt r0,r7,0b01010
dcbtst r0,r9,0b01000
dcbtst r0,r10,0b01010
eieio
dcbt r0,r8,0b01010 /* GO */
.machine pop
beq cr1,.Lunwind_stack_nonvmx_copy
/*
* If source and destination are not relatively aligned we use a
* slower permute loop.
*/
xor r6,r4,r3
rldicl. r6,r6,0,(64-4)
bne .Lvmx_unaligned_copy
/* Get the destination 16B aligned */
neg r6,r3
mtocrf 0x01,r6
clrldi r6,r6,(64-4)
bf cr7*4+3,1f
lbz r0,0(r4)
addi r4,r4,1
stb r0,0(r3)
addi r3,r3,1
1: bf cr7*4+2,2f
lhz r0,0(r4)
addi r4,r4,2
sth r0,0(r3)
addi r3,r3,2
2: bf cr7*4+1,3f
lwz r0,0(r4)
addi r4,r4,4
stw r0,0(r3)
addi r3,r3,4
3: bf cr7*4+0,4f
ld r0,0(r4)
addi r4,r4,8
std r0,0(r3)
addi r3,r3,8
4: sub r5,r5,r6
/* Get the desination 128B aligned */
neg r6,r3
srdi r7,r6,4
mtocrf 0x01,r7
clrldi r6,r6,(64-7)
li r9,16
li r10,32
li r11,48
bf cr7*4+3,5f
lvx vr1,r0,r4
addi r4,r4,16
stvx vr1,r0,r3
addi r3,r3,16
5: bf cr7*4+2,6f
lvx vr1,r0,r4
lvx vr0,r4,r9
addi r4,r4,32
stvx vr1,r0,r3
stvx vr0,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
lvx vr3,r0,r4
lvx vr2,r4,r9
lvx vr1,r4,r10
lvx vr0,r4,r11
addi r4,r4,64
stvx vr3,r0,r3
stvx vr2,r3,r9
stvx vr1,r3,r10
stvx vr0,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
srdi r6,r5,7
std r14,STK_REG(R14)(r1)
std r15,STK_REG(R15)(r1)
std r16,STK_REG(R16)(r1)
li r12,64
li r14,80
li r15,96
li r16,112
mtctr r6
/*
* Now do cacheline sized loads and stores. By this stage the
* cacheline stores are also cacheline aligned.
*/
.align 5
8:
lvx vr7,r0,r4
lvx vr6,r4,r9
lvx vr5,r4,r10
lvx vr4,r4,r11
lvx vr3,r4,r12
lvx vr2,r4,r14
lvx vr1,r4,r15
lvx vr0,r4,r16
addi r4,r4,128
stvx vr7,r0,r3
stvx vr6,r3,r9
stvx vr5,r3,r10
stvx vr4,r3,r11
stvx vr3,r3,r12
stvx vr2,r3,r14
stvx vr1,r3,r15
stvx vr0,r3,r16
addi r3,r3,128
bdnz 8b
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
/* Up to 127B to go */
clrldi r5,r5,(64-7)
srdi r6,r5,4
mtocrf 0x01,r6
bf cr7*4+1,9f
lvx vr3,r0,r4
lvx vr2,r4,r9
lvx vr1,r4,r10
lvx vr0,r4,r11
addi r4,r4,64
stvx vr3,r0,r3
stvx vr2,r3,r9
stvx vr1,r3,r10
stvx vr0,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
lvx vr1,r0,r4
lvx vr0,r4,r9
addi r4,r4,32
stvx vr1,r0,r3
stvx vr0,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
lvx vr1,r0,r4
addi r4,r4,16
stvx vr1,r0,r3
addi r3,r3,16
/* Up to 15B to go */
11: clrldi r5,r5,(64-4)
mtocrf 0x01,r5
bf cr7*4+0,12f
ld r0,0(r4)
addi r4,r4,8
std r0,0(r3)
addi r3,r3,8
12: bf cr7*4+1,13f
lwz r0,0(r4)
addi r4,r4,4
stw r0,0(r3)
addi r3,r3,4
13: bf cr7*4+2,14f
lhz r0,0(r4)
addi r4,r4,2
sth r0,0(r3)
addi r3,r3,2
14: bf cr7*4+3,15f
lbz r0,0(r4)
stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
b exit_vmx_copy /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
neg r6,r3
mtocrf 0x01,r6
clrldi r6,r6,(64-4)
bf cr7*4+3,1f
lbz r0,0(r4)
addi r4,r4,1
stb r0,0(r3)
addi r3,r3,1
1: bf cr7*4+2,2f
lhz r0,0(r4)
addi r4,r4,2
sth r0,0(r3)
addi r3,r3,2
2: bf cr7*4+1,3f
lwz r0,0(r4)
addi r4,r4,4
stw r0,0(r3)
addi r3,r3,4
3: bf cr7*4+0,4f
lwz r0,0(r4) /* Less chance of a reject with word ops */
lwz r7,4(r4)
addi r4,r4,8
stw r0,0(r3)
stw r7,4(r3)
addi r3,r3,8
4: sub r5,r5,r6
/* Get the desination 128B aligned */
neg r6,r3
srdi r7,r6,4
mtocrf 0x01,r7
clrldi r6,r6,(64-7)
li r9,16
li r10,32
li r11,48
LVS(vr16,0,r4) /* Setup permute control vector */
lvx vr0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
lvx vr1,r0,r4
VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
stvx vr8,r0,r3
addi r3,r3,16
vor vr0,vr1,vr1
5: bf cr7*4+2,6f
lvx vr1,r0,r4
VPERM(vr8,vr0,vr1,vr16)
lvx vr0,r4,r9
VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
stvx vr8,r0,r3
stvx vr9,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
lvx vr3,r0,r4
VPERM(vr8,vr0,vr3,vr16)
lvx vr2,r4,r9
VPERM(vr9,vr3,vr2,vr16)
lvx vr1,r4,r10
VPERM(vr10,vr2,vr1,vr16)
lvx vr0,r4,r11
VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
stvx vr8,r0,r3
stvx vr9,r3,r9
stvx vr10,r3,r10
stvx vr11,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
srdi r6,r5,7
std r14,STK_REG(R14)(r1)
std r15,STK_REG(R15)(r1)
std r16,STK_REG(R16)(r1)
li r12,64
li r14,80
li r15,96
li r16,112
mtctr r6
/*
* Now do cacheline sized loads and stores. By this stage the
* cacheline stores are also cacheline aligned.
*/
.align 5
8:
lvx vr7,r0,r4
VPERM(vr8,vr0,vr7,vr16)
lvx vr6,r4,r9
VPERM(vr9,vr7,vr6,vr16)
lvx vr5,r4,r10
VPERM(vr10,vr6,vr5,vr16)
lvx vr4,r4,r11
VPERM(vr11,vr5,vr4,vr16)
lvx vr3,r4,r12
VPERM(vr12,vr4,vr3,vr16)
lvx vr2,r4,r14
VPERM(vr13,vr3,vr2,vr16)
lvx vr1,r4,r15
VPERM(vr14,vr2,vr1,vr16)
lvx vr0,r4,r16
VPERM(vr15,vr1,vr0,vr16)
addi r4,r4,128
stvx vr8,r0,r3
stvx vr9,r3,r9
stvx vr10,r3,r10
stvx vr11,r3,r11
stvx vr12,r3,r12
stvx vr13,r3,r14
stvx vr14,r3,r15
stvx vr15,r3,r16
addi r3,r3,128
bdnz 8b
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
/* Up to 127B to go */
clrldi r5,r5,(64-7)
srdi r6,r5,4
mtocrf 0x01,r6
bf cr7*4+1,9f
lvx vr3,r0,r4
VPERM(vr8,vr0,vr3,vr16)
lvx vr2,r4,r9
VPERM(vr9,vr3,vr2,vr16)
lvx vr1,r4,r10
VPERM(vr10,vr2,vr1,vr16)
lvx vr0,r4,r11
VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
stvx vr8,r0,r3
stvx vr9,r3,r9
stvx vr10,r3,r10
stvx vr11,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
lvx vr1,r0,r4
VPERM(vr8,vr0,vr1,vr16)
lvx vr0,r4,r9
VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
stvx vr8,r0,r3
stvx vr9,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
lvx vr1,r0,r4
VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
stvx vr8,r0,r3
addi r3,r3,16
/* Up to 15B to go */
11: clrldi r5,r5,(64-4)
addi r4,r4,-16 /* Unwind the +16 load offset */
mtocrf 0x01,r5
bf cr7*4+0,12f
lwz r0,0(r4) /* Less chance of a reject with word ops */
lwz r6,4(r4)
addi r4,r4,8
stw r0,0(r3)
stw r6,4(r3)
addi r3,r3,8
12: bf cr7*4+1,13f
lwz r0,0(r4)
addi r4,r4,4
stw r0,0(r3)
addi r3,r3,4
13: bf cr7*4+2,14f
lhz r0,0(r4)
addi r4,r4,2
sth r0,0(r3)
addi r3,r3,2
14: bf cr7*4+3,15f
lbz r0,0(r4)
stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
b exit_vmx_copy /* tail call optimise */
#endif /* CONFiG_ALTIVEC */

View file

@ -0,0 +1,39 @@
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <net/checksum.h>
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcmp);
EXPORT_SYMBOL(memchr);
#ifdef CONFIG_PPC32
EXPORT_SYMBOL(cacheable_memcpy);
EXPORT_SYMBOL(cacheable_memzero);
#endif
EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strncpy);
EXPORT_SYMBOL(strcat);
EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(strcmp);
EXPORT_SYMBOL(strncmp);
#ifndef CONFIG_GENERIC_CSUM
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(csum_partial_copy_generic);
EXPORT_SYMBOL(ip_fast_csum);
EXPORT_SYMBOL(csum_tcpudp_magic);
#endif
EXPORT_SYMBOL(__copy_tofrom_user);
EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(copy_page);
#ifdef CONFIG_PPC64
EXPORT_SYMBOL(__arch_hweight8);
EXPORT_SYMBOL(__arch_hweight16);
EXPORT_SYMBOL(__arch_hweight32);
EXPORT_SYMBOL(__arch_hweight64);
#endif

747
arch/powerpc/lib/rheap.c Normal file
View file

@ -0,0 +1,747 @@
/*
* A Remote Heap. Remote means that we don't touch the memory that the
* heap points to. Normal heap implementations use the memory they manage
* to place their list. We cannot do that because the memory we manage may
* have special properties, for example it is uncachable or of different
* endianess.
*
* Author: Pantelis Antoniou <panto@intracom.gr>
*
* 2004 (c) INTRACOM S.A. Greece. This file is licensed under
* the terms of the GNU General Public License version 2. This program
* is licensed "as is" without any warranty of any kind, whether express
* or implied.
*/
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <asm/rheap.h>
/*
* Fixup a list_head, needed when copying lists. If the pointers fall
* between s and e, apply the delta. This assumes that
* sizeof(struct list_head *) == sizeof(unsigned long *).
*/
static inline void fixup(unsigned long s, unsigned long e, int d,
struct list_head *l)
{
unsigned long *pp;
pp = (unsigned long *)&l->next;
if (*pp >= s && *pp < e)
*pp += d;
pp = (unsigned long *)&l->prev;
if (*pp >= s && *pp < e)
*pp += d;
}
/* Grow the allocated blocks */
static int grow(rh_info_t * info, int max_blocks)
{
rh_block_t *block, *blk;
int i, new_blocks;
int delta;
unsigned long blks, blke;
if (max_blocks <= info->max_blocks)
return -EINVAL;
new_blocks = max_blocks - info->max_blocks;
block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_ATOMIC);
if (block == NULL)
return -ENOMEM;
if (info->max_blocks > 0) {
/* copy old block area */
memcpy(block, info->block,
sizeof(rh_block_t) * info->max_blocks);
delta = (char *)block - (char *)info->block;
/* and fixup list pointers */
blks = (unsigned long)info->block;
blke = (unsigned long)(info->block + info->max_blocks);
for (i = 0, blk = block; i < info->max_blocks; i++, blk++)
fixup(blks, blke, delta, &blk->list);
fixup(blks, blke, delta, &info->empty_list);
fixup(blks, blke, delta, &info->free_list);
fixup(blks, blke, delta, &info->taken_list);
/* free the old allocated memory */
if ((info->flags & RHIF_STATIC_BLOCK) == 0)
kfree(info->block);
}
info->block = block;
info->empty_slots += new_blocks;
info->max_blocks = max_blocks;
info->flags &= ~RHIF_STATIC_BLOCK;
/* add all new blocks to the free list */
blk = block + info->max_blocks - new_blocks;
for (i = 0; i < new_blocks; i++, blk++)
list_add(&blk->list, &info->empty_list);
return 0;
}
/*
* Assure at least the required amount of empty slots. If this function
* causes a grow in the block area then all pointers kept to the block
* area are invalid!
*/
static int assure_empty(rh_info_t * info, int slots)
{
int max_blocks;
/* This function is not meant to be used to grow uncontrollably */
if (slots >= 4)
return -EINVAL;
/* Enough space */
if (info->empty_slots >= slots)
return 0;
/* Next 16 sized block */
max_blocks = ((info->max_blocks + slots) + 15) & ~15;
return grow(info, max_blocks);
}
static rh_block_t *get_slot(rh_info_t * info)
{
rh_block_t *blk;
/* If no more free slots, and failure to extend. */
/* XXX: You should have called assure_empty before */
if (info->empty_slots == 0) {
printk(KERN_ERR "rh: out of slots; crash is imminent.\n");
return NULL;
}
/* Get empty slot to use */
blk = list_entry(info->empty_list.next, rh_block_t, list);
list_del_init(&blk->list);
info->empty_slots--;
/* Initialize */
blk->start = 0;
blk->size = 0;
blk->owner = NULL;
return blk;
}
static inline void release_slot(rh_info_t * info, rh_block_t * blk)
{
list_add(&blk->list, &info->empty_list);
info->empty_slots++;
}
static void attach_free_block(rh_info_t * info, rh_block_t * blkn)
{
rh_block_t *blk;
rh_block_t *before;
rh_block_t *after;
rh_block_t *next;
int size;
unsigned long s, e, bs, be;
struct list_head *l;
/* We assume that they are aligned properly */
size = blkn->size;
s = blkn->start;
e = s + size;
/* Find the blocks immediately before and after the given one
* (if any) */
before = NULL;
after = NULL;
next = NULL;
list_for_each(l, &info->free_list) {
blk = list_entry(l, rh_block_t, list);
bs = blk->start;
be = bs + blk->size;
if (next == NULL && s >= bs)
next = blk;
if (be == s)
before = blk;
if (e == bs)
after = blk;
/* If both are not null, break now */
if (before != NULL && after != NULL)
break;
}
/* Now check if they are really adjacent */
if (before && s != (before->start + before->size))
before = NULL;
if (after && e != after->start)
after = NULL;
/* No coalescing; list insert and return */
if (before == NULL && after == NULL) {
if (next != NULL)
list_add(&blkn->list, &next->list);
else
list_add(&blkn->list, &info->free_list);
return;
}
/* We don't need it anymore */
release_slot(info, blkn);
/* Grow the before block */
if (before != NULL && after == NULL) {
before->size += size;
return;
}
/* Grow the after block backwards */
if (before == NULL && after != NULL) {
after->start -= size;
after->size += size;
return;
}
/* Grow the before block, and release the after block */
before->size += size + after->size;
list_del(&after->list);
release_slot(info, after);
}
static void attach_taken_block(rh_info_t * info, rh_block_t * blkn)
{
rh_block_t *blk;
struct list_head *l;
/* Find the block immediately before the given one (if any) */
list_for_each(l, &info->taken_list) {
blk = list_entry(l, rh_block_t, list);
if (blk->start > blkn->start) {
list_add_tail(&blkn->list, &blk->list);
return;
}
}
list_add_tail(&blkn->list, &info->taken_list);
}
/*
* Create a remote heap dynamically. Note that no memory for the blocks
* are allocated. It will upon the first allocation
*/
rh_info_t *rh_create(unsigned int alignment)
{
rh_info_t *info;
/* Alignment must be a power of two */
if ((alignment & (alignment - 1)) != 0)
return ERR_PTR(-EINVAL);
info = kmalloc(sizeof(*info), GFP_ATOMIC);
if (info == NULL)
return ERR_PTR(-ENOMEM);
info->alignment = alignment;
/* Initially everything as empty */
info->block = NULL;
info->max_blocks = 0;
info->empty_slots = 0;
info->flags = 0;
INIT_LIST_HEAD(&info->empty_list);
INIT_LIST_HEAD(&info->free_list);
INIT_LIST_HEAD(&info->taken_list);
return info;
}
EXPORT_SYMBOL_GPL(rh_create);
/*
* Destroy a dynamically created remote heap. Deallocate only if the areas
* are not static
*/
void rh_destroy(rh_info_t * info)
{
if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL)
kfree(info->block);
if ((info->flags & RHIF_STATIC_INFO) == 0)
kfree(info);
}
EXPORT_SYMBOL_GPL(rh_destroy);
/*
* Initialize in place a remote heap info block. This is needed to support
* operation very early in the startup of the kernel, when it is not yet safe
* to call kmalloc.
*/
void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
rh_block_t * block)
{
int i;
rh_block_t *blk;
/* Alignment must be a power of two */
if ((alignment & (alignment - 1)) != 0)
return;
info->alignment = alignment;
/* Initially everything as empty */
info->block = block;
info->max_blocks = max_blocks;
info->empty_slots = max_blocks;
info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK;
INIT_LIST_HEAD(&info->empty_list);
INIT_LIST_HEAD(&info->free_list);
INIT_LIST_HEAD(&info->taken_list);
/* Add all new blocks to the free list */
for (i = 0, blk = block; i < max_blocks; i++, blk++)
list_add(&blk->list, &info->empty_list);
}
EXPORT_SYMBOL_GPL(rh_init);
/* Attach a free memory region, coalesces regions if adjuscent */
int rh_attach_region(rh_info_t * info, unsigned long start, int size)
{
rh_block_t *blk;
unsigned long s, e, m;
int r;
/* The region must be aligned */
s = start;
e = s + size;
m = info->alignment - 1;
/* Round start up */
s = (s + m) & ~m;
/* Round end down */
e = e & ~m;
if (IS_ERR_VALUE(e) || (e < s))
return -ERANGE;
/* Take final values */
start = s;
size = e - s;
/* Grow the blocks, if needed */
r = assure_empty(info, 1);
if (r < 0)
return r;
blk = get_slot(info);
blk->start = start;
blk->size = size;
blk->owner = NULL;
attach_free_block(info, blk);
return 0;
}
EXPORT_SYMBOL_GPL(rh_attach_region);
/* Detatch given address range, splits free block if needed. */
unsigned long rh_detach_region(rh_info_t * info, unsigned long start, int size)
{
struct list_head *l;
rh_block_t *blk, *newblk;
unsigned long s, e, m, bs, be;
/* Validate size */
if (size <= 0)
return (unsigned long) -EINVAL;
/* The region must be aligned */
s = start;
e = s + size;
m = info->alignment - 1;
/* Round start up */
s = (s + m) & ~m;
/* Round end down */
e = e & ~m;
if (assure_empty(info, 1) < 0)
return (unsigned long) -ENOMEM;
blk = NULL;
list_for_each(l, &info->free_list) {
blk = list_entry(l, rh_block_t, list);
/* The range must lie entirely inside one free block */
bs = blk->start;
be = blk->start + blk->size;
if (s >= bs && e <= be)
break;
blk = NULL;
}
if (blk == NULL)
return (unsigned long) -ENOMEM;
/* Perfect fit */
if (bs == s && be == e) {
/* Delete from free list, release slot */
list_del(&blk->list);
release_slot(info, blk);
return s;
}
/* blk still in free list, with updated start and/or size */
if (bs == s || be == e) {
if (bs == s)
blk->start += size;
blk->size -= size;
} else {
/* The front free fragment */
blk->size = s - bs;
/* the back free fragment */
newblk = get_slot(info);
newblk->start = e;
newblk->size = be - e;
list_add(&newblk->list, &blk->list);
}
return s;
}
EXPORT_SYMBOL_GPL(rh_detach_region);
/* Allocate a block of memory at the specified alignment. The value returned
* is an offset into the buffer initialized by rh_init(), or a negative number
* if there is an error.
*/
unsigned long rh_alloc_align(rh_info_t * info, int size, int alignment, const char *owner)
{
struct list_head *l;
rh_block_t *blk;
rh_block_t *newblk;
unsigned long start, sp_size;
/* Validate size, and alignment must be power of two */
if (size <= 0 || (alignment & (alignment - 1)) != 0)
return (unsigned long) -EINVAL;
/* Align to configured alignment */
size = (size + (info->alignment - 1)) & ~(info->alignment - 1);
if (assure_empty(info, 2) < 0)
return (unsigned long) -ENOMEM;
blk = NULL;
list_for_each(l, &info->free_list) {
blk = list_entry(l, rh_block_t, list);
if (size <= blk->size) {
start = (blk->start + alignment - 1) & ~(alignment - 1);
if (start + size <= blk->start + blk->size)
break;
}
blk = NULL;
}
if (blk == NULL)
return (unsigned long) -ENOMEM;
/* Just fits */
if (blk->size == size) {
/* Move from free list to taken list */
list_del(&blk->list);
newblk = blk;
} else {
/* Fragment caused, split if needed */
/* Create block for fragment in the beginning */
sp_size = start - blk->start;
if (sp_size) {
rh_block_t *spblk;
spblk = get_slot(info);
spblk->start = blk->start;
spblk->size = sp_size;
/* add before the blk */
list_add(&spblk->list, blk->list.prev);
}
newblk = get_slot(info);
newblk->start = start;
newblk->size = size;
/* blk still in free list, with updated start and size
* for fragment in the end */
blk->start = start + size;
blk->size -= sp_size + size;
/* No fragment in the end, remove blk */
if (blk->size == 0) {
list_del(&blk->list);
release_slot(info, blk);
}
}
newblk->owner = owner;
attach_taken_block(info, newblk);
return start;
}
EXPORT_SYMBOL_GPL(rh_alloc_align);
/* Allocate a block of memory at the default alignment. The value returned is
* an offset into the buffer initialized by rh_init(), or a negative number if
* there is an error.
*/
unsigned long rh_alloc(rh_info_t * info, int size, const char *owner)
{
return rh_alloc_align(info, size, info->alignment, owner);
}
EXPORT_SYMBOL_GPL(rh_alloc);
/* Allocate a block of memory at the given offset, rounded up to the default
* alignment. The value returned is an offset into the buffer initialized by
* rh_init(), or a negative number if there is an error.
*/
unsigned long rh_alloc_fixed(rh_info_t * info, unsigned long start, int size, const char *owner)
{
struct list_head *l;
rh_block_t *blk, *newblk1, *newblk2;
unsigned long s, e, m, bs = 0, be = 0;
/* Validate size */
if (size <= 0)
return (unsigned long) -EINVAL;
/* The region must be aligned */
s = start;
e = s + size;
m = info->alignment - 1;
/* Round start up */
s = (s + m) & ~m;
/* Round end down */
e = e & ~m;
if (assure_empty(info, 2) < 0)
return (unsigned long) -ENOMEM;
blk = NULL;
list_for_each(l, &info->free_list) {
blk = list_entry(l, rh_block_t, list);
/* The range must lie entirely inside one free block */
bs = blk->start;
be = blk->start + blk->size;
if (s >= bs && e <= be)
break;
blk = NULL;
}
if (blk == NULL)
return (unsigned long) -ENOMEM;
/* Perfect fit */
if (bs == s && be == e) {
/* Move from free list to taken list */
list_del(&blk->list);
blk->owner = owner;
start = blk->start;
attach_taken_block(info, blk);
return start;
}
/* blk still in free list, with updated start and/or size */
if (bs == s || be == e) {
if (bs == s)
blk->start += size;
blk->size -= size;
} else {
/* The front free fragment */
blk->size = s - bs;
/* The back free fragment */
newblk2 = get_slot(info);
newblk2->start = e;
newblk2->size = be - e;
list_add(&newblk2->list, &blk->list);
}
newblk1 = get_slot(info);
newblk1->start = s;
newblk1->size = e - s;
newblk1->owner = owner;
start = newblk1->start;
attach_taken_block(info, newblk1);
return start;
}
EXPORT_SYMBOL_GPL(rh_alloc_fixed);
/* Deallocate the memory previously allocated by one of the rh_alloc functions.
* The return value is the size of the deallocated block, or a negative number
* if there is an error.
*/
int rh_free(rh_info_t * info, unsigned long start)
{
rh_block_t *blk, *blk2;
struct list_head *l;
int size;
/* Linear search for block */
blk = NULL;
list_for_each(l, &info->taken_list) {
blk2 = list_entry(l, rh_block_t, list);
if (start < blk2->start)
break;
blk = blk2;
}
if (blk == NULL || start > (blk->start + blk->size))
return -EINVAL;
/* Remove from taken list */
list_del(&blk->list);
/* Get size of freed block */
size = blk->size;
attach_free_block(info, blk);
return size;
}
EXPORT_SYMBOL_GPL(rh_free);
int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats)
{
rh_block_t *blk;
struct list_head *l;
struct list_head *h;
int nr;
switch (what) {
case RHGS_FREE:
h = &info->free_list;
break;
case RHGS_TAKEN:
h = &info->taken_list;
break;
default:
return -EINVAL;
}
/* Linear search for block */
nr = 0;
list_for_each(l, h) {
blk = list_entry(l, rh_block_t, list);
if (stats != NULL && nr < max_stats) {
stats->start = blk->start;
stats->size = blk->size;
stats->owner = blk->owner;
stats++;
}
nr++;
}
return nr;
}
EXPORT_SYMBOL_GPL(rh_get_stats);
int rh_set_owner(rh_info_t * info, unsigned long start, const char *owner)
{
rh_block_t *blk, *blk2;
struct list_head *l;
int size;
/* Linear search for block */
blk = NULL;
list_for_each(l, &info->taken_list) {
blk2 = list_entry(l, rh_block_t, list);
if (start < blk2->start)
break;
blk = blk2;
}
if (blk == NULL || start > (blk->start + blk->size))
return -EINVAL;
blk->owner = owner;
size = blk->size;
return size;
}
EXPORT_SYMBOL_GPL(rh_set_owner);
void rh_dump(rh_info_t * info)
{
static rh_stats_t st[32]; /* XXX maximum 32 blocks */
int maxnr;
int i, nr;
maxnr = ARRAY_SIZE(st);
printk(KERN_INFO
"info @0x%p (%d slots empty / %d max)\n",
info, info->empty_slots, info->max_blocks);
printk(KERN_INFO " Free:\n");
nr = rh_get_stats(info, RHGS_FREE, maxnr, st);
if (nr > maxnr)
nr = maxnr;
for (i = 0; i < nr; i++)
printk(KERN_INFO
" 0x%lx-0x%lx (%u)\n",
st[i].start, st[i].start + st[i].size,
st[i].size);
printk(KERN_INFO "\n");
printk(KERN_INFO " Taken:\n");
nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st);
if (nr > maxnr)
nr = maxnr;
for (i = 0; i < nr; i++)
printk(KERN_INFO
" 0x%lx-0x%lx (%u) %s\n",
st[i].start, st[i].start + st[i].size,
st[i].size, st[i].owner != NULL ? st[i].owner : "");
printk(KERN_INFO "\n");
}
EXPORT_SYMBOL_GPL(rh_dump);
void rh_dump_blk(rh_info_t * info, rh_block_t * blk)
{
printk(KERN_INFO
"blk @0x%p: 0x%lx-0x%lx (%u)\n",
blk, blk->start, blk->start + blk->size, blk->size);
}
EXPORT_SYMBOL_GPL(rh_dump_blk);

2013
arch/powerpc/lib/sstep.c Normal file

File diff suppressed because it is too large Load diff

164
arch/powerpc/lib/string.S Normal file
View file

@ -0,0 +1,164 @@
/*
* String handling functions for PowerPC.
*
* Copyright (C) 1996 Paul Mackerras.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
.section __ex_table,"a"
PPC_LONG_ALIGN
.text
_GLOBAL(strcpy)
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r0,1(r4)
cmpwi 0,r0,0
stbu r0,1(r5)
bne 1b
blr
/* This clears out any unused part of the destination buffer,
just as the libc version does. -- paulus */
_GLOBAL(strncpy)
PPC_LCMPI 0,r5,0
beqlr
mtctr r5
addi r6,r3,-1
addi r4,r4,-1
1: lbzu r0,1(r4)
cmpwi 0,r0,0
stbu r0,1(r6)
bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
bnelr /* if we didn't hit a null char, we're done */
mfctr r5
PPC_LCMPI 0,r5,0 /* any space left in destination buffer? */
beqlr /* we know r0 == 0 here */
2: stbu r0,1(r6) /* clear it out if so */
bdnz 2b
blr
_GLOBAL(strcat)
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r0,1(r5)
cmpwi 0,r0,0
bne 1b
addi r5,r5,-1
1: lbzu r0,1(r4)
cmpwi 0,r0,0
stbu r0,1(r5)
bne 1b
blr
_GLOBAL(strcmp)
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r3,1(r5)
cmpwi 1,r3,0
lbzu r0,1(r4)
subf. r3,r0,r3
beqlr 1
beq 1b
blr
_GLOBAL(strncmp)
PPC_LCMPI 0,r5,0
beq- 2f
mtctr r5
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r3,1(r5)
cmpwi 1,r3,0
lbzu r0,1(r4)
subf. r3,r0,r3
beqlr 1
bdnzt eq,1b
blr
2: li r3,0
blr
_GLOBAL(strlen)
addi r4,r3,-1
1: lbzu r0,1(r4)
cmpwi 0,r0,0
bne 1b
subf r3,r3,r4
blr
_GLOBAL(memcmp)
PPC_LCMPI 0,r5,0
beq- 2f
mtctr r5
addi r6,r3,-1
addi r4,r4,-1
1: lbzu r3,1(r6)
lbzu r0,1(r4)
subf. r3,r0,r3
bdnzt 2,1b
blr
2: li r3,0
blr
_GLOBAL(memchr)
PPC_LCMPI 0,r5,0
beq- 2f
mtctr r5
addi r3,r3,-1
1: lbzu r0,1(r3)
cmpw 0,r0,r4
bdnzf 2,1b
beqlr
2: li r3,0
blr
#ifdef CONFIG_PPC32
_GLOBAL(__clear_user)
addi r6,r3,-4
li r3,0
li r5,0
cmplwi 0,r4,4
blt 7f
/* clear a single word */
11: stwu r5,4(r6)
beqlr
/* clear word sized chunks */
andi. r0,r6,3
add r4,r0,r4
subf r6,r0,r6
srwi r0,r4,2
andi. r4,r4,3
mtctr r0
bdz 7f
1: stwu r5,4(r6)
bdnz 1b
/* clear byte sized chunks */
7: cmpwi 0,r4,0
beqlr
mtctr r4
addi r6,r6,3
8: stbu r5,1(r6)
bdnz 8b
blr
90: mr r3,r4
blr
91: mfctr r3
slwi r3,r3,2
add r3,r3,r4
blr
92: mfctr r3
blr
.section __ex_table,"a"
PPC_LONG 11b,90b
PPC_LONG 1b,91b
PPC_LONG 8b,92b
.text
#endif

View file

@ -0,0 +1,202 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
.section ".toc","aw"
PPC64_CACHES:
.tc ppc64_caches[TC],ppc64_caches
.section ".text"
/**
* __clear_user: - Zero a block of memory in user space, with less checking.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
* Zero a block of memory in user space. Caller must check
* the specified block with access_ok() before calling this function.
*
* Returns number of bytes that could not be cleared.
* On success, this will be zero.
*/
.macro err1
100:
.section __ex_table,"a"
.align 3
.llong 100b,.Ldo_err1
.previous
.endm
.macro err2
200:
.section __ex_table,"a"
.align 3
.llong 200b,.Ldo_err2
.previous
.endm
.macro err3
300:
.section __ex_table,"a"
.align 3
.llong 300b,.Ldo_err3
.previous
.endm
.Ldo_err1:
mr r3,r8
.Ldo_err2:
mtctr r4
1:
err3; stb r0,0(r3)
addi r3,r3,1
addi r4,r4,-1
bdnz 1b
.Ldo_err3:
mr r3,r4
blr
_GLOBAL_TOC(__clear_user)
cmpdi r4,32
neg r6,r3
li r0,0
blt .Lshort_clear
mr r8,r3
mtocrf 0x01,r6
clrldi r6,r6,(64-3)
/* Get the destination 8 byte aligned */
bf cr7*4+3,1f
err1; stb r0,0(r3)
addi r3,r3,1
1: bf cr7*4+2,2f
err1; sth r0,0(r3)
addi r3,r3,2
2: bf cr7*4+1,3f
err1; stw r0,0(r3)
addi r3,r3,4
3: sub r4,r4,r6
cmpdi r4,32
cmpdi cr1,r4,512
blt .Lshort_clear
bgt cr1,.Llong_clear
.Lmedium_clear:
srdi r6,r4,5
mtctr r6
/* Do 32 byte chunks */
4:
err2; std r0,0(r3)
err2; std r0,8(r3)
err2; std r0,16(r3)
err2; std r0,24(r3)
addi r3,r3,32
addi r4,r4,-32
bdnz 4b
.Lshort_clear:
/* up to 31 bytes to go */
cmpdi r4,16
blt 6f
err2; std r0,0(r3)
err2; std r0,8(r3)
addi r3,r3,16
addi r4,r4,-16
/* Up to 15 bytes to go */
6: mr r8,r3
clrldi r4,r4,(64-4)
mtocrf 0x01,r4
bf cr7*4+0,7f
err1; std r0,0(r3)
addi r3,r3,8
7: bf cr7*4+1,8f
err1; stw r0,0(r3)
addi r3,r3,4
8: bf cr7*4+2,9f
err1; sth r0,0(r3)
addi r3,r3,2
9: bf cr7*4+3,10f
err1; stb r0,0(r3)
10: li r3,0
blr
.Llong_clear:
ld r5,PPC64_CACHES@toc(r2)
bf cr7*4+0,11f
err2; std r0,0(r3)
addi r3,r3,8
addi r4,r4,-8
/* Destination is 16 byte aligned, need to get it cacheline aligned */
11: lwz r7,DCACHEL1LOGLINESIZE(r5)
lwz r9,DCACHEL1LINESIZE(r5)
/*
* With worst case alignment the long clear loop takes a minimum
* of 1 byte less than 2 cachelines.
*/
sldi r10,r9,2
cmpd r4,r10
blt .Lmedium_clear
neg r6,r3
addi r10,r9,-1
and. r5,r6,r10
beq 13f
srdi r6,r5,4
mtctr r6
mr r8,r3
12:
err1; std r0,0(r3)
err1; std r0,8(r3)
addi r3,r3,16
bdnz 12b
sub r4,r4,r5
13: srd r6,r4,r7
mtctr r6
mr r8,r3
14:
err1; dcbz r0,r3
add r3,r3,r9
bdnz 14b
and r4,r4,r10
cmpdi r4,32
blt .Lshort_clear
b .Lmedium_clear

View file

@ -0,0 +1,41 @@
/*
* Functions which are too large to be inlined.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <asm/uaccess.h>
unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
{
if (likely(access_ok(VERIFY_READ, from, n)))
n = __copy_from_user(to, from, n);
else
memset(to, 0, n);
return n;
}
unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
{
if (likely(access_ok(VERIFY_WRITE, to, n)))
n = __copy_to_user(to, from, n);
return n;
}
unsigned long copy_in_user(void __user *to, const void __user *from,
unsigned long n)
{
might_sleep();
if (likely(access_ok(VERIFY_READ, from, n) &&
access_ok(VERIFY_WRITE, to, n)))
n =__copy_tofrom_user(to, from, n);
return n;
}
EXPORT_SYMBOL(copy_from_user);
EXPORT_SYMBOL(copy_to_user);
EXPORT_SYMBOL(copy_in_user);

View file

@ -0,0 +1,74 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2011
*
* Authors: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
* Anton Blanchard <anton@au.ibm.com>
*/
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <asm/switch_to.h>
int enter_vmx_usercopy(void)
{
if (in_interrupt())
return 0;
/* This acts as preempt_disable() as well and will make
* enable_kernel_altivec(). We need to disable page faults
* as they can call schedule and thus make us lose the VMX
* context. So on page faults, we just fail which will cause
* a fallback to the normal non-vmx copy.
*/
pagefault_disable();
enable_kernel_altivec();
return 1;
}
/*
* This function must return 0 because we tail call optimise when calling
* from __copy_tofrom_user_power7 which returns 0 on success.
*/
int exit_vmx_usercopy(void)
{
pagefault_enable();
return 0;
}
int enter_vmx_copy(void)
{
if (in_interrupt())
return 0;
preempt_disable();
enable_kernel_altivec();
return 1;
}
/*
* All calls to this function will be optimised into tail calls. We are
* passed a pointer to the destination which we return as required by a
* memcpy implementation.
*/
void *exit_vmx_copy(void *dest)
{
preempt_enable();
return dest;
}

177
arch/powerpc/lib/xor_vmx.c Normal file
View file

@ -0,0 +1,177 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
#include <altivec.h>
#include <linux/preempt.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <asm/switch_to.h>
typedef vector signed char unative_t;
#define DEFINE(V) \
unative_t *V = (unative_t *)V##_in; \
unative_t V##_0, V##_1, V##_2, V##_3
#define LOAD(V) \
do { \
V##_0 = V[0]; \
V##_1 = V[1]; \
V##_2 = V[2]; \
V##_3 = V[3]; \
} while (0)
#define STORE(V) \
do { \
V[0] = V##_0; \
V[1] = V##_1; \
V[2] = V##_2; \
V[3] = V##_3; \
} while (0)
#define XOR(V1, V2) \
do { \
V1##_0 = vec_xor(V1##_0, V2##_0); \
V1##_1 = vec_xor(V1##_1, V2##_1); \
V1##_2 = vec_xor(V1##_2, V2##_2); \
V1##_3 = vec_xor(V1##_3, V2##_3); \
} while (0)
void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
unsigned long *v2_in)
{
DEFINE(v1);
DEFINE(v2);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
preempt_disable();
enable_kernel_altivec();
do {
LOAD(v1);
LOAD(v2);
XOR(v1, v2);
STORE(v1);
v1 += 4;
v2 += 4;
} while (--lines > 0);
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_2);
void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
unsigned long *v2_in, unsigned long *v3_in)
{
DEFINE(v1);
DEFINE(v2);
DEFINE(v3);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
preempt_disable();
enable_kernel_altivec();
do {
LOAD(v1);
LOAD(v2);
LOAD(v3);
XOR(v1, v2);
XOR(v1, v3);
STORE(v1);
v1 += 4;
v2 += 4;
v3 += 4;
} while (--lines > 0);
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_3);
void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
unsigned long *v2_in, unsigned long *v3_in,
unsigned long *v4_in)
{
DEFINE(v1);
DEFINE(v2);
DEFINE(v3);
DEFINE(v4);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
preempt_disable();
enable_kernel_altivec();
do {
LOAD(v1);
LOAD(v2);
LOAD(v3);
LOAD(v4);
XOR(v1, v2);
XOR(v3, v4);
XOR(v1, v3);
STORE(v1);
v1 += 4;
v2 += 4;
v3 += 4;
v4 += 4;
} while (--lines > 0);
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_4);
void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
unsigned long *v2_in, unsigned long *v3_in,
unsigned long *v4_in, unsigned long *v5_in)
{
DEFINE(v1);
DEFINE(v2);
DEFINE(v3);
DEFINE(v4);
DEFINE(v5);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
preempt_disable();
enable_kernel_altivec();
do {
LOAD(v1);
LOAD(v2);
LOAD(v3);
LOAD(v4);
LOAD(v5);
XOR(v1, v2);
XOR(v3, v4);
XOR(v1, v5);
XOR(v1, v3);
STORE(v1);
v1 += 4;
v2 += 4;
v3 += 4;
v4 += 4;
v5 += 4;
} while (--lines > 0);
preempt_enable();
}
EXPORT_SYMBOL(xor_altivec_5);