mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-10-28 14:58:52 +01:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
159
arch/powerpc/mm/40x_mmu.c
Normal file
159
arch/powerpc/mm/40x_mmu.c
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* This file contains the routines for initializing the MMU
|
||||
* on the 4xx series of chips.
|
||||
* -- paulus
|
||||
*
|
||||
* Derived from arch/ppc/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/bootx.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
extern int __map_without_ltlbs;
|
||||
/*
|
||||
* MMU_init_hw does the chip-specific initialization of the MMU hardware.
|
||||
*/
|
||||
void __init MMU_init_hw(void)
|
||||
{
|
||||
/*
|
||||
* The Zone Protection Register (ZPR) defines how protection will
|
||||
* be applied to every page which is a member of a given zone. At
|
||||
* present, we utilize only two of the 4xx's zones.
|
||||
* The zone index bits (of ZSEL) in the PTE are used for software
|
||||
* indicators, except the LSB. For user access, zone 1 is used,
|
||||
* for kernel access, zone 0 is used. We set all but zone 1
|
||||
* to zero, allowing only kernel access as indicated in the PTE.
|
||||
* For zone 1, we set a 01 binary (a value of 10 will not work)
|
||||
* to allow user access as indicated in the PTE. This also allows
|
||||
* kernel access as indicated in the PTE.
|
||||
*/
|
||||
|
||||
mtspr(SPRN_ZPR, 0x10000000);
|
||||
|
||||
flush_instruction_cache();
|
||||
|
||||
/*
|
||||
* Set up the real-mode cache parameters for the exception vector
|
||||
* handlers (which are run in real-mode).
|
||||
*/
|
||||
|
||||
mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */
|
||||
|
||||
/*
|
||||
* Cache instruction and data space where the exception
|
||||
* vectors and the kernel live in real-mode.
|
||||
*/
|
||||
|
||||
mtspr(SPRN_DCCR, 0xFFFF0000); /* 2GByte of data space at 0x0. */
|
||||
mtspr(SPRN_ICCR, 0xFFFF0000); /* 2GByte of instr. space at 0x0. */
|
||||
}
|
||||
|
||||
#define LARGE_PAGE_SIZE_16M (1<<24)
|
||||
#define LARGE_PAGE_SIZE_4M (1<<22)
|
||||
|
||||
unsigned long __init mmu_mapin_ram(unsigned long top)
|
||||
{
|
||||
unsigned long v, s, mapped;
|
||||
phys_addr_t p;
|
||||
|
||||
v = KERNELBASE;
|
||||
p = 0;
|
||||
s = total_lowmem;
|
||||
|
||||
if (__map_without_ltlbs)
|
||||
return 0;
|
||||
|
||||
while (s >= LARGE_PAGE_SIZE_16M) {
|
||||
pmd_t *pmdp;
|
||||
unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE;
|
||||
|
||||
pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
|
||||
pmd_val(*pmdp++) = val;
|
||||
pmd_val(*pmdp++) = val;
|
||||
pmd_val(*pmdp++) = val;
|
||||
pmd_val(*pmdp++) = val;
|
||||
|
||||
v += LARGE_PAGE_SIZE_16M;
|
||||
p += LARGE_PAGE_SIZE_16M;
|
||||
s -= LARGE_PAGE_SIZE_16M;
|
||||
}
|
||||
|
||||
while (s >= LARGE_PAGE_SIZE_4M) {
|
||||
pmd_t *pmdp;
|
||||
unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE;
|
||||
|
||||
pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
|
||||
pmd_val(*pmdp) = val;
|
||||
|
||||
v += LARGE_PAGE_SIZE_4M;
|
||||
p += LARGE_PAGE_SIZE_4M;
|
||||
s -= LARGE_PAGE_SIZE_4M;
|
||||
}
|
||||
|
||||
mapped = total_lowmem - s;
|
||||
|
||||
/* If the size of RAM is not an exact power of two, we may not
|
||||
* have covered RAM in its entirety with 16 and 4 MiB
|
||||
* pages. Consequently, restrict the top end of RAM currently
|
||||
* allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
|
||||
* coverage with normal-sized pages (or other reasons) do not
|
||||
* attempt to allocate outside the allowed range.
|
||||
*/
|
||||
memblock_set_current_limit(mapped);
|
||||
|
||||
return mapped;
|
||||
}
|
||||
|
||||
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
|
||||
phys_addr_t first_memblock_size)
|
||||
{
|
||||
/* We don't currently support the first MEMBLOCK not mapping 0
|
||||
* physical on those processors
|
||||
*/
|
||||
BUG_ON(first_memblock_base != 0);
|
||||
|
||||
/* 40x can only access 16MB at the moment (see head_40x.S) */
|
||||
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
|
||||
}
|
||||
254
arch/powerpc/mm/44x_mmu.c
Normal file
254
arch/powerpc/mm/44x_mmu.c
Normal file
|
|
@ -0,0 +1,254 @@
|
|||
/*
|
||||
* Modifications by Matt Porter (mporter@mvista.com) to support
|
||||
* PPC44x Book E processors.
|
||||
*
|
||||
* This file contains the routines for initializing the MMU
|
||||
* on the 4xx series of chips.
|
||||
* -- paulus
|
||||
*
|
||||
* Derived from arch/ppc/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
/* Used by the 44x TLB replacement exception handler.
|
||||
* Just needed it declared someplace.
|
||||
*/
|
||||
unsigned int tlb_44x_index; /* = 0 */
|
||||
unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS;
|
||||
int icache_44x_need_flush;
|
||||
|
||||
unsigned long tlb_47x_boltmap[1024/8];
|
||||
|
||||
static void ppc44x_update_tlb_hwater(void)
|
||||
{
|
||||
extern unsigned int tlb_44x_patch_hwater_D[];
|
||||
extern unsigned int tlb_44x_patch_hwater_I[];
|
||||
|
||||
/* The TLB miss handlers hard codes the watermark in a cmpli
|
||||
* instruction to improve performances rather than loading it
|
||||
* from the global variable. Thus, we patch the instructions
|
||||
* in the 2 TLB miss handlers when updating the value
|
||||
*/
|
||||
tlb_44x_patch_hwater_D[0] = (tlb_44x_patch_hwater_D[0] & 0xffff0000) |
|
||||
tlb_44x_hwater;
|
||||
flush_icache_range((unsigned long)&tlb_44x_patch_hwater_D[0],
|
||||
(unsigned long)&tlb_44x_patch_hwater_D[1]);
|
||||
tlb_44x_patch_hwater_I[0] = (tlb_44x_patch_hwater_I[0] & 0xffff0000) |
|
||||
tlb_44x_hwater;
|
||||
flush_icache_range((unsigned long)&tlb_44x_patch_hwater_I[0],
|
||||
(unsigned long)&tlb_44x_patch_hwater_I[1]);
|
||||
}
|
||||
|
||||
/*
|
||||
* "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 44x type MMU
|
||||
*/
|
||||
static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
|
||||
{
|
||||
unsigned int entry = tlb_44x_hwater--;
|
||||
|
||||
ppc44x_update_tlb_hwater();
|
||||
|
||||
mtspr(SPRN_MMUCR, 0);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"tlbwe %2,%3,%4\n"
|
||||
"tlbwe %1,%3,%5\n"
|
||||
"tlbwe %0,%3,%6\n"
|
||||
:
|
||||
: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
|
||||
"r" (phys),
|
||||
"r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
|
||||
"r" (entry),
|
||||
"i" (PPC44x_TLB_PAGEID),
|
||||
"i" (PPC44x_TLB_XLAT),
|
||||
"i" (PPC44x_TLB_ATTRIB));
|
||||
}
|
||||
|
||||
static int __init ppc47x_find_free_bolted(void)
|
||||
{
|
||||
unsigned int mmube0 = mfspr(SPRN_MMUBE0);
|
||||
unsigned int mmube1 = mfspr(SPRN_MMUBE1);
|
||||
|
||||
if (!(mmube0 & MMUBE0_VBE0))
|
||||
return 0;
|
||||
if (!(mmube0 & MMUBE0_VBE1))
|
||||
return 1;
|
||||
if (!(mmube0 & MMUBE0_VBE2))
|
||||
return 2;
|
||||
if (!(mmube1 & MMUBE1_VBE3))
|
||||
return 3;
|
||||
if (!(mmube1 & MMUBE1_VBE4))
|
||||
return 4;
|
||||
if (!(mmube1 & MMUBE1_VBE5))
|
||||
return 5;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void __init ppc47x_update_boltmap(void)
|
||||
{
|
||||
unsigned int mmube0 = mfspr(SPRN_MMUBE0);
|
||||
unsigned int mmube1 = mfspr(SPRN_MMUBE1);
|
||||
|
||||
if (mmube0 & MMUBE0_VBE0)
|
||||
__set_bit((mmube0 >> MMUBE0_IBE0_SHIFT) & 0xff,
|
||||
tlb_47x_boltmap);
|
||||
if (mmube0 & MMUBE0_VBE1)
|
||||
__set_bit((mmube0 >> MMUBE0_IBE1_SHIFT) & 0xff,
|
||||
tlb_47x_boltmap);
|
||||
if (mmube0 & MMUBE0_VBE2)
|
||||
__set_bit((mmube0 >> MMUBE0_IBE2_SHIFT) & 0xff,
|
||||
tlb_47x_boltmap);
|
||||
if (mmube1 & MMUBE1_VBE3)
|
||||
__set_bit((mmube1 >> MMUBE1_IBE3_SHIFT) & 0xff,
|
||||
tlb_47x_boltmap);
|
||||
if (mmube1 & MMUBE1_VBE4)
|
||||
__set_bit((mmube1 >> MMUBE1_IBE4_SHIFT) & 0xff,
|
||||
tlb_47x_boltmap);
|
||||
if (mmube1 & MMUBE1_VBE5)
|
||||
__set_bit((mmube1 >> MMUBE1_IBE5_SHIFT) & 0xff,
|
||||
tlb_47x_boltmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU
|
||||
*/
|
||||
static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
|
||||
{
|
||||
unsigned int rA;
|
||||
int bolted;
|
||||
|
||||
/* Base rA is HW way select, way 0, bolted bit set */
|
||||
rA = 0x88000000;
|
||||
|
||||
/* Look for a bolted entry slot */
|
||||
bolted = ppc47x_find_free_bolted();
|
||||
BUG_ON(bolted < 0);
|
||||
|
||||
/* Insert bolted slot number */
|
||||
rA |= bolted << 24;
|
||||
|
||||
pr_debug("256M TLB entry for 0x%08x->0x%08x in bolt slot %d\n",
|
||||
virt, phys, bolted);
|
||||
|
||||
mtspr(SPRN_MMUCR, 0);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"tlbwe %2,%3,0\n"
|
||||
"tlbwe %1,%3,1\n"
|
||||
"tlbwe %0,%3,2\n"
|
||||
:
|
||||
: "r" (PPC47x_TLB2_SW | PPC47x_TLB2_SR |
|
||||
PPC47x_TLB2_SX
|
||||
#ifdef CONFIG_SMP
|
||||
| PPC47x_TLB2_M
|
||||
#endif
|
||||
),
|
||||
"r" (phys),
|
||||
"r" (virt | PPC47x_TLB0_VALID | PPC47x_TLB0_256M),
|
||||
"r" (rA));
|
||||
}
|
||||
|
||||
void __init MMU_init_hw(void)
|
||||
{
|
||||
/* This is not useful on 47x but won't hurt either */
|
||||
ppc44x_update_tlb_hwater();
|
||||
|
||||
flush_instruction_cache();
|
||||
}
|
||||
|
||||
unsigned long __init mmu_mapin_ram(unsigned long top)
|
||||
{
|
||||
unsigned long addr;
|
||||
unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
|
||||
|
||||
/* Pin in enough TLBs to cover any lowmem not covered by the
|
||||
* initial 256M mapping established in head_44x.S */
|
||||
for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
|
||||
addr += PPC_PIN_SIZE) {
|
||||
if (mmu_has_feature(MMU_FTR_TYPE_47x))
|
||||
ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
|
||||
else
|
||||
ppc44x_pin_tlb(addr + PAGE_OFFSET, addr);
|
||||
}
|
||||
if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
|
||||
ppc47x_update_boltmap();
|
||||
|
||||
#ifdef DEBUG
|
||||
{
|
||||
int i;
|
||||
|
||||
printk(KERN_DEBUG "bolted entries: ");
|
||||
for (i = 0; i < 255; i++) {
|
||||
if (test_bit(i, tlb_47x_boltmap))
|
||||
printk("%d ", i);
|
||||
}
|
||||
printk("\n");
|
||||
}
|
||||
#endif /* DEBUG */
|
||||
}
|
||||
return total_lowmem;
|
||||
}
|
||||
|
||||
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
|
||||
phys_addr_t first_memblock_size)
|
||||
{
|
||||
u64 size;
|
||||
|
||||
#ifndef CONFIG_NONSTATIC_KERNEL
|
||||
/* We don't currently support the first MEMBLOCK not mapping 0
|
||||
* physical on those processors
|
||||
*/
|
||||
BUG_ON(first_memblock_base != 0);
|
||||
#endif
|
||||
|
||||
/* 44x has a 256M TLB entry pinned at boot */
|
||||
size = (min_t(u64, first_memblock_size, PPC_PIN_SIZE));
|
||||
memblock_set_current_limit(first_memblock_base + size);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
void mmu_init_secondary(int cpu)
|
||||
{
|
||||
unsigned long addr;
|
||||
unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
|
||||
|
||||
/* Pin in enough TLBs to cover any lowmem not covered by the
|
||||
* initial 256M mapping established in head_44x.S
|
||||
*
|
||||
* WARNING: This is called with only the first 256M of the
|
||||
* linear mapping in the TLB and we can't take faults yet
|
||||
* so beware of what this code uses. It runs off a temporary
|
||||
* stack. current (r2) isn't initialized, smp_processor_id()
|
||||
* will not work, current thread info isn't accessible, ...
|
||||
*/
|
||||
for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
|
||||
addr += PPC_PIN_SIZE) {
|
||||
if (mmu_has_feature(MMU_FTR_TYPE_47x))
|
||||
ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
|
||||
else
|
||||
ppc44x_pin_tlb(addr + PAGE_OFFSET, addr);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
37
arch/powerpc/mm/Makefile
Normal file
37
arch/powerpc/mm/Makefile
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#
|
||||
# Makefile for the linux ppc-specific parts of the memory manager.
|
||||
#
|
||||
|
||||
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
|
||||
|
||||
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
|
||||
|
||||
obj-y := fault.o mem.o pgtable.o gup.o mmap.o \
|
||||
init_$(CONFIG_WORD_SIZE).o \
|
||||
pgtable_$(CONFIG_WORD_SIZE).o
|
||||
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
|
||||
tlb_nohash_low.o
|
||||
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o
|
||||
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
|
||||
obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o slb_low.o slb.o $(hash64-y)
|
||||
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o
|
||||
obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
|
||||
tlb_hash$(CONFIG_WORD_SIZE).o \
|
||||
mmu_context_hash$(CONFIG_WORD_SIZE).o
|
||||
obj-$(CONFIG_PPC_ICSWX) += icswx.o
|
||||
obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o
|
||||
obj-$(CONFIG_40x) += 40x_mmu.o
|
||||
obj-$(CONFIG_44x) += 44x_mmu.o
|
||||
obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o
|
||||
obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
|
||||
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
|
||||
obj-y += hugetlbpage.o
|
||||
ifeq ($(CONFIG_HUGETLB_PAGE),y)
|
||||
obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
|
||||
obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
|
||||
endif
|
||||
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
|
||||
obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
|
||||
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
|
||||
obj-$(CONFIG_HIGHMEM) += highmem.o
|
||||
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
|
||||
148
arch/powerpc/mm/copro_fault.c
Normal file
148
arch/powerpc/mm/copro_fault.c
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* CoProcessor (SPU/AFU) mm fault handler
|
||||
*
|
||||
* (C) Copyright IBM Deutschland Entwicklung GmbH 2007
|
||||
*
|
||||
* Author: Arnd Bergmann <arndb@de.ibm.com>
|
||||
* Author: Jeremy Kerr <jk@ozlabs.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/reg.h>
|
||||
#include <asm/copro.h>
|
||||
#include <asm/spu.h>
|
||||
#include <misc/cxl.h>
|
||||
|
||||
/*
|
||||
* This ought to be kept in sync with the powerpc specific do_page_fault
|
||||
* function. Currently, there are a few corner cases that we haven't had
|
||||
* to handle fortunately.
|
||||
*/
|
||||
int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
|
||||
unsigned long dsisr, unsigned *flt)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long is_write;
|
||||
int ret;
|
||||
|
||||
if (mm == NULL)
|
||||
return -EFAULT;
|
||||
|
||||
if (mm->pgd == NULL)
|
||||
return -EFAULT;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
ret = -EFAULT;
|
||||
vma = find_vma(mm, ea);
|
||||
if (!vma)
|
||||
goto out_unlock;
|
||||
|
||||
if (ea < vma->vm_start) {
|
||||
if (!(vma->vm_flags & VM_GROWSDOWN))
|
||||
goto out_unlock;
|
||||
if (expand_stack(vma, ea))
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
is_write = dsisr & DSISR_ISSTORE;
|
||||
if (is_write) {
|
||||
if (!(vma->vm_flags & VM_WRITE))
|
||||
goto out_unlock;
|
||||
} else {
|
||||
if (dsisr & DSISR_PROTFAULT)
|
||||
goto out_unlock;
|
||||
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
*flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
|
||||
if (unlikely(*flt & VM_FAULT_ERROR)) {
|
||||
if (*flt & VM_FAULT_OOM) {
|
||||
ret = -ENOMEM;
|
||||
goto out_unlock;
|
||||
} else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
|
||||
ret = -EFAULT;
|
||||
goto out_unlock;
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (*flt & VM_FAULT_MAJOR)
|
||||
current->maj_flt++;
|
||||
else
|
||||
current->min_flt++;
|
||||
|
||||
out_unlock:
|
||||
up_read(&mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
|
||||
|
||||
int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
|
||||
{
|
||||
u64 vsid;
|
||||
int psize, ssize;
|
||||
|
||||
switch (REGION_ID(ea)) {
|
||||
case USER_REGION_ID:
|
||||
pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
|
||||
psize = get_slice_psize(mm, ea);
|
||||
ssize = user_segment_size(ea);
|
||||
vsid = get_vsid(mm->context.id, ea, ssize);
|
||||
break;
|
||||
case VMALLOC_REGION_ID:
|
||||
pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
|
||||
if (ea < VMALLOC_END)
|
||||
psize = mmu_vmalloc_psize;
|
||||
else
|
||||
psize = mmu_io_psize;
|
||||
ssize = mmu_kernel_ssize;
|
||||
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
|
||||
break;
|
||||
case KERNEL_REGION_ID:
|
||||
pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea);
|
||||
psize = mmu_linear_psize;
|
||||
ssize = mmu_kernel_ssize;
|
||||
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
|
||||
break;
|
||||
default:
|
||||
pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
|
||||
return 1;
|
||||
}
|
||||
|
||||
vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER;
|
||||
|
||||
vsid |= mmu_psize_defs[psize].sllp |
|
||||
((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
|
||||
|
||||
slb->esid = (ea & (ssize == MMU_SEGSIZE_1T ? ESID_MASK_1T : ESID_MASK)) | SLB_ESID_V;
|
||||
slb->vsid = vsid;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(copro_calculate_slb);
|
||||
|
||||
void copro_flush_all_slbs(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_SPU_BASE
|
||||
spu_flush_all_slbs(mm);
|
||||
#endif
|
||||
cxl_slbia(mm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(copro_flush_all_slbs);
|
||||
420
arch/powerpc/mm/dma-noncoherent.c
Normal file
420
arch/powerpc/mm/dma-noncoherent.c
Normal file
|
|
@ -0,0 +1,420 @@
|
|||
/*
|
||||
* PowerPC version derived from arch/arm/mm/consistent.c
|
||||
* Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
|
||||
*
|
||||
* Copyright (C) 2000 Russell King
|
||||
*
|
||||
* Consistent memory allocators. Used for DMA devices that want to
|
||||
* share uncached memory with the processor core. The function return
|
||||
* is the virtual address and 'dma_handle' is the physical address.
|
||||
* Mostly stolen from the ARM port, with some changes for PowerPC.
|
||||
* -- Dan
|
||||
*
|
||||
* Reorganized to get rid of the arch-specific consistent_* functions
|
||||
* and provide non-coherent implementations for the DMA API. -Matt
|
||||
*
|
||||
* Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
|
||||
* implementation. This is pulled straight from ARM and barely
|
||||
* modified. -Matt
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/dma.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
/*
|
||||
* This address range defaults to a value that is safe for all
|
||||
* platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
|
||||
* can be further configured for specific applications under
|
||||
* the "Advanced Setup" menu. -Matt
|
||||
*/
|
||||
#define CONSISTENT_BASE (IOREMAP_TOP)
|
||||
#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE)
|
||||
#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
|
||||
|
||||
/*
|
||||
* This is the page table (2MB) covering uncached, DMA consistent allocations
|
||||
*/
|
||||
static DEFINE_SPINLOCK(consistent_lock);
|
||||
|
||||
/*
|
||||
* VM region handling support.
|
||||
*
|
||||
* This should become something generic, handling VM region allocations for
|
||||
* vmalloc and similar (ioremap, module space, etc).
|
||||
*
|
||||
* I envisage vmalloc()'s supporting vm_struct becoming:
|
||||
*
|
||||
* struct vm_struct {
|
||||
* struct vm_region region;
|
||||
* unsigned long flags;
|
||||
* struct page **pages;
|
||||
* unsigned int nr_pages;
|
||||
* unsigned long phys_addr;
|
||||
* };
|
||||
*
|
||||
* get_vm_area() would then call vm_region_alloc with an appropriate
|
||||
* struct vm_region head (eg):
|
||||
*
|
||||
* struct vm_region vmalloc_head = {
|
||||
* .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
|
||||
* .vm_start = VMALLOC_START,
|
||||
* .vm_end = VMALLOC_END,
|
||||
* };
|
||||
*
|
||||
* However, vmalloc_head.vm_start is variable (typically, it is dependent on
|
||||
* the amount of RAM found at boot time.) I would imagine that get_vm_area()
|
||||
* would have to initialise this each time prior to calling vm_region_alloc().
|
||||
*/
|
||||
struct ppc_vm_region {
|
||||
struct list_head vm_list;
|
||||
unsigned long vm_start;
|
||||
unsigned long vm_end;
|
||||
};
|
||||
|
||||
static struct ppc_vm_region consistent_head = {
|
||||
.vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
|
||||
.vm_start = CONSISTENT_BASE,
|
||||
.vm_end = CONSISTENT_END,
|
||||
};
|
||||
|
||||
static struct ppc_vm_region *
|
||||
ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
|
||||
{
|
||||
unsigned long addr = head->vm_start, end = head->vm_end - size;
|
||||
unsigned long flags;
|
||||
struct ppc_vm_region *c, *new;
|
||||
|
||||
new = kmalloc(sizeof(struct ppc_vm_region), gfp);
|
||||
if (!new)
|
||||
goto out;
|
||||
|
||||
spin_lock_irqsave(&consistent_lock, flags);
|
||||
|
||||
list_for_each_entry(c, &head->vm_list, vm_list) {
|
||||
if ((addr + size) < addr)
|
||||
goto nospc;
|
||||
if ((addr + size) <= c->vm_start)
|
||||
goto found;
|
||||
addr = c->vm_end;
|
||||
if (addr > end)
|
||||
goto nospc;
|
||||
}
|
||||
|
||||
found:
|
||||
/*
|
||||
* Insert this entry _before_ the one we found.
|
||||
*/
|
||||
list_add_tail(&new->vm_list, &c->vm_list);
|
||||
new->vm_start = addr;
|
||||
new->vm_end = addr + size;
|
||||
|
||||
spin_unlock_irqrestore(&consistent_lock, flags);
|
||||
return new;
|
||||
|
||||
nospc:
|
||||
spin_unlock_irqrestore(&consistent_lock, flags);
|
||||
kfree(new);
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
|
||||
{
|
||||
struct ppc_vm_region *c;
|
||||
|
||||
list_for_each_entry(c, &head->vm_list, vm_list) {
|
||||
if (c->vm_start == addr)
|
||||
goto out;
|
||||
}
|
||||
c = NULL;
|
||||
out:
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate DMA-coherent memory space and return both the kernel remapped
|
||||
* virtual and bus address for that space.
|
||||
*/
|
||||
void *
|
||||
__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
|
||||
{
|
||||
struct page *page;
|
||||
struct ppc_vm_region *c;
|
||||
unsigned long order;
|
||||
u64 mask = ISA_DMA_THRESHOLD, limit;
|
||||
|
||||
if (dev) {
|
||||
mask = dev->coherent_dma_mask;
|
||||
|
||||
/*
|
||||
* Sanity check the DMA mask - it must be non-zero, and
|
||||
* must be able to be satisfied by a DMA allocation.
|
||||
*/
|
||||
if (mask == 0) {
|
||||
dev_warn(dev, "coherent DMA mask is unset\n");
|
||||
goto no_page;
|
||||
}
|
||||
|
||||
if ((~mask) & ISA_DMA_THRESHOLD) {
|
||||
dev_warn(dev, "coherent DMA mask %#llx is smaller "
|
||||
"than system GFP_DMA mask %#llx\n",
|
||||
mask, (unsigned long long)ISA_DMA_THRESHOLD);
|
||||
goto no_page;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
size = PAGE_ALIGN(size);
|
||||
limit = (mask + 1) & ~mask;
|
||||
if ((limit && size >= limit) ||
|
||||
size >= (CONSISTENT_END - CONSISTENT_BASE)) {
|
||||
printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
|
||||
size, mask);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
order = get_order(size);
|
||||
|
||||
/* Might be useful if we ever have a real legacy DMA zone... */
|
||||
if (mask != 0xffffffff)
|
||||
gfp |= GFP_DMA;
|
||||
|
||||
page = alloc_pages(gfp, order);
|
||||
if (!page)
|
||||
goto no_page;
|
||||
|
||||
/*
|
||||
* Invalidate any data that might be lurking in the
|
||||
* kernel direct-mapped region for device DMA.
|
||||
*/
|
||||
{
|
||||
unsigned long kaddr = (unsigned long)page_address(page);
|
||||
memset(page_address(page), 0, size);
|
||||
flush_dcache_range(kaddr, kaddr + size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a virtual address in the consistent mapping region.
|
||||
*/
|
||||
c = ppc_vm_region_alloc(&consistent_head, size,
|
||||
gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
|
||||
if (c) {
|
||||
unsigned long vaddr = c->vm_start;
|
||||
struct page *end = page + (1 << order);
|
||||
|
||||
split_page(page, order);
|
||||
|
||||
/*
|
||||
* Set the "dma handle"
|
||||
*/
|
||||
*handle = page_to_phys(page);
|
||||
|
||||
do {
|
||||
SetPageReserved(page);
|
||||
map_page(vaddr, page_to_phys(page),
|
||||
pgprot_noncached(PAGE_KERNEL));
|
||||
page++;
|
||||
vaddr += PAGE_SIZE;
|
||||
} while (size -= PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* Free the otherwise unused pages.
|
||||
*/
|
||||
while (page < end) {
|
||||
__free_page(page);
|
||||
page++;
|
||||
}
|
||||
|
||||
return (void *)c->vm_start;
|
||||
}
|
||||
|
||||
if (page)
|
||||
__free_pages(page, order);
|
||||
no_page:
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(__dma_alloc_coherent);
|
||||
|
||||
/*
|
||||
* free a page as defined by the above mapping.
|
||||
*/
|
||||
void __dma_free_coherent(size_t size, void *vaddr)
|
||||
{
|
||||
struct ppc_vm_region *c;
|
||||
unsigned long flags, addr;
|
||||
|
||||
size = PAGE_ALIGN(size);
|
||||
|
||||
spin_lock_irqsave(&consistent_lock, flags);
|
||||
|
||||
c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
|
||||
if (!c)
|
||||
goto no_area;
|
||||
|
||||
if ((c->vm_end - c->vm_start) != size) {
|
||||
printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
|
||||
__func__, c->vm_end - c->vm_start, size);
|
||||
dump_stack();
|
||||
size = c->vm_end - c->vm_start;
|
||||
}
|
||||
|
||||
addr = c->vm_start;
|
||||
do {
|
||||
pte_t *ptep;
|
||||
unsigned long pfn;
|
||||
|
||||
ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr),
|
||||
addr),
|
||||
addr),
|
||||
addr);
|
||||
if (!pte_none(*ptep) && pte_present(*ptep)) {
|
||||
pfn = pte_pfn(*ptep);
|
||||
pte_clear(&init_mm, addr, ptep);
|
||||
if (pfn_valid(pfn)) {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
__free_reserved_page(page);
|
||||
}
|
||||
}
|
||||
addr += PAGE_SIZE;
|
||||
} while (size -= PAGE_SIZE);
|
||||
|
||||
flush_tlb_kernel_range(c->vm_start, c->vm_end);
|
||||
|
||||
list_del(&c->vm_list);
|
||||
|
||||
spin_unlock_irqrestore(&consistent_lock, flags);
|
||||
|
||||
kfree(c);
|
||||
return;
|
||||
|
||||
no_area:
|
||||
spin_unlock_irqrestore(&consistent_lock, flags);
|
||||
printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
|
||||
__func__, vaddr);
|
||||
dump_stack();
|
||||
}
|
||||
EXPORT_SYMBOL(__dma_free_coherent);
|
||||
|
||||
/*
|
||||
* make an area consistent.
|
||||
*/
|
||||
void __dma_sync(void *vaddr, size_t size, int direction)
|
||||
{
|
||||
unsigned long start = (unsigned long)vaddr;
|
||||
unsigned long end = start + size;
|
||||
|
||||
switch (direction) {
|
||||
case DMA_NONE:
|
||||
BUG();
|
||||
case DMA_FROM_DEVICE:
|
||||
/*
|
||||
* invalidate only when cache-line aligned otherwise there is
|
||||
* the potential for discarding uncommitted data from the cache
|
||||
*/
|
||||
if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1)))
|
||||
flush_dcache_range(start, end);
|
||||
else
|
||||
invalidate_dcache_range(start, end);
|
||||
break;
|
||||
case DMA_TO_DEVICE: /* writeback only */
|
||||
clean_dcache_range(start, end);
|
||||
break;
|
||||
case DMA_BIDIRECTIONAL: /* writeback and invalidate */
|
||||
flush_dcache_range(start, end);
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__dma_sync);
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
/*
|
||||
* __dma_sync_page() implementation for systems using highmem.
|
||||
* In this case, each page of a buffer must be kmapped/kunmapped
|
||||
* in order to have a virtual address for __dma_sync(). This must
|
||||
* not sleep so kmap_atomic()/kunmap_atomic() are used.
|
||||
*
|
||||
* Note: yes, it is possible and correct to have a buffer extend
|
||||
* beyond the first page.
|
||||
*/
|
||||
static inline void __dma_sync_page_highmem(struct page *page,
|
||||
unsigned long offset, size_t size, int direction)
|
||||
{
|
||||
size_t seg_size = min((size_t)(PAGE_SIZE - offset), size);
|
||||
size_t cur_size = seg_size;
|
||||
unsigned long flags, start, seg_offset = offset;
|
||||
int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE;
|
||||
int seg_nr = 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
do {
|
||||
start = (unsigned long)kmap_atomic(page + seg_nr) + seg_offset;
|
||||
|
||||
/* Sync this buffer segment */
|
||||
__dma_sync((void *)start, seg_size, direction);
|
||||
kunmap_atomic((void *)start);
|
||||
seg_nr++;
|
||||
|
||||
/* Calculate next buffer segment size */
|
||||
seg_size = min((size_t)PAGE_SIZE, size - cur_size);
|
||||
|
||||
/* Add the segment size to our running total */
|
||||
cur_size += seg_size;
|
||||
seg_offset = 0;
|
||||
} while (seg_nr < nr_segs);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
#endif /* CONFIG_HIGHMEM */
|
||||
|
||||
/*
|
||||
* __dma_sync_page makes memory consistent. identical to __dma_sync, but
|
||||
* takes a struct page instead of a virtual address
|
||||
*/
|
||||
void __dma_sync_page(struct page *page, unsigned long offset,
|
||||
size_t size, int direction)
|
||||
{
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
__dma_sync_page_highmem(page, offset, size, direction);
|
||||
#else
|
||||
unsigned long start = (unsigned long)page_address(page) + offset;
|
||||
__dma_sync((void *)start, size, direction);
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(__dma_sync_page);
|
||||
|
||||
/*
|
||||
* Return the PFN for a given cpu virtual address returned by
|
||||
* __dma_alloc_coherent. This is used by dma_mmap_coherent()
|
||||
*/
|
||||
unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
|
||||
{
|
||||
/* This should always be populated, so we don't test every
|
||||
* level. If that fails, we'll have a nice crash which
|
||||
* will be as good as a BUG_ON()
|
||||
*/
|
||||
pgd_t *pgd = pgd_offset_k(cpu_addr);
|
||||
pud_t *pud = pud_offset(pgd, cpu_addr);
|
||||
pmd_t *pmd = pmd_offset(pud, cpu_addr);
|
||||
pte_t *ptep = pte_offset_kernel(pmd, cpu_addr);
|
||||
|
||||
if (pte_none(*ptep) || !pte_present(*ptep))
|
||||
return 0;
|
||||
return pte_pfn(*ptep);
|
||||
}
|
||||
557
arch/powerpc/mm/fault.c
Normal file
557
arch/powerpc/mm/fault.c
Normal file
|
|
@ -0,0 +1,557 @@
|
|||
/*
|
||||
* PowerPC version
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Derived from "arch/i386/mm/fault.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* Modified by Cort Dougan and Paul Mackerras.
|
||||
*
|
||||
* Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/siginfo.h>
|
||||
#include <asm/debug.h>
|
||||
#include <mm/mmu_decl.h>
|
||||
|
||||
#include "icswx.h"
|
||||
|
||||
#ifdef CONFIG_KPROBES
|
||||
static inline int notify_page_fault(struct pt_regs *regs)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/* kprobe_running() needs smp_processor_id() */
|
||||
if (!user_mode(regs)) {
|
||||
preempt_disable();
|
||||
if (kprobe_running() && kprobe_fault_handler(regs, 11))
|
||||
ret = 1;
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static inline int notify_page_fault(struct pt_regs *regs)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check whether the instruction at regs->nip is a store using
|
||||
* an update addressing form which will update r1.
|
||||
*/
|
||||
static int store_updates_sp(struct pt_regs *regs)
|
||||
{
|
||||
unsigned int inst;
|
||||
|
||||
if (get_user(inst, (unsigned int __user *)regs->nip))
|
||||
return 0;
|
||||
/* check for 1 in the rA field */
|
||||
if (((inst >> 16) & 0x1f) != 1)
|
||||
return 0;
|
||||
/* check major opcode */
|
||||
switch (inst >> 26) {
|
||||
case 37: /* stwu */
|
||||
case 39: /* stbu */
|
||||
case 45: /* sthu */
|
||||
case 53: /* stfsu */
|
||||
case 55: /* stfdu */
|
||||
return 1;
|
||||
case 62: /* std or stdu */
|
||||
return (inst & 3) == 1;
|
||||
case 31:
|
||||
/* check minor opcode */
|
||||
switch ((inst >> 1) & 0x3ff) {
|
||||
case 181: /* stdux */
|
||||
case 183: /* stwux */
|
||||
case 247: /* stbux */
|
||||
case 439: /* sthux */
|
||||
case 695: /* stfsux */
|
||||
case 759: /* stfdux */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* do_page_fault error handling helpers
|
||||
*/
|
||||
|
||||
#define MM_FAULT_RETURN 0
|
||||
#define MM_FAULT_CONTINUE -1
|
||||
#define MM_FAULT_ERR(sig) (sig)
|
||||
|
||||
static int do_sigbus(struct pt_regs *regs, unsigned long address,
|
||||
unsigned int fault)
|
||||
{
|
||||
siginfo_t info;
|
||||
unsigned int lsb = 0;
|
||||
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
if (!user_mode(regs))
|
||||
return MM_FAULT_ERR(SIGBUS);
|
||||
|
||||
current->thread.trap_nr = BUS_ADRERR;
|
||||
info.si_signo = SIGBUS;
|
||||
info.si_errno = 0;
|
||||
info.si_code = BUS_ADRERR;
|
||||
info.si_addr = (void __user *)address;
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
|
||||
pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
|
||||
current->comm, current->pid, address);
|
||||
info.si_code = BUS_MCEERR_AR;
|
||||
}
|
||||
|
||||
if (fault & VM_FAULT_HWPOISON_LARGE)
|
||||
lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
|
||||
if (fault & VM_FAULT_HWPOISON)
|
||||
lsb = PAGE_SHIFT;
|
||||
#endif
|
||||
info.si_addr_lsb = lsb;
|
||||
force_sig_info(SIGBUS, &info, current);
|
||||
return MM_FAULT_RETURN;
|
||||
}
|
||||
|
||||
static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
|
||||
{
|
||||
/*
|
||||
* Pagefault was interrupted by SIGKILL. We have no reason to
|
||||
* continue the pagefault.
|
||||
*/
|
||||
if (fatal_signal_pending(current)) {
|
||||
/*
|
||||
* If we have retry set, the mmap semaphore will have
|
||||
* alrady been released in __lock_page_or_retry(). Else
|
||||
* we release it now.
|
||||
*/
|
||||
if (!(fault & VM_FAULT_RETRY))
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
/* Coming from kernel, we need to deal with uaccess fixups */
|
||||
if (user_mode(regs))
|
||||
return MM_FAULT_RETURN;
|
||||
return MM_FAULT_ERR(SIGKILL);
|
||||
}
|
||||
|
||||
/* No fault: be happy */
|
||||
if (!(fault & VM_FAULT_ERROR))
|
||||
return MM_FAULT_CONTINUE;
|
||||
|
||||
/* Out of memory */
|
||||
if (fault & VM_FAULT_OOM) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
/*
|
||||
* We ran out of memory, or some other thing happened to us that
|
||||
* made us unable to handle the page fault gracefully.
|
||||
*/
|
||||
if (!user_mode(regs))
|
||||
return MM_FAULT_ERR(SIGKILL);
|
||||
pagefault_out_of_memory();
|
||||
return MM_FAULT_RETURN;
|
||||
}
|
||||
|
||||
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE))
|
||||
return do_sigbus(regs, addr, fault);
|
||||
|
||||
/* We don't understand the fault code, this is fatal */
|
||||
BUG();
|
||||
return MM_FAULT_CONTINUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* For 600- and 800-family processors, the error_code parameter is DSISR
|
||||
* for a data fault, SRR1 for an instruction fault. For 400-family processors
|
||||
* the error_code parameter is ESR for a data fault, 0 for an instruction
|
||||
* fault.
|
||||
* For 64-bit processors, the error_code parameter is
|
||||
* - DSISR for a non-SLB data access fault,
|
||||
* - SRR1 & 0x08000000 for a non-SLB instruction access fault
|
||||
* - 0 any SLB fault.
|
||||
*
|
||||
* The return value is 0 if the fault was handled, or the signal
|
||||
* number if this is a kernel fault that can't be handled here.
|
||||
*/
|
||||
int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
|
||||
unsigned long error_code)
|
||||
{
|
||||
enum ctx_state prev_state = exception_enter();
|
||||
struct vm_area_struct * vma;
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
int code = SEGV_MAPERR;
|
||||
int is_write = 0;
|
||||
int trap = TRAP(regs);
|
||||
int is_exec = trap == 0x400;
|
||||
int fault;
|
||||
int rc = 0, store_update_sp = 0;
|
||||
|
||||
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
|
||||
/*
|
||||
* Fortunately the bit assignments in SRR1 for an instruction
|
||||
* fault and DSISR for a data fault are mostly the same for the
|
||||
* bits we are interested in. But there are some bits which
|
||||
* indicate errors in DSISR but can validly be set in SRR1.
|
||||
*/
|
||||
if (trap == 0x400)
|
||||
error_code &= 0x48200000;
|
||||
else
|
||||
is_write = error_code & DSISR_ISSTORE;
|
||||
#else
|
||||
is_write = error_code & ESR_DST;
|
||||
#endif /* CONFIG_4xx || CONFIG_BOOKE */
|
||||
|
||||
#ifdef CONFIG_PPC_ICSWX
|
||||
/*
|
||||
* we need to do this early because this "data storage
|
||||
* interrupt" does not update the DAR/DEAR so we don't want to
|
||||
* look at it
|
||||
*/
|
||||
if (error_code & ICSWX_DSI_UCT) {
|
||||
rc = acop_handle_fault(regs, address, error_code);
|
||||
if (rc)
|
||||
goto bail;
|
||||
}
|
||||
#endif /* CONFIG_PPC_ICSWX */
|
||||
|
||||
if (notify_page_fault(regs))
|
||||
goto bail;
|
||||
|
||||
if (unlikely(debugger_fault_handler(regs)))
|
||||
goto bail;
|
||||
|
||||
/* On a kernel SLB miss we can only check for a valid exception entry */
|
||||
if (!user_mode(regs) && (address >= TASK_SIZE)) {
|
||||
rc = SIGSEGV;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
|
||||
defined(CONFIG_PPC_BOOK3S_64))
|
||||
if (error_code & DSISR_DABRMATCH) {
|
||||
/* breakpoint match */
|
||||
do_break(regs, address, error_code);
|
||||
goto bail;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* We restore the interrupt state now */
|
||||
if (!arch_irq_disabled_regs(regs))
|
||||
local_irq_enable();
|
||||
|
||||
if (in_atomic() || mm == NULL) {
|
||||
if (!user_mode(regs)) {
|
||||
rc = SIGSEGV;
|
||||
goto bail;
|
||||
}
|
||||
/* in_atomic() in user mode is really bad,
|
||||
as is current->mm == NULL. */
|
||||
printk(KERN_EMERG "Page fault in user mode with "
|
||||
"in_atomic() = %d mm = %p\n", in_atomic(), mm);
|
||||
printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
|
||||
regs->nip, regs->msr);
|
||||
die("Weird page fault", regs, SIGSEGV);
|
||||
}
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
||||
/*
|
||||
* We want to do this outside mmap_sem, because reading code around nip
|
||||
* can result in fault, which will cause a deadlock when called with
|
||||
* mmap_sem held
|
||||
*/
|
||||
if (user_mode(regs))
|
||||
store_update_sp = store_updates_sp(regs);
|
||||
|
||||
if (user_mode(regs))
|
||||
flags |= FAULT_FLAG_USER;
|
||||
|
||||
/* When running in the kernel we expect faults to occur only to
|
||||
* addresses in user space. All other faults represent errors in the
|
||||
* kernel and should generate an OOPS. Unfortunately, in the case of an
|
||||
* erroneous fault occurring in a code path which already holds mmap_sem
|
||||
* we will deadlock attempting to validate the fault against the
|
||||
* address space. Luckily the kernel only validly references user
|
||||
* space from well defined areas of code, which are listed in the
|
||||
* exceptions table.
|
||||
*
|
||||
* As the vast majority of faults will be valid we will only perform
|
||||
* the source reference check when there is a possibility of a deadlock.
|
||||
* Attempt to lock the address space, if we cannot we then validate the
|
||||
* source. If this is invalid we can skip the address space check,
|
||||
* thus avoiding the deadlock.
|
||||
*/
|
||||
if (!down_read_trylock(&mm->mmap_sem)) {
|
||||
if (!user_mode(regs) && !search_exception_tables(regs->nip))
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
retry:
|
||||
down_read(&mm->mmap_sem);
|
||||
} else {
|
||||
/*
|
||||
* The above down_read_trylock() might have succeeded in
|
||||
* which case we'll have missed the might_sleep() from
|
||||
* down_read():
|
||||
*/
|
||||
might_sleep();
|
||||
}
|
||||
|
||||
vma = find_vma(mm, address);
|
||||
if (!vma)
|
||||
goto bad_area;
|
||||
if (vma->vm_start <= address)
|
||||
goto good_area;
|
||||
if (!(vma->vm_flags & VM_GROWSDOWN))
|
||||
goto bad_area;
|
||||
|
||||
/*
|
||||
* N.B. The POWER/Open ABI allows programs to access up to
|
||||
* 288 bytes below the stack pointer.
|
||||
* The kernel signal delivery code writes up to about 1.5kB
|
||||
* below the stack pointer (r1) before decrementing it.
|
||||
* The exec code can write slightly over 640kB to the stack
|
||||
* before setting the user r1. Thus we allow the stack to
|
||||
* expand to 1MB without further checks.
|
||||
*/
|
||||
if (address + 0x100000 < vma->vm_end) {
|
||||
/* get user regs even if this fault is in kernel mode */
|
||||
struct pt_regs *uregs = current->thread.regs;
|
||||
if (uregs == NULL)
|
||||
goto bad_area;
|
||||
|
||||
/*
|
||||
* A user-mode access to an address a long way below
|
||||
* the stack pointer is only valid if the instruction
|
||||
* is one which would update the stack pointer to the
|
||||
* address accessed if the instruction completed,
|
||||
* i.e. either stwu rs,n(r1) or stwux rs,r1,rb
|
||||
* (or the byte, halfword, float or double forms).
|
||||
*
|
||||
* If we don't check this then any write to the area
|
||||
* between the last mapped region and the stack will
|
||||
* expand the stack rather than segfaulting.
|
||||
*/
|
||||
if (address + 2048 < uregs->gpr[1] && !store_update_sp)
|
||||
goto bad_area;
|
||||
}
|
||||
if (expand_stack(vma, address))
|
||||
goto bad_area;
|
||||
|
||||
good_area:
|
||||
code = SEGV_ACCERR;
|
||||
#if defined(CONFIG_6xx)
|
||||
if (error_code & 0x95700000)
|
||||
/* an error such as lwarx to I/O controller space,
|
||||
address matching DABR, eciwx, etc. */
|
||||
goto bad_area;
|
||||
#endif /* CONFIG_6xx */
|
||||
#if defined(CONFIG_8xx)
|
||||
/* 8xx sometimes need to load a invalid/non-present TLBs.
|
||||
* These must be invalidated separately as linux mm don't.
|
||||
*/
|
||||
if (error_code & 0x40000000) /* no translation? */
|
||||
_tlbil_va(address, 0, 0, 0);
|
||||
|
||||
/* The MPC8xx seems to always set 0x80000000, which is
|
||||
* "undefined". Of those that can be set, this is the only
|
||||
* one which seems bad.
|
||||
*/
|
||||
if (error_code & 0x10000000)
|
||||
/* Guarded storage error. */
|
||||
goto bad_area;
|
||||
#endif /* CONFIG_8xx */
|
||||
|
||||
if (is_exec) {
|
||||
#ifdef CONFIG_PPC_STD_MMU
|
||||
/* Protection fault on exec go straight to failure on
|
||||
* Hash based MMUs as they either don't support per-page
|
||||
* execute permission, or if they do, it's handled already
|
||||
* at the hash level. This test would probably have to
|
||||
* be removed if we change the way this works to make hash
|
||||
* processors use the same I/D cache coherency mechanism
|
||||
* as embedded.
|
||||
*/
|
||||
if (error_code & DSISR_PROTFAULT)
|
||||
goto bad_area;
|
||||
#endif /* CONFIG_PPC_STD_MMU */
|
||||
|
||||
/*
|
||||
* Allow execution from readable areas if the MMU does not
|
||||
* provide separate controls over reading and executing.
|
||||
*
|
||||
* Note: That code used to not be enabled for 4xx/BookE.
|
||||
* It is now as I/D cache coherency for these is done at
|
||||
* set_pte_at() time and I see no reason why the test
|
||||
* below wouldn't be valid on those processors. This -may-
|
||||
* break programs compiled with a really old ABI though.
|
||||
*/
|
||||
if (!(vma->vm_flags & VM_EXEC) &&
|
||||
(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
|
||||
!(vma->vm_flags & (VM_READ | VM_WRITE))))
|
||||
goto bad_area;
|
||||
/* a write */
|
||||
} else if (is_write) {
|
||||
if (!(vma->vm_flags & VM_WRITE))
|
||||
goto bad_area;
|
||||
flags |= FAULT_FLAG_WRITE;
|
||||
/* a read */
|
||||
} else {
|
||||
/* protection fault */
|
||||
if (error_code & 0x08000000)
|
||||
goto bad_area;
|
||||
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
|
||||
goto bad_area;
|
||||
}
|
||||
|
||||
/*
|
||||
* If for any reason at all we couldn't handle the fault,
|
||||
* make sure we exit gracefully rather than endlessly redo
|
||||
* the fault.
|
||||
*/
|
||||
fault = handle_mm_fault(mm, vma, address, flags);
|
||||
if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
|
||||
if (fault & VM_FAULT_SIGSEGV)
|
||||
goto bad_area;
|
||||
rc = mm_fault_error(regs, address, fault);
|
||||
if (rc >= MM_FAULT_RETURN)
|
||||
goto bail;
|
||||
else
|
||||
rc = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Major/minor page fault accounting is only done on the
|
||||
* initial attempt. If we go through a retry, it is extremely
|
||||
* likely that the page will be found in page cache at that point.
|
||||
*/
|
||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||
if (fault & VM_FAULT_MAJOR) {
|
||||
current->maj_flt++;
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
|
||||
regs, address);
|
||||
#ifdef CONFIG_PPC_SMLPAR
|
||||
if (firmware_has_feature(FW_FEATURE_CMO)) {
|
||||
u32 page_ins;
|
||||
|
||||
preempt_disable();
|
||||
page_ins = be32_to_cpu(get_lppaca()->page_ins);
|
||||
page_ins += 1 << PAGE_FACTOR;
|
||||
get_lppaca()->page_ins = cpu_to_be32(page_ins);
|
||||
preempt_enable();
|
||||
}
|
||||
#endif /* CONFIG_PPC_SMLPAR */
|
||||
} else {
|
||||
current->min_flt++;
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
|
||||
regs, address);
|
||||
}
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
|
||||
* of starvation. */
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
up_read(&mm->mmap_sem);
|
||||
goto bail;
|
||||
|
||||
bad_area:
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
bad_area_nosemaphore:
|
||||
/* User mode accesses cause a SIGSEGV */
|
||||
if (user_mode(regs)) {
|
||||
_exception(SIGSEGV, regs, code, address);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (is_exec && (error_code & DSISR_PROTFAULT))
|
||||
printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected"
|
||||
" page (%lx) - exploit attempt? (uid: %d)\n",
|
||||
address, from_kuid(&init_user_ns, current_uid()));
|
||||
|
||||
rc = SIGSEGV;
|
||||
|
||||
bail:
|
||||
exception_exit(prev_state);
|
||||
return rc;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* bad_page_fault is called when we have a bad access from the kernel.
|
||||
* It is called from the DSI and ISI handlers in head.S and from some
|
||||
* of the procedures in traps.c.
|
||||
*/
|
||||
void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
|
||||
{
|
||||
const struct exception_table_entry *entry;
|
||||
|
||||
/* Are we prepared to handle this fault? */
|
||||
if ((entry = search_exception_tables(regs->nip)) != NULL) {
|
||||
regs->nip = entry->fixup;
|
||||
return;
|
||||
}
|
||||
|
||||
/* kernel has accessed a bad area */
|
||||
|
||||
switch (regs->trap) {
|
||||
case 0x300:
|
||||
case 0x380:
|
||||
printk(KERN_ALERT "Unable to handle kernel paging request for "
|
||||
"data at address 0x%08lx\n", regs->dar);
|
||||
break;
|
||||
case 0x400:
|
||||
case 0x480:
|
||||
printk(KERN_ALERT "Unable to handle kernel paging request for "
|
||||
"instruction fetch\n");
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ALERT "Unable to handle kernel paging request for "
|
||||
"unknown fault\n");
|
||||
break;
|
||||
}
|
||||
printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
|
||||
regs->nip);
|
||||
|
||||
if (task_stack_end_corrupted(current))
|
||||
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
|
||||
|
||||
die("Kernel access of bad area", regs, sig);
|
||||
}
|
||||
318
arch/powerpc/mm/fsl_booke_mmu.c
Normal file
318
arch/powerpc/mm/fsl_booke_mmu.c
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
/*
|
||||
* Modifications by Kumar Gala (galak@kernel.crashing.org) to support
|
||||
* E500 Book E processors.
|
||||
*
|
||||
* Copyright 2004,2010 Freescale Semiconductor, Inc.
|
||||
*
|
||||
* This file contains the routines for initializing the MMU
|
||||
* on the 4xx series of chips.
|
||||
* -- paulus
|
||||
*
|
||||
* Derived from arch/ppc/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/paca.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
unsigned int tlbcam_index;
|
||||
|
||||
#define NUM_TLBCAMS (64)
|
||||
struct tlbcam TLBCAM[NUM_TLBCAMS];
|
||||
|
||||
struct tlbcamrange {
|
||||
unsigned long start;
|
||||
unsigned long limit;
|
||||
phys_addr_t phys;
|
||||
} tlbcam_addrs[NUM_TLBCAMS];
|
||||
|
||||
extern unsigned int tlbcam_index;
|
||||
|
||||
unsigned long tlbcam_sz(int idx)
|
||||
{
|
||||
return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return PA for this VA if it is mapped by a CAM, or 0
|
||||
*/
|
||||
phys_addr_t v_mapped_by_tlbcam(unsigned long va)
|
||||
{
|
||||
int b;
|
||||
for (b = 0; b < tlbcam_index; ++b)
|
||||
if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
|
||||
return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return VA for a given PA or 0 if not mapped
|
||||
*/
|
||||
unsigned long p_mapped_by_tlbcam(phys_addr_t pa)
|
||||
{
|
||||
int b;
|
||||
for (b = 0; b < tlbcam_index; ++b)
|
||||
if (pa >= tlbcam_addrs[b].phys
|
||||
&& pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
|
||||
+tlbcam_addrs[b].phys)
|
||||
return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up a variable-size TLB entry (tlbcam). The parameters are not checked;
|
||||
* in particular size must be a power of 4 between 4k and the max supported by
|
||||
* an implementation; max may further be limited by what can be represented in
|
||||
* an unsigned long (for example, 32-bit implementations cannot support a 4GB
|
||||
* size).
|
||||
*/
|
||||
static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
|
||||
unsigned long size, unsigned long flags, unsigned int pid)
|
||||
{
|
||||
unsigned int tsize;
|
||||
|
||||
tsize = __ilog2(size) - 10;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if ((flags & _PAGE_NO_CACHE) == 0)
|
||||
flags |= _PAGE_COHERENT;
|
||||
#endif
|
||||
|
||||
TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
|
||||
TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
|
||||
TLBCAM[index].MAS2 = virt & PAGE_MASK;
|
||||
|
||||
TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
|
||||
TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
|
||||
TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
|
||||
TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
|
||||
TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
|
||||
|
||||
TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR;
|
||||
TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
|
||||
if (mmu_has_feature(MMU_FTR_BIG_PHYS))
|
||||
TLBCAM[index].MAS7 = (u64)phys >> 32;
|
||||
|
||||
/* Below is unlikely -- only for large user pages or similar */
|
||||
if (pte_user(flags)) {
|
||||
TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
|
||||
TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
|
||||
}
|
||||
|
||||
tlbcam_addrs[index].start = virt;
|
||||
tlbcam_addrs[index].limit = virt + size - 1;
|
||||
tlbcam_addrs[index].phys = phys;
|
||||
|
||||
loadcam_entry(index);
|
||||
}
|
||||
|
||||
unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
|
||||
phys_addr_t phys)
|
||||
{
|
||||
unsigned int camsize = __ilog2(ram);
|
||||
unsigned int align = __ffs(virt | phys);
|
||||
unsigned long max_cam;
|
||||
|
||||
if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
|
||||
/* Convert (4^max) kB to (2^max) bytes */
|
||||
max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10;
|
||||
camsize &= ~1U;
|
||||
align &= ~1U;
|
||||
} else {
|
||||
/* Convert (2^max) kB to (2^max) bytes */
|
||||
max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10;
|
||||
}
|
||||
|
||||
if (camsize > align)
|
||||
camsize = align;
|
||||
if (camsize > max_cam)
|
||||
camsize = max_cam;
|
||||
|
||||
return 1UL << camsize;
|
||||
}
|
||||
|
||||
static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
|
||||
unsigned long ram, int max_cam_idx)
|
||||
{
|
||||
int i;
|
||||
unsigned long amount_mapped = 0;
|
||||
|
||||
/* Calculate CAM values */
|
||||
for (i = 0; ram && i < max_cam_idx; i++) {
|
||||
unsigned long cam_sz;
|
||||
|
||||
cam_sz = calc_cam_sz(ram, virt, phys);
|
||||
settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0);
|
||||
|
||||
ram -= cam_sz;
|
||||
amount_mapped += cam_sz;
|
||||
virt += cam_sz;
|
||||
phys += cam_sz;
|
||||
}
|
||||
tlbcam_index = i;
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
get_paca()->tcd.esel_next = i;
|
||||
get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
|
||||
get_paca()->tcd.esel_first = i;
|
||||
#endif
|
||||
|
||||
return amount_mapped;
|
||||
}
|
||||
|
||||
unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
|
||||
{
|
||||
unsigned long virt = PAGE_OFFSET;
|
||||
phys_addr_t phys = memstart_addr;
|
||||
|
||||
return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC32
|
||||
|
||||
#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
|
||||
#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
|
||||
#endif
|
||||
|
||||
unsigned long __init mmu_mapin_ram(unsigned long top)
|
||||
{
|
||||
return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* MMU_init_hw does the chip-specific initialization of the MMU hardware.
|
||||
*/
|
||||
void __init MMU_init_hw(void)
|
||||
{
|
||||
flush_instruction_cache();
|
||||
}
|
||||
|
||||
void __init adjust_total_lowmem(void)
|
||||
{
|
||||
unsigned long ram;
|
||||
int i;
|
||||
|
||||
/* adjust lowmem size to __max_low_memory */
|
||||
ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
|
||||
|
||||
i = switch_to_as1();
|
||||
__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
|
||||
restore_to_as0(i, 0, 0, 1);
|
||||
|
||||
pr_info("Memory CAM mapping: ");
|
||||
for (i = 0; i < tlbcam_index - 1; i++)
|
||||
pr_cont("%lu/", tlbcam_sz(i) >> 20);
|
||||
pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
|
||||
(unsigned int)((total_lowmem - __max_low_memory) >> 20));
|
||||
|
||||
memblock_set_current_limit(memstart_addr + __max_low_memory);
|
||||
}
|
||||
|
||||
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
|
||||
phys_addr_t first_memblock_size)
|
||||
{
|
||||
phys_addr_t limit = first_memblock_base + first_memblock_size;
|
||||
|
||||
/* 64M mapped initially according to head_fsl_booke.S */
|
||||
memblock_set_current_limit(min_t(u64, limit, 0x04000000));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
int __initdata is_second_reloc;
|
||||
notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
|
||||
{
|
||||
unsigned long base = KERNELBASE;
|
||||
|
||||
kernstart_addr = start;
|
||||
if (is_second_reloc) {
|
||||
virt_phys_offset = PAGE_OFFSET - memstart_addr;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Relocatable kernel support based on processing of dynamic
|
||||
* relocation entries. Before we get the real memstart_addr,
|
||||
* We will compute the virt_phys_offset like this:
|
||||
* virt_phys_offset = stext.run - kernstart_addr
|
||||
*
|
||||
* stext.run = (KERNELBASE & ~0x3ffffff) +
|
||||
* (kernstart_addr & 0x3ffffff)
|
||||
* When we relocate, we have :
|
||||
*
|
||||
* (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
|
||||
*
|
||||
* hence:
|
||||
* virt_phys_offset = (KERNELBASE & ~0x3ffffff) -
|
||||
* (kernstart_addr & ~0x3ffffff)
|
||||
*
|
||||
*/
|
||||
start &= ~0x3ffffff;
|
||||
base &= ~0x3ffffff;
|
||||
virt_phys_offset = base - start;
|
||||
early_get_first_memblock_info(__va(dt_ptr), NULL);
|
||||
/*
|
||||
* We now get the memstart_addr, then we should check if this
|
||||
* address is the same as what the PAGE_OFFSET map to now. If
|
||||
* not we have to change the map of PAGE_OFFSET to memstart_addr
|
||||
* and do a second relocation.
|
||||
*/
|
||||
if (start != memstart_addr) {
|
||||
int n;
|
||||
long offset = start - memstart_addr;
|
||||
|
||||
is_second_reloc = 1;
|
||||
n = switch_to_as1();
|
||||
/* map a 64M area for the second relocation */
|
||||
if (memstart_addr > start)
|
||||
map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM);
|
||||
else
|
||||
map_mem_in_cams_addr(start, PAGE_OFFSET + offset,
|
||||
0x4000000, CONFIG_LOWMEM_CAM_NUM);
|
||||
restore_to_as0(n, offset, __va(dt_ptr), 1);
|
||||
/* We should never reach here */
|
||||
panic("Relocation error");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
235
arch/powerpc/mm/gup.c
Normal file
235
arch/powerpc/mm/gup.c
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
/*
|
||||
* Lockless get_user_pages_fast for powerpc
|
||||
*
|
||||
* Copyright (C) 2008 Nick Piggin
|
||||
* Copyright (C) 2008 Novell Inc.
|
||||
*/
|
||||
#undef DEBUG
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/vmstat.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
#ifdef __HAVE_ARCH_PTE_SPECIAL
|
||||
|
||||
/*
|
||||
* The performance critical leaf functions are made noinline otherwise gcc
|
||||
* inlines everything into a single function which results in too much
|
||||
* register pressure.
|
||||
*/
|
||||
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long mask, result;
|
||||
pte_t *ptep;
|
||||
|
||||
result = _PAGE_PRESENT|_PAGE_USER;
|
||||
if (write)
|
||||
result |= _PAGE_RW;
|
||||
mask = result | _PAGE_SPECIAL;
|
||||
|
||||
ptep = pte_offset_kernel(&pmd, addr);
|
||||
do {
|
||||
pte_t pte = ACCESS_ONCE(*ptep);
|
||||
struct page *page;
|
||||
/*
|
||||
* Similar to the PMD case, NUMA hinting must take slow path
|
||||
*/
|
||||
if (pte_numa(pte))
|
||||
return 0;
|
||||
|
||||
if ((pte_val(pte) & mask) != result)
|
||||
return 0;
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
page = pte_page(pte);
|
||||
if (!page_cache_get_speculative(page))
|
||||
return 0;
|
||||
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
|
||||
put_page(page);
|
||||
return 0;
|
||||
}
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
|
||||
} while (ptep++, addr += PAGE_SIZE, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pmd_t *pmdp;
|
||||
|
||||
pmdp = pmd_offset(&pud, addr);
|
||||
do {
|
||||
pmd_t pmd = ACCESS_ONCE(*pmdp);
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
/*
|
||||
* If we find a splitting transparent hugepage we
|
||||
* return zero. That will result in taking the slow
|
||||
* path which will call wait_split_huge_page()
|
||||
* if the pmd is still in splitting state
|
||||
*/
|
||||
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
|
||||
return 0;
|
||||
if (pmd_huge(pmd) || pmd_large(pmd)) {
|
||||
/*
|
||||
* NUMA hinting faults need to be handled in the GUP
|
||||
* slowpath for accounting purposes and so that they
|
||||
* can be serialised against THP migration.
|
||||
*/
|
||||
if (pmd_numa(pmd))
|
||||
return 0;
|
||||
|
||||
if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
|
||||
write, pages, nr))
|
||||
return 0;
|
||||
} else if (is_hugepd(pmdp)) {
|
||||
if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
|
||||
addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} while (pmdp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pud_t *pudp;
|
||||
|
||||
pudp = pud_offset(&pgd, addr);
|
||||
do {
|
||||
pud_t pud = ACCESS_ONCE(*pudp);
|
||||
|
||||
next = pud_addr_end(addr, end);
|
||||
if (pud_none(pud))
|
||||
return 0;
|
||||
if (pud_huge(pud)) {
|
||||
if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
|
||||
write, pages, nr))
|
||||
return 0;
|
||||
} else if (is_hugepd(pudp)) {
|
||||
if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
|
||||
addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} while (pudp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
unsigned long next;
|
||||
unsigned long flags;
|
||||
pgd_t *pgdp;
|
||||
int nr = 0;
|
||||
|
||||
pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
|
||||
|
||||
start &= PAGE_MASK;
|
||||
addr = start;
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
|
||||
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
|
||||
start, len)))
|
||||
return 0;
|
||||
|
||||
pr_devel(" aligned: %lx .. %lx\n", start, end);
|
||||
|
||||
/*
|
||||
* XXX: batch / limit 'nr', to avoid large irq off latency
|
||||
* needs some instrumenting to determine the common sizes used by
|
||||
* important workloads (eg. DB2), and whether limiting the batch size
|
||||
* will decrease performance.
|
||||
*
|
||||
* It seems like we're in the clear for the moment. Direct-IO is
|
||||
* the main guy that batches up lots of get_user_pages, and even
|
||||
* they are limited to 64-at-a-time which is not so many.
|
||||
*/
|
||||
/*
|
||||
* This doesn't prevent pagetable teardown, but does prevent
|
||||
* the pagetables from being freed on powerpc.
|
||||
*
|
||||
* So long as we atomically load page table pointers versus teardown,
|
||||
* we can follow the address down to the the page and take a ref on it.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = ACCESS_ONCE(*pgdp);
|
||||
|
||||
pr_devel(" %016lx: normal pgd %p\n", addr,
|
||||
(void *)pgd_val(pgd));
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
break;
|
||||
if (pgd_huge(pgd)) {
|
||||
if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
|
||||
write, pages, &nr))
|
||||
break;
|
||||
} else if (is_hugepd(pgdp)) {
|
||||
if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
|
||||
addr, next, write, pages, &nr))
|
||||
break;
|
||||
} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
|
||||
break;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
int nr, ret;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
nr = __get_user_pages_fast(start, nr_pages, write, pages);
|
||||
ret = nr;
|
||||
|
||||
if (nr < nr_pages) {
|
||||
pr_devel(" slow path ! nr = %d\n", nr);
|
||||
|
||||
/* Try to get the remaining pages with get_user_pages */
|
||||
start += nr << PAGE_SHIFT;
|
||||
pages += nr;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
ret = get_user_pages(current, mm, start,
|
||||
nr_pages - nr, write, 0, pages, NULL);
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
/* Have to be a bit careful with return values */
|
||||
if (nr > 0) {
|
||||
if (ret < 0)
|
||||
ret = nr;
|
||||
else
|
||||
ret += nr;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* __HAVE_ARCH_PTE_SPECIAL */
|
||||
712
arch/powerpc/mm/hash_low_32.S
Normal file
712
arch/powerpc/mm/hash_low_32.S
Normal file
|
|
@ -0,0 +1,712 @@
|
|||
/*
|
||||
* PowerPC version
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
* Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
|
||||
* Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
|
||||
* Adapted for Power Macintosh by Paul Mackerras.
|
||||
* Low-level exception handlers and MMU support
|
||||
* rewritten by Paul Mackerras.
|
||||
* Copyright (C) 1996 Paul Mackerras.
|
||||
*
|
||||
* This file contains low-level assembler routines for managing
|
||||
* the PowerPC MMU hash table. (PPC 8xx processors don't use a
|
||||
* hash table, so this file is not used on them.)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/reg.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
.section .bss
|
||||
.align 2
|
||||
.globl mmu_hash_lock
|
||||
mmu_hash_lock:
|
||||
.space 4
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* Load a PTE into the hash table, if possible.
|
||||
* The address is in r4, and r3 contains an access flag:
|
||||
* _PAGE_RW (0x400) if a write.
|
||||
* r9 contains the SRR1 value, from which we use the MSR_PR bit.
|
||||
* SPRG_THREAD contains the physical address of the current task's thread.
|
||||
*
|
||||
* Returns to the caller if the access is illegal or there is no
|
||||
* mapping for the address. Otherwise it places an appropriate PTE
|
||||
* in the hash table and returns from the exception.
|
||||
* Uses r0, r3 - r8, r10, ctr, lr.
|
||||
*/
|
||||
.text
|
||||
_GLOBAL(hash_page)
|
||||
tophys(r7,0) /* gets -KERNELBASE into r7 */
|
||||
#ifdef CONFIG_SMP
|
||||
addis r8,r7,mmu_hash_lock@h
|
||||
ori r8,r8,mmu_hash_lock@l
|
||||
lis r0,0x0fff
|
||||
b 10f
|
||||
11: lwz r6,0(r8)
|
||||
cmpwi 0,r6,0
|
||||
bne 11b
|
||||
10: lwarx r6,0,r8
|
||||
cmpwi 0,r6,0
|
||||
bne- 11b
|
||||
stwcx. r0,0,r8
|
||||
bne- 10b
|
||||
isync
|
||||
#endif
|
||||
/* Get PTE (linux-style) and check access */
|
||||
lis r0,KERNELBASE@h /* check if kernel address */
|
||||
cmplw 0,r4,r0
|
||||
mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
|
||||
ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
|
||||
lwz r5,PGDIR(r8) /* virt page-table root */
|
||||
blt+ 112f /* assume user more likely */
|
||||
lis r5,swapper_pg_dir@ha /* if kernel address, use */
|
||||
addi r5,r5,swapper_pg_dir@l /* kernel page table */
|
||||
rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */
|
||||
112: add r5,r5,r7 /* convert to phys addr */
|
||||
#ifndef CONFIG_PTE_64BIT
|
||||
rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */
|
||||
lwz r8,0(r5) /* get pmd entry */
|
||||
rlwinm. r8,r8,0,0,19 /* extract address of pte page */
|
||||
#else
|
||||
rlwinm r8,r4,13,19,29 /* Compute pgdir/pmd offset */
|
||||
lwzx r8,r8,r5 /* Get L1 entry */
|
||||
rlwinm. r8,r8,0,0,20 /* extract pt base address */
|
||||
#endif
|
||||
#ifdef CONFIG_SMP
|
||||
beq- hash_page_out /* return if no mapping */
|
||||
#else
|
||||
/* XXX it seems like the 601 will give a machine fault on the
|
||||
rfi if its alignment is wrong (bottom 4 bits of address are
|
||||
8 or 0xc) and we have had a not-taken conditional branch
|
||||
to the address following the rfi. */
|
||||
beqlr-
|
||||
#endif
|
||||
#ifndef CONFIG_PTE_64BIT
|
||||
rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */
|
||||
#else
|
||||
rlwimi r8,r4,23,20,28 /* compute pte address */
|
||||
#endif
|
||||
rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */
|
||||
ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
|
||||
|
||||
/*
|
||||
* Update the linux PTE atomically. We do the lwarx up-front
|
||||
* because almost always, there won't be a permission violation
|
||||
* and there won't already be an HPTE, and thus we will have
|
||||
* to update the PTE to set _PAGE_HASHPTE. -- paulus.
|
||||
*
|
||||
* If PTE_64BIT is set, the low word is the flags word; use that
|
||||
* word for locking since it contains all the interesting bits.
|
||||
*/
|
||||
#if (PTE_FLAGS_OFFSET != 0)
|
||||
addi r8,r8,PTE_FLAGS_OFFSET
|
||||
#endif
|
||||
retry:
|
||||
lwarx r6,0,r8 /* get linux-style pte, flag word */
|
||||
andc. r5,r3,r6 /* check access & ~permission */
|
||||
#ifdef CONFIG_SMP
|
||||
bne- hash_page_out /* return if access not permitted */
|
||||
#else
|
||||
bnelr-
|
||||
#endif
|
||||
or r5,r0,r6 /* set accessed/dirty bits */
|
||||
#ifdef CONFIG_PTE_64BIT
|
||||
#ifdef CONFIG_SMP
|
||||
subf r10,r6,r8 /* create false data dependency */
|
||||
subi r10,r10,PTE_FLAGS_OFFSET
|
||||
lwzx r10,r6,r10 /* Get upper PTE word */
|
||||
#else
|
||||
lwz r10,-PTE_FLAGS_OFFSET(r8)
|
||||
#endif /* CONFIG_SMP */
|
||||
#endif /* CONFIG_PTE_64BIT */
|
||||
stwcx. r5,0,r8 /* attempt to update PTE */
|
||||
bne- retry /* retry if someone got there first */
|
||||
|
||||
mfsrin r3,r4 /* get segment reg for segment */
|
||||
mfctr r0
|
||||
stw r0,_CTR(r11)
|
||||
bl create_hpte /* add the hash table entry */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
eieio
|
||||
addis r8,r7,mmu_hash_lock@ha
|
||||
li r0,0
|
||||
stw r0,mmu_hash_lock@l(r8)
|
||||
#endif
|
||||
|
||||
/* Return from the exception */
|
||||
lwz r5,_CTR(r11)
|
||||
mtctr r5
|
||||
lwz r0,GPR0(r11)
|
||||
lwz r7,GPR7(r11)
|
||||
lwz r8,GPR8(r11)
|
||||
b fast_exception_return
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
hash_page_out:
|
||||
eieio
|
||||
addis r8,r7,mmu_hash_lock@ha
|
||||
li r0,0
|
||||
stw r0,mmu_hash_lock@l(r8)
|
||||
blr
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* Add an entry for a particular page to the hash table.
|
||||
*
|
||||
* add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
|
||||
*
|
||||
* We assume any necessary modifications to the pte (e.g. setting
|
||||
* the accessed bit) have already been done and that there is actually
|
||||
* a hash table in use (i.e. we're not on a 603).
|
||||
*/
|
||||
_GLOBAL(add_hash_page)
|
||||
mflr r0
|
||||
stw r0,4(r1)
|
||||
|
||||
/* Convert context and va to VSID */
|
||||
mulli r3,r3,897*16 /* multiply context by context skew */
|
||||
rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */
|
||||
mulli r0,r0,0x111 /* multiply by ESID skew */
|
||||
add r3,r3,r0 /* note create_hpte trims to 24 bits */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
CURRENT_THREAD_INFO(r8, r1) /* use cpu number to make tag */
|
||||
lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */
|
||||
oris r8,r8,12
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* We disable interrupts here, even on UP, because we don't
|
||||
* want to race with hash_page, and because we want the
|
||||
* _PAGE_HASHPTE bit to be a reliable indication of whether
|
||||
* the HPTE exists (or at least whether one did once).
|
||||
* We also turn off the MMU for data accesses so that we
|
||||
* we can't take a hash table miss (assuming the code is
|
||||
* covered by a BAT). -- paulus
|
||||
*/
|
||||
mfmsr r9
|
||||
SYNC
|
||||
rlwinm r0,r9,0,17,15 /* clear bit 16 (MSR_EE) */
|
||||
rlwinm r0,r0,0,28,26 /* clear MSR_DR */
|
||||
mtmsr r0
|
||||
SYNC_601
|
||||
isync
|
||||
|
||||
tophys(r7,0)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
addis r6,r7,mmu_hash_lock@ha
|
||||
addi r6,r6,mmu_hash_lock@l
|
||||
10: lwarx r0,0,r6 /* take the mmu_hash_lock */
|
||||
cmpi 0,r0,0
|
||||
bne- 11f
|
||||
stwcx. r8,0,r6
|
||||
beq+ 12f
|
||||
11: lwz r0,0(r6)
|
||||
cmpi 0,r0,0
|
||||
beq 10b
|
||||
b 11b
|
||||
12: isync
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
|
||||
* If _PAGE_HASHPTE was already set, we don't replace the existing
|
||||
* HPTE, so we just unlock and return.
|
||||
*/
|
||||
mr r8,r5
|
||||
#ifndef CONFIG_PTE_64BIT
|
||||
rlwimi r8,r4,22,20,29
|
||||
#else
|
||||
rlwimi r8,r4,23,20,28
|
||||
addi r8,r8,PTE_FLAGS_OFFSET
|
||||
#endif
|
||||
1: lwarx r6,0,r8
|
||||
andi. r0,r6,_PAGE_HASHPTE
|
||||
bne 9f /* if HASHPTE already set, done */
|
||||
#ifdef CONFIG_PTE_64BIT
|
||||
#ifdef CONFIG_SMP
|
||||
subf r10,r6,r8 /* create false data dependency */
|
||||
subi r10,r10,PTE_FLAGS_OFFSET
|
||||
lwzx r10,r6,r10 /* Get upper PTE word */
|
||||
#else
|
||||
lwz r10,-PTE_FLAGS_OFFSET(r8)
|
||||
#endif /* CONFIG_SMP */
|
||||
#endif /* CONFIG_PTE_64BIT */
|
||||
ori r5,r6,_PAGE_HASHPTE
|
||||
stwcx. r5,0,r8
|
||||
bne- 1b
|
||||
|
||||
bl create_hpte
|
||||
|
||||
9:
|
||||
#ifdef CONFIG_SMP
|
||||
addis r6,r7,mmu_hash_lock@ha
|
||||
addi r6,r6,mmu_hash_lock@l
|
||||
eieio
|
||||
li r0,0
|
||||
stw r0,0(r6) /* clear mmu_hash_lock */
|
||||
#endif
|
||||
|
||||
/* reenable interrupts and DR */
|
||||
mtmsr r9
|
||||
SYNC_601
|
||||
isync
|
||||
|
||||
lwz r0,4(r1)
|
||||
mtlr r0
|
||||
blr
|
||||
|
||||
/*
|
||||
* This routine adds a hardware PTE to the hash table.
|
||||
* It is designed to be called with the MMU either on or off.
|
||||
* r3 contains the VSID, r4 contains the virtual address,
|
||||
* r5 contains the linux PTE, r6 contains the old value of the
|
||||
* linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
|
||||
* offset to be added to addresses (0 if the MMU is on,
|
||||
* -KERNELBASE if it is off). r10 contains the upper half of
|
||||
* the PTE if CONFIG_PTE_64BIT.
|
||||
* On SMP, the caller should have the mmu_hash_lock held.
|
||||
* We assume that the caller has (or will) set the _PAGE_HASHPTE
|
||||
* bit in the linux PTE in memory. The value passed in r6 should
|
||||
* be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
|
||||
* this routine will skip the search for an existing HPTE.
|
||||
* This procedure modifies r0, r3 - r6, r8, cr0.
|
||||
* -- paulus.
|
||||
*
|
||||
* For speed, 4 of the instructions get patched once the size and
|
||||
* physical address of the hash table are known. These definitions
|
||||
* of Hash_base and Hash_bits below are just an example.
|
||||
*/
|
||||
Hash_base = 0xc0180000
|
||||
Hash_bits = 12 /* e.g. 256kB hash table */
|
||||
Hash_msk = (((1 << Hash_bits) - 1) * 64)
|
||||
|
||||
/* defines for the PTE format for 32-bit PPCs */
|
||||
#define HPTE_SIZE 8
|
||||
#define PTEG_SIZE 64
|
||||
#define LG_PTEG_SIZE 6
|
||||
#define LDPTEu lwzu
|
||||
#define LDPTE lwz
|
||||
#define STPTE stw
|
||||
#define CMPPTE cmpw
|
||||
#define PTE_H 0x40
|
||||
#define PTE_V 0x80000000
|
||||
#define TST_V(r) rlwinm. r,r,0,0,0
|
||||
#define SET_V(r) oris r,r,PTE_V@h
|
||||
#define CLR_V(r,t) rlwinm r,r,0,1,31
|
||||
|
||||
#define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1)
|
||||
#define HASH_RIGHT 31-LG_PTEG_SIZE
|
||||
|
||||
_GLOBAL(create_hpte)
|
||||
/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
|
||||
rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */
|
||||
rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
|
||||
and r8,r8,r0 /* writable if _RW & _DIRTY */
|
||||
rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */
|
||||
rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */
|
||||
ori r8,r8,0xe04 /* clear out reserved bits */
|
||||
andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */
|
||||
BEGIN_FTR_SECTION
|
||||
rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
|
||||
#ifdef CONFIG_PTE_64BIT
|
||||
/* Put the XPN bits into the PTE */
|
||||
rlwimi r8,r10,8,20,22
|
||||
rlwimi r8,r10,2,29,29
|
||||
#endif
|
||||
|
||||
/* Construct the high word of the PPC-style PTE (r5) */
|
||||
rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */
|
||||
rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */
|
||||
SET_V(r5) /* set V (valid) bit */
|
||||
|
||||
/* Get the address of the primary PTE group in the hash table (r3) */
|
||||
_GLOBAL(hash_page_patch_A)
|
||||
addis r0,r7,Hash_base@h /* base address of hash table */
|
||||
rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
|
||||
rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
|
||||
xor r3,r3,r0 /* make primary hash */
|
||||
li r0,8 /* PTEs/group */
|
||||
|
||||
/*
|
||||
* Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
|
||||
* if it is clear, meaning that the HPTE isn't there already...
|
||||
*/
|
||||
andi. r6,r6,_PAGE_HASHPTE
|
||||
beq+ 10f /* no PTE: go look for an empty slot */
|
||||
tlbie r4
|
||||
|
||||
addis r4,r7,htab_hash_searches@ha
|
||||
lwz r6,htab_hash_searches@l(r4)
|
||||
addi r6,r6,1 /* count how many searches we do */
|
||||
stw r6,htab_hash_searches@l(r4)
|
||||
|
||||
/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
|
||||
mtctr r0
|
||||
addi r4,r3,-HPTE_SIZE
|
||||
1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */
|
||||
CMPPTE 0,r6,r5
|
||||
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
|
||||
beq+ found_slot
|
||||
|
||||
/* Search the secondary PTEG for a matching PTE */
|
||||
ori r5,r5,PTE_H /* set H (secondary hash) bit */
|
||||
_GLOBAL(hash_page_patch_B)
|
||||
xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
|
||||
xori r4,r4,(-PTEG_SIZE & 0xffff)
|
||||
addi r4,r4,-HPTE_SIZE
|
||||
mtctr r0
|
||||
2: LDPTEu r6,HPTE_SIZE(r4)
|
||||
CMPPTE 0,r6,r5
|
||||
bdnzf 2,2b
|
||||
beq+ found_slot
|
||||
xori r5,r5,PTE_H /* clear H bit again */
|
||||
|
||||
/* Search the primary PTEG for an empty slot */
|
||||
10: mtctr r0
|
||||
addi r4,r3,-HPTE_SIZE /* search primary PTEG */
|
||||
1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */
|
||||
TST_V(r6) /* test valid bit */
|
||||
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
|
||||
beq+ found_empty
|
||||
|
||||
/* update counter of times that the primary PTEG is full */
|
||||
addis r4,r7,primary_pteg_full@ha
|
||||
lwz r6,primary_pteg_full@l(r4)
|
||||
addi r6,r6,1
|
||||
stw r6,primary_pteg_full@l(r4)
|
||||
|
||||
/* Search the secondary PTEG for an empty slot */
|
||||
ori r5,r5,PTE_H /* set H (secondary hash) bit */
|
||||
_GLOBAL(hash_page_patch_C)
|
||||
xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
|
||||
xori r4,r4,(-PTEG_SIZE & 0xffff)
|
||||
addi r4,r4,-HPTE_SIZE
|
||||
mtctr r0
|
||||
2: LDPTEu r6,HPTE_SIZE(r4)
|
||||
TST_V(r6)
|
||||
bdnzf 2,2b
|
||||
beq+ found_empty
|
||||
xori r5,r5,PTE_H /* clear H bit again */
|
||||
|
||||
/*
|
||||
* Choose an arbitrary slot in the primary PTEG to overwrite.
|
||||
* Since both the primary and secondary PTEGs are full, and we
|
||||
* have no information that the PTEs in the primary PTEG are
|
||||
* more important or useful than those in the secondary PTEG,
|
||||
* and we know there is a definite (although small) speed
|
||||
* advantage to putting the PTE in the primary PTEG, we always
|
||||
* put the PTE in the primary PTEG.
|
||||
*
|
||||
* In addition, we skip any slot that is mapping kernel text in
|
||||
* order to avoid a deadlock when not using BAT mappings if
|
||||
* trying to hash in the kernel hash code itself after it has
|
||||
* already taken the hash table lock. This works in conjunction
|
||||
* with pre-faulting of the kernel text.
|
||||
*
|
||||
* If the hash table bucket is full of kernel text entries, we'll
|
||||
* lockup here but that shouldn't happen
|
||||
*/
|
||||
|
||||
1: addis r4,r7,next_slot@ha /* get next evict slot */
|
||||
lwz r6,next_slot@l(r4)
|
||||
addi r6,r6,HPTE_SIZE /* search for candidate */
|
||||
andi. r6,r6,7*HPTE_SIZE
|
||||
stw r6,next_slot@l(r4)
|
||||
add r4,r3,r6
|
||||
LDPTE r0,HPTE_SIZE/2(r4) /* get PTE second word */
|
||||
clrrwi r0,r0,12
|
||||
lis r6,etext@h
|
||||
ori r6,r6,etext@l /* get etext */
|
||||
tophys(r6,r6)
|
||||
cmpl cr0,r0,r6 /* compare and try again */
|
||||
blt 1b
|
||||
|
||||
#ifndef CONFIG_SMP
|
||||
/* Store PTE in PTEG */
|
||||
found_empty:
|
||||
STPTE r5,0(r4)
|
||||
found_slot:
|
||||
STPTE r8,HPTE_SIZE/2(r4)
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
/*
|
||||
* Between the tlbie above and updating the hash table entry below,
|
||||
* another CPU could read the hash table entry and put it in its TLB.
|
||||
* There are 3 cases:
|
||||
* 1. using an empty slot
|
||||
* 2. updating an earlier entry to change permissions (i.e. enable write)
|
||||
* 3. taking over the PTE for an unrelated address
|
||||
*
|
||||
* In each case it doesn't really matter if the other CPUs have the old
|
||||
* PTE in their TLB. So we don't need to bother with another tlbie here,
|
||||
* which is convenient as we've overwritten the register that had the
|
||||
* address. :-) The tlbie above is mainly to make sure that this CPU comes
|
||||
* and gets the new PTE from the hash table.
|
||||
*
|
||||
* We do however have to make sure that the PTE is never in an invalid
|
||||
* state with the V bit set.
|
||||
*/
|
||||
found_empty:
|
||||
found_slot:
|
||||
CLR_V(r5,r0) /* clear V (valid) bit in PTE */
|
||||
STPTE r5,0(r4)
|
||||
sync
|
||||
TLBSYNC
|
||||
STPTE r8,HPTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
|
||||
sync
|
||||
SET_V(r5)
|
||||
STPTE r5,0(r4) /* finally set V bit in PTE */
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
sync /* make sure pte updates get to memory */
|
||||
blr
|
||||
|
||||
.section .bss
|
||||
.align 2
|
||||
next_slot:
|
||||
.space 4
|
||||
primary_pteg_full:
|
||||
.space 4
|
||||
htab_hash_searches:
|
||||
.space 4
|
||||
.previous
|
||||
|
||||
/*
|
||||
* Flush the entry for a particular page from the hash table.
|
||||
*
|
||||
* flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
|
||||
* int count)
|
||||
*
|
||||
* We assume that there is a hash table in use (Hash != 0).
|
||||
*/
|
||||
_GLOBAL(flush_hash_pages)
|
||||
tophys(r7,0)
|
||||
|
||||
/*
|
||||
* We disable interrupts here, even on UP, because we want
|
||||
* the _PAGE_HASHPTE bit to be a reliable indication of
|
||||
* whether the HPTE exists (or at least whether one did once).
|
||||
* We also turn off the MMU for data accesses so that we
|
||||
* we can't take a hash table miss (assuming the code is
|
||||
* covered by a BAT). -- paulus
|
||||
*/
|
||||
mfmsr r10
|
||||
SYNC
|
||||
rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
|
||||
rlwinm r0,r0,0,28,26 /* clear MSR_DR */
|
||||
mtmsr r0
|
||||
SYNC_601
|
||||
isync
|
||||
|
||||
/* First find a PTE in the range that has _PAGE_HASHPTE set */
|
||||
#ifndef CONFIG_PTE_64BIT
|
||||
rlwimi r5,r4,22,20,29
|
||||
#else
|
||||
rlwimi r5,r4,23,20,28
|
||||
#endif
|
||||
1: lwz r0,PTE_FLAGS_OFFSET(r5)
|
||||
cmpwi cr1,r6,1
|
||||
andi. r0,r0,_PAGE_HASHPTE
|
||||
bne 2f
|
||||
ble cr1,19f
|
||||
addi r4,r4,0x1000
|
||||
addi r5,r5,PTE_SIZE
|
||||
addi r6,r6,-1
|
||||
b 1b
|
||||
|
||||
/* Convert context and va to VSID */
|
||||
2: mulli r3,r3,897*16 /* multiply context by context skew */
|
||||
rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */
|
||||
mulli r0,r0,0x111 /* multiply by ESID skew */
|
||||
add r3,r3,r0 /* note code below trims to 24 bits */
|
||||
|
||||
/* Construct the high word of the PPC-style PTE (r11) */
|
||||
rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */
|
||||
rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */
|
||||
SET_V(r11) /* set V (valid) bit */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
addis r9,r7,mmu_hash_lock@ha
|
||||
addi r9,r9,mmu_hash_lock@l
|
||||
CURRENT_THREAD_INFO(r8, r1)
|
||||
add r8,r8,r7
|
||||
lwz r8,TI_CPU(r8)
|
||||
oris r8,r8,9
|
||||
10: lwarx r0,0,r9
|
||||
cmpi 0,r0,0
|
||||
bne- 11f
|
||||
stwcx. r8,0,r9
|
||||
beq+ 12f
|
||||
11: lwz r0,0(r9)
|
||||
cmpi 0,r0,0
|
||||
beq 10b
|
||||
b 11b
|
||||
12: isync
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check the _PAGE_HASHPTE bit in the linux PTE. If it is
|
||||
* already clear, we're done (for this pte). If not,
|
||||
* clear it (atomically) and proceed. -- paulus.
|
||||
*/
|
||||
#if (PTE_FLAGS_OFFSET != 0)
|
||||
addi r5,r5,PTE_FLAGS_OFFSET
|
||||
#endif
|
||||
33: lwarx r8,0,r5 /* fetch the pte flags word */
|
||||
andi. r0,r8,_PAGE_HASHPTE
|
||||
beq 8f /* done if HASHPTE is already clear */
|
||||
rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */
|
||||
stwcx. r8,0,r5 /* update the pte */
|
||||
bne- 33b
|
||||
|
||||
/* Get the address of the primary PTE group in the hash table (r3) */
|
||||
_GLOBAL(flush_hash_patch_A)
|
||||
addis r8,r7,Hash_base@h /* base address of hash table */
|
||||
rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
|
||||
rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
|
||||
xor r8,r0,r8 /* make primary hash */
|
||||
|
||||
/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
|
||||
li r0,8 /* PTEs/group */
|
||||
mtctr r0
|
||||
addi r12,r8,-HPTE_SIZE
|
||||
1: LDPTEu r0,HPTE_SIZE(r12) /* get next PTE */
|
||||
CMPPTE 0,r0,r11
|
||||
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
|
||||
beq+ 3f
|
||||
|
||||
/* Search the secondary PTEG for a matching PTE */
|
||||
ori r11,r11,PTE_H /* set H (secondary hash) bit */
|
||||
li r0,8 /* PTEs/group */
|
||||
_GLOBAL(flush_hash_patch_B)
|
||||
xoris r12,r8,Hash_msk>>16 /* compute secondary hash */
|
||||
xori r12,r12,(-PTEG_SIZE & 0xffff)
|
||||
addi r12,r12,-HPTE_SIZE
|
||||
mtctr r0
|
||||
2: LDPTEu r0,HPTE_SIZE(r12)
|
||||
CMPPTE 0,r0,r11
|
||||
bdnzf 2,2b
|
||||
xori r11,r11,PTE_H /* clear H again */
|
||||
bne- 4f /* should rarely fail to find it */
|
||||
|
||||
3: li r0,0
|
||||
STPTE r0,0(r12) /* invalidate entry */
|
||||
4: sync
|
||||
tlbie r4 /* in hw tlb too */
|
||||
sync
|
||||
|
||||
8: ble cr1,9f /* if all ptes checked */
|
||||
81: addi r6,r6,-1
|
||||
addi r5,r5,PTE_SIZE
|
||||
addi r4,r4,0x1000
|
||||
lwz r0,0(r5) /* check next pte */
|
||||
cmpwi cr1,r6,1
|
||||
andi. r0,r0,_PAGE_HASHPTE
|
||||
bne 33b
|
||||
bgt cr1,81b
|
||||
|
||||
9:
|
||||
#ifdef CONFIG_SMP
|
||||
TLBSYNC
|
||||
li r0,0
|
||||
stw r0,0(r9) /* clear mmu_hash_lock */
|
||||
#endif
|
||||
|
||||
19: mtmsr r10
|
||||
SYNC_601
|
||||
isync
|
||||
blr
|
||||
|
||||
/*
|
||||
* Flush an entry from the TLB
|
||||
*/
|
||||
_GLOBAL(_tlbie)
|
||||
#ifdef CONFIG_SMP
|
||||
CURRENT_THREAD_INFO(r8, r1)
|
||||
lwz r8,TI_CPU(r8)
|
||||
oris r8,r8,11
|
||||
mfmsr r10
|
||||
SYNC
|
||||
rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
|
||||
rlwinm r0,r0,0,28,26 /* clear DR */
|
||||
mtmsr r0
|
||||
SYNC_601
|
||||
isync
|
||||
lis r9,mmu_hash_lock@h
|
||||
ori r9,r9,mmu_hash_lock@l
|
||||
tophys(r9,r9)
|
||||
10: lwarx r7,0,r9
|
||||
cmpwi 0,r7,0
|
||||
bne- 10b
|
||||
stwcx. r8,0,r9
|
||||
bne- 10b
|
||||
eieio
|
||||
tlbie r3
|
||||
sync
|
||||
TLBSYNC
|
||||
li r0,0
|
||||
stw r0,0(r9) /* clear mmu_hash_lock */
|
||||
mtmsr r10
|
||||
SYNC_601
|
||||
isync
|
||||
#else /* CONFIG_SMP */
|
||||
tlbie r3
|
||||
sync
|
||||
#endif /* CONFIG_SMP */
|
||||
blr
|
||||
|
||||
/*
|
||||
* Flush the entire TLB. 603/603e only
|
||||
*/
|
||||
_GLOBAL(_tlbia)
|
||||
#if defined(CONFIG_SMP)
|
||||
CURRENT_THREAD_INFO(r8, r1)
|
||||
lwz r8,TI_CPU(r8)
|
||||
oris r8,r8,10
|
||||
mfmsr r10
|
||||
SYNC
|
||||
rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
|
||||
rlwinm r0,r0,0,28,26 /* clear DR */
|
||||
mtmsr r0
|
||||
SYNC_601
|
||||
isync
|
||||
lis r9,mmu_hash_lock@h
|
||||
ori r9,r9,mmu_hash_lock@l
|
||||
tophys(r9,r9)
|
||||
10: lwarx r7,0,r9
|
||||
cmpwi 0,r7,0
|
||||
bne- 10b
|
||||
stwcx. r8,0,r9
|
||||
bne- 10b
|
||||
sync
|
||||
tlbia
|
||||
sync
|
||||
TLBSYNC
|
||||
li r0,0
|
||||
stw r0,0(r9) /* clear mmu_hash_lock */
|
||||
mtmsr r10
|
||||
SYNC_601
|
||||
isync
|
||||
#else /* CONFIG_SMP */
|
||||
sync
|
||||
tlbia
|
||||
sync
|
||||
#endif /* CONFIG_SMP */
|
||||
blr
|
||||
1002
arch/powerpc/mm/hash_low_64.S
Normal file
1002
arch/powerpc/mm/hash_low_64.S
Normal file
File diff suppressed because it is too large
Load diff
710
arch/powerpc/mm/hash_native_64.c
Normal file
710
arch/powerpc/mm/hash_native_64.c
Normal file
|
|
@ -0,0 +1,710 @@
|
|||
/*
|
||||
* native hashtable management.
|
||||
*
|
||||
* SMP scalability work:
|
||||
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#undef DEBUG_LOW
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/udbg.h>
|
||||
#include <asm/kexec.h>
|
||||
#include <asm/ppc-opcode.h>
|
||||
|
||||
#include <misc/cxl.h>
|
||||
|
||||
#ifdef DEBUG_LOW
|
||||
#define DBG_LOW(fmt...) udbg_printf(fmt)
|
||||
#else
|
||||
#define DBG_LOW(fmt...)
|
||||
#endif
|
||||
|
||||
#ifdef __BIG_ENDIAN__
|
||||
#define HPTE_LOCK_BIT 3
|
||||
#else
|
||||
#define HPTE_LOCK_BIT (56+3)
|
||||
#endif
|
||||
|
||||
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
|
||||
|
||||
static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
|
||||
{
|
||||
unsigned long va;
|
||||
unsigned int penc;
|
||||
unsigned long sllp;
|
||||
|
||||
/*
|
||||
* We need 14 to 65 bits of va for a tlibe of 4K page
|
||||
* With vpn we ignore the lower VPN_SHIFT bits already.
|
||||
* And top two bits are already ignored because we can
|
||||
* only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
|
||||
* of 12.
|
||||
*/
|
||||
va = vpn << VPN_SHIFT;
|
||||
/*
|
||||
* clear top 16 bits of 64bit va, non SLS segment
|
||||
* Older versions of the architecture (2.02 and earler) require the
|
||||
* masking of the top 16 bits.
|
||||
*/
|
||||
va &= ~(0xffffULL << 48);
|
||||
|
||||
switch (psize) {
|
||||
case MMU_PAGE_4K:
|
||||
/* clear out bits after (52) [0....52.....63] */
|
||||
va &= ~((1ul << (64 - 52)) - 1);
|
||||
va |= ssize << 8;
|
||||
sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
|
||||
((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
|
||||
va |= sllp << 5;
|
||||
asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
|
||||
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
|
||||
: "memory");
|
||||
break;
|
||||
default:
|
||||
/* We need 14 to 14 + i bits of va */
|
||||
penc = mmu_psize_defs[psize].penc[apsize];
|
||||
va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
|
||||
va |= penc << 12;
|
||||
va |= ssize << 8;
|
||||
/*
|
||||
* AVAL bits:
|
||||
* We don't need all the bits, but rest of the bits
|
||||
* must be ignored by the processor.
|
||||
* vpn cover upto 65 bits of va. (0...65) and we need
|
||||
* 58..64 bits of va.
|
||||
*/
|
||||
va |= (vpn & 0xfe); /* AVAL */
|
||||
va |= 1; /* L */
|
||||
asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
|
||||
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
|
||||
: "memory");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
|
||||
{
|
||||
unsigned long va;
|
||||
unsigned int penc;
|
||||
unsigned long sllp;
|
||||
|
||||
/* VPN_SHIFT can be atmost 12 */
|
||||
va = vpn << VPN_SHIFT;
|
||||
/*
|
||||
* clear top 16 bits of 64 bit va, non SLS segment
|
||||
* Older versions of the architecture (2.02 and earler) require the
|
||||
* masking of the top 16 bits.
|
||||
*/
|
||||
va &= ~(0xffffULL << 48);
|
||||
|
||||
switch (psize) {
|
||||
case MMU_PAGE_4K:
|
||||
/* clear out bits after(52) [0....52.....63] */
|
||||
va &= ~((1ul << (64 - 52)) - 1);
|
||||
va |= ssize << 8;
|
||||
sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
|
||||
((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
|
||||
va |= sllp << 5;
|
||||
asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
|
||||
: : "r"(va) : "memory");
|
||||
break;
|
||||
default:
|
||||
/* We need 14 to 14 + i bits of va */
|
||||
penc = mmu_psize_defs[psize].penc[apsize];
|
||||
va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
|
||||
va |= penc << 12;
|
||||
va |= ssize << 8;
|
||||
/*
|
||||
* AVAL bits:
|
||||
* We don't need all the bits, but rest of the bits
|
||||
* must be ignored by the processor.
|
||||
* vpn cover upto 65 bits of va. (0...65) and we need
|
||||
* 58..64 bits of va.
|
||||
*/
|
||||
va |= (vpn & 0xfe);
|
||||
va |= 1; /* L */
|
||||
asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
|
||||
: : "r"(va) : "memory");
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static inline void tlbie(unsigned long vpn, int psize, int apsize,
|
||||
int ssize, int local)
|
||||
{
|
||||
unsigned int use_local;
|
||||
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
|
||||
|
||||
use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use();
|
||||
|
||||
if (use_local)
|
||||
use_local = mmu_psize_defs[psize].tlbiel;
|
||||
if (lock_tlbie && !use_local)
|
||||
raw_spin_lock(&native_tlbie_lock);
|
||||
asm volatile("ptesync": : :"memory");
|
||||
if (use_local) {
|
||||
__tlbiel(vpn, psize, apsize, ssize);
|
||||
asm volatile("ptesync": : :"memory");
|
||||
} else {
|
||||
__tlbie(vpn, psize, apsize, ssize);
|
||||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
}
|
||||
if (lock_tlbie && !use_local)
|
||||
raw_spin_unlock(&native_tlbie_lock);
|
||||
}
|
||||
|
||||
static inline void native_lock_hpte(struct hash_pte *hptep)
|
||||
{
|
||||
unsigned long *word = (unsigned long *)&hptep->v;
|
||||
|
||||
while (1) {
|
||||
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
|
||||
break;
|
||||
while(test_bit(HPTE_LOCK_BIT, word))
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void native_unlock_hpte(struct hash_pte *hptep)
|
||||
{
|
||||
unsigned long *word = (unsigned long *)&hptep->v;
|
||||
|
||||
clear_bit_unlock(HPTE_LOCK_BIT, word);
|
||||
}
|
||||
|
||||
static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
|
||||
unsigned long pa, unsigned long rflags,
|
||||
unsigned long vflags, int psize, int apsize, int ssize)
|
||||
{
|
||||
struct hash_pte *hptep = htab_address + hpte_group;
|
||||
unsigned long hpte_v, hpte_r;
|
||||
int i;
|
||||
|
||||
if (!(vflags & HPTE_V_BOLTED)) {
|
||||
DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
|
||||
" rflags=%lx, vflags=%lx, psize=%d)\n",
|
||||
hpte_group, vpn, pa, rflags, vflags, psize);
|
||||
}
|
||||
|
||||
for (i = 0; i < HPTES_PER_GROUP; i++) {
|
||||
if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
|
||||
/* retry with lock held */
|
||||
native_lock_hpte(hptep);
|
||||
if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
|
||||
break;
|
||||
native_unlock_hpte(hptep);
|
||||
}
|
||||
|
||||
hptep++;
|
||||
}
|
||||
|
||||
if (i == HPTES_PER_GROUP)
|
||||
return -1;
|
||||
|
||||
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
|
||||
hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
|
||||
|
||||
if (!(vflags & HPTE_V_BOLTED)) {
|
||||
DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
|
||||
i, hpte_v, hpte_r);
|
||||
}
|
||||
|
||||
hptep->r = cpu_to_be64(hpte_r);
|
||||
/* Guarantee the second dword is visible before the valid bit */
|
||||
eieio();
|
||||
/*
|
||||
* Now set the first dword including the valid bit
|
||||
* NOTE: this also unlocks the hpte
|
||||
*/
|
||||
hptep->v = cpu_to_be64(hpte_v);
|
||||
|
||||
__asm__ __volatile__ ("ptesync" : : : "memory");
|
||||
|
||||
return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
|
||||
}
|
||||
|
||||
static long native_hpte_remove(unsigned long hpte_group)
|
||||
{
|
||||
struct hash_pte *hptep;
|
||||
int i;
|
||||
int slot_offset;
|
||||
unsigned long hpte_v;
|
||||
|
||||
DBG_LOW(" remove(group=%lx)\n", hpte_group);
|
||||
|
||||
/* pick a random entry to start at */
|
||||
slot_offset = mftb() & 0x7;
|
||||
|
||||
for (i = 0; i < HPTES_PER_GROUP; i++) {
|
||||
hptep = htab_address + hpte_group + slot_offset;
|
||||
hpte_v = be64_to_cpu(hptep->v);
|
||||
|
||||
if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
|
||||
/* retry with lock held */
|
||||
native_lock_hpte(hptep);
|
||||
hpte_v = be64_to_cpu(hptep->v);
|
||||
if ((hpte_v & HPTE_V_VALID)
|
||||
&& !(hpte_v & HPTE_V_BOLTED))
|
||||
break;
|
||||
native_unlock_hpte(hptep);
|
||||
}
|
||||
|
||||
slot_offset++;
|
||||
slot_offset &= 0x7;
|
||||
}
|
||||
|
||||
if (i == HPTES_PER_GROUP)
|
||||
return -1;
|
||||
|
||||
/* Invalidate the hpte. NOTE: this also unlocks it */
|
||||
hptep->v = 0;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
|
||||
unsigned long vpn, int bpsize,
|
||||
int apsize, int ssize, int local)
|
||||
{
|
||||
struct hash_pte *hptep = htab_address + slot;
|
||||
unsigned long hpte_v, want_v;
|
||||
int ret = 0;
|
||||
|
||||
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
|
||||
|
||||
DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
|
||||
vpn, want_v & HPTE_V_AVPN, slot, newpp);
|
||||
|
||||
native_lock_hpte(hptep);
|
||||
|
||||
hpte_v = be64_to_cpu(hptep->v);
|
||||
/*
|
||||
* We need to invalidate the TLB always because hpte_remove doesn't do
|
||||
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
|
||||
* random entry from it. When we do that we don't invalidate the TLB
|
||||
* (hpte_remove) because we assume the old translation is still
|
||||
* technically "valid".
|
||||
*/
|
||||
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
|
||||
DBG_LOW(" -> miss\n");
|
||||
ret = -1;
|
||||
} else {
|
||||
DBG_LOW(" -> hit\n");
|
||||
/* Update the HPTE */
|
||||
hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
|
||||
(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
|
||||
}
|
||||
native_unlock_hpte(hptep);
|
||||
|
||||
/* Ensure it is out of the tlb too. */
|
||||
tlbie(vpn, bpsize, apsize, ssize, local);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long native_hpte_find(unsigned long vpn, int psize, int ssize)
|
||||
{
|
||||
struct hash_pte *hptep;
|
||||
unsigned long hash;
|
||||
unsigned long i;
|
||||
long slot;
|
||||
unsigned long want_v, hpte_v;
|
||||
|
||||
hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
|
||||
want_v = hpte_encode_avpn(vpn, psize, ssize);
|
||||
|
||||
/* Bolted mappings are only ever in the primary group */
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
for (i = 0; i < HPTES_PER_GROUP; i++) {
|
||||
hptep = htab_address + slot;
|
||||
hpte_v = be64_to_cpu(hptep->v);
|
||||
|
||||
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
|
||||
/* HPTE matches */
|
||||
return slot;
|
||||
++slot;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the page protection bits. Intended to be used to create
|
||||
* guard pages for kernel data structures on pages which are bolted
|
||||
* in the HPT. Assumes pages being operated on will not be stolen.
|
||||
*
|
||||
* No need to lock here because we should be the only user.
|
||||
*/
|
||||
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
|
||||
int psize, int ssize)
|
||||
{
|
||||
unsigned long vpn;
|
||||
unsigned long vsid;
|
||||
long slot;
|
||||
struct hash_pte *hptep;
|
||||
|
||||
vsid = get_kernel_vsid(ea, ssize);
|
||||
vpn = hpt_vpn(ea, vsid, ssize);
|
||||
|
||||
slot = native_hpte_find(vpn, psize, ssize);
|
||||
if (slot == -1)
|
||||
panic("could not find page to bolt\n");
|
||||
hptep = htab_address + slot;
|
||||
|
||||
/* Update the HPTE */
|
||||
hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
|
||||
~(HPTE_R_PP | HPTE_R_N)) |
|
||||
(newpp & (HPTE_R_PP | HPTE_R_N)));
|
||||
/*
|
||||
* Ensure it is out of the tlb too. Bolted entries base and
|
||||
* actual page size will be same.
|
||||
*/
|
||||
tlbie(vpn, psize, psize, ssize, 0);
|
||||
}
|
||||
|
||||
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
|
||||
int bpsize, int apsize, int ssize, int local)
|
||||
{
|
||||
struct hash_pte *hptep = htab_address + slot;
|
||||
unsigned long hpte_v;
|
||||
unsigned long want_v;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
|
||||
|
||||
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
|
||||
native_lock_hpte(hptep);
|
||||
hpte_v = be64_to_cpu(hptep->v);
|
||||
|
||||
/*
|
||||
* We need to invalidate the TLB always because hpte_remove doesn't do
|
||||
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
|
||||
* random entry from it. When we do that we don't invalidate the TLB
|
||||
* (hpte_remove) because we assume the old translation is still
|
||||
* technically "valid".
|
||||
*/
|
||||
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
|
||||
native_unlock_hpte(hptep);
|
||||
else
|
||||
/* Invalidate the hpte. NOTE: this also unlocks it */
|
||||
hptep->v = 0;
|
||||
|
||||
/* Invalidate the TLB */
|
||||
tlbie(vpn, bpsize, apsize, ssize, local);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void native_hugepage_invalidate(unsigned long vsid,
|
||||
unsigned long addr,
|
||||
unsigned char *hpte_slot_array,
|
||||
int psize, int ssize)
|
||||
{
|
||||
int i;
|
||||
struct hash_pte *hptep;
|
||||
int actual_psize = MMU_PAGE_16M;
|
||||
unsigned int max_hpte_count, valid;
|
||||
unsigned long flags, s_addr = addr;
|
||||
unsigned long hpte_v, want_v, shift;
|
||||
unsigned long hidx, vpn = 0, hash, slot;
|
||||
|
||||
shift = mmu_psize_defs[psize].shift;
|
||||
max_hpte_count = 1U << (PMD_SHIFT - shift);
|
||||
|
||||
local_irq_save(flags);
|
||||
for (i = 0; i < max_hpte_count; i++) {
|
||||
valid = hpte_valid(hpte_slot_array, i);
|
||||
if (!valid)
|
||||
continue;
|
||||
hidx = hpte_hash_index(hpte_slot_array, i);
|
||||
|
||||
/* get the vpn */
|
||||
addr = s_addr + (i * (1ul << shift));
|
||||
vpn = hpt_vpn(addr, vsid, ssize);
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
if (hidx & _PTEIDX_SECONDARY)
|
||||
hash = ~hash;
|
||||
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += hidx & _PTEIDX_GROUP_IX;
|
||||
|
||||
hptep = htab_address + slot;
|
||||
want_v = hpte_encode_avpn(vpn, psize, ssize);
|
||||
native_lock_hpte(hptep);
|
||||
hpte_v = be64_to_cpu(hptep->v);
|
||||
|
||||
/* Even if we miss, we need to invalidate the TLB */
|
||||
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
|
||||
native_unlock_hpte(hptep);
|
||||
else
|
||||
/* Invalidate the hpte. NOTE: this also unlocks it */
|
||||
hptep->v = 0;
|
||||
/*
|
||||
* We need to do tlb invalidate for all the address, tlbie
|
||||
* instruction compares entry_VA in tlb with the VA specified
|
||||
* here
|
||||
*/
|
||||
tlbie(vpn, psize, actual_psize, ssize, 0);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static inline int __hpte_actual_psize(unsigned int lp, int psize)
|
||||
{
|
||||
int i, shift;
|
||||
unsigned int mask;
|
||||
|
||||
/* start from 1 ignoring MMU_PAGE_4K */
|
||||
for (i = 1; i < MMU_PAGE_COUNT; i++) {
|
||||
|
||||
/* invalid penc */
|
||||
if (mmu_psize_defs[psize].penc[i] == -1)
|
||||
continue;
|
||||
/*
|
||||
* encoding bits per actual page size
|
||||
* PTE LP actual page size
|
||||
* rrrr rrrz >=8KB
|
||||
* rrrr rrzz >=16KB
|
||||
* rrrr rzzz >=32KB
|
||||
* rrrr zzzz >=64KB
|
||||
* .......
|
||||
*/
|
||||
shift = mmu_psize_defs[i].shift - LP_SHIFT;
|
||||
if (shift > LP_BITS)
|
||||
shift = LP_BITS;
|
||||
mask = (1 << shift) - 1;
|
||||
if ((lp & mask) == mmu_psize_defs[psize].penc[i])
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
|
||||
int *psize, int *apsize, int *ssize, unsigned long *vpn)
|
||||
{
|
||||
unsigned long avpn, pteg, vpi;
|
||||
unsigned long hpte_v = be64_to_cpu(hpte->v);
|
||||
unsigned long hpte_r = be64_to_cpu(hpte->r);
|
||||
unsigned long vsid, seg_off;
|
||||
int size, a_size, shift;
|
||||
/* Look at the 8 bit LP value */
|
||||
unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
|
||||
|
||||
if (!(hpte_v & HPTE_V_LARGE)) {
|
||||
size = MMU_PAGE_4K;
|
||||
a_size = MMU_PAGE_4K;
|
||||
} else {
|
||||
for (size = 0; size < MMU_PAGE_COUNT; size++) {
|
||||
|
||||
/* valid entries have a shift value */
|
||||
if (!mmu_psize_defs[size].shift)
|
||||
continue;
|
||||
|
||||
a_size = __hpte_actual_psize(lp, size);
|
||||
if (a_size != -1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* This works for all page sizes, and for 256M and 1T segments */
|
||||
*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
|
||||
shift = mmu_psize_defs[size].shift;
|
||||
|
||||
avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
|
||||
pteg = slot / HPTES_PER_GROUP;
|
||||
if (hpte_v & HPTE_V_SECONDARY)
|
||||
pteg = ~pteg;
|
||||
|
||||
switch (*ssize) {
|
||||
case MMU_SEGSIZE_256M:
|
||||
/* We only have 28 - 23 bits of seg_off in avpn */
|
||||
seg_off = (avpn & 0x1f) << 23;
|
||||
vsid = avpn >> 5;
|
||||
/* We can find more bits from the pteg value */
|
||||
if (shift < 23) {
|
||||
vpi = (vsid ^ pteg) & htab_hash_mask;
|
||||
seg_off |= vpi << shift;
|
||||
}
|
||||
*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
|
||||
break;
|
||||
case MMU_SEGSIZE_1T:
|
||||
/* We only have 40 - 23 bits of seg_off in avpn */
|
||||
seg_off = (avpn & 0x1ffff) << 23;
|
||||
vsid = avpn >> 17;
|
||||
if (shift < 23) {
|
||||
vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
|
||||
seg_off |= vpi << shift;
|
||||
}
|
||||
*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
|
||||
break;
|
||||
default:
|
||||
*vpn = size = 0;
|
||||
}
|
||||
*psize = size;
|
||||
*apsize = a_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* clear all mappings on kexec. All cpus are in real mode (or they will
|
||||
* be when they isi), and we are the only one left. We rely on our kernel
|
||||
* mapping being 0xC0's and the hardware ignoring those two real bits.
|
||||
*
|
||||
* TODO: add batching support when enabled. remember, no dynamic memory here,
|
||||
* athough there is the control page available...
|
||||
*/
|
||||
static void native_hpte_clear(void)
|
||||
{
|
||||
unsigned long vpn = 0;
|
||||
unsigned long slot, slots, flags;
|
||||
struct hash_pte *hptep = htab_address;
|
||||
unsigned long hpte_v;
|
||||
unsigned long pteg_count;
|
||||
int psize, apsize, ssize;
|
||||
|
||||
pteg_count = htab_hash_mask + 1;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
/* we take the tlbie lock and hold it. Some hardware will
|
||||
* deadlock if we try to tlbie from two processors at once.
|
||||
*/
|
||||
raw_spin_lock(&native_tlbie_lock);
|
||||
|
||||
slots = pteg_count * HPTES_PER_GROUP;
|
||||
|
||||
for (slot = 0; slot < slots; slot++, hptep++) {
|
||||
/*
|
||||
* we could lock the pte here, but we are the only cpu
|
||||
* running, right? and for crash dump, we probably
|
||||
* don't want to wait for a maybe bad cpu.
|
||||
*/
|
||||
hpte_v = be64_to_cpu(hptep->v);
|
||||
|
||||
/*
|
||||
* Call __tlbie() here rather than tlbie() since we
|
||||
* already hold the native_tlbie_lock.
|
||||
*/
|
||||
if (hpte_v & HPTE_V_VALID) {
|
||||
hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
|
||||
hptep->v = 0;
|
||||
__tlbie(vpn, psize, apsize, ssize);
|
||||
}
|
||||
}
|
||||
|
||||
asm volatile("eieio; tlbsync; ptesync":::"memory");
|
||||
raw_spin_unlock(&native_tlbie_lock);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Batched hash table flush, we batch the tlbie's to avoid taking/releasing
|
||||
* the lock all the time
|
||||
*/
|
||||
static void native_flush_hash_range(unsigned long number, int local)
|
||||
{
|
||||
unsigned long vpn;
|
||||
unsigned long hash, index, hidx, shift, slot;
|
||||
struct hash_pte *hptep;
|
||||
unsigned long hpte_v;
|
||||
unsigned long want_v;
|
||||
unsigned long flags;
|
||||
real_pte_t pte;
|
||||
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
|
||||
unsigned long psize = batch->psize;
|
||||
int ssize = batch->ssize;
|
||||
int i;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
for (i = 0; i < number; i++) {
|
||||
vpn = batch->vpn[i];
|
||||
pte = batch->pte[i];
|
||||
|
||||
pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
hidx = __rpte_to_hidx(pte, index);
|
||||
if (hidx & _PTEIDX_SECONDARY)
|
||||
hash = ~hash;
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += hidx & _PTEIDX_GROUP_IX;
|
||||
hptep = htab_address + slot;
|
||||
want_v = hpte_encode_avpn(vpn, psize, ssize);
|
||||
native_lock_hpte(hptep);
|
||||
hpte_v = be64_to_cpu(hptep->v);
|
||||
if (!HPTE_V_COMPARE(hpte_v, want_v) ||
|
||||
!(hpte_v & HPTE_V_VALID))
|
||||
native_unlock_hpte(hptep);
|
||||
else
|
||||
hptep->v = 0;
|
||||
} pte_iterate_hashed_end();
|
||||
}
|
||||
|
||||
if (mmu_has_feature(MMU_FTR_TLBIEL) &&
|
||||
mmu_psize_defs[psize].tlbiel && local) {
|
||||
asm volatile("ptesync":::"memory");
|
||||
for (i = 0; i < number; i++) {
|
||||
vpn = batch->vpn[i];
|
||||
pte = batch->pte[i];
|
||||
|
||||
pte_iterate_hashed_subpages(pte, psize,
|
||||
vpn, index, shift) {
|
||||
__tlbiel(vpn, psize, psize, ssize);
|
||||
} pte_iterate_hashed_end();
|
||||
}
|
||||
asm volatile("ptesync":::"memory");
|
||||
} else {
|
||||
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
|
||||
|
||||
if (lock_tlbie)
|
||||
raw_spin_lock(&native_tlbie_lock);
|
||||
|
||||
asm volatile("ptesync":::"memory");
|
||||
for (i = 0; i < number; i++) {
|
||||
vpn = batch->vpn[i];
|
||||
pte = batch->pte[i];
|
||||
|
||||
pte_iterate_hashed_subpages(pte, psize,
|
||||
vpn, index, shift) {
|
||||
__tlbie(vpn, psize, psize, ssize);
|
||||
} pte_iterate_hashed_end();
|
||||
}
|
||||
asm volatile("eieio; tlbsync; ptesync":::"memory");
|
||||
|
||||
if (lock_tlbie)
|
||||
raw_spin_unlock(&native_tlbie_lock);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void __init hpte_init_native(void)
|
||||
{
|
||||
ppc_md.hpte_invalidate = native_hpte_invalidate;
|
||||
ppc_md.hpte_updatepp = native_hpte_updatepp;
|
||||
ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
|
||||
ppc_md.hpte_insert = native_hpte_insert;
|
||||
ppc_md.hpte_remove = native_hpte_remove;
|
||||
ppc_md.hpte_clear_all = native_hpte_clear;
|
||||
ppc_md.flush_hash_range = native_flush_hash_range;
|
||||
ppc_md.hugepage_invalidate = native_hugepage_invalidate;
|
||||
}
|
||||
1473
arch/powerpc/mm/hash_utils_64.c
Normal file
1473
arch/powerpc/mm/hash_utils_64.c
Normal file
File diff suppressed because it is too large
Load diff
86
arch/powerpc/mm/highmem.c
Normal file
86
arch/powerpc/mm/highmem.c
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* highmem.c: virtual kernel memory mappings for high memory
|
||||
*
|
||||
* PowerPC version, stolen from the i386 version.
|
||||
*
|
||||
* Used in CONFIG_HIGHMEM systems for memory pages which
|
||||
* are not addressable by direct kernel virtual addresses.
|
||||
*
|
||||
* Copyright (C) 1999 Gerhard Wichert, Siemens AG
|
||||
* Gerhard.Wichert@pdb.siemens.de
|
||||
*
|
||||
*
|
||||
* Redesigned the x86 32-bit VM architecture to deal with
|
||||
* up to 16 Terrabyte physical memory. With current x86 CPUs
|
||||
* we now support up to 64 Gigabytes physical RAM.
|
||||
*
|
||||
* Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
|
||||
*
|
||||
* Reworked for PowerPC by various contributors. Moved from
|
||||
* highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp.
|
||||
*/
|
||||
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/*
|
||||
* The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
|
||||
* gives a more generic (and caching) interface. But kmap_atomic can
|
||||
* be used in IRQ contexts, so in some (very limited) cases we need
|
||||
* it.
|
||||
*/
|
||||
void *kmap_atomic_prot(struct page *page, pgprot_t prot)
|
||||
{
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
|
||||
type = kmap_atomic_idx_push();
|
||||
idx = type + KM_TYPE_NR*smp_processor_id();
|
||||
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
||||
#ifdef CONFIG_DEBUG_HIGHMEM
|
||||
BUG_ON(!pte_none(*(kmap_pte-idx)));
|
||||
#endif
|
||||
__set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
|
||||
local_flush_tlb_page(NULL, vaddr);
|
||||
|
||||
return (void*) vaddr;
|
||||
}
|
||||
EXPORT_SYMBOL(kmap_atomic_prot);
|
||||
|
||||
void __kunmap_atomic(void *kvaddr)
|
||||
{
|
||||
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
|
||||
int type;
|
||||
|
||||
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
|
||||
pagefault_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
type = kmap_atomic_idx();
|
||||
|
||||
#ifdef CONFIG_DEBUG_HIGHMEM
|
||||
{
|
||||
unsigned int idx;
|
||||
|
||||
idx = type + KM_TYPE_NR * smp_processor_id();
|
||||
BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
|
||||
|
||||
/*
|
||||
* force other mappings to Oops if they'll try to access
|
||||
* this pte without first remap it
|
||||
*/
|
||||
pte_clear(&init_mm, vaddr, kmap_pte-idx);
|
||||
local_flush_tlb_page(NULL, vaddr);
|
||||
}
|
||||
#endif
|
||||
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
245
arch/powerpc/mm/hugepage-hash64.c
Normal file
245
arch/powerpc/mm/hugepage-hash64.c
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
/*
|
||||
* Copyright IBM Corporation, 2013
|
||||
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* PPC64 THP Support for hash based MMUs
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <asm/machdep.h>
|
||||
|
||||
static void invalidate_old_hpte(unsigned long vsid, unsigned long addr,
|
||||
pmd_t *pmdp, unsigned int psize, int ssize)
|
||||
{
|
||||
int i, max_hpte_count, valid;
|
||||
unsigned long s_addr;
|
||||
unsigned char *hpte_slot_array;
|
||||
unsigned long hidx, shift, vpn, hash, slot;
|
||||
|
||||
s_addr = addr & HPAGE_PMD_MASK;
|
||||
hpte_slot_array = get_hpte_slot_array(pmdp);
|
||||
/*
|
||||
* IF we try to do a HUGE PTE update after a withdraw is done.
|
||||
* we will find the below NULL. This happens when we do
|
||||
* split_huge_page_pmd
|
||||
*/
|
||||
if (!hpte_slot_array)
|
||||
return;
|
||||
|
||||
if (ppc_md.hugepage_invalidate)
|
||||
return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
|
||||
psize, ssize);
|
||||
/*
|
||||
* No bluk hpte removal support, invalidate each entry
|
||||
*/
|
||||
shift = mmu_psize_defs[psize].shift;
|
||||
max_hpte_count = HPAGE_PMD_SIZE >> shift;
|
||||
for (i = 0; i < max_hpte_count; i++) {
|
||||
/*
|
||||
* 8 bits per each hpte entries
|
||||
* 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
|
||||
*/
|
||||
valid = hpte_valid(hpte_slot_array, i);
|
||||
if (!valid)
|
||||
continue;
|
||||
hidx = hpte_hash_index(hpte_slot_array, i);
|
||||
|
||||
/* get the vpn */
|
||||
addr = s_addr + (i * (1ul << shift));
|
||||
vpn = hpt_vpn(addr, vsid, ssize);
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
if (hidx & _PTEIDX_SECONDARY)
|
||||
hash = ~hash;
|
||||
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += hidx & _PTEIDX_GROUP_IX;
|
||||
ppc_md.hpte_invalidate(slot, vpn, psize,
|
||||
MMU_PAGE_16M, ssize, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
pmd_t *pmdp, unsigned long trap, int local, int ssize,
|
||||
unsigned int psize)
|
||||
{
|
||||
unsigned int index, valid;
|
||||
unsigned char *hpte_slot_array;
|
||||
unsigned long rflags, pa, hidx;
|
||||
unsigned long old_pmd, new_pmd;
|
||||
int ret, lpsize = MMU_PAGE_16M;
|
||||
unsigned long vpn, hash, shift, slot;
|
||||
|
||||
/*
|
||||
* atomically mark the linux large page PMD busy and dirty
|
||||
*/
|
||||
do {
|
||||
pmd_t pmd = ACCESS_ONCE(*pmdp);
|
||||
|
||||
old_pmd = pmd_val(pmd);
|
||||
/* If PMD busy, retry the access */
|
||||
if (unlikely(old_pmd & _PAGE_BUSY))
|
||||
return 0;
|
||||
/* If PMD is trans splitting retry the access */
|
||||
if (unlikely(old_pmd & _PAGE_SPLITTING))
|
||||
return 0;
|
||||
/* If PMD permissions don't match, take page fault */
|
||||
if (unlikely(access & ~old_pmd))
|
||||
return 1;
|
||||
/*
|
||||
* Try to lock the PTE, add ACCESSED and DIRTY if it was
|
||||
* a write access
|
||||
*/
|
||||
new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
|
||||
if (access & _PAGE_RW)
|
||||
new_pmd |= _PAGE_DIRTY;
|
||||
} while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
|
||||
old_pmd, new_pmd));
|
||||
/*
|
||||
* PP bits. _PAGE_USER is already PP bit 0x2, so we only
|
||||
* need to add in 0x1 if it's a read-only user page
|
||||
*/
|
||||
rflags = new_pmd & _PAGE_USER;
|
||||
if ((new_pmd & _PAGE_USER) && !((new_pmd & _PAGE_RW) &&
|
||||
(new_pmd & _PAGE_DIRTY)))
|
||||
rflags |= 0x1;
|
||||
/*
|
||||
* _PAGE_EXEC -> HW_NO_EXEC since it's inverted
|
||||
*/
|
||||
rflags |= ((new_pmd & _PAGE_EXEC) ? 0 : HPTE_R_N);
|
||||
|
||||
#if 0
|
||||
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
|
||||
|
||||
/*
|
||||
* No CPU has hugepages but lacks no execute, so we
|
||||
* don't need to worry about that case
|
||||
*/
|
||||
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Find the slot index details for this ea, using base page size.
|
||||
*/
|
||||
shift = mmu_psize_defs[psize].shift;
|
||||
index = (ea & ~HPAGE_PMD_MASK) >> shift;
|
||||
BUG_ON(index >= 4096);
|
||||
|
||||
vpn = hpt_vpn(ea, vsid, ssize);
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
hpte_slot_array = get_hpte_slot_array(pmdp);
|
||||
if (psize == MMU_PAGE_4K) {
|
||||
/*
|
||||
* invalidate the old hpte entry if we have that mapped via 64K
|
||||
* base page size. This is because demote_segment won't flush
|
||||
* hash page table entries.
|
||||
*/
|
||||
if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
|
||||
invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize);
|
||||
}
|
||||
|
||||
valid = hpte_valid(hpte_slot_array, index);
|
||||
if (valid) {
|
||||
/* update the hpte bits */
|
||||
hidx = hpte_hash_index(hpte_slot_array, index);
|
||||
if (hidx & _PTEIDX_SECONDARY)
|
||||
hash = ~hash;
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += hidx & _PTEIDX_GROUP_IX;
|
||||
|
||||
ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
|
||||
psize, lpsize, ssize, local);
|
||||
/*
|
||||
* We failed to update, try to insert a new entry.
|
||||
*/
|
||||
if (ret == -1) {
|
||||
/*
|
||||
* large pte is marked busy, so we can be sure
|
||||
* nobody is looking at hpte_slot_array. hence we can
|
||||
* safely update this here.
|
||||
*/
|
||||
valid = 0;
|
||||
hpte_slot_array[index] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!valid) {
|
||||
unsigned long hpte_group;
|
||||
|
||||
/* insert new entry */
|
||||
pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
|
||||
new_pmd |= _PAGE_HASHPTE;
|
||||
|
||||
/* Add in WIMG bits */
|
||||
rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
|
||||
_PAGE_GUARDED));
|
||||
/*
|
||||
* enable the memory coherence always
|
||||
*/
|
||||
rflags |= HPTE_R_M;
|
||||
repeat:
|
||||
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
|
||||
|
||||
/* Insert into the hash table, primary slot */
|
||||
slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
|
||||
psize, lpsize, ssize);
|
||||
/*
|
||||
* Primary is full, try the secondary
|
||||
*/
|
||||
if (unlikely(slot == -1)) {
|
||||
hpte_group = ((~hash & htab_hash_mask) *
|
||||
HPTES_PER_GROUP) & ~0x7UL;
|
||||
slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
|
||||
rflags, HPTE_V_SECONDARY,
|
||||
psize, lpsize, ssize);
|
||||
if (slot == -1) {
|
||||
if (mftb() & 0x1)
|
||||
hpte_group = ((hash & htab_hash_mask) *
|
||||
HPTES_PER_GROUP) & ~0x7UL;
|
||||
|
||||
ppc_md.hpte_remove(hpte_group);
|
||||
goto repeat;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Hypervisor failure. Restore old pmd and return -1
|
||||
* similar to __hash_page_*
|
||||
*/
|
||||
if (unlikely(slot == -2)) {
|
||||
*pmdp = __pmd(old_pmd);
|
||||
hash_failure_debug(ea, access, vsid, trap, ssize,
|
||||
psize, lpsize, old_pmd);
|
||||
return -1;
|
||||
}
|
||||
/*
|
||||
* large pte is marked busy, so we can be sure
|
||||
* nobody is looking at hpte_slot_array. hence we can
|
||||
* safely update this here.
|
||||
*/
|
||||
mark_hpte_slot_valid(hpte_slot_array, index, slot);
|
||||
}
|
||||
/*
|
||||
* Mark the pte with _PAGE_COMBO, if we are trying to hash it with
|
||||
* base page size 4k.
|
||||
*/
|
||||
if (psize == MMU_PAGE_4K)
|
||||
new_pmd |= _PAGE_COMBO;
|
||||
/*
|
||||
* The hpte valid is stored in the pgtable whose address is in the
|
||||
* second half of the PMD. Order this against clearing of the busy bit in
|
||||
* huge pmd.
|
||||
*/
|
||||
smp_wmb();
|
||||
*pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
|
||||
return 0;
|
||||
}
|
||||
153
arch/powerpc/mm/hugetlbpage-book3e.c
Normal file
153
arch/powerpc/mm/hugetlbpage-book3e.c
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
/*
|
||||
* PPC Huge TLB Page Support for Book3E MMU
|
||||
*
|
||||
* Copyright (C) 2009 David Gibson, IBM Corporation.
|
||||
* Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
|
||||
*
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
#ifdef CONFIG_PPC64
|
||||
static inline int tlb1_next(void)
|
||||
{
|
||||
struct paca_struct *paca = get_paca();
|
||||
struct tlb_core_data *tcd;
|
||||
int this, next;
|
||||
|
||||
tcd = paca->tcd_ptr;
|
||||
this = tcd->esel_next;
|
||||
|
||||
next = this + 1;
|
||||
if (next >= tcd->esel_max)
|
||||
next = tcd->esel_first;
|
||||
|
||||
tcd->esel_next = next;
|
||||
return this;
|
||||
}
|
||||
#else
|
||||
static inline int tlb1_next(void)
|
||||
{
|
||||
int index, ncams;
|
||||
|
||||
ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
|
||||
|
||||
index = __get_cpu_var(next_tlbcam_idx);
|
||||
|
||||
/* Just round-robin the entries and wrap when we hit the end */
|
||||
if (unlikely(index == ncams - 1))
|
||||
__get_cpu_var(next_tlbcam_idx) = tlbcam_index;
|
||||
else
|
||||
__get_cpu_var(next_tlbcam_idx)++;
|
||||
|
||||
return index;
|
||||
}
|
||||
#endif /* !PPC64 */
|
||||
#endif /* FSL */
|
||||
|
||||
static inline int mmu_get_tsize(int psize)
|
||||
{
|
||||
return mmu_psize_defs[psize].enc;
|
||||
}
|
||||
|
||||
static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
|
||||
{
|
||||
int found = 0;
|
||||
|
||||
mtspr(SPRN_MAS6, pid << 16);
|
||||
if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) {
|
||||
asm volatile(
|
||||
"li %0,0\n"
|
||||
"tlbsx. 0,%1\n"
|
||||
"bne 1f\n"
|
||||
"li %0,1\n"
|
||||
"1:\n"
|
||||
: "=&r"(found) : "r"(ea));
|
||||
} else {
|
||||
asm volatile(
|
||||
"tlbsx 0,%1\n"
|
||||
"mfspr %0,0x271\n"
|
||||
"srwi %0,%0,31\n"
|
||||
: "=&r"(found) : "r"(ea));
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
|
||||
pte_t pte)
|
||||
{
|
||||
unsigned long mas1, mas2;
|
||||
u64 mas7_3;
|
||||
unsigned long psize, tsize, shift;
|
||||
unsigned long flags;
|
||||
struct mm_struct *mm;
|
||||
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
int index;
|
||||
#endif
|
||||
|
||||
if (unlikely(is_kernel_addr(ea)))
|
||||
return;
|
||||
|
||||
mm = vma->vm_mm;
|
||||
|
||||
#ifdef CONFIG_PPC_MM_SLICES
|
||||
psize = get_slice_psize(mm, ea);
|
||||
tsize = mmu_get_tsize(psize);
|
||||
shift = mmu_psize_defs[psize].shift;
|
||||
#else
|
||||
psize = vma_mmu_pagesize(vma);
|
||||
shift = __ilog2(psize);
|
||||
tsize = shift - 10;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We can't be interrupted while we're setting up the MAS
|
||||
* regusters or after we've confirmed that no tlb exists.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
|
||||
local_irq_restore(flags);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
/* We have to use the CAM(TLB1) on FSL parts for hugepages */
|
||||
index = tlb1_next();
|
||||
mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
|
||||
#endif
|
||||
|
||||
mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
|
||||
mas2 = ea & ~((1UL << shift) - 1);
|
||||
mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
|
||||
mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT;
|
||||
mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK;
|
||||
if (!pte_dirty(pte))
|
||||
mas7_3 &= ~(MAS3_SW|MAS3_UW);
|
||||
|
||||
mtspr(SPRN_MAS1, mas1);
|
||||
mtspr(SPRN_MAS2, mas2);
|
||||
|
||||
if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
|
||||
mtspr(SPRN_MAS7_MAS3, mas7_3);
|
||||
} else {
|
||||
if (mmu_has_feature(MMU_FTR_BIG_PHYS))
|
||||
mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
|
||||
mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
|
||||
}
|
||||
|
||||
asm volatile ("tlbwe");
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
|
||||
{
|
||||
struct hstate *hstate = hstate_file(vma->vm_file);
|
||||
unsigned long tsize = huge_page_shift(hstate) - 10;
|
||||
|
||||
__flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0);
|
||||
}
|
||||
129
arch/powerpc/mm/hugetlbpage-hash64.c
Normal file
129
arch/powerpc/mm/hugetlbpage-hash64.c
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
|
||||
*
|
||||
* Copyright (C) 2003 David Gibson, IBM Corporation.
|
||||
*
|
||||
* Based on the IA-32 version:
|
||||
* Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/machdep.h>
|
||||
|
||||
extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
|
||||
unsigned long pa, unsigned long rlags,
|
||||
unsigned long vflags, int psize, int ssize);
|
||||
|
||||
int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
|
||||
pte_t *ptep, unsigned long trap, int local, int ssize,
|
||||
unsigned int shift, unsigned int mmu_psize)
|
||||
{
|
||||
unsigned long vpn;
|
||||
unsigned long old_pte, new_pte;
|
||||
unsigned long rflags, pa, sz;
|
||||
long slot;
|
||||
|
||||
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
|
||||
|
||||
/* Search the Linux page table for a match with va */
|
||||
vpn = hpt_vpn(ea, vsid, ssize);
|
||||
|
||||
/* At this point, we have a pte (old_pte) which can be used to build
|
||||
* or update an HPTE. There are 2 cases:
|
||||
*
|
||||
* 1. There is a valid (present) pte with no associated HPTE (this is
|
||||
* the most common case)
|
||||
* 2. There is a valid (present) pte with an associated HPTE. The
|
||||
* current values of the pp bits in the HPTE prevent access
|
||||
* because we are doing software DIRTY bit management and the
|
||||
* page is currently not DIRTY.
|
||||
*/
|
||||
|
||||
|
||||
do {
|
||||
old_pte = pte_val(*ptep);
|
||||
/* If PTE busy, retry the access */
|
||||
if (unlikely(old_pte & _PAGE_BUSY))
|
||||
return 0;
|
||||
/* If PTE permissions don't match, take page fault */
|
||||
if (unlikely(access & ~old_pte))
|
||||
return 1;
|
||||
/* Try to lock the PTE, add ACCESSED and DIRTY if it was
|
||||
* a write access */
|
||||
new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
|
||||
if (access & _PAGE_RW)
|
||||
new_pte |= _PAGE_DIRTY;
|
||||
} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
|
||||
old_pte, new_pte));
|
||||
|
||||
rflags = 0x2 | (!(new_pte & _PAGE_RW));
|
||||
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
|
||||
rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
|
||||
sz = ((1UL) << shift);
|
||||
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
|
||||
/* No CPU has hugepages but lacks no execute, so we
|
||||
* don't need to worry about that case */
|
||||
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
|
||||
|
||||
/* Check if pte already has an hpte (case 2) */
|
||||
if (unlikely(old_pte & _PAGE_HASHPTE)) {
|
||||
/* There MIGHT be an HPTE for this pte */
|
||||
unsigned long hash, slot;
|
||||
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
if (old_pte & _PAGE_F_SECOND)
|
||||
hash = ~hash;
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += (old_pte & _PAGE_F_GIX) >> 12;
|
||||
|
||||
if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
|
||||
mmu_psize, ssize, local) == -1)
|
||||
old_pte &= ~_PAGE_HPTEFLAGS;
|
||||
}
|
||||
|
||||
if (likely(!(old_pte & _PAGE_HASHPTE))) {
|
||||
unsigned long hash = hpt_hash(vpn, shift, ssize);
|
||||
|
||||
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
|
||||
|
||||
/* clear HPTE slot informations in new PTE */
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
|
||||
#else
|
||||
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
|
||||
#endif
|
||||
/* Add in WIMG bits */
|
||||
rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
|
||||
_PAGE_COHERENT | _PAGE_GUARDED));
|
||||
/*
|
||||
* enable the memory coherence always
|
||||
*/
|
||||
rflags |= HPTE_R_M;
|
||||
|
||||
slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
|
||||
mmu_psize, ssize);
|
||||
|
||||
/*
|
||||
* Hypervisor failure. Restore old pte and return -1
|
||||
* similar to __hash_page_*
|
||||
*/
|
||||
if (unlikely(slot == -2)) {
|
||||
*ptep = __pte(old_pte);
|
||||
hash_failure_debug(ea, access, vsid, trap, ssize,
|
||||
mmu_psize, mmu_psize, old_pte);
|
||||
return -1;
|
||||
}
|
||||
|
||||
new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
|
||||
}
|
||||
|
||||
/*
|
||||
* No need to use ldarx/stdcx here
|
||||
*/
|
||||
*ptep = __pte(new_pte & ~_PAGE_BUSY);
|
||||
return 0;
|
||||
}
|
||||
1095
arch/powerpc/mm/hugetlbpage.c
Normal file
1095
arch/powerpc/mm/hugetlbpage.c
Normal file
File diff suppressed because it is too large
Load diff
292
arch/powerpc/mm/icswx.c
Normal file
292
arch/powerpc/mm/icswx.c
Normal file
|
|
@ -0,0 +1,292 @@
|
|||
/*
|
||||
* ICSWX and ACOP Management
|
||||
*
|
||||
* Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include "icswx.h"
|
||||
|
||||
/*
|
||||
* The processor and its L2 cache cause the icswx instruction to
|
||||
* generate a COP_REQ transaction on PowerBus. The transaction has no
|
||||
* address, and the processor does not perform an MMU access to
|
||||
* authenticate the transaction. The command portion of the PowerBus
|
||||
* COP_REQ transaction includes the LPAR_ID (LPID) and the coprocessor
|
||||
* Process ID (PID), which the coprocessor compares to the authorized
|
||||
* LPID and PID held in the coprocessor, to determine if the process
|
||||
* is authorized to generate the transaction. The data of the COP_REQ
|
||||
* transaction is 128-byte or less in size and is placed in cacheable
|
||||
* memory on a 128-byte cache line boundary.
|
||||
*
|
||||
* The task to use a coprocessor should use use_cop() to mark the use
|
||||
* of the Coprocessor Type (CT) and context switching. On a server
|
||||
* class processor, the PID register is used only for coprocessor
|
||||
* management + * and so a coprocessor PID is allocated before
|
||||
* executing icswx + * instruction. Drop_cop() is used to free the
|
||||
* coprocessor PID.
|
||||
*
|
||||
* Example:
|
||||
* Host Fabric Interface (HFI) is a PowerPC network coprocessor.
|
||||
* Each HFI have multiple windows. Each HFI window serves as a
|
||||
* network device sending to and receiving from HFI network.
|
||||
* HFI immediate send function uses icswx instruction. The immediate
|
||||
* send function allows small (single cache-line) packets be sent
|
||||
* without using the regular HFI send FIFO and doorbell, which are
|
||||
* much slower than immediate send.
|
||||
*
|
||||
* For each task intending to use HFI immediate send, the HFI driver
|
||||
* calls use_cop() to obtain a coprocessor PID for the task.
|
||||
* The HFI driver then allocate a free HFI window and save the
|
||||
* coprocessor PID to the HFI window to allow the task to use the
|
||||
* HFI window.
|
||||
*
|
||||
* The HFI driver repeatedly creates immediate send packets and
|
||||
* issues icswx instruction to send data through the HFI window.
|
||||
* The HFI compares the coprocessor PID in the CPU PID register
|
||||
* to the PID held in the HFI window to determine if the transaction
|
||||
* is allowed.
|
||||
*
|
||||
* When the task to release the HFI window, the HFI driver calls
|
||||
* drop_cop() to release the coprocessor PID.
|
||||
*/
|
||||
|
||||
void switch_cop(struct mm_struct *next)
|
||||
{
|
||||
#ifdef CONFIG_PPC_ICSWX_PID
|
||||
mtspr(SPRN_PID, next->context.cop_pid);
|
||||
#endif
|
||||
mtspr(SPRN_ACOP, next->context.acop);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start using a coprocessor.
|
||||
* @acop: mask of coprocessor to be used.
|
||||
* @mm: The mm the coprocessor to associate with. Most likely current mm.
|
||||
*
|
||||
* Return a positive PID if successful. Negative errno otherwise.
|
||||
* The returned PID will be fed to the coprocessor to determine if an
|
||||
* icswx transaction is authenticated.
|
||||
*/
|
||||
int use_cop(unsigned long acop, struct mm_struct *mm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!cpu_has_feature(CPU_FTR_ICSWX))
|
||||
return -ENODEV;
|
||||
|
||||
if (!mm || !acop)
|
||||
return -EINVAL;
|
||||
|
||||
/* The page_table_lock ensures mm_users won't change under us */
|
||||
spin_lock(&mm->page_table_lock);
|
||||
spin_lock(mm->context.cop_lockp);
|
||||
|
||||
ret = get_cop_pid(mm);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* update acop */
|
||||
mm->context.acop |= acop;
|
||||
|
||||
sync_cop(mm);
|
||||
|
||||
/*
|
||||
* If this is a threaded process then there might be other threads
|
||||
* running. We need to send an IPI to force them to pick up any
|
||||
* change in PID and ACOP.
|
||||
*/
|
||||
if (atomic_read(&mm->mm_users) > 1)
|
||||
smp_call_function(sync_cop, mm, 1);
|
||||
|
||||
out:
|
||||
spin_unlock(mm->context.cop_lockp);
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(use_cop);
|
||||
|
||||
/**
|
||||
* Stop using a coprocessor.
|
||||
* @acop: mask of coprocessor to be stopped.
|
||||
* @mm: The mm the coprocessor associated with.
|
||||
*/
|
||||
void drop_cop(unsigned long acop, struct mm_struct *mm)
|
||||
{
|
||||
int free_pid;
|
||||
|
||||
if (!cpu_has_feature(CPU_FTR_ICSWX))
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(!mm))
|
||||
return;
|
||||
|
||||
/* The page_table_lock ensures mm_users won't change under us */
|
||||
spin_lock(&mm->page_table_lock);
|
||||
spin_lock(mm->context.cop_lockp);
|
||||
|
||||
mm->context.acop &= ~acop;
|
||||
|
||||
free_pid = disable_cop_pid(mm);
|
||||
sync_cop(mm);
|
||||
|
||||
/*
|
||||
* If this is a threaded process then there might be other threads
|
||||
* running. We need to send an IPI to force them to pick up any
|
||||
* change in PID and ACOP.
|
||||
*/
|
||||
if (atomic_read(&mm->mm_users) > 1)
|
||||
smp_call_function(sync_cop, mm, 1);
|
||||
|
||||
if (free_pid != COP_PID_NONE)
|
||||
free_cop_pid(free_pid);
|
||||
|
||||
spin_unlock(mm->context.cop_lockp);
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(drop_cop);
|
||||
|
||||
static int acop_use_cop(int ct)
|
||||
{
|
||||
/* There is no alternate policy, yet */
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the instruction word at the NIP
|
||||
*/
|
||||
static u32 acop_get_inst(struct pt_regs *regs)
|
||||
{
|
||||
u32 inst;
|
||||
u32 __user *p;
|
||||
|
||||
p = (u32 __user *)regs->nip;
|
||||
if (!access_ok(VERIFY_READ, p, sizeof(*p)))
|
||||
return 0;
|
||||
|
||||
if (__get_user(inst, p))
|
||||
return 0;
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
/**
|
||||
* @regs: regsiters at time of interrupt
|
||||
* @address: storage address
|
||||
* @error_code: Fault code, usually the DSISR or ESR depending on
|
||||
* processor type
|
||||
*
|
||||
* Return 0 if we are able to resolve the data storage fault that
|
||||
* results from a CT miss in the ACOP register.
|
||||
*/
|
||||
int acop_handle_fault(struct pt_regs *regs, unsigned long address,
|
||||
unsigned long error_code)
|
||||
{
|
||||
int ct;
|
||||
u32 inst = 0;
|
||||
|
||||
if (!cpu_has_feature(CPU_FTR_ICSWX)) {
|
||||
pr_info("No coprocessors available");
|
||||
_exception(SIGILL, regs, ILL_ILLOPN, address);
|
||||
}
|
||||
|
||||
if (!user_mode(regs)) {
|
||||
/* this could happen if the HV denies the
|
||||
* kernel access, for now we just die */
|
||||
die("ICSWX from kernel failed", regs, SIGSEGV);
|
||||
}
|
||||
|
||||
/* Some implementations leave us a hint for the CT */
|
||||
ct = ICSWX_GET_CT_HINT(error_code);
|
||||
if (ct < 0) {
|
||||
/* we have to peek at the instruction word to figure out CT */
|
||||
u32 ccw;
|
||||
u32 rs;
|
||||
|
||||
inst = acop_get_inst(regs);
|
||||
if (inst == 0)
|
||||
return -1;
|
||||
|
||||
rs = (inst >> (31 - 10)) & 0x1f;
|
||||
ccw = regs->gpr[rs];
|
||||
ct = (ccw >> 16) & 0x3f;
|
||||
}
|
||||
|
||||
/*
|
||||
* We could be here because another thread has enabled acop
|
||||
* but the ACOP register has yet to be updated.
|
||||
*
|
||||
* This should have been taken care of by the IPI to sync all
|
||||
* the threads (see smp_call_function(sync_cop, mm, 1)), but
|
||||
* that could take forever if there are a significant amount
|
||||
* of threads.
|
||||
*
|
||||
* Given the number of threads on some of these systems,
|
||||
* perhaps this is the best way to sync ACOP rather than whack
|
||||
* every thread with an IPI.
|
||||
*/
|
||||
if ((acop_copro_type_bit(ct) & current->active_mm->context.acop) != 0) {
|
||||
sync_cop(current->active_mm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check for alternate policy */
|
||||
if (!acop_use_cop(ct))
|
||||
return 0;
|
||||
|
||||
/* at this point the CT is unknown to the system */
|
||||
pr_warn("%s[%d]: Coprocessor %d is unavailable\n",
|
||||
current->comm, current->pid, ct);
|
||||
|
||||
/* get inst if we don't already have it */
|
||||
if (inst == 0) {
|
||||
inst = acop_get_inst(regs);
|
||||
if (inst == 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check if the instruction is the "record form" */
|
||||
if (inst & 1) {
|
||||
/*
|
||||
* the instruction is "record" form so we can reject
|
||||
* using CR0
|
||||
*/
|
||||
regs->ccr &= ~(0xful << 28);
|
||||
regs->ccr |= ICSWX_RC_NOT_FOUND << 28;
|
||||
|
||||
/* Move on to the next instruction */
|
||||
regs->nip += 4;
|
||||
} else {
|
||||
/*
|
||||
* There is no architected mechanism to report a bad
|
||||
* CT so we could either SIGILL or report nothing.
|
||||
* Since the non-record version should only bu used
|
||||
* for "hints" or "don't care" we should probably do
|
||||
* nothing. However, I could see how some people
|
||||
* might want an SIGILL so it here if you want it.
|
||||
*/
|
||||
#ifdef CONFIG_PPC_ICSWX_USE_SIGILL
|
||||
_exception(SIGILL, regs, ILL_ILLOPN, address);
|
||||
#else
|
||||
regs->nip += 4;
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acop_handle_fault);
|
||||
68
arch/powerpc/mm/icswx.h
Normal file
68
arch/powerpc/mm/icswx.h
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
#ifndef _ARCH_POWERPC_MM_ICSWX_H_
|
||||
#define _ARCH_POWERPC_MM_ICSWX_H_
|
||||
|
||||
/*
|
||||
* ICSWX and ACOP Management
|
||||
*
|
||||
* Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
/* also used to denote that PIDs are not used */
|
||||
#define COP_PID_NONE 0
|
||||
|
||||
static inline void sync_cop(void *arg)
|
||||
{
|
||||
struct mm_struct *mm = arg;
|
||||
|
||||
if (mm == current->active_mm)
|
||||
switch_cop(current->active_mm);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_ICSWX_PID
|
||||
extern int get_cop_pid(struct mm_struct *mm);
|
||||
extern int disable_cop_pid(struct mm_struct *mm);
|
||||
extern void free_cop_pid(int free_pid);
|
||||
#else
|
||||
#define get_cop_pid(m) (COP_PID_NONE)
|
||||
#define disable_cop_pid(m) (COP_PID_NONE)
|
||||
#define free_cop_pid(p)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* These are implementation bits for architected registers. If this
|
||||
* ever becomes architecture the should be moved to reg.h et. al.
|
||||
*/
|
||||
/* UCT is the same bit for Server and Embedded */
|
||||
#define ICSWX_DSI_UCT 0x00004000 /* Unavailable Coprocessor Type */
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3E
|
||||
/* Embedded implementation gives us no hints as to what the CT is */
|
||||
#define ICSWX_GET_CT_HINT(x) (-1)
|
||||
#else
|
||||
/* Server implementation contains the CT value in the DSISR */
|
||||
#define ICSWX_DSISR_CTMASK 0x00003f00
|
||||
#define ICSWX_GET_CT_HINT(x) (((x) & ICSWX_DSISR_CTMASK) >> 8)
|
||||
#endif
|
||||
|
||||
#define ICSWX_RC_STARTED 0x8 /* The request has been started */
|
||||
#define ICSWX_RC_NOT_IDLE 0x4 /* No coprocessor found idle */
|
||||
#define ICSWX_RC_NOT_FOUND 0x2 /* No coprocessor found */
|
||||
#define ICSWX_RC_UNDEFINED 0x1 /* Reserved */
|
||||
|
||||
extern int acop_handle_fault(struct pt_regs *regs, unsigned long address,
|
||||
unsigned long error_code);
|
||||
|
||||
static inline u64 acop_copro_type_bit(unsigned int type)
|
||||
{
|
||||
return 1ULL << (63 - type);
|
||||
}
|
||||
|
||||
#endif /* !_ARCH_POWERPC_MM_ICSWX_H_ */
|
||||
87
arch/powerpc/mm/icswx_pid.c
Normal file
87
arch/powerpc/mm/icswx_pid.c
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* ICSWX and ACOP/PID Management
|
||||
*
|
||||
* Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/module.h>
|
||||
#include "icswx.h"
|
||||
|
||||
#define COP_PID_MIN (COP_PID_NONE + 1)
|
||||
#define COP_PID_MAX (0xFFFF)
|
||||
|
||||
static DEFINE_SPINLOCK(mmu_context_acop_lock);
|
||||
static DEFINE_IDA(cop_ida);
|
||||
|
||||
static int new_cop_pid(struct ida *ida, int min_id, int max_id,
|
||||
spinlock_t *lock)
|
||||
{
|
||||
int index;
|
||||
int err;
|
||||
|
||||
again:
|
||||
if (!ida_pre_get(ida, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock(lock);
|
||||
err = ida_get_new_above(ida, min_id, &index);
|
||||
spin_unlock(lock);
|
||||
|
||||
if (err == -EAGAIN)
|
||||
goto again;
|
||||
else if (err)
|
||||
return err;
|
||||
|
||||
if (index > max_id) {
|
||||
spin_lock(lock);
|
||||
ida_remove(ida, index);
|
||||
spin_unlock(lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
int get_cop_pid(struct mm_struct *mm)
|
||||
{
|
||||
int pid;
|
||||
|
||||
if (mm->context.cop_pid == COP_PID_NONE) {
|
||||
pid = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
|
||||
&mmu_context_acop_lock);
|
||||
if (pid >= 0)
|
||||
mm->context.cop_pid = pid;
|
||||
}
|
||||
return mm->context.cop_pid;
|
||||
}
|
||||
|
||||
int disable_cop_pid(struct mm_struct *mm)
|
||||
{
|
||||
int free_pid = COP_PID_NONE;
|
||||
|
||||
if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
|
||||
free_pid = mm->context.cop_pid;
|
||||
mm->context.cop_pid = COP_PID_NONE;
|
||||
}
|
||||
return free_pid;
|
||||
}
|
||||
|
||||
void free_cop_pid(int free_pid)
|
||||
{
|
||||
spin_lock(&mmu_context_acop_lock);
|
||||
ida_remove(&cop_ida, free_pid);
|
||||
spin_unlock(&mmu_context_acop_lock);
|
||||
}
|
||||
224
arch/powerpc/mm/init_32.c
Normal file
224
arch/powerpc/mm/init_32.c
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
/*
|
||||
* PowerPC version
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
* PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/initrd.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/btext.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/hugetlb.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
|
||||
/* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */
|
||||
#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET))
|
||||
#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_KERNEL_START"
|
||||
#endif
|
||||
#endif
|
||||
#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE
|
||||
|
||||
phys_addr_t total_memory;
|
||||
phys_addr_t total_lowmem;
|
||||
|
||||
phys_addr_t memstart_addr = (phys_addr_t)~0ull;
|
||||
EXPORT_SYMBOL(memstart_addr);
|
||||
phys_addr_t kernstart_addr;
|
||||
EXPORT_SYMBOL(kernstart_addr);
|
||||
|
||||
#ifdef CONFIG_RELOCATABLE_PPC32
|
||||
/* Used in __va()/__pa() */
|
||||
long long virt_phys_offset;
|
||||
EXPORT_SYMBOL(virt_phys_offset);
|
||||
#endif
|
||||
|
||||
phys_addr_t lowmem_end_addr;
|
||||
|
||||
int boot_mapsize;
|
||||
#ifdef CONFIG_PPC_PMAC
|
||||
unsigned long agp_special_page;
|
||||
EXPORT_SYMBOL(agp_special_page);
|
||||
#endif
|
||||
|
||||
void MMU_init(void);
|
||||
|
||||
/* XXX should be in current.h -- paulus */
|
||||
extern struct task_struct *current_set[NR_CPUS];
|
||||
|
||||
/*
|
||||
* this tells the system to map all of ram with the segregs
|
||||
* (i.e. page tables) instead of the bats.
|
||||
* -- Cort
|
||||
*/
|
||||
int __map_without_bats;
|
||||
int __map_without_ltlbs;
|
||||
|
||||
/*
|
||||
* This tells the system to allow ioremapping memory marked as reserved.
|
||||
*/
|
||||
int __allow_ioremap_reserved;
|
||||
|
||||
/* max amount of low RAM to map in */
|
||||
unsigned long __max_low_memory = MAX_LOW_MEM;
|
||||
|
||||
/*
|
||||
* Check for command-line options that affect what MMU_init will do.
|
||||
*/
|
||||
void __init MMU_setup(void)
|
||||
{
|
||||
/* Check for nobats option (used in mapin_ram). */
|
||||
if (strstr(boot_command_line, "nobats")) {
|
||||
__map_without_bats = 1;
|
||||
}
|
||||
|
||||
if (strstr(boot_command_line, "noltlbs")) {
|
||||
__map_without_ltlbs = 1;
|
||||
}
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
__map_without_bats = 1;
|
||||
__map_without_ltlbs = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* MMU_init sets up the basic memory mappings for the kernel,
|
||||
* including both RAM and possibly some I/O regions,
|
||||
* and sets up the page tables and the MMU hardware ready to go.
|
||||
*/
|
||||
void __init MMU_init(void)
|
||||
{
|
||||
if (ppc_md.progress)
|
||||
ppc_md.progress("MMU:enter", 0x111);
|
||||
|
||||
/* parse args from command line */
|
||||
MMU_setup();
|
||||
|
||||
/*
|
||||
* Reserve gigantic pages for hugetlb. This MUST occur before
|
||||
* lowmem_end_addr is initialized below.
|
||||
*/
|
||||
reserve_hugetlb_gpages();
|
||||
|
||||
if (memblock.memory.cnt > 1) {
|
||||
#ifndef CONFIG_WII
|
||||
memblock_enforce_memory_limit(memblock.memory.regions[0].size);
|
||||
printk(KERN_WARNING "Only using first contiguous memory region");
|
||||
#else
|
||||
wii_memory_fixups();
|
||||
#endif
|
||||
}
|
||||
|
||||
total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr;
|
||||
lowmem_end_addr = memstart_addr + total_lowmem;
|
||||
|
||||
#ifdef CONFIG_FSL_BOOKE
|
||||
/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
|
||||
* entries, so we need to adjust lowmem to match the amount we can map
|
||||
* in the fixed entries */
|
||||
adjust_total_lowmem();
|
||||
#endif /* CONFIG_FSL_BOOKE */
|
||||
|
||||
if (total_lowmem > __max_low_memory) {
|
||||
total_lowmem = __max_low_memory;
|
||||
lowmem_end_addr = memstart_addr + total_lowmem;
|
||||
#ifndef CONFIG_HIGHMEM
|
||||
total_memory = total_lowmem;
|
||||
memblock_enforce_memory_limit(total_lowmem);
|
||||
#endif /* CONFIG_HIGHMEM */
|
||||
}
|
||||
|
||||
/* Initialize the MMU hardware */
|
||||
if (ppc_md.progress)
|
||||
ppc_md.progress("MMU:hw init", 0x300);
|
||||
MMU_init_hw();
|
||||
|
||||
/* Map in all of RAM starting at KERNELBASE */
|
||||
if (ppc_md.progress)
|
||||
ppc_md.progress("MMU:mapin", 0x301);
|
||||
mapin_ram();
|
||||
|
||||
/* Initialize early top-down ioremap allocator */
|
||||
ioremap_bot = IOREMAP_TOP;
|
||||
|
||||
/* Map in I/O resources */
|
||||
if (ppc_md.progress)
|
||||
ppc_md.progress("MMU:setio", 0x302);
|
||||
|
||||
if (ppc_md.progress)
|
||||
ppc_md.progress("MMU:exit", 0x211);
|
||||
|
||||
/* From now on, btext is no longer BAT mapped if it was at all */
|
||||
#ifdef CONFIG_BOOTX_TEXT
|
||||
btext_unmap();
|
||||
#endif
|
||||
|
||||
/* Shortly after that, the entire linear mapping will be available */
|
||||
memblock_set_current_limit(lowmem_end_addr);
|
||||
}
|
||||
|
||||
/* This is only called until mem_init is done. */
|
||||
void __init *early_get_page(void)
|
||||
{
|
||||
if (init_bootmem_done)
|
||||
return alloc_bootmem_pages(PAGE_SIZE);
|
||||
else
|
||||
return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */
|
||||
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
|
||||
phys_addr_t first_memblock_size)
|
||||
{
|
||||
/* We don't currently support the first MEMBLOCK not mapping 0
|
||||
* physical on those processors
|
||||
*/
|
||||
BUG_ON(first_memblock_base != 0);
|
||||
|
||||
#ifdef CONFIG_PIN_TLB
|
||||
/* 8xx can only access 24MB at the moment */
|
||||
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000));
|
||||
#else
|
||||
/* 8xx can only access 8MB at the moment */
|
||||
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
|
||||
#endif
|
||||
}
|
||||
#endif /* CONFIG_8xx */
|
||||
462
arch/powerpc/mm/init_64.c
Normal file
462
arch/powerpc/mm/init_64.c
Normal file
|
|
@ -0,0 +1,462 @@
|
|||
/*
|
||||
* PowerPC version
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* Dave Engebretsen <engebret@us.ibm.com>
|
||||
* Rework for PPC64 port.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#undef DEBUG
|
||||
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/poison.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/rtas.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/eeh.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/mmzone.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/vdso.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
#ifdef CONFIG_PPC_STD_MMU_64
|
||||
#if PGTABLE_RANGE > USER_VSID_RANGE
|
||||
#warning Limited user VSID range means pagetable space is wasted
|
||||
#endif
|
||||
|
||||
#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
|
||||
#warning TASK_SIZE is smaller than it needs to be.
|
||||
#endif
|
||||
#endif /* CONFIG_PPC_STD_MMU_64 */
|
||||
|
||||
phys_addr_t memstart_addr = ~0;
|
||||
EXPORT_SYMBOL_GPL(memstart_addr);
|
||||
phys_addr_t kernstart_addr;
|
||||
EXPORT_SYMBOL_GPL(kernstart_addr);
|
||||
|
||||
static void pgd_ctor(void *addr)
|
||||
{
|
||||
memset(addr, 0, PGD_TABLE_SIZE);
|
||||
}
|
||||
|
||||
static void pmd_ctor(void *addr)
|
||||
{
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
memset(addr, 0, PMD_TABLE_SIZE * 2);
|
||||
#else
|
||||
memset(addr, 0, PMD_TABLE_SIZE);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
|
||||
|
||||
/*
|
||||
* Create a kmem_cache() for pagetables. This is not used for PTE
|
||||
* pages - they're linked to struct page, come from the normal free
|
||||
* pages pool and have a different entry size (see real_pte_t) to
|
||||
* everything else. Caches created by this function are used for all
|
||||
* the higher level pagetables, and for hugepage pagetables.
|
||||
*/
|
||||
void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
|
||||
{
|
||||
char *name;
|
||||
unsigned long table_size = sizeof(void *) << shift;
|
||||
unsigned long align = table_size;
|
||||
|
||||
/* When batching pgtable pointers for RCU freeing, we store
|
||||
* the index size in the low bits. Table alignment must be
|
||||
* big enough to fit it.
|
||||
*
|
||||
* Likewise, hugeapge pagetable pointers contain a (different)
|
||||
* shift value in the low bits. All tables must be aligned so
|
||||
* as to leave enough 0 bits in the address to contain it. */
|
||||
unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
|
||||
HUGEPD_SHIFT_MASK + 1);
|
||||
struct kmem_cache *new;
|
||||
|
||||
/* It would be nice if this was a BUILD_BUG_ON(), but at the
|
||||
* moment, gcc doesn't seem to recognize is_power_of_2 as a
|
||||
* constant expression, so so much for that. */
|
||||
BUG_ON(!is_power_of_2(minalign));
|
||||
BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
|
||||
|
||||
if (PGT_CACHE(shift))
|
||||
return; /* Already have a cache of this size */
|
||||
|
||||
align = max_t(unsigned long, align, minalign);
|
||||
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
|
||||
new = kmem_cache_create(name, table_size, align, 0, ctor);
|
||||
pgtable_cache[shift - 1] = new;
|
||||
pr_debug("Allocated pgtable cache for order %d\n", shift);
|
||||
}
|
||||
|
||||
|
||||
void pgtable_cache_init(void)
|
||||
{
|
||||
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
|
||||
pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
|
||||
if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
|
||||
panic("Couldn't allocate pgtable caches");
|
||||
/* In all current configs, when the PUD index exists it's the
|
||||
* same size as either the pgd or pmd index. Verify that the
|
||||
* initialization above has also created a PUD cache. This
|
||||
* will need re-examiniation if we add new possibilities for
|
||||
* the pagetable layout. */
|
||||
BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
/*
|
||||
* Given an address within the vmemmap, determine the pfn of the page that
|
||||
* represents the start of the section it is within. Note that we have to
|
||||
* do this by hand as the proffered address may not be correctly aligned.
|
||||
* Subtraction of non-aligned pointers produces undefined results.
|
||||
*/
|
||||
static unsigned long __meminit vmemmap_section_start(unsigned long page)
|
||||
{
|
||||
unsigned long offset = page - ((unsigned long)(vmemmap));
|
||||
|
||||
/* Return the pfn of the start of the section. */
|
||||
return (offset / sizeof(struct page)) & PAGE_SECTION_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if this vmemmap page is already initialised. If any section
|
||||
* which overlaps this vmemmap page is initialised then this page is
|
||||
* initialised already.
|
||||
*/
|
||||
static int __meminit vmemmap_populated(unsigned long start, int page_size)
|
||||
{
|
||||
unsigned long end = start + page_size;
|
||||
start = (unsigned long)(pfn_to_page(vmemmap_section_start(start)));
|
||||
|
||||
for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
|
||||
if (pfn_valid(page_to_pfn((struct page *)start)))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* On hash-based CPUs, the vmemmap is bolted in the hash table.
|
||||
*
|
||||
* On Book3E CPUs, the vmemmap is currently mapped in the top half of
|
||||
* the vmalloc space using normal page tables, though the size of
|
||||
* pages encoded in the PTEs can be different
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3E
|
||||
static void __meminit vmemmap_create_mapping(unsigned long start,
|
||||
unsigned long page_size,
|
||||
unsigned long phys)
|
||||
{
|
||||
/* Create a PTE encoding without page size */
|
||||
unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
|
||||
_PAGE_KERNEL_RW;
|
||||
|
||||
/* PTEs only contain page size encodings up to 32M */
|
||||
BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
|
||||
|
||||
/* Encode the size in the PTE */
|
||||
flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
|
||||
|
||||
/* For each PTE for that area, map things. Note that we don't
|
||||
* increment phys because all PTEs are of the large size and
|
||||
* thus must have the low bits clear
|
||||
*/
|
||||
for (i = 0; i < page_size; i += PAGE_SIZE)
|
||||
BUG_ON(map_kernel_page(start + i, phys, flags));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
static void vmemmap_remove_mapping(unsigned long start,
|
||||
unsigned long page_size)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#else /* CONFIG_PPC_BOOK3E */
|
||||
static void __meminit vmemmap_create_mapping(unsigned long start,
|
||||
unsigned long page_size,
|
||||
unsigned long phys)
|
||||
{
|
||||
int mapped = htab_bolt_mapping(start, start + page_size, phys,
|
||||
pgprot_val(PAGE_KERNEL),
|
||||
mmu_vmemmap_psize,
|
||||
mmu_kernel_ssize);
|
||||
BUG_ON(mapped < 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
static void vmemmap_remove_mapping(unsigned long start,
|
||||
unsigned long page_size)
|
||||
{
|
||||
int mapped = htab_remove_mapping(start, start + page_size,
|
||||
mmu_vmemmap_psize,
|
||||
mmu_kernel_ssize);
|
||||
BUG_ON(mapped < 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3E */
|
||||
|
||||
struct vmemmap_backing *vmemmap_list;
|
||||
static struct vmemmap_backing *next;
|
||||
static int num_left;
|
||||
static int num_freed;
|
||||
|
||||
static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node)
|
||||
{
|
||||
struct vmemmap_backing *vmem_back;
|
||||
/* get from freed entries first */
|
||||
if (num_freed) {
|
||||
num_freed--;
|
||||
vmem_back = next;
|
||||
next = next->list;
|
||||
|
||||
return vmem_back;
|
||||
}
|
||||
|
||||
/* allocate a page when required and hand out chunks */
|
||||
if (!num_left) {
|
||||
next = vmemmap_alloc_block(PAGE_SIZE, node);
|
||||
if (unlikely(!next)) {
|
||||
WARN_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
num_left = PAGE_SIZE / sizeof(struct vmemmap_backing);
|
||||
}
|
||||
|
||||
num_left--;
|
||||
|
||||
return next++;
|
||||
}
|
||||
|
||||
static __meminit void vmemmap_list_populate(unsigned long phys,
|
||||
unsigned long start,
|
||||
int node)
|
||||
{
|
||||
struct vmemmap_backing *vmem_back;
|
||||
|
||||
vmem_back = vmemmap_list_alloc(node);
|
||||
if (unlikely(!vmem_back)) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
vmem_back->phys = phys;
|
||||
vmem_back->virt_addr = start;
|
||||
vmem_back->list = vmemmap_list;
|
||||
|
||||
vmemmap_list = vmem_back;
|
||||
}
|
||||
|
||||
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
||||
{
|
||||
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
|
||||
|
||||
/* Align to the page size of the linear mapping. */
|
||||
start = _ALIGN_DOWN(start, page_size);
|
||||
|
||||
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
|
||||
|
||||
for (; start < end; start += page_size) {
|
||||
void *p;
|
||||
|
||||
if (vmemmap_populated(start, page_size))
|
||||
continue;
|
||||
|
||||
p = vmemmap_alloc_block(page_size, node);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
vmemmap_list_populate(__pa(p), start, node);
|
||||
|
||||
pr_debug(" * %016lx..%016lx allocated at %p\n",
|
||||
start, start + page_size, p);
|
||||
|
||||
vmemmap_create_mapping(start, page_size, __pa(p));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
static unsigned long vmemmap_list_free(unsigned long start)
|
||||
{
|
||||
struct vmemmap_backing *vmem_back, *vmem_back_prev;
|
||||
|
||||
vmem_back_prev = vmem_back = vmemmap_list;
|
||||
|
||||
/* look for it with prev pointer recorded */
|
||||
for (; vmem_back; vmem_back = vmem_back->list) {
|
||||
if (vmem_back->virt_addr == start)
|
||||
break;
|
||||
vmem_back_prev = vmem_back;
|
||||
}
|
||||
|
||||
if (unlikely(!vmem_back)) {
|
||||
WARN_ON(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* remove it from vmemmap_list */
|
||||
if (vmem_back == vmemmap_list) /* remove head */
|
||||
vmemmap_list = vmem_back->list;
|
||||
else
|
||||
vmem_back_prev->list = vmem_back->list;
|
||||
|
||||
/* next point to this freed entry */
|
||||
vmem_back->list = next;
|
||||
next = vmem_back;
|
||||
num_freed++;
|
||||
|
||||
return vmem_back->phys;
|
||||
}
|
||||
|
||||
void __ref vmemmap_free(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
|
||||
|
||||
start = _ALIGN_DOWN(start, page_size);
|
||||
|
||||
pr_debug("vmemmap_free %lx...%lx\n", start, end);
|
||||
|
||||
for (; start < end; start += page_size) {
|
||||
unsigned long addr;
|
||||
|
||||
/*
|
||||
* the section has already be marked as invalid, so
|
||||
* vmemmap_populated() true means some other sections still
|
||||
* in this page, so skip it.
|
||||
*/
|
||||
if (vmemmap_populated(start, page_size))
|
||||
continue;
|
||||
|
||||
addr = vmemmap_list_free(start);
|
||||
if (addr) {
|
||||
struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
|
||||
|
||||
if (PageReserved(page)) {
|
||||
/* allocated from bootmem */
|
||||
if (page_size < PAGE_SIZE) {
|
||||
/*
|
||||
* this shouldn't happen, but if it is
|
||||
* the case, leave the memory there
|
||||
*/
|
||||
WARN_ON_ONCE(1);
|
||||
} else {
|
||||
unsigned int nr_pages =
|
||||
1 << get_order(page_size);
|
||||
while (nr_pages--)
|
||||
free_reserved_page(page++);
|
||||
}
|
||||
} else
|
||||
free_pages((unsigned long)(__va(addr)),
|
||||
get_order(page_size));
|
||||
|
||||
vmemmap_remove_mapping(start, page_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
struct page *start_page, unsigned long size)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* We do not have access to the sparsemem vmemmap, so we fallback to
|
||||
* walking the list of sparsemem blocks which we already maintain for
|
||||
* the sake of crashdump. In the long run, we might want to maintain
|
||||
* a tree if performance of that linear walk becomes a problem.
|
||||
*
|
||||
* realmode_pfn_to_page functions can fail due to:
|
||||
* 1) As real sparsemem blocks do not lay in RAM continously (they
|
||||
* are in virtual address space which is not available in the real mode),
|
||||
* the requested page struct can be split between blocks so get_page/put_page
|
||||
* may fail.
|
||||
* 2) When huge pages are used, the get_page/put_page API will fail
|
||||
* in real mode as the linked addresses in the page struct are virtual
|
||||
* too.
|
||||
*/
|
||||
struct page *realmode_pfn_to_page(unsigned long pfn)
|
||||
{
|
||||
struct vmemmap_backing *vmem_back;
|
||||
struct page *page;
|
||||
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
|
||||
unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
|
||||
|
||||
for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
|
||||
if (pg_va < vmem_back->virt_addr)
|
||||
continue;
|
||||
|
||||
/* After vmemmap_list entry free is possible, need check all */
|
||||
if ((pg_va + sizeof(struct page)) <=
|
||||
(vmem_back->virt_addr + page_size)) {
|
||||
page = (struct page *) (vmem_back->phys + pg_va -
|
||||
vmem_back->virt_addr);
|
||||
return page;
|
||||
}
|
||||
}
|
||||
|
||||
/* Probably that page struct is split between real pages */
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
|
||||
|
||||
#elif defined(CONFIG_FLATMEM)
|
||||
|
||||
struct page *realmode_pfn_to_page(unsigned long pfn)
|
||||
{
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
return page;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
|
||||
|
||||
#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
|
||||
621
arch/powerpc/mm/mem.c
Normal file
621
arch/powerpc/mm/mem.c
Normal file
|
|
@ -0,0 +1,621 @@
|
|||
/*
|
||||
* PowerPC version
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
* PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/initrd.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/btext.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/sparsemem.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/swiotlb.h>
|
||||
#include <asm/rtas.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
#ifndef CPU_FTR_COHERENT_ICACHE
|
||||
#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */
|
||||
#define CPU_FTR_NOEXECUTE 0
|
||||
#endif
|
||||
|
||||
int init_bootmem_done;
|
||||
int mem_init_done;
|
||||
unsigned long long memory_limit;
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
pte_t *kmap_pte;
|
||||
EXPORT_SYMBOL(kmap_pte);
|
||||
pgprot_t kmap_prot;
|
||||
EXPORT_SYMBOL(kmap_prot);
|
||||
|
||||
static inline pte_t *virt_to_kpte(unsigned long vaddr)
|
||||
{
|
||||
return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
|
||||
vaddr), vaddr), vaddr);
|
||||
}
|
||||
#endif
|
||||
|
||||
int page_is_ram(unsigned long pfn)
|
||||
{
|
||||
#ifndef CONFIG_PPC64 /* XXX for now */
|
||||
return pfn < max_pfn;
|
||||
#else
|
||||
unsigned long paddr = (pfn << PAGE_SHIFT);
|
||||
struct memblock_region *reg;
|
||||
|
||||
for_each_memblock(memory, reg)
|
||||
if (paddr >= reg->base && paddr < (reg->base + reg->size))
|
||||
return 1;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
||||
unsigned long size, pgprot_t vma_prot)
|
||||
{
|
||||
if (ppc_md.phys_mem_access_prot)
|
||||
return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot);
|
||||
|
||||
if (!page_is_ram(pfn))
|
||||
vma_prot = pgprot_noncached(vma_prot);
|
||||
|
||||
return vma_prot;
|
||||
}
|
||||
EXPORT_SYMBOL(phys_mem_access_prot);
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
int memory_add_physaddr_to_nid(u64 start)
|
||||
{
|
||||
return hot_add_scn_to_nid(start);
|
||||
}
|
||||
#endif
|
||||
|
||||
int arch_add_memory(int nid, u64 start, u64 size)
|
||||
{
|
||||
struct pglist_data *pgdata;
|
||||
struct zone *zone;
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
|
||||
pgdata = NODE_DATA(nid);
|
||||
|
||||
start = (unsigned long)__va(start);
|
||||
if (create_section_mapping(start, start + size))
|
||||
return -EINVAL;
|
||||
|
||||
/* this should work for most non-highmem platforms */
|
||||
zone = pgdata->node_zones +
|
||||
zone_for_memory(nid, start, size, 0);
|
||||
|
||||
return __add_pages(nid, zone, start_pfn, nr_pages);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
int arch_remove_memory(u64 start, u64 size)
|
||||
{
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
struct zone *zone;
|
||||
int ret;
|
||||
|
||||
zone = page_zone(pfn_to_page(start_pfn));
|
||||
ret = __remove_pages(zone, start_pfn, nr_pages);
|
||||
if (!ret && (ppc_md.remove_memory))
|
||||
ret = ppc_md.remove_memory(start, size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG */
|
||||
|
||||
/*
|
||||
* walk_memory_resource() needs to make sure there is no holes in a given
|
||||
* memory range. PPC64 does not maintain the memory layout in /proc/iomem.
|
||||
* Instead it maintains it in memblock.memory structures. Walk through the
|
||||
* memory regions, find holes and callback for contiguous regions.
|
||||
*/
|
||||
int
|
||||
walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
|
||||
void *arg, int (*func)(unsigned long, unsigned long, void *))
|
||||
{
|
||||
struct memblock_region *reg;
|
||||
unsigned long end_pfn = start_pfn + nr_pages;
|
||||
unsigned long tstart, tend;
|
||||
int ret = -1;
|
||||
|
||||
for_each_memblock(memory, reg) {
|
||||
tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
|
||||
tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
|
||||
if (tstart >= tend)
|
||||
continue;
|
||||
ret = (*func)(tstart, tend - tstart, arg);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(walk_system_ram_range);
|
||||
|
||||
/*
|
||||
* Initialize the bootmem system and give it all the memory we
|
||||
* have available. If we are using highmem, we only put the
|
||||
* lowmem into the bootmem system.
|
||||
*/
|
||||
#ifndef CONFIG_NEED_MULTIPLE_NODES
|
||||
void __init do_init_bootmem(void)
|
||||
{
|
||||
unsigned long start, bootmap_pages;
|
||||
unsigned long total_pages;
|
||||
struct memblock_region *reg;
|
||||
int boot_mapsize;
|
||||
|
||||
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
|
||||
total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
total_pages = total_lowmem >> PAGE_SHIFT;
|
||||
max_low_pfn = lowmem_end_addr >> PAGE_SHIFT;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Find an area to use for the bootmem bitmap. Calculate the size of
|
||||
* bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
|
||||
* Add 1 additional page in case the address isn't page-aligned.
|
||||
*/
|
||||
bootmap_pages = bootmem_bootmap_pages(total_pages);
|
||||
|
||||
start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
|
||||
|
||||
min_low_pfn = MEMORY_START >> PAGE_SHIFT;
|
||||
boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
|
||||
|
||||
/* Place all memblock_regions in the same node and merge contiguous
|
||||
* memblock_regions
|
||||
*/
|
||||
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
|
||||
|
||||
/* Add all physical memory to the bootmem map, mark each area
|
||||
* present.
|
||||
*/
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT);
|
||||
|
||||
/* reserve the sections we're already using */
|
||||
for_each_memblock(reserved, reg) {
|
||||
unsigned long top = reg->base + reg->size - 1;
|
||||
if (top < lowmem_end_addr)
|
||||
reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
|
||||
else if (reg->base < lowmem_end_addr) {
|
||||
unsigned long trunc_size = lowmem_end_addr - reg->base;
|
||||
reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT);
|
||||
}
|
||||
}
|
||||
#else
|
||||
free_bootmem_with_active_regions(0, max_pfn);
|
||||
|
||||
/* reserve the sections we're already using */
|
||||
for_each_memblock(reserved, reg)
|
||||
reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
|
||||
#endif
|
||||
/* XXX need to clip this if using highmem? */
|
||||
sparse_memory_present_with_active_regions(0);
|
||||
|
||||
init_bootmem_done = 1;
|
||||
}
|
||||
|
||||
/* mark pages that don't exist as nosave */
|
||||
static int __init mark_nonram_nosave(void)
|
||||
{
|
||||
struct memblock_region *reg, *prev = NULL;
|
||||
|
||||
for_each_memblock(memory, reg) {
|
||||
if (prev &&
|
||||
memblock_region_memory_end_pfn(prev) < memblock_region_memory_base_pfn(reg))
|
||||
register_nosave_region(memblock_region_memory_end_pfn(prev),
|
||||
memblock_region_memory_base_pfn(reg));
|
||||
prev = reg;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else /* CONFIG_NEED_MULTIPLE_NODES */
|
||||
static int __init mark_nonram_nosave(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool zone_limits_final;
|
||||
|
||||
static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
|
||||
[0 ... MAX_NR_ZONES - 1] = ~0UL
|
||||
};
|
||||
|
||||
/*
|
||||
* Restrict the specified zone and all more restrictive zones
|
||||
* to be below the specified pfn. May not be called after
|
||||
* paging_init().
|
||||
*/
|
||||
void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (WARN_ON(zone_limits_final))
|
||||
return;
|
||||
|
||||
for (i = zone; i >= 0; i--) {
|
||||
if (max_zone_pfns[i] > pfn_limit)
|
||||
max_zone_pfns[i] = pfn_limit;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the least restrictive zone that is entirely below the
|
||||
* specified pfn limit. Returns < 0 if no suitable zone is found.
|
||||
*
|
||||
* pfn_limit must be u64 because it can exceed 32 bits even on 32-bit
|
||||
* systems -- the DMA limit can be higher than any possible real pfn.
|
||||
*/
|
||||
int dma_pfn_limit_to_zone(u64 pfn_limit)
|
||||
{
|
||||
enum zone_type top_zone = ZONE_NORMAL;
|
||||
int i;
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
top_zone = ZONE_HIGHMEM;
|
||||
#endif
|
||||
|
||||
for (i = top_zone; i >= 0; i--) {
|
||||
if (max_zone_pfns[i] <= pfn_limit)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
/*
|
||||
* paging_init() sets up the page tables - in fact we've already done this.
|
||||
*/
|
||||
void __init paging_init(void)
|
||||
{
|
||||
unsigned long long total_ram = memblock_phys_mem_size();
|
||||
phys_addr_t top_of_ram = memblock_end_of_DRAM();
|
||||
enum zone_type top_zone;
|
||||
|
||||
#ifdef CONFIG_PPC32
|
||||
unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
|
||||
unsigned long end = __fix_to_virt(FIX_HOLE);
|
||||
|
||||
for (; v < end; v += PAGE_SIZE)
|
||||
map_page(v, 0, 0); /* XXX gross */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
map_page(PKMAP_BASE, 0, 0); /* XXX gross */
|
||||
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
|
||||
|
||||
kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
|
||||
kmap_prot = PAGE_KERNEL;
|
||||
#endif /* CONFIG_HIGHMEM */
|
||||
|
||||
printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%llx\n",
|
||||
(unsigned long long)top_of_ram, total_ram);
|
||||
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
|
||||
(long int)((top_of_ram - total_ram) >> 20));
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
top_zone = ZONE_HIGHMEM;
|
||||
limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
|
||||
#else
|
||||
top_zone = ZONE_NORMAL;
|
||||
#endif
|
||||
|
||||
limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT);
|
||||
zone_limits_final = true;
|
||||
free_area_init_nodes(max_zone_pfns);
|
||||
|
||||
mark_nonram_nosave();
|
||||
}
|
||||
|
||||
static void __init register_page_bootmem_info(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_online_node(i)
|
||||
register_page_bootmem_info_node(NODE_DATA(i));
|
||||
}
|
||||
|
||||
void __init mem_init(void)
|
||||
{
|
||||
/*
|
||||
* book3s is limited to 16 page sizes due to encoding this in
|
||||
* a 4-bit field for slices.
|
||||
*/
|
||||
BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
|
||||
|
||||
#ifdef CONFIG_SWIOTLB
|
||||
swiotlb_init(0);
|
||||
#endif
|
||||
|
||||
register_page_bootmem_info();
|
||||
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
|
||||
set_max_mapnr(max_pfn);
|
||||
free_all_bootmem();
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
{
|
||||
unsigned long pfn, highmem_mapnr;
|
||||
|
||||
highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT;
|
||||
for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
|
||||
phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
if (!memblock_is_reserved(paddr))
|
||||
free_highmem_page(page);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_HIGHMEM */
|
||||
|
||||
#if defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_SMP)
|
||||
/*
|
||||
* If smp is enabled, next_tlbcam_idx is initialized in the cpu up
|
||||
* functions.... do it here for the non-smp case.
|
||||
*/
|
||||
per_cpu(next_tlbcam_idx, smp_processor_id()) =
|
||||
(mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
|
||||
#endif
|
||||
|
||||
mem_init_print_info(NULL);
|
||||
#ifdef CONFIG_PPC32
|
||||
pr_info("Kernel virtual memory layout:\n");
|
||||
pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP);
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n",
|
||||
PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
|
||||
#endif /* CONFIG_HIGHMEM */
|
||||
#ifdef CONFIG_NOT_COHERENT_CACHE
|
||||
pr_info(" * 0x%08lx..0x%08lx : consistent mem\n",
|
||||
IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE);
|
||||
#endif /* CONFIG_NOT_COHERENT_CACHE */
|
||||
pr_info(" * 0x%08lx..0x%08lx : early ioremap\n",
|
||||
ioremap_bot, IOREMAP_TOP);
|
||||
pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n",
|
||||
VMALLOC_START, VMALLOC_END);
|
||||
#endif /* CONFIG_PPC32 */
|
||||
|
||||
mem_init_done = 1;
|
||||
}
|
||||
|
||||
void free_initmem(void)
|
||||
{
|
||||
ppc_md.progress = ppc_printk_progress;
|
||||
free_initmem_default(POISON_FREE_INITMEM);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
void __init free_initrd_mem(unsigned long start, unsigned long end)
|
||||
{
|
||||
free_reserved_area((void *)start, (void *)end, -1, "initrd");
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is called when a page has been modified by the kernel.
|
||||
* It just marks the page as not i-cache clean. We do the i-cache
|
||||
* flush later when the page is given to a user process, if necessary.
|
||||
*/
|
||||
void flush_dcache_page(struct page *page)
|
||||
{
|
||||
if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
|
||||
return;
|
||||
/* avoid an atomic op if possible */
|
||||
if (test_bit(PG_arch_1, &page->flags))
|
||||
clear_bit(PG_arch_1, &page->flags);
|
||||
}
|
||||
EXPORT_SYMBOL(flush_dcache_page);
|
||||
|
||||
void flush_dcache_icache_page(struct page *page)
|
||||
{
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
if (PageCompound(page)) {
|
||||
flush_dcache_icache_hugepage(page);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_BOOKE
|
||||
{
|
||||
void *start = kmap_atomic(page);
|
||||
__flush_dcache_icache(start);
|
||||
kunmap_atomic(start);
|
||||
}
|
||||
#elif defined(CONFIG_8xx) || defined(CONFIG_PPC64)
|
||||
/* On 8xx there is no need to kmap since highmem is not supported */
|
||||
__flush_dcache_icache(page_address(page));
|
||||
#else
|
||||
__flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(flush_dcache_icache_page);
|
||||
|
||||
void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
|
||||
{
|
||||
clear_page(page);
|
||||
|
||||
/*
|
||||
* We shouldn't have to do this, but some versions of glibc
|
||||
* require it (ld.so assumes zero filled pages are icache clean)
|
||||
* - Anton
|
||||
*/
|
||||
flush_dcache_page(pg);
|
||||
}
|
||||
EXPORT_SYMBOL(clear_user_page);
|
||||
|
||||
void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
|
||||
struct page *pg)
|
||||
{
|
||||
copy_page(vto, vfrom);
|
||||
|
||||
/*
|
||||
* We should be able to use the following optimisation, however
|
||||
* there are two problems.
|
||||
* Firstly a bug in some versions of binutils meant PLT sections
|
||||
* were not marked executable.
|
||||
* Secondly the first word in the GOT section is blrl, used
|
||||
* to establish the GOT address. Until recently the GOT was
|
||||
* not marked executable.
|
||||
* - Anton
|
||||
*/
|
||||
#if 0
|
||||
if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
|
||||
return;
|
||||
#endif
|
||||
|
||||
flush_dcache_page(pg);
|
||||
}
|
||||
|
||||
void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
|
||||
unsigned long addr, int len)
|
||||
{
|
||||
unsigned long maddr;
|
||||
|
||||
maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
|
||||
flush_icache_range(maddr, maddr + len);
|
||||
kunmap(page);
|
||||
}
|
||||
EXPORT_SYMBOL(flush_icache_user_range);
|
||||
|
||||
/*
|
||||
* This is called at the end of handling a user page fault, when the
|
||||
* fault has been handled by updating a PTE in the linux page tables.
|
||||
* We use it to preload an HPTE into the hash table corresponding to
|
||||
* the updated linux PTE.
|
||||
*
|
||||
* This must always be called with the pte lock held.
|
||||
*/
|
||||
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
|
||||
pte_t *ptep)
|
||||
{
|
||||
#ifdef CONFIG_PPC_STD_MMU
|
||||
/*
|
||||
* We don't need to worry about _PAGE_PRESENT here because we are
|
||||
* called with either mm->page_table_lock held or ptl lock held
|
||||
*/
|
||||
unsigned long access = 0, trap;
|
||||
|
||||
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
|
||||
if (!pte_young(*ptep) || address >= TASK_SIZE)
|
||||
return;
|
||||
|
||||
/* We try to figure out if we are coming from an instruction
|
||||
* access fault and pass that down to __hash_page so we avoid
|
||||
* double-faulting on execution of fresh text. We have to test
|
||||
* for regs NULL since init will get here first thing at boot
|
||||
*
|
||||
* We also avoid filling the hash if not coming from a fault
|
||||
*/
|
||||
if (current->thread.regs == NULL)
|
||||
return;
|
||||
trap = TRAP(current->thread.regs);
|
||||
if (trap == 0x400)
|
||||
access |= _PAGE_EXEC;
|
||||
else if (trap != 0x300)
|
||||
return;
|
||||
hash_preload(vma->vm_mm, address, access, trap);
|
||||
#endif /* CONFIG_PPC_STD_MMU */
|
||||
#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
|
||||
&& defined(CONFIG_HUGETLB_PAGE)
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
book3e_hugetlb_preload(vma, address, *ptep);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* System memory should not be in /proc/iomem but various tools expect it
|
||||
* (eg kdump).
|
||||
*/
|
||||
static int __init add_system_ram_resources(void)
|
||||
{
|
||||
struct memblock_region *reg;
|
||||
|
||||
for_each_memblock(memory, reg) {
|
||||
struct resource *res;
|
||||
unsigned long base = reg->base;
|
||||
unsigned long size = reg->size;
|
||||
|
||||
res = kzalloc(sizeof(struct resource), GFP_KERNEL);
|
||||
WARN_ON(!res);
|
||||
|
||||
if (res) {
|
||||
res->name = "System RAM";
|
||||
res->start = base;
|
||||
res->end = base + size - 1;
|
||||
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
WARN_ON(request_resource(&iomem_resource, res) < 0);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(add_system_ram_resources);
|
||||
|
||||
#ifdef CONFIG_STRICT_DEVMEM
|
||||
/*
|
||||
* devmem_is_allowed(): check to see if /dev/mem access to a certain address
|
||||
* is valid. The argument is a physical page number.
|
||||
*
|
||||
* Access has to be given to non-kernel-ram areas as well, these contain the
|
||||
* PCI mmio resources as well as potential bios/acpi data regions.
|
||||
*/
|
||||
int devmem_is_allowed(unsigned long pfn)
|
||||
{
|
||||
if (iomem_is_exclusive(pfn << PAGE_SHIFT))
|
||||
return 0;
|
||||
if (!page_is_ram(pfn))
|
||||
return 1;
|
||||
if (page_is_rtas_user_buf(pfn))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_STRICT_DEVMEM */
|
||||
99
arch/powerpc/mm/mmap.c
Normal file
99
arch/powerpc/mm/mmap.c
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* flexible mmap layout support
|
||||
*
|
||||
* Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
*
|
||||
* Started by Ingo Molnar <mingo@elte.hu>
|
||||
*/
|
||||
|
||||
#include <linux/personality.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
/*
|
||||
* Top of mmap area (just below the process stack).
|
||||
*
|
||||
* Leave at least a ~128 MB hole on 32bit applications.
|
||||
*
|
||||
* On 64bit applications we randomise the stack by 1GB so we need to
|
||||
* space our mmap start address by a further 1GB, otherwise there is a
|
||||
* chance the mmap area will end up closer to the stack than our ulimit
|
||||
* requires.
|
||||
*/
|
||||
#define MIN_GAP32 (128*1024*1024)
|
||||
#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
|
||||
#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
|
||||
#define MAX_GAP (TASK_SIZE/6*5)
|
||||
|
||||
static inline int mmap_is_legacy(void)
|
||||
{
|
||||
if (current->personality & ADDR_COMPAT_LAYOUT)
|
||||
return 1;
|
||||
|
||||
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
|
||||
return 1;
|
||||
|
||||
return sysctl_legacy_va_layout;
|
||||
}
|
||||
|
||||
static unsigned long mmap_rnd(void)
|
||||
{
|
||||
unsigned long rnd = 0;
|
||||
|
||||
if (current->flags & PF_RANDOMIZE) {
|
||||
/* 8MB for 32bit, 1GB for 64bit */
|
||||
if (is_32bit_task())
|
||||
rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
|
||||
else
|
||||
rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
|
||||
}
|
||||
return rnd << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned long mmap_base(void)
|
||||
{
|
||||
unsigned long gap = rlimit(RLIMIT_STACK);
|
||||
|
||||
if (gap < MIN_GAP)
|
||||
gap = MIN_GAP;
|
||||
else if (gap > MAX_GAP)
|
||||
gap = MAX_GAP;
|
||||
|
||||
return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
|
||||
}
|
||||
|
||||
/*
|
||||
* This function, called very early during the creation of a new
|
||||
* process VM image, sets up which VM layout function to use:
|
||||
*/
|
||||
void arch_pick_mmap_layout(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* Fall back to the standard layout if the personality
|
||||
* bit is set, or if the expected stack growth is unlimited:
|
||||
*/
|
||||
if (mmap_is_legacy()) {
|
||||
mm->mmap_base = TASK_UNMAPPED_BASE;
|
||||
mm->get_unmapped_area = arch_get_unmapped_area;
|
||||
} else {
|
||||
mm->mmap_base = mmap_base();
|
||||
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
|
||||
}
|
||||
}
|
||||
119
arch/powerpc/mm/mmu_context_hash32.c
Normal file
119
arch/powerpc/mm/mmu_context_hash32.c
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* This file contains the routines for handling the MMU on those
|
||||
* PowerPC implementations where the MMU substantially follows the
|
||||
* architecture specification. This includes the 6xx, 7xx, 7xxx,
|
||||
* and 8260 implementations but excludes the 8xx and 4xx.
|
||||
* -- paulus
|
||||
*
|
||||
* Derived from arch/ppc/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
/*
|
||||
* On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
|
||||
* (virtual segment identifiers) for each context. Although the
|
||||
* hardware supports 24-bit VSIDs, and thus >1 million contexts,
|
||||
* we only use 32,768 of them. That is ample, since there can be
|
||||
* at most around 30,000 tasks in the system anyway, and it means
|
||||
* that we can use a bitmap to indicate which contexts are in use.
|
||||
* Using a bitmap means that we entirely avoid all of the problems
|
||||
* that we used to have when the context number overflowed,
|
||||
* particularly on SMP systems.
|
||||
* -- paulus.
|
||||
*/
|
||||
#define NO_CONTEXT ((unsigned long) -1)
|
||||
#define LAST_CONTEXT 32767
|
||||
#define FIRST_CONTEXT 1
|
||||
|
||||
/*
|
||||
* This function defines the mapping from contexts to VSIDs (virtual
|
||||
* segment IDs). We use a skew on both the context and the high 4 bits
|
||||
* of the 32-bit virtual address (the "effective segment ID") in order
|
||||
* to spread out the entries in the MMU hash table. Note, if this
|
||||
* function is changed then arch/ppc/mm/hashtable.S will have to be
|
||||
* changed to correspond.
|
||||
*
|
||||
*
|
||||
* CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \
|
||||
* & 0xffffff)
|
||||
*/
|
||||
|
||||
static unsigned long next_mmu_context;
|
||||
static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
|
||||
|
||||
unsigned long __init_new_context(void)
|
||||
{
|
||||
unsigned long ctx = next_mmu_context;
|
||||
|
||||
while (test_and_set_bit(ctx, context_map)) {
|
||||
ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx);
|
||||
if (ctx > LAST_CONTEXT)
|
||||
ctx = 0;
|
||||
}
|
||||
next_mmu_context = (ctx + 1) & LAST_CONTEXT;
|
||||
|
||||
return ctx;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__init_new_context);
|
||||
|
||||
/*
|
||||
* Set up the context for a new address space.
|
||||
*/
|
||||
int init_new_context(struct task_struct *t, struct mm_struct *mm)
|
||||
{
|
||||
mm->context.id = __init_new_context();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a context ID. Make sure to call this with preempt disabled!
|
||||
*/
|
||||
void __destroy_context(unsigned long ctx)
|
||||
{
|
||||
clear_bit(ctx, context_map);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__destroy_context);
|
||||
|
||||
/*
|
||||
* We're finished using the context for an address space.
|
||||
*/
|
||||
void destroy_context(struct mm_struct *mm)
|
||||
{
|
||||
preempt_disable();
|
||||
if (mm->context.id != NO_CONTEXT) {
|
||||
__destroy_context(mm->context.id);
|
||||
mm->context.id = NO_CONTEXT;
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the context management stuff.
|
||||
*/
|
||||
void __init mmu_context_init(void)
|
||||
{
|
||||
/* Reserve context 0 for kernel use */
|
||||
context_map[0] = (1 << FIRST_CONTEXT) - 1;
|
||||
next_mmu_context = FIRST_CONTEXT;
|
||||
}
|
||||
146
arch/powerpc/mm/mmu_context_hash64.c
Normal file
146
arch/powerpc/mm/mmu_context_hash64.c
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* MMU context allocation for 64-bit kernels.
|
||||
*
|
||||
* Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
||||
#include "icswx.h"
|
||||
|
||||
static DEFINE_SPINLOCK(mmu_context_lock);
|
||||
static DEFINE_IDA(mmu_context_ida);
|
||||
|
||||
int __init_new_context(void)
|
||||
{
|
||||
int index;
|
||||
int err;
|
||||
|
||||
again:
|
||||
if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock(&mmu_context_lock);
|
||||
err = ida_get_new_above(&mmu_context_ida, 1, &index);
|
||||
spin_unlock(&mmu_context_lock);
|
||||
|
||||
if (err == -EAGAIN)
|
||||
goto again;
|
||||
else if (err)
|
||||
return err;
|
||||
|
||||
if (index > MAX_USER_CONTEXT) {
|
||||
spin_lock(&mmu_context_lock);
|
||||
ida_remove(&mmu_context_ida, index);
|
||||
spin_unlock(&mmu_context_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__init_new_context);
|
||||
|
||||
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
int index;
|
||||
|
||||
index = __init_new_context();
|
||||
if (index < 0)
|
||||
return index;
|
||||
|
||||
/* The old code would re-promote on fork, we don't do that
|
||||
* when using slices as it could cause problem promoting slices
|
||||
* that have been forced down to 4K
|
||||
*/
|
||||
if (slice_mm_new_context(mm))
|
||||
slice_set_user_psize(mm, mmu_virtual_psize);
|
||||
subpage_prot_init_new_context(mm);
|
||||
mm->context.id = index;
|
||||
#ifdef CONFIG_PPC_ICSWX
|
||||
mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
|
||||
if (!mm->context.cop_lockp) {
|
||||
__destroy_context(index);
|
||||
subpage_prot_free(mm);
|
||||
mm->context.id = MMU_NO_CONTEXT;
|
||||
return -ENOMEM;
|
||||
}
|
||||
spin_lock_init(mm->context.cop_lockp);
|
||||
#endif /* CONFIG_PPC_ICSWX */
|
||||
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
mm->context.pte_frag = NULL;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __destroy_context(int context_id)
|
||||
{
|
||||
spin_lock(&mmu_context_lock);
|
||||
ida_remove(&mmu_context_ida, context_id);
|
||||
spin_unlock(&mmu_context_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__destroy_context);
|
||||
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
static void destroy_pagetable_page(struct mm_struct *mm)
|
||||
{
|
||||
int count;
|
||||
void *pte_frag;
|
||||
struct page *page;
|
||||
|
||||
pte_frag = mm->context.pte_frag;
|
||||
if (!pte_frag)
|
||||
return;
|
||||
|
||||
page = virt_to_page(pte_frag);
|
||||
/* drop all the pending references */
|
||||
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
|
||||
/* We allow PTE_FRAG_NR fragments from a PTE page */
|
||||
count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
|
||||
if (!count) {
|
||||
pgtable_page_dtor(page);
|
||||
free_hot_cold_page(page, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void destroy_pagetable_page(struct mm_struct *mm)
|
||||
{
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void destroy_context(struct mm_struct *mm)
|
||||
{
|
||||
|
||||
#ifdef CONFIG_PPC_ICSWX
|
||||
drop_cop(mm->context.acop, mm);
|
||||
kfree(mm->context.cop_lockp);
|
||||
mm->context.cop_lockp = NULL;
|
||||
#endif /* CONFIG_PPC_ICSWX */
|
||||
|
||||
destroy_pagetable_page(mm);
|
||||
__destroy_context(mm->context.id);
|
||||
subpage_prot_free(mm);
|
||||
mm->context.id = MMU_NO_CONTEXT;
|
||||
}
|
||||
449
arch/powerpc/mm/mmu_context_nohash.c
Normal file
449
arch/powerpc/mm/mmu_context_nohash.c
Normal file
|
|
@ -0,0 +1,449 @@
|
|||
/*
|
||||
* This file contains the routines for handling the MMU on those
|
||||
* PowerPC implementations where the MMU is not using the hash
|
||||
* table, such as 8xx, 4xx, BookE's etc...
|
||||
*
|
||||
* Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
|
||||
* IBM Corp.
|
||||
*
|
||||
* Derived from previous arch/powerpc/mm/mmu_context.c
|
||||
* and arch/powerpc/include/asm/mmu_context.h
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* TODO:
|
||||
*
|
||||
* - The global context lock will not scale very well
|
||||
* - The maps should be dynamically allocated to allow for processors
|
||||
* that support more PID bits at runtime
|
||||
* - Implement flush_tlb_mm() by making the context stale and picking
|
||||
* a new one
|
||||
* - More aggressively clear stale map bits and maybe find some way to
|
||||
* also clear mm->cpu_vm_mask bits when processes are migrated
|
||||
*/
|
||||
|
||||
//#define DEBUG_MAP_CONSISTENCY
|
||||
//#define DEBUG_CLAMP_LAST_CONTEXT 31
|
||||
//#define DEBUG_HARDER
|
||||
|
||||
/* We don't use DEBUG because it tends to be compiled in always nowadays
|
||||
* and this would generate way too much output
|
||||
*/
|
||||
#ifdef DEBUG_HARDER
|
||||
#define pr_hard(args...) printk(KERN_DEBUG args)
|
||||
#define pr_hardcont(args...) printk(KERN_CONT args)
|
||||
#else
|
||||
#define pr_hard(args...) do { } while(0)
|
||||
#define pr_hardcont(args...) do { } while(0)
|
||||
#endif
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
static unsigned int first_context, last_context;
|
||||
static unsigned int next_context, nr_free_contexts;
|
||||
static unsigned long *context_map;
|
||||
static unsigned long *stale_map[NR_CPUS];
|
||||
static struct mm_struct **context_mm;
|
||||
static DEFINE_RAW_SPINLOCK(context_lock);
|
||||
|
||||
#define CTX_MAP_SIZE \
|
||||
(sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
|
||||
|
||||
|
||||
/* Steal a context from a task that has one at the moment.
|
||||
*
|
||||
* This is used when we are running out of available PID numbers
|
||||
* on the processors.
|
||||
*
|
||||
* This isn't an LRU system, it just frees up each context in
|
||||
* turn (sort-of pseudo-random replacement :). This would be the
|
||||
* place to implement an LRU scheme if anyone was motivated to do it.
|
||||
* -- paulus
|
||||
*
|
||||
* For context stealing, we use a slightly different approach for
|
||||
* SMP and UP. Basically, the UP one is simpler and doesn't use
|
||||
* the stale map as we can just flush the local CPU
|
||||
* -- benh
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
static unsigned int steal_context_smp(unsigned int id)
|
||||
{
|
||||
struct mm_struct *mm;
|
||||
unsigned int cpu, max, i;
|
||||
|
||||
max = last_context - first_context;
|
||||
|
||||
/* Attempt to free next_context first and then loop until we manage */
|
||||
while (max--) {
|
||||
/* Pick up the victim mm */
|
||||
mm = context_mm[id];
|
||||
|
||||
/* We have a candidate victim, check if it's active, on SMP
|
||||
* we cannot steal active contexts
|
||||
*/
|
||||
if (mm->context.active) {
|
||||
id++;
|
||||
if (id > last_context)
|
||||
id = first_context;
|
||||
continue;
|
||||
}
|
||||
pr_hardcont(" | steal %d from 0x%p", id, mm);
|
||||
|
||||
/* Mark this mm has having no context anymore */
|
||||
mm->context.id = MMU_NO_CONTEXT;
|
||||
|
||||
/* Mark it stale on all CPUs that used this mm. For threaded
|
||||
* implementations, we set it on all threads on each core
|
||||
* represented in the mask. A future implementation will use
|
||||
* a core map instead but this will do for now.
|
||||
*/
|
||||
for_each_cpu(cpu, mm_cpumask(mm)) {
|
||||
for (i = cpu_first_thread_sibling(cpu);
|
||||
i <= cpu_last_thread_sibling(cpu); i++) {
|
||||
if (stale_map[i])
|
||||
__set_bit(id, stale_map[i]);
|
||||
}
|
||||
cpu = i - 1;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
/* This will happen if you have more CPUs than available contexts,
|
||||
* all we can do here is wait a bit and try again
|
||||
*/
|
||||
raw_spin_unlock(&context_lock);
|
||||
cpu_relax();
|
||||
raw_spin_lock(&context_lock);
|
||||
|
||||
/* This will cause the caller to try again */
|
||||
return MMU_NO_CONTEXT;
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/* Note that this will also be called on SMP if all other CPUs are
|
||||
* offlined, which means that it may be called for cpu != 0. For
|
||||
* this to work, we somewhat assume that CPUs that are onlined
|
||||
* come up with a fully clean TLB (or are cleaned when offlined)
|
||||
*/
|
||||
static unsigned int steal_context_up(unsigned int id)
|
||||
{
|
||||
struct mm_struct *mm;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/* Pick up the victim mm */
|
||||
mm = context_mm[id];
|
||||
|
||||
pr_hardcont(" | steal %d from 0x%p", id, mm);
|
||||
|
||||
/* Flush the TLB for that context */
|
||||
local_flush_tlb_mm(mm);
|
||||
|
||||
/* Mark this mm has having no context anymore */
|
||||
mm->context.id = MMU_NO_CONTEXT;
|
||||
|
||||
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
|
||||
__clear_bit(id, stale_map[cpu]);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MAP_CONSISTENCY
|
||||
static void context_check_map(void)
|
||||
{
|
||||
unsigned int id, nrf, nact;
|
||||
|
||||
nrf = nact = 0;
|
||||
for (id = first_context; id <= last_context; id++) {
|
||||
int used = test_bit(id, context_map);
|
||||
if (!used)
|
||||
nrf++;
|
||||
if (used != (context_mm[id] != NULL))
|
||||
pr_err("MMU: Context %d is %s and MM is %p !\n",
|
||||
id, used ? "used" : "free", context_mm[id]);
|
||||
if (context_mm[id] != NULL)
|
||||
nact += context_mm[id]->context.active;
|
||||
}
|
||||
if (nrf != nr_free_contexts) {
|
||||
pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
|
||||
nr_free_contexts, nrf);
|
||||
nr_free_contexts = nrf;
|
||||
}
|
||||
if (nact > num_online_cpus())
|
||||
pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
|
||||
nact, num_online_cpus());
|
||||
if (first_context > 0 && !test_bit(0, context_map))
|
||||
pr_err("MMU: Context 0 has been freed !!!\n");
|
||||
}
|
||||
#else
|
||||
static void context_check_map(void) { }
|
||||
#endif
|
||||
|
||||
void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
|
||||
{
|
||||
unsigned int i, id, cpu = smp_processor_id();
|
||||
unsigned long *map;
|
||||
|
||||
/* No lockless fast path .. yet */
|
||||
raw_spin_lock(&context_lock);
|
||||
|
||||
pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
|
||||
cpu, next, next->context.active, next->context.id);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Mark us active and the previous one not anymore */
|
||||
next->context.active++;
|
||||
if (prev) {
|
||||
pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
|
||||
WARN_ON(prev->context.active < 1);
|
||||
prev->context.active--;
|
||||
}
|
||||
|
||||
again:
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/* If we already have a valid assigned context, skip all that */
|
||||
id = next->context.id;
|
||||
if (likely(id != MMU_NO_CONTEXT)) {
|
||||
#ifdef DEBUG_MAP_CONSISTENCY
|
||||
if (context_mm[id] != next)
|
||||
pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
|
||||
next, id, id, context_mm[id]);
|
||||
#endif
|
||||
goto ctxt_ok;
|
||||
}
|
||||
|
||||
/* We really don't have a context, let's try to acquire one */
|
||||
id = next_context;
|
||||
if (id > last_context)
|
||||
id = first_context;
|
||||
map = context_map;
|
||||
|
||||
/* No more free contexts, let's try to steal one */
|
||||
if (nr_free_contexts == 0) {
|
||||
#ifdef CONFIG_SMP
|
||||
if (num_online_cpus() > 1) {
|
||||
id = steal_context_smp(id);
|
||||
if (id == MMU_NO_CONTEXT)
|
||||
goto again;
|
||||
goto stolen;
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
id = steal_context_up(id);
|
||||
goto stolen;
|
||||
}
|
||||
nr_free_contexts--;
|
||||
|
||||
/* We know there's at least one free context, try to find it */
|
||||
while (__test_and_set_bit(id, map)) {
|
||||
id = find_next_zero_bit(map, last_context+1, id);
|
||||
if (id > last_context)
|
||||
id = first_context;
|
||||
}
|
||||
stolen:
|
||||
next_context = id + 1;
|
||||
context_mm[id] = next;
|
||||
next->context.id = id;
|
||||
pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
|
||||
|
||||
context_check_map();
|
||||
ctxt_ok:
|
||||
|
||||
/* If that context got marked stale on this CPU, then flush the
|
||||
* local TLB for it and unmark it before we use it
|
||||
*/
|
||||
if (test_bit(id, stale_map[cpu])) {
|
||||
pr_hardcont(" | stale flush %d [%d..%d]",
|
||||
id, cpu_first_thread_sibling(cpu),
|
||||
cpu_last_thread_sibling(cpu));
|
||||
|
||||
local_flush_tlb_mm(next);
|
||||
|
||||
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
|
||||
for (i = cpu_first_thread_sibling(cpu);
|
||||
i <= cpu_last_thread_sibling(cpu); i++) {
|
||||
if (stale_map[i])
|
||||
__clear_bit(id, stale_map[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Flick the MMU and release lock */
|
||||
pr_hardcont(" -> %d\n", id);
|
||||
set_context(id, next->pgd);
|
||||
raw_spin_unlock(&context_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the context for a new address space.
|
||||
*/
|
||||
int init_new_context(struct task_struct *t, struct mm_struct *mm)
|
||||
{
|
||||
pr_hard("initing context for mm @%p\n", mm);
|
||||
|
||||
mm->context.id = MMU_NO_CONTEXT;
|
||||
mm->context.active = 0;
|
||||
|
||||
#ifdef CONFIG_PPC_MM_SLICES
|
||||
if (slice_mm_new_context(mm))
|
||||
slice_set_user_psize(mm, mmu_virtual_psize);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're finished using the context for an address space.
|
||||
*/
|
||||
void destroy_context(struct mm_struct *mm)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned int id;
|
||||
|
||||
if (mm->context.id == MMU_NO_CONTEXT)
|
||||
return;
|
||||
|
||||
WARN_ON(mm->context.active != 0);
|
||||
|
||||
raw_spin_lock_irqsave(&context_lock, flags);
|
||||
id = mm->context.id;
|
||||
if (id != MMU_NO_CONTEXT) {
|
||||
__clear_bit(id, context_map);
|
||||
mm->context.id = MMU_NO_CONTEXT;
|
||||
#ifdef DEBUG_MAP_CONSISTENCY
|
||||
mm->context.active = 0;
|
||||
#endif
|
||||
context_mm[id] = NULL;
|
||||
nr_free_contexts++;
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&context_lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static int mmu_context_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (unsigned int)(long)hcpu;
|
||||
|
||||
/* We don't touch CPU 0 map, it's allocated at aboot and kept
|
||||
* around forever
|
||||
*/
|
||||
if (cpu == boot_cpuid)
|
||||
return NOTIFY_OK;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
|
||||
stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
|
||||
break;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
|
||||
kfree(stale_map[cpu]);
|
||||
stale_map[cpu] = NULL;
|
||||
|
||||
/* We also clear the cpu_vm_mask bits of CPUs going away */
|
||||
clear_tasks_mm_cpumask(cpu);
|
||||
break;
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block mmu_context_cpu_nb = {
|
||||
.notifier_call = mmu_context_cpu_notify,
|
||||
};
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* Initialize the context management stuff.
|
||||
*/
|
||||
void __init mmu_context_init(void)
|
||||
{
|
||||
/* Mark init_mm as being active on all possible CPUs since
|
||||
* we'll get called with prev == init_mm the first time
|
||||
* we schedule on a given CPU
|
||||
*/
|
||||
init_mm.context.active = NR_CPUS;
|
||||
|
||||
/*
|
||||
* The MPC8xx has only 16 contexts. We rotate through them on each
|
||||
* task switch. A better way would be to keep track of tasks that
|
||||
* own contexts, and implement an LRU usage. That way very active
|
||||
* tasks don't always have to pay the TLB reload overhead. The
|
||||
* kernel pages are mapped shared, so the kernel can run on behalf
|
||||
* of any task that makes a kernel entry. Shared does not mean they
|
||||
* are not protected, just that the ASID comparison is not performed.
|
||||
* -- Dan
|
||||
*
|
||||
* The IBM4xx has 256 contexts, so we can just rotate through these
|
||||
* as a way of "switching" contexts. If the TID of the TLB is zero,
|
||||
* the PID/TID comparison is disabled, so we can use a TID of zero
|
||||
* to represent all kernel pages as shared among all contexts.
|
||||
* -- Dan
|
||||
*
|
||||
* The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We
|
||||
* should normally never have to steal though the facility is
|
||||
* present if needed.
|
||||
* -- BenH
|
||||
*/
|
||||
if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
|
||||
first_context = 0;
|
||||
last_context = 15;
|
||||
} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
|
||||
first_context = 1;
|
||||
last_context = 65535;
|
||||
} else {
|
||||
first_context = 1;
|
||||
last_context = 255;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_CLAMP_LAST_CONTEXT
|
||||
last_context = DEBUG_CLAMP_LAST_CONTEXT;
|
||||
#endif
|
||||
/*
|
||||
* Allocate the maps used by context management
|
||||
*/
|
||||
context_map = alloc_bootmem(CTX_MAP_SIZE);
|
||||
context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
|
||||
#ifndef CONFIG_SMP
|
||||
stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
|
||||
#else
|
||||
stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE);
|
||||
|
||||
register_cpu_notifier(&mmu_context_cpu_nb);
|
||||
#endif
|
||||
|
||||
printk(KERN_INFO
|
||||
"MMU: Allocated %zu bytes of context maps for %d contexts\n",
|
||||
2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
|
||||
last_context - first_context + 1);
|
||||
|
||||
/*
|
||||
* Some processors have too few contexts to reserve one for
|
||||
* init_mm, and require using context 0 for a normal task.
|
||||
* Other processors reserve the use of context zero for the kernel.
|
||||
* This code assumes first_context < 32.
|
||||
*/
|
||||
context_map[0] = (1 << first_context) - 1;
|
||||
next_context = first_context;
|
||||
nr_free_contexts = last_context - first_context + 1;
|
||||
}
|
||||
|
||||
167
arch/powerpc/mm/mmu_decl.h
Normal file
167
arch/powerpc/mm/mmu_decl.h
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
/*
|
||||
* Declarations of procedures and variables shared between files
|
||||
* in arch/ppc/mm/.
|
||||
*
|
||||
* Derived from arch/ppc/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu.h>
|
||||
|
||||
#ifdef CONFIG_PPC_MMU_NOHASH
|
||||
|
||||
/*
|
||||
* On 40x and 8xx, we directly inline tlbia and tlbivax
|
||||
*/
|
||||
#if defined(CONFIG_40x) || defined(CONFIG_8xx)
|
||||
static inline void _tlbil_all(void)
|
||||
{
|
||||
asm volatile ("sync; tlbia; isync" : : : "memory");
|
||||
}
|
||||
static inline void _tlbil_pid(unsigned int pid)
|
||||
{
|
||||
asm volatile ("sync; tlbia; isync" : : : "memory");
|
||||
}
|
||||
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
|
||||
|
||||
#else /* CONFIG_40x || CONFIG_8xx */
|
||||
extern void _tlbil_all(void);
|
||||
extern void _tlbil_pid(unsigned int pid);
|
||||
#ifdef CONFIG_PPC_BOOK3E
|
||||
extern void _tlbil_pid_noind(unsigned int pid);
|
||||
#else
|
||||
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
|
||||
#endif
|
||||
#endif /* !(CONFIG_40x || CONFIG_8xx) */
|
||||
|
||||
/*
|
||||
* On 8xx, we directly inline tlbie, on others, it's extern
|
||||
*/
|
||||
#ifdef CONFIG_8xx
|
||||
static inline void _tlbil_va(unsigned long address, unsigned int pid,
|
||||
unsigned int tsize, unsigned int ind)
|
||||
{
|
||||
asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
|
||||
}
|
||||
#elif defined(CONFIG_PPC_BOOK3E)
|
||||
extern void _tlbil_va(unsigned long address, unsigned int pid,
|
||||
unsigned int tsize, unsigned int ind);
|
||||
#else
|
||||
extern void __tlbil_va(unsigned long address, unsigned int pid);
|
||||
static inline void _tlbil_va(unsigned long address, unsigned int pid,
|
||||
unsigned int tsize, unsigned int ind)
|
||||
{
|
||||
__tlbil_va(address, pid);
|
||||
}
|
||||
#endif /* CONIFG_8xx */
|
||||
|
||||
#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x)
|
||||
extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
|
||||
unsigned int tsize, unsigned int ind);
|
||||
#else
|
||||
static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
|
||||
unsigned int tsize, unsigned int ind)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_PPC_MMU_NOHASH */
|
||||
|
||||
extern void hash_preload(struct mm_struct *mm, unsigned long ea,
|
||||
unsigned long access, unsigned long trap);
|
||||
|
||||
|
||||
extern void _tlbie(unsigned long address);
|
||||
extern void _tlbia(void);
|
||||
|
||||
#endif /* CONFIG_PPC_MMU_NOHASH */
|
||||
|
||||
#ifdef CONFIG_PPC32
|
||||
|
||||
extern void mapin_ram(void);
|
||||
extern int map_page(unsigned long va, phys_addr_t pa, int flags);
|
||||
extern void setbat(int index, unsigned long virt, phys_addr_t phys,
|
||||
unsigned int size, int flags);
|
||||
|
||||
extern int __map_without_bats;
|
||||
extern int __allow_ioremap_reserved;
|
||||
extern unsigned long ioremap_base;
|
||||
extern unsigned int rtas_data, rtas_size;
|
||||
|
||||
struct hash_pte;
|
||||
extern struct hash_pte *Hash, *Hash_end;
|
||||
extern unsigned long Hash_size, Hash_mask;
|
||||
|
||||
#endif /* CONFIG_PPC32 */
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags);
|
||||
#endif /* CONFIG_PPC64 */
|
||||
|
||||
extern unsigned long ioremap_bot;
|
||||
extern unsigned long __max_low_memory;
|
||||
extern phys_addr_t __initial_memory_limit_addr;
|
||||
extern phys_addr_t total_memory;
|
||||
extern phys_addr_t total_lowmem;
|
||||
extern phys_addr_t memstart_addr;
|
||||
extern phys_addr_t lowmem_end_addr;
|
||||
|
||||
#ifdef CONFIG_WII
|
||||
extern unsigned long wii_hole_start;
|
||||
extern unsigned long wii_hole_size;
|
||||
|
||||
extern unsigned long wii_mmu_mapin_mem2(unsigned long top);
|
||||
extern void wii_memory_fixups(void);
|
||||
#endif
|
||||
|
||||
/* ...and now those things that may be slightly different between processor
|
||||
* architectures. -- Dan
|
||||
*/
|
||||
#if defined(CONFIG_8xx)
|
||||
#define MMU_init_hw() do { } while(0)
|
||||
#define mmu_mapin_ram(top) (0UL)
|
||||
|
||||
#elif defined(CONFIG_4xx)
|
||||
extern void MMU_init_hw(void);
|
||||
extern unsigned long mmu_mapin_ram(unsigned long top);
|
||||
|
||||
#elif defined(CONFIG_PPC_FSL_BOOK3E)
|
||||
extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx);
|
||||
extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
|
||||
phys_addr_t phys);
|
||||
#ifdef CONFIG_PPC32
|
||||
extern void MMU_init_hw(void);
|
||||
extern unsigned long mmu_mapin_ram(unsigned long top);
|
||||
extern void adjust_total_lowmem(void);
|
||||
extern int switch_to_as1(void);
|
||||
extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
|
||||
#endif
|
||||
extern void loadcam_entry(unsigned int index);
|
||||
|
||||
struct tlbcam {
|
||||
u32 MAS0;
|
||||
u32 MAS1;
|
||||
unsigned long MAS2;
|
||||
u32 MAS3;
|
||||
u32 MAS7;
|
||||
};
|
||||
#elif defined(CONFIG_PPC32)
|
||||
/* anything 32-bit except 4xx or 8xx */
|
||||
extern void MMU_init_hw(void);
|
||||
extern unsigned long mmu_mapin_ram(unsigned long top);
|
||||
#endif
|
||||
1849
arch/powerpc/mm/numa.c
Normal file
1849
arch/powerpc/mm/numa.c
Normal file
File diff suppressed because it is too large
Load diff
236
arch/powerpc/mm/pgtable.c
Normal file
236
arch/powerpc/mm/pgtable.c
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
* This file contains common routines for dealing with free of page tables
|
||||
* Along with common page table handling code
|
||||
*
|
||||
* Derived from arch/powerpc/mm/tlb_64.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* Dave Engebretsen <engebret@us.ibm.com>
|
||||
* Rework for PPC64 port.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/tlb.h>
|
||||
|
||||
static inline int is_exec_fault(void)
|
||||
{
|
||||
return current->thread.regs && TRAP(current->thread.regs) == 0x400;
|
||||
}
|
||||
|
||||
/* We only try to do i/d cache coherency on stuff that looks like
|
||||
* reasonably "normal" PTEs. We currently require a PTE to be present
|
||||
* and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that
|
||||
* on userspace PTEs
|
||||
*/
|
||||
static inline int pte_looks_normal(pte_t pte)
|
||||
{
|
||||
return (pte_val(pte) &
|
||||
(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
|
||||
(_PAGE_PRESENT | _PAGE_USER);
|
||||
}
|
||||
|
||||
static struct page *maybe_pte_to_page(pte_t pte)
|
||||
{
|
||||
unsigned long pfn = pte_pfn(pte);
|
||||
struct page *page;
|
||||
|
||||
if (unlikely(!pfn_valid(pfn)))
|
||||
return NULL;
|
||||
page = pfn_to_page(pfn);
|
||||
if (PageReserved(page))
|
||||
return NULL;
|
||||
return page;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0
|
||||
|
||||
/* Server-style MMU handles coherency when hashing if HW exec permission
|
||||
* is supposed per page (currently 64-bit only). If not, then, we always
|
||||
* flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec
|
||||
* support falls into the same category.
|
||||
*/
|
||||
|
||||
static pte_t set_pte_filter(pte_t pte)
|
||||
{
|
||||
pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
|
||||
if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
|
||||
cpu_has_feature(CPU_FTR_NOEXECUTE))) {
|
||||
struct page *pg = maybe_pte_to_page(pte);
|
||||
if (!pg)
|
||||
return pte;
|
||||
if (!test_bit(PG_arch_1, &pg->flags)) {
|
||||
flush_dcache_icache_page(pg);
|
||||
set_bit(PG_arch_1, &pg->flags);
|
||||
}
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
|
||||
static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
|
||||
int dirty)
|
||||
{
|
||||
return pte;
|
||||
}
|
||||
|
||||
#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */
|
||||
|
||||
/* Embedded type MMU with HW exec support. This is a bit more complicated
|
||||
* as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
|
||||
* instead we "filter out" the exec permission for non clean pages.
|
||||
*/
|
||||
static pte_t set_pte_filter(pte_t pte)
|
||||
{
|
||||
struct page *pg;
|
||||
|
||||
/* No exec permission in the first place, move on */
|
||||
if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte))
|
||||
return pte;
|
||||
|
||||
/* If you set _PAGE_EXEC on weird pages you're on your own */
|
||||
pg = maybe_pte_to_page(pte);
|
||||
if (unlikely(!pg))
|
||||
return pte;
|
||||
|
||||
/* If the page clean, we move on */
|
||||
if (test_bit(PG_arch_1, &pg->flags))
|
||||
return pte;
|
||||
|
||||
/* If it's an exec fault, we flush the cache and make it clean */
|
||||
if (is_exec_fault()) {
|
||||
flush_dcache_icache_page(pg);
|
||||
set_bit(PG_arch_1, &pg->flags);
|
||||
return pte;
|
||||
}
|
||||
|
||||
/* Else, we filter out _PAGE_EXEC */
|
||||
return __pte(pte_val(pte) & ~_PAGE_EXEC);
|
||||
}
|
||||
|
||||
static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
|
||||
int dirty)
|
||||
{
|
||||
struct page *pg;
|
||||
|
||||
/* So here, we only care about exec faults, as we use them
|
||||
* to recover lost _PAGE_EXEC and perform I$/D$ coherency
|
||||
* if necessary. Also if _PAGE_EXEC is already set, same deal,
|
||||
* we just bail out
|
||||
*/
|
||||
if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault())
|
||||
return pte;
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
/* So this is an exec fault, _PAGE_EXEC is not set. If it was
|
||||
* an error we would have bailed out earlier in do_page_fault()
|
||||
* but let's make sure of it
|
||||
*/
|
||||
if (WARN_ON(!(vma->vm_flags & VM_EXEC)))
|
||||
return pte;
|
||||
#endif /* CONFIG_DEBUG_VM */
|
||||
|
||||
/* If you set _PAGE_EXEC on weird pages you're on your own */
|
||||
pg = maybe_pte_to_page(pte);
|
||||
if (unlikely(!pg))
|
||||
goto bail;
|
||||
|
||||
/* If the page is already clean, we move on */
|
||||
if (test_bit(PG_arch_1, &pg->flags))
|
||||
goto bail;
|
||||
|
||||
/* Clean the page and set PG_arch_1 */
|
||||
flush_dcache_icache_page(pg);
|
||||
set_bit(PG_arch_1, &pg->flags);
|
||||
|
||||
bail:
|
||||
return __pte(pte_val(pte) | _PAGE_EXEC);
|
||||
}
|
||||
|
||||
#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */
|
||||
|
||||
/*
|
||||
* set_pte stores a linux PTE into the linux page table.
|
||||
*/
|
||||
void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
|
||||
pte_t pte)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
|
||||
#endif
|
||||
/* Note: mm->context.id might not yet have been assigned as
|
||||
* this context might not have been activated yet when this
|
||||
* is called.
|
||||
*/
|
||||
pte = set_pte_filter(pte);
|
||||
|
||||
/* Perform the setting of the PTE */
|
||||
__set_pte_at(mm, addr, ptep, pte, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called when relaxing access to a PTE. It's also called in the page
|
||||
* fault path when we don't hit any of the major fault cases, ie, a minor
|
||||
* update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
|
||||
* handled those two for us, we additionally deal with missing execute
|
||||
* permission here on some processors
|
||||
*/
|
||||
int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
|
||||
pte_t *ptep, pte_t entry, int dirty)
|
||||
{
|
||||
int changed;
|
||||
entry = set_access_flags_filter(entry, vma, dirty);
|
||||
changed = !pte_same(*(ptep), entry);
|
||||
if (changed) {
|
||||
if (!is_vm_hugetlb_page(vma))
|
||||
assert_pte_locked(vma->vm_mm, address);
|
||||
__ptep_set_access_flags(ptep, entry);
|
||||
flush_tlb_page_nohash(vma, address);
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
if (mm == &init_mm)
|
||||
return;
|
||||
pgd = mm->pgd + pgd_index(addr);
|
||||
BUG_ON(pgd_none(*pgd));
|
||||
pud = pud_offset(pgd, addr);
|
||||
BUG_ON(pud_none(*pud));
|
||||
pmd = pmd_offset(pud, addr);
|
||||
/*
|
||||
* khugepaged to collapse normal pages to hugepage, first set
|
||||
* pmd to none to force page fault/gup to take mmap_sem. After
|
||||
* pmd is set to none, we do a pte_clear which does this assertion
|
||||
* so if we find pmd none, return.
|
||||
*/
|
||||
if (pmd_none(*pmd))
|
||||
return;
|
||||
BUG_ON(!pmd_present(*pmd));
|
||||
assert_spin_locked(pte_lockptr(mm, pmd));
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_VM */
|
||||
|
||||
460
arch/powerpc/mm/pgtable_32.c
Normal file
460
arch/powerpc/mm/pgtable_32.c
Normal file
|
|
@ -0,0 +1,460 @@
|
|||
/*
|
||||
* This file contains the routines setting up the linux page tables.
|
||||
* -- paulus
|
||||
*
|
||||
* Derived from arch/ppc/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
unsigned long ioremap_base;
|
||||
unsigned long ioremap_bot;
|
||||
EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
|
||||
|
||||
#ifdef CONFIG_6xx
|
||||
#define HAVE_BATS 1
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_FSL_BOOKE)
|
||||
#define HAVE_TLBCAM 1
|
||||
#endif
|
||||
|
||||
extern char etext[], _stext[];
|
||||
|
||||
#ifdef HAVE_BATS
|
||||
extern phys_addr_t v_mapped_by_bats(unsigned long va);
|
||||
extern unsigned long p_mapped_by_bats(phys_addr_t pa);
|
||||
void setbat(int index, unsigned long virt, phys_addr_t phys,
|
||||
unsigned int size, int flags);
|
||||
|
||||
#else /* !HAVE_BATS */
|
||||
#define v_mapped_by_bats(x) (0UL)
|
||||
#define p_mapped_by_bats(x) (0UL)
|
||||
#endif /* HAVE_BATS */
|
||||
|
||||
#ifdef HAVE_TLBCAM
|
||||
extern unsigned int tlbcam_index;
|
||||
extern phys_addr_t v_mapped_by_tlbcam(unsigned long va);
|
||||
extern unsigned long p_mapped_by_tlbcam(phys_addr_t pa);
|
||||
#else /* !HAVE_TLBCAM */
|
||||
#define v_mapped_by_tlbcam(x) (0UL)
|
||||
#define p_mapped_by_tlbcam(x) (0UL)
|
||||
#endif /* HAVE_TLBCAM */
|
||||
|
||||
#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT)
|
||||
|
||||
pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||
{
|
||||
pgd_t *ret;
|
||||
|
||||
/* pgdir take page or two with 4K pages and a page fraction otherwise */
|
||||
#ifndef CONFIG_PPC_4K_PAGES
|
||||
ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
|
||||
#else
|
||||
ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
|
||||
PGDIR_ORDER - PAGE_SHIFT);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
||||
{
|
||||
#ifndef CONFIG_PPC_4K_PAGES
|
||||
kfree((void *)pgd);
|
||||
#else
|
||||
free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT);
|
||||
#endif
|
||||
}
|
||||
|
||||
__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
|
||||
{
|
||||
pte_t *pte;
|
||||
extern int mem_init_done;
|
||||
extern void *early_get_page(void);
|
||||
|
||||
if (mem_init_done) {
|
||||
pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
|
||||
} else {
|
||||
pte = (pte_t *)early_get_page();
|
||||
if (pte)
|
||||
clear_page(pte);
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
|
||||
pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
|
||||
{
|
||||
struct page *ptepage;
|
||||
|
||||
gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
|
||||
|
||||
ptepage = alloc_pages(flags, 0);
|
||||
if (!ptepage)
|
||||
return NULL;
|
||||
if (!pgtable_page_ctor(ptepage)) {
|
||||
__free_page(ptepage);
|
||||
return NULL;
|
||||
}
|
||||
return ptepage;
|
||||
}
|
||||
|
||||
void __iomem *
|
||||
ioremap(phys_addr_t addr, unsigned long size)
|
||||
{
|
||||
return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED,
|
||||
__builtin_return_address(0));
|
||||
}
|
||||
EXPORT_SYMBOL(ioremap);
|
||||
|
||||
void __iomem *
|
||||
ioremap_wc(phys_addr_t addr, unsigned long size)
|
||||
{
|
||||
return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
|
||||
__builtin_return_address(0));
|
||||
}
|
||||
EXPORT_SYMBOL(ioremap_wc);
|
||||
|
||||
void __iomem *
|
||||
ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
|
||||
{
|
||||
/* writeable implies dirty for kernel addresses */
|
||||
if (flags & _PAGE_RW)
|
||||
flags |= _PAGE_DIRTY | _PAGE_HWWRITE;
|
||||
|
||||
/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
|
||||
flags &= ~(_PAGE_USER | _PAGE_EXEC);
|
||||
|
||||
#ifdef _PAGE_BAP_SR
|
||||
/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
|
||||
* which means that we just cleared supervisor access... oops ;-) This
|
||||
* restores it
|
||||
*/
|
||||
flags |= _PAGE_BAP_SR;
|
||||
#endif
|
||||
|
||||
return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
|
||||
}
|
||||
EXPORT_SYMBOL(ioremap_prot);
|
||||
|
||||
void __iomem *
|
||||
__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
|
||||
{
|
||||
return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
|
||||
}
|
||||
|
||||
void __iomem *
|
||||
__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
|
||||
void *caller)
|
||||
{
|
||||
unsigned long v, i;
|
||||
phys_addr_t p;
|
||||
int err;
|
||||
|
||||
/* Make sure we have the base flags */
|
||||
if ((flags & _PAGE_PRESENT) == 0)
|
||||
flags |= PAGE_KERNEL;
|
||||
|
||||
/* Non-cacheable page cannot be coherent */
|
||||
if (flags & _PAGE_NO_CACHE)
|
||||
flags &= ~_PAGE_COHERENT;
|
||||
|
||||
/*
|
||||
* Choose an address to map it to.
|
||||
* Once the vmalloc system is running, we use it.
|
||||
* Before then, we use space going down from ioremap_base
|
||||
* (ioremap_bot records where we're up to).
|
||||
*/
|
||||
p = addr & PAGE_MASK;
|
||||
size = PAGE_ALIGN(addr + size) - p;
|
||||
|
||||
/*
|
||||
* If the address lies within the first 16 MB, assume it's in ISA
|
||||
* memory space
|
||||
*/
|
||||
if (p < 16*1024*1024)
|
||||
p += _ISA_MEM_BASE;
|
||||
|
||||
#ifndef CONFIG_CRASH_DUMP
|
||||
/*
|
||||
* Don't allow anybody to remap normal RAM that we're using.
|
||||
* mem_init() sets high_memory so only do the check after that.
|
||||
*/
|
||||
if (mem_init_done && (p < virt_to_phys(high_memory)) &&
|
||||
!(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
|
||||
printk("__ioremap(): phys addr 0x%llx is RAM lr %pf\n",
|
||||
(unsigned long long)p, __builtin_return_address(0));
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (size == 0)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Is it already mapped? Perhaps overlapped by a previous
|
||||
* BAT mapping. If the whole area is mapped then we're done,
|
||||
* otherwise remap it since we want to keep the virt addrs for
|
||||
* each request contiguous.
|
||||
*
|
||||
* We make the assumption here that if the bottom and top
|
||||
* of the range we want are mapped then it's mapped to the
|
||||
* same virt address (and this is contiguous).
|
||||
* -- Cort
|
||||
*/
|
||||
if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
|
||||
goto out;
|
||||
|
||||
if ((v = p_mapped_by_tlbcam(p)))
|
||||
goto out;
|
||||
|
||||
if (mem_init_done) {
|
||||
struct vm_struct *area;
|
||||
area = get_vm_area_caller(size, VM_IOREMAP, caller);
|
||||
if (area == 0)
|
||||
return NULL;
|
||||
area->phys_addr = p;
|
||||
v = (unsigned long) area->addr;
|
||||
} else {
|
||||
v = (ioremap_bot -= size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Should check if it is a candidate for a BAT mapping
|
||||
*/
|
||||
|
||||
err = 0;
|
||||
for (i = 0; i < size && err == 0; i += PAGE_SIZE)
|
||||
err = map_page(v+i, p+i, flags);
|
||||
if (err) {
|
||||
if (mem_init_done)
|
||||
vunmap((void *)v);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
out:
|
||||
return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
|
||||
}
|
||||
EXPORT_SYMBOL(__ioremap);
|
||||
|
||||
void iounmap(volatile void __iomem *addr)
|
||||
{
|
||||
/*
|
||||
* If mapped by BATs then there is nothing to do.
|
||||
* Calling vfree() generates a benign warning.
|
||||
*/
|
||||
if (v_mapped_by_bats((unsigned long)addr)) return;
|
||||
|
||||
if (addr > high_memory && (unsigned long) addr < ioremap_bot)
|
||||
vunmap((void *) (PAGE_MASK & (unsigned long)addr));
|
||||
}
|
||||
EXPORT_SYMBOL(iounmap);
|
||||
|
||||
int map_page(unsigned long va, phys_addr_t pa, int flags)
|
||||
{
|
||||
pmd_t *pd;
|
||||
pte_t *pg;
|
||||
int err = -ENOMEM;
|
||||
|
||||
/* Use upper 10 bits of VA to index the first level map */
|
||||
pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va);
|
||||
/* Use middle 10 bits of VA to index the second-level map */
|
||||
pg = pte_alloc_kernel(pd, va);
|
||||
if (pg != 0) {
|
||||
err = 0;
|
||||
/* The PTE should never be already set nor present in the
|
||||
* hash table
|
||||
*/
|
||||
BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) &&
|
||||
flags);
|
||||
set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
|
||||
__pgprot(flags)));
|
||||
}
|
||||
smp_wmb();
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map in a chunk of physical memory starting at start.
|
||||
*/
|
||||
void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
|
||||
{
|
||||
unsigned long v, s, f;
|
||||
phys_addr_t p;
|
||||
int ktext;
|
||||
|
||||
s = offset;
|
||||
v = PAGE_OFFSET + s;
|
||||
p = memstart_addr + s;
|
||||
for (; s < top; s += PAGE_SIZE) {
|
||||
ktext = ((char *) v >= _stext && (char *) v < etext);
|
||||
f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL;
|
||||
map_page(v, p, f);
|
||||
#ifdef CONFIG_PPC_STD_MMU_32
|
||||
if (ktext)
|
||||
hash_preload(&init_mm, v, 0, 0x300);
|
||||
#endif
|
||||
v += PAGE_SIZE;
|
||||
p += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
void __init mapin_ram(void)
|
||||
{
|
||||
unsigned long s, top;
|
||||
|
||||
#ifndef CONFIG_WII
|
||||
top = total_lowmem;
|
||||
s = mmu_mapin_ram(top);
|
||||
__mapin_ram_chunk(s, top);
|
||||
#else
|
||||
if (!wii_hole_size) {
|
||||
s = mmu_mapin_ram(total_lowmem);
|
||||
__mapin_ram_chunk(s, total_lowmem);
|
||||
} else {
|
||||
top = wii_hole_start;
|
||||
s = mmu_mapin_ram(top);
|
||||
__mapin_ram_chunk(s, top);
|
||||
|
||||
top = memblock_end_of_DRAM();
|
||||
s = wii_mmu_mapin_mem2(top);
|
||||
__mapin_ram_chunk(s, top);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Scan the real Linux page tables and return a PTE pointer for
|
||||
* a virtual address in a context.
|
||||
* Returns true (1) if PTE was found, zero otherwise. The pointer to
|
||||
* the PTE pointer is unmodified if PTE is not found.
|
||||
*/
|
||||
int
|
||||
get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
int retval = 0;
|
||||
|
||||
pgd = pgd_offset(mm, addr & PAGE_MASK);
|
||||
if (pgd) {
|
||||
pud = pud_offset(pgd, addr & PAGE_MASK);
|
||||
if (pud && pud_present(*pud)) {
|
||||
pmd = pmd_offset(pud, addr & PAGE_MASK);
|
||||
if (pmd_present(*pmd)) {
|
||||
pte = pte_offset_map(pmd, addr & PAGE_MASK);
|
||||
if (pte) {
|
||||
retval = 1;
|
||||
*ptep = pte;
|
||||
if (pmdp)
|
||||
*pmdp = pmd;
|
||||
/* XXX caller needs to do pte_unmap, yuck */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return(retval);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
|
||||
static int __change_page_attr(struct page *page, pgprot_t prot)
|
||||
{
|
||||
pte_t *kpte;
|
||||
pmd_t *kpmd;
|
||||
unsigned long address;
|
||||
|
||||
BUG_ON(PageHighMem(page));
|
||||
address = (unsigned long)page_address(page);
|
||||
|
||||
if (v_mapped_by_bats(address) || v_mapped_by_tlbcam(address))
|
||||
return 0;
|
||||
if (!get_pteptr(&init_mm, address, &kpte, &kpmd))
|
||||
return -EINVAL;
|
||||
__set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0);
|
||||
wmb();
|
||||
flush_tlb_page(NULL, address);
|
||||
pte_unmap(kpte);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Change the page attributes of an page in the linear mapping.
|
||||
*
|
||||
* THIS CONFLICTS WITH BAT MAPPINGS, DEBUG USE ONLY
|
||||
*/
|
||||
static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
|
||||
{
|
||||
int i, err = 0;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
for (i = 0; i < numpages; i++, page++) {
|
||||
err = __change_page_attr(page, prot);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
void kernel_map_pages(struct page *page, int numpages, int enable)
|
||||
{
|
||||
if (PageHighMem(page))
|
||||
return;
|
||||
|
||||
change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_PAGEALLOC */
|
||||
|
||||
static int fixmaps;
|
||||
|
||||
void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
|
||||
{
|
||||
unsigned long address = __fix_to_virt(idx);
|
||||
|
||||
if (idx >= __end_of_fixed_addresses) {
|
||||
BUG();
|
||||
return;
|
||||
}
|
||||
|
||||
map_page(address, phys, pgprot_val(flags));
|
||||
fixmaps++;
|
||||
}
|
||||
|
||||
void __this_fixmap_does_not_exist(void)
|
||||
{
|
||||
WARN_ON(1);
|
||||
}
|
||||
904
arch/powerpc/mm/pgtable_64.c
Normal file
904
arch/powerpc/mm/pgtable_64.c
Normal file
|
|
@ -0,0 +1,904 @@
|
|||
/*
|
||||
* This file contains ioremap and related functions for 64-bit machines.
|
||||
*
|
||||
* Derived from arch/ppc64/mm/init.c
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* Dave Engebretsen <engebret@us.ibm.com>
|
||||
* Rework for PPC64 port.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/prom.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/firmware.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/thp.h>
|
||||
|
||||
/* Some sanity checking */
|
||||
#if TASK_SIZE_USER64 > PGTABLE_RANGE
|
||||
#error TASK_SIZE_USER64 exceeds pagetable range
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PPC_STD_MMU_64
|
||||
#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
|
||||
#error TASK_SIZE_USER64 exceeds user VSID range
|
||||
#endif
|
||||
#endif
|
||||
|
||||
unsigned long ioremap_bot = IOREMAP_BASE;
|
||||
|
||||
#ifdef CONFIG_PPC_MMU_NOHASH
|
||||
static __ref void *early_alloc_pgtable(unsigned long size)
|
||||
{
|
||||
void *pt;
|
||||
|
||||
if (init_bootmem_done)
|
||||
pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
|
||||
else
|
||||
pt = __va(memblock_alloc_base(size, size,
|
||||
__pa(MAX_DMA_ADDRESS)));
|
||||
memset(pt, 0, size);
|
||||
|
||||
return pt;
|
||||
}
|
||||
#endif /* CONFIG_PPC_MMU_NOHASH */
|
||||
|
||||
/*
|
||||
* map_kernel_page currently only called by __ioremap
|
||||
* map_kernel_page adds an entry to the ioremap page table
|
||||
* and adds an entry to the HPT, possibly bolting it
|
||||
*/
|
||||
int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
|
||||
{
|
||||
pgd_t *pgdp;
|
||||
pud_t *pudp;
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
|
||||
if (slab_is_available()) {
|
||||
pgdp = pgd_offset_k(ea);
|
||||
pudp = pud_alloc(&init_mm, pgdp, ea);
|
||||
if (!pudp)
|
||||
return -ENOMEM;
|
||||
pmdp = pmd_alloc(&init_mm, pudp, ea);
|
||||
if (!pmdp)
|
||||
return -ENOMEM;
|
||||
ptep = pte_alloc_kernel(pmdp, ea);
|
||||
if (!ptep)
|
||||
return -ENOMEM;
|
||||
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
|
||||
__pgprot(flags)));
|
||||
} else {
|
||||
#ifdef CONFIG_PPC_MMU_NOHASH
|
||||
/* Warning ! This will blow up if bootmem is not initialized
|
||||
* which our ppc64 code is keen to do that, we'll need to
|
||||
* fix it and/or be more careful
|
||||
*/
|
||||
pgdp = pgd_offset_k(ea);
|
||||
#ifdef PUD_TABLE_SIZE
|
||||
if (pgd_none(*pgdp)) {
|
||||
pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
|
||||
BUG_ON(pudp == NULL);
|
||||
pgd_populate(&init_mm, pgdp, pudp);
|
||||
}
|
||||
#endif /* PUD_TABLE_SIZE */
|
||||
pudp = pud_offset(pgdp, ea);
|
||||
if (pud_none(*pudp)) {
|
||||
pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
|
||||
BUG_ON(pmdp == NULL);
|
||||
pud_populate(&init_mm, pudp, pmdp);
|
||||
}
|
||||
pmdp = pmd_offset(pudp, ea);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
ptep = early_alloc_pgtable(PAGE_SIZE);
|
||||
BUG_ON(ptep == NULL);
|
||||
pmd_populate_kernel(&init_mm, pmdp, ptep);
|
||||
}
|
||||
ptep = pte_offset_kernel(pmdp, ea);
|
||||
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
|
||||
__pgprot(flags)));
|
||||
#else /* CONFIG_PPC_MMU_NOHASH */
|
||||
/*
|
||||
* If the mm subsystem is not fully up, we cannot create a
|
||||
* linux page table entry for this mapping. Simply bolt an
|
||||
* entry in the hardware page table.
|
||||
*
|
||||
*/
|
||||
if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
|
||||
mmu_io_psize, mmu_kernel_ssize)) {
|
||||
printk(KERN_ERR "Failed to do bolted mapping IO "
|
||||
"memory at %016lx !\n", pa);
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif /* !CONFIG_PPC_MMU_NOHASH */
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3E_64
|
||||
/*
|
||||
* With hardware tablewalk, a sync is needed to ensure that
|
||||
* subsequent accesses see the PTE we just wrote. Unlike userspace
|
||||
* mappings, we can't tolerate spurious faults, so make sure
|
||||
* the new PTE will be seen the first time.
|
||||
*/
|
||||
mb();
|
||||
#else
|
||||
smp_wmb();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* __ioremap_at - Low level function to establish the page tables
|
||||
* for an IO mapping
|
||||
*/
|
||||
void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
/* Make sure we have the base flags */
|
||||
if ((flags & _PAGE_PRESENT) == 0)
|
||||
flags |= pgprot_val(PAGE_KERNEL);
|
||||
|
||||
/* Non-cacheable page cannot be coherent */
|
||||
if (flags & _PAGE_NO_CACHE)
|
||||
flags &= ~_PAGE_COHERENT;
|
||||
|
||||
/* We don't support the 4K PFN hack with ioremap */
|
||||
if (flags & _PAGE_4K_PFN)
|
||||
return NULL;
|
||||
|
||||
WARN_ON(pa & ~PAGE_MASK);
|
||||
WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
|
||||
WARN_ON(size & ~PAGE_MASK);
|
||||
|
||||
for (i = 0; i < size; i += PAGE_SIZE)
|
||||
if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
|
||||
return NULL;
|
||||
|
||||
return (void __iomem *)ea;
|
||||
}
|
||||
|
||||
/**
|
||||
* __iounmap_from - Low level function to tear down the page tables
|
||||
* for an IO mapping. This is used for mappings that
|
||||
* are manipulated manually, like partial unmapping of
|
||||
* PCI IOs or ISA space.
|
||||
*/
|
||||
void __iounmap_at(void *ea, unsigned long size)
|
||||
{
|
||||
WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
|
||||
WARN_ON(size & ~PAGE_MASK);
|
||||
|
||||
unmap_kernel_range((unsigned long)ea, size);
|
||||
}
|
||||
|
||||
void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
|
||||
unsigned long flags, void *caller)
|
||||
{
|
||||
phys_addr_t paligned;
|
||||
void __iomem *ret;
|
||||
|
||||
/*
|
||||
* Choose an address to map it to.
|
||||
* Once the imalloc system is running, we use it.
|
||||
* Before that, we map using addresses going
|
||||
* up from ioremap_bot. imalloc will use
|
||||
* the addresses from ioremap_bot through
|
||||
* IMALLOC_END
|
||||
*
|
||||
*/
|
||||
paligned = addr & PAGE_MASK;
|
||||
size = PAGE_ALIGN(addr + size) - paligned;
|
||||
|
||||
if ((size == 0) || (paligned == 0))
|
||||
return NULL;
|
||||
|
||||
if (mem_init_done) {
|
||||
struct vm_struct *area;
|
||||
|
||||
area = __get_vm_area_caller(size, VM_IOREMAP,
|
||||
ioremap_bot, IOREMAP_END,
|
||||
caller);
|
||||
if (area == NULL)
|
||||
return NULL;
|
||||
|
||||
area->phys_addr = paligned;
|
||||
ret = __ioremap_at(paligned, area->addr, size, flags);
|
||||
if (!ret)
|
||||
vunmap(area->addr);
|
||||
} else {
|
||||
ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
|
||||
if (ret)
|
||||
ioremap_bot += size;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
ret += addr & ~PAGE_MASK;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
|
||||
unsigned long flags)
|
||||
{
|
||||
return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
|
||||
}
|
||||
|
||||
void __iomem * ioremap(phys_addr_t addr, unsigned long size)
|
||||
{
|
||||
unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED;
|
||||
void *caller = __builtin_return_address(0);
|
||||
|
||||
if (ppc_md.ioremap)
|
||||
return ppc_md.ioremap(addr, size, flags, caller);
|
||||
return __ioremap_caller(addr, size, flags, caller);
|
||||
}
|
||||
|
||||
void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
|
||||
{
|
||||
unsigned long flags = _PAGE_NO_CACHE;
|
||||
void *caller = __builtin_return_address(0);
|
||||
|
||||
if (ppc_md.ioremap)
|
||||
return ppc_md.ioremap(addr, size, flags, caller);
|
||||
return __ioremap_caller(addr, size, flags, caller);
|
||||
}
|
||||
|
||||
void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
|
||||
unsigned long flags)
|
||||
{
|
||||
void *caller = __builtin_return_address(0);
|
||||
|
||||
/* writeable implies dirty for kernel addresses */
|
||||
if (flags & _PAGE_RW)
|
||||
flags |= _PAGE_DIRTY;
|
||||
|
||||
/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
|
||||
flags &= ~(_PAGE_USER | _PAGE_EXEC);
|
||||
|
||||
#ifdef _PAGE_BAP_SR
|
||||
/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
|
||||
* which means that we just cleared supervisor access... oops ;-) This
|
||||
* restores it
|
||||
*/
|
||||
flags |= _PAGE_BAP_SR;
|
||||
#endif
|
||||
|
||||
if (ppc_md.ioremap)
|
||||
return ppc_md.ioremap(addr, size, flags, caller);
|
||||
return __ioremap_caller(addr, size, flags, caller);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Unmap an IO region and remove it from imalloc'd list.
|
||||
* Access to IO memory should be serialized by driver.
|
||||
*/
|
||||
void __iounmap(volatile void __iomem *token)
|
||||
{
|
||||
void *addr;
|
||||
|
||||
if (!mem_init_done)
|
||||
return;
|
||||
|
||||
addr = (void *) ((unsigned long __force)
|
||||
PCI_FIX_ADDR(token) & PAGE_MASK);
|
||||
if ((unsigned long)addr < ioremap_bot) {
|
||||
printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
|
||||
" at 0x%p\n", addr);
|
||||
return;
|
||||
}
|
||||
vunmap(addr);
|
||||
}
|
||||
|
||||
void iounmap(volatile void __iomem *token)
|
||||
{
|
||||
if (ppc_md.iounmap)
|
||||
ppc_md.iounmap(token);
|
||||
else
|
||||
__iounmap(token);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(ioremap);
|
||||
EXPORT_SYMBOL(ioremap_wc);
|
||||
EXPORT_SYMBOL(ioremap_prot);
|
||||
EXPORT_SYMBOL(__ioremap);
|
||||
EXPORT_SYMBOL(__ioremap_at);
|
||||
EXPORT_SYMBOL(iounmap);
|
||||
EXPORT_SYMBOL(__iounmap);
|
||||
EXPORT_SYMBOL(__iounmap_at);
|
||||
|
||||
/*
|
||||
* For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
|
||||
* For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
|
||||
*/
|
||||
struct page *pmd_page(pmd_t pmd)
|
||||
{
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
if (pmd_trans_huge(pmd))
|
||||
return pfn_to_page(pmd_pfn(pmd));
|
||||
#endif
|
||||
return virt_to_page(pmd_page_vaddr(pmd));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
static pte_t *get_from_cache(struct mm_struct *mm)
|
||||
{
|
||||
void *pte_frag, *ret;
|
||||
|
||||
spin_lock(&mm->page_table_lock);
|
||||
ret = mm->context.pte_frag;
|
||||
if (ret) {
|
||||
pte_frag = ret + PTE_FRAG_SIZE;
|
||||
/*
|
||||
* If we have taken up all the fragments mark PTE page NULL
|
||||
*/
|
||||
if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
|
||||
pte_frag = NULL;
|
||||
mm->context.pte_frag = pte_frag;
|
||||
}
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
return (pte_t *)ret;
|
||||
}
|
||||
|
||||
static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
|
||||
{
|
||||
void *ret = NULL;
|
||||
struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
|
||||
__GFP_REPEAT | __GFP_ZERO);
|
||||
if (!page)
|
||||
return NULL;
|
||||
if (!kernel && !pgtable_page_ctor(page)) {
|
||||
__free_page(page);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret = page_address(page);
|
||||
spin_lock(&mm->page_table_lock);
|
||||
/*
|
||||
* If we find pgtable_page set, we return
|
||||
* the allocated page with single fragement
|
||||
* count.
|
||||
*/
|
||||
if (likely(!mm->context.pte_frag)) {
|
||||
atomic_set(&page->_count, PTE_FRAG_NR);
|
||||
mm->context.pte_frag = ret + PTE_FRAG_SIZE;
|
||||
}
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
|
||||
return (pte_t *)ret;
|
||||
}
|
||||
|
||||
pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
|
||||
{
|
||||
pte_t *pte;
|
||||
|
||||
pte = get_from_cache(mm);
|
||||
if (pte)
|
||||
return pte;
|
||||
|
||||
return __alloc_for_cache(mm, kernel);
|
||||
}
|
||||
|
||||
void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
|
||||
{
|
||||
struct page *page = virt_to_page(table);
|
||||
if (put_page_testzero(page)) {
|
||||
if (!kernel)
|
||||
pgtable_page_dtor(page);
|
||||
free_hot_cold_page(page, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void page_table_free_rcu(void *table)
|
||||
{
|
||||
struct page *page = virt_to_page(table);
|
||||
if (put_page_testzero(page)) {
|
||||
pgtable_page_dtor(page);
|
||||
free_hot_cold_page(page, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
|
||||
{
|
||||
unsigned long pgf = (unsigned long)table;
|
||||
|
||||
BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
|
||||
pgf |= shift;
|
||||
tlb_remove_table(tlb, (void *)pgf);
|
||||
}
|
||||
|
||||
void __tlb_remove_table(void *_table)
|
||||
{
|
||||
void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
|
||||
unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
|
||||
|
||||
if (!shift)
|
||||
/* PTE page needs special handling */
|
||||
page_table_free_rcu(table);
|
||||
else {
|
||||
BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
|
||||
kmem_cache_free(PGT_CACHE(shift), table);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
|
||||
{
|
||||
if (!shift) {
|
||||
/* PTE page needs special handling */
|
||||
struct page *page = virt_to_page(table);
|
||||
if (put_page_testzero(page)) {
|
||||
pgtable_page_dtor(page);
|
||||
free_hot_cold_page(page, 0);
|
||||
}
|
||||
} else {
|
||||
BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
|
||||
kmem_cache_free(PGT_CACHE(shift), table);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_PPC_64K_PAGES */
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
|
||||
/*
|
||||
* This is called when relaxing access to a hugepage. It's also called in the page
|
||||
* fault path when we don't hit any of the major fault cases, ie, a minor
|
||||
* update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
|
||||
* handled those two for us, we additionally deal with missing execute
|
||||
* permission here on some processors
|
||||
*/
|
||||
int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmdp, pmd_t entry, int dirty)
|
||||
{
|
||||
int changed;
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
WARN_ON(!pmd_trans_huge(*pmdp));
|
||||
assert_spin_locked(&vma->vm_mm->page_table_lock);
|
||||
#endif
|
||||
changed = !pmd_same(*(pmdp), entry);
|
||||
if (changed) {
|
||||
__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
|
||||
/*
|
||||
* Since we are not supporting SW TLB systems, we don't
|
||||
* have any thing similar to flush_tlb_page_nohash()
|
||||
*/
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmdp, unsigned long clr,
|
||||
unsigned long set)
|
||||
{
|
||||
|
||||
unsigned long old, tmp;
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
WARN_ON(!pmd_trans_huge(*pmdp));
|
||||
assert_spin_locked(&mm->page_table_lock);
|
||||
#endif
|
||||
|
||||
#ifdef PTE_ATOMIC_UPDATES
|
||||
__asm__ __volatile__(
|
||||
"1: ldarx %0,0,%3\n\
|
||||
andi. %1,%0,%6\n\
|
||||
bne- 1b \n\
|
||||
andc %1,%0,%4 \n\
|
||||
or %1,%1,%7\n\
|
||||
stdcx. %1,0,%3 \n\
|
||||
bne- 1b"
|
||||
: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
|
||||
: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
|
||||
: "cc" );
|
||||
#else
|
||||
old = pmd_val(*pmdp);
|
||||
*pmdp = __pmd((old & ~clr) | set);
|
||||
#endif
|
||||
trace_hugepage_update(addr, old, clr, set);
|
||||
if (old & _PAGE_HASHPTE)
|
||||
hpte_do_hugepage_flush(mm, addr, pmdp, old);
|
||||
return old;
|
||||
}
|
||||
|
||||
pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
{
|
||||
pmd_t pmd;
|
||||
|
||||
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
||||
if (pmd_trans_huge(*pmdp)) {
|
||||
pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
|
||||
} else {
|
||||
/*
|
||||
* khugepaged calls this for normal pmd
|
||||
*/
|
||||
pmd = *pmdp;
|
||||
pmd_clear(pmdp);
|
||||
/*
|
||||
* Wait for all pending hash_page to finish. This is needed
|
||||
* in case of subpage collapse. When we collapse normal pages
|
||||
* to hugepage, we first clear the pmd, then invalidate all
|
||||
* the PTE entries. The assumption here is that any low level
|
||||
* page fault will see a none pmd and take the slow path that
|
||||
* will wait on mmap_sem. But we could very well be in a
|
||||
* hash_page with local ptep pointer value. Such a hash page
|
||||
* can result in adding new HPTE entries for normal subpages.
|
||||
* That means we could be modifying the page content as we
|
||||
* copy them to a huge page. So wait for parallel hash_page
|
||||
* to finish before invalidating HPTE entries. We can do this
|
||||
* by sending an IPI to all the cpus and executing a dummy
|
||||
* function there.
|
||||
*/
|
||||
kick_all_cpus_sync();
|
||||
/*
|
||||
* Now invalidate the hpte entries in the range
|
||||
* covered by pmd. This make sure we take a
|
||||
* fault and will find the pmd as none, which will
|
||||
* result in a major fault which takes mmap_sem and
|
||||
* hence wait for collapse to complete. Without this
|
||||
* the __collapse_huge_page_copy can result in copying
|
||||
* the old content.
|
||||
*/
|
||||
flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
|
||||
}
|
||||
return pmd;
|
||||
}
|
||||
|
||||
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmdp)
|
||||
{
|
||||
return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
|
||||
}
|
||||
|
||||
/*
|
||||
* We currently remove entries from the hashtable regardless of whether
|
||||
* the entry was young or dirty. The generic routines only flush if the
|
||||
* entry was young or dirty which is not good enough.
|
||||
*
|
||||
* We should be more intelligent about this but for the moment we override
|
||||
* these functions and force a tlb flush unconditionally
|
||||
*/
|
||||
int pmdp_clear_flush_young(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmdp)
|
||||
{
|
||||
return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
|
||||
}
|
||||
|
||||
/*
|
||||
* We mark the pmd splitting and invalidate all the hpte
|
||||
* entries for this hugepage.
|
||||
*/
|
||||
void pmdp_splitting_flush(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmdp)
|
||||
{
|
||||
unsigned long old, tmp;
|
||||
|
||||
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
WARN_ON(!pmd_trans_huge(*pmdp));
|
||||
assert_spin_locked(&vma->vm_mm->page_table_lock);
|
||||
#endif
|
||||
|
||||
#ifdef PTE_ATOMIC_UPDATES
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: ldarx %0,0,%3\n\
|
||||
andi. %1,%0,%6\n\
|
||||
bne- 1b \n\
|
||||
ori %1,%0,%4 \n\
|
||||
stdcx. %1,0,%3 \n\
|
||||
bne- 1b"
|
||||
: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
|
||||
: "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY)
|
||||
: "cc" );
|
||||
#else
|
||||
old = pmd_val(*pmdp);
|
||||
*pmdp = __pmd(old | _PAGE_SPLITTING);
|
||||
#endif
|
||||
/*
|
||||
* If we didn't had the splitting flag set, go and flush the
|
||||
* HPTE entries.
|
||||
*/
|
||||
trace_hugepage_splitting(address, old);
|
||||
if (!(old & _PAGE_SPLITTING)) {
|
||||
/* We need to flush the hpte */
|
||||
if (old & _PAGE_HASHPTE)
|
||||
hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
|
||||
}
|
||||
/*
|
||||
* This ensures that generic code that rely on IRQ disabling
|
||||
* to prevent a parallel THP split work as expected.
|
||||
*/
|
||||
kick_all_cpus_sync();
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to put the pgtable in pmd and use pgtable for tracking
|
||||
* the base page size hptes
|
||||
*/
|
||||
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
|
||||
pgtable_t pgtable)
|
||||
{
|
||||
pgtable_t *pgtable_slot;
|
||||
assert_spin_locked(&mm->page_table_lock);
|
||||
/*
|
||||
* we store the pgtable in the second half of PMD
|
||||
*/
|
||||
pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
|
||||
*pgtable_slot = pgtable;
|
||||
/*
|
||||
* expose the deposited pgtable to other cpus.
|
||||
* before we set the hugepage PTE at pmd level
|
||||
* hash fault code looks at the deposted pgtable
|
||||
* to store hash index values.
|
||||
*/
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
|
||||
{
|
||||
pgtable_t pgtable;
|
||||
pgtable_t *pgtable_slot;
|
||||
|
||||
assert_spin_locked(&mm->page_table_lock);
|
||||
pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
|
||||
pgtable = *pgtable_slot;
|
||||
/*
|
||||
* Once we withdraw, mark the entry NULL.
|
||||
*/
|
||||
*pgtable_slot = NULL;
|
||||
/*
|
||||
* We store HPTE information in the deposited PTE fragment.
|
||||
* zero out the content on withdraw.
|
||||
*/
|
||||
memset(pgtable, 0, PTE_FRAG_SIZE);
|
||||
return pgtable;
|
||||
}
|
||||
|
||||
/*
|
||||
* set a new huge pmd. We should not be called for updating
|
||||
* an existing pmd entry. That should go via pmd_hugepage_update.
|
||||
*/
|
||||
void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmdp, pmd_t pmd)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
|
||||
assert_spin_locked(&mm->page_table_lock);
|
||||
WARN_ON(!pmd_trans_huge(pmd));
|
||||
#endif
|
||||
trace_hugepage_set_pmd(addr, pmd);
|
||||
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
|
||||
}
|
||||
|
||||
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
{
|
||||
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* A linux hugepage PMD was changed and the corresponding hash table entries
|
||||
* neesd to be flushed.
|
||||
*/
|
||||
void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmdp, unsigned long old_pmd)
|
||||
{
|
||||
int ssize, i;
|
||||
unsigned long s_addr;
|
||||
int max_hpte_count;
|
||||
unsigned int psize, valid;
|
||||
unsigned char *hpte_slot_array;
|
||||
unsigned long hidx, vpn, vsid, hash, shift, slot;
|
||||
|
||||
/*
|
||||
* Flush all the hptes mapping this hugepage
|
||||
*/
|
||||
s_addr = addr & HPAGE_PMD_MASK;
|
||||
hpte_slot_array = get_hpte_slot_array(pmdp);
|
||||
/*
|
||||
* IF we try to do a HUGE PTE update after a withdraw is done.
|
||||
* we will find the below NULL. This happens when we do
|
||||
* split_huge_page_pmd
|
||||
*/
|
||||
if (!hpte_slot_array)
|
||||
return;
|
||||
|
||||
/* get the base page size,vsid and segment size */
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
psize = get_slice_psize(mm, s_addr);
|
||||
BUG_ON(psize == MMU_PAGE_16M);
|
||||
#endif
|
||||
if (old_pmd & _PAGE_COMBO)
|
||||
psize = MMU_PAGE_4K;
|
||||
else
|
||||
psize = MMU_PAGE_64K;
|
||||
|
||||
if (!is_kernel_addr(s_addr)) {
|
||||
ssize = user_segment_size(s_addr);
|
||||
vsid = get_vsid(mm->context.id, s_addr, ssize);
|
||||
WARN_ON(vsid == 0);
|
||||
} else {
|
||||
vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
|
||||
ssize = mmu_kernel_ssize;
|
||||
}
|
||||
|
||||
if (ppc_md.hugepage_invalidate)
|
||||
return ppc_md.hugepage_invalidate(vsid, s_addr,
|
||||
hpte_slot_array,
|
||||
psize, ssize);
|
||||
/*
|
||||
* No bluk hpte removal support, invalidate each entry
|
||||
*/
|
||||
shift = mmu_psize_defs[psize].shift;
|
||||
max_hpte_count = HPAGE_PMD_SIZE >> shift;
|
||||
for (i = 0; i < max_hpte_count; i++) {
|
||||
/*
|
||||
* 8 bits per each hpte entries
|
||||
* 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
|
||||
*/
|
||||
valid = hpte_valid(hpte_slot_array, i);
|
||||
if (!valid)
|
||||
continue;
|
||||
hidx = hpte_hash_index(hpte_slot_array, i);
|
||||
|
||||
/* get the vpn */
|
||||
addr = s_addr + (i * (1ul << shift));
|
||||
vpn = hpt_vpn(addr, vsid, ssize);
|
||||
hash = hpt_hash(vpn, shift, ssize);
|
||||
if (hidx & _PTEIDX_SECONDARY)
|
||||
hash = ~hash;
|
||||
|
||||
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
|
||||
slot += hidx & _PTEIDX_GROUP_IX;
|
||||
ppc_md.hpte_invalidate(slot, vpn, psize,
|
||||
MMU_PAGE_16M, ssize, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
|
||||
{
|
||||
pmd_val(pmd) |= pgprot_val(pgprot);
|
||||
return pmd;
|
||||
}
|
||||
|
||||
pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
|
||||
{
|
||||
pmd_t pmd;
|
||||
/*
|
||||
* For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
|
||||
* set. We use this to check THP page at pmd level.
|
||||
* leaf pte for huge page, bottom two bits != 00
|
||||
*/
|
||||
pmd_val(pmd) = pfn << PTE_RPN_SHIFT;
|
||||
pmd_val(pmd) |= _PAGE_THP_HUGE;
|
||||
pmd = pmd_set_protbits(pmd, pgprot);
|
||||
return pmd;
|
||||
}
|
||||
|
||||
pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
|
||||
{
|
||||
return pfn_pmd(page_to_pfn(page), pgprot);
|
||||
}
|
||||
|
||||
pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
|
||||
{
|
||||
|
||||
pmd_val(pmd) &= _HPAGE_CHG_MASK;
|
||||
pmd = pmd_set_protbits(pmd, newprot);
|
||||
return pmd;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called at the end of handling a user page fault, when the
|
||||
* fault has been handled by updating a HUGE PMD entry in the linux page tables.
|
||||
* We use it to preload an HPTE into the hash table corresponding to
|
||||
* the updated linux HUGE PMD entry.
|
||||
*/
|
||||
void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
|
||||
pmd_t *pmd)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
pmd_t pmdp_get_and_clear(struct mm_struct *mm,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
pmd_t old_pmd;
|
||||
pgtable_t pgtable;
|
||||
unsigned long old;
|
||||
pgtable_t *pgtable_slot;
|
||||
|
||||
old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
|
||||
old_pmd = __pmd(old);
|
||||
/*
|
||||
* We have pmd == none and we are holding page_table_lock.
|
||||
* So we can safely go and clear the pgtable hash
|
||||
* index info.
|
||||
*/
|
||||
pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
|
||||
pgtable = *pgtable_slot;
|
||||
/*
|
||||
* Let's zero out old valid and hash index details
|
||||
* hash fault look at them.
|
||||
*/
|
||||
memset(pgtable, 0, PTE_FRAG_SIZE);
|
||||
return old_pmd;
|
||||
}
|
||||
|
||||
int has_transparent_hugepage(void)
|
||||
{
|
||||
if (!mmu_has_feature(MMU_FTR_16M_PAGE))
|
||||
return 0;
|
||||
/*
|
||||
* We support THP only if PMD_SIZE is 16MB.
|
||||
*/
|
||||
if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
|
||||
return 0;
|
||||
/*
|
||||
* We need to make sure that we support 16MB hugepage in a segement
|
||||
* with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
|
||||
* of 64K.
|
||||
*/
|
||||
/*
|
||||
* If we have 64K HPTE, we will be using that by default
|
||||
*/
|
||||
if (mmu_psize_defs[MMU_PAGE_64K].shift &&
|
||||
(mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
|
||||
return 0;
|
||||
/*
|
||||
* Ok we only have 4K HPTE
|
||||
*/
|
||||
if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
288
arch/powerpc/mm/ppc_mmu_32.c
Normal file
288
arch/powerpc/mm/ppc_mmu_32.c
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
/*
|
||||
* This file contains the routines for handling the MMU on those
|
||||
* PowerPC implementations where the MMU substantially follows the
|
||||
* architecture specification. This includes the 6xx, 7xx, 7xxx,
|
||||
* and 8260 implementations but excludes the 8xx and 4xx.
|
||||
* -- paulus
|
||||
*
|
||||
* Derived from arch/ppc/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm/prom.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/machdep.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
struct hash_pte *Hash, *Hash_end;
|
||||
unsigned long Hash_size, Hash_mask;
|
||||
unsigned long _SDR1;
|
||||
|
||||
struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */
|
||||
|
||||
struct batrange { /* stores address ranges mapped by BATs */
|
||||
unsigned long start;
|
||||
unsigned long limit;
|
||||
phys_addr_t phys;
|
||||
} bat_addrs[8];
|
||||
|
||||
/*
|
||||
* Return PA for this VA if it is mapped by a BAT, or 0
|
||||
*/
|
||||
phys_addr_t v_mapped_by_bats(unsigned long va)
|
||||
{
|
||||
int b;
|
||||
for (b = 0; b < 4; ++b)
|
||||
if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
|
||||
return bat_addrs[b].phys + (va - bat_addrs[b].start);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return VA for a given PA or 0 if not mapped
|
||||
*/
|
||||
unsigned long p_mapped_by_bats(phys_addr_t pa)
|
||||
{
|
||||
int b;
|
||||
for (b = 0; b < 4; ++b)
|
||||
if (pa >= bat_addrs[b].phys
|
||||
&& pa < (bat_addrs[b].limit-bat_addrs[b].start)
|
||||
+bat_addrs[b].phys)
|
||||
return bat_addrs[b].start+(pa-bat_addrs[b].phys);
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long __init mmu_mapin_ram(unsigned long top)
|
||||
{
|
||||
unsigned long tot, bl, done;
|
||||
unsigned long max_size = (256<<20);
|
||||
|
||||
if (__map_without_bats) {
|
||||
printk(KERN_DEBUG "RAM mapped without BATs\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set up BAT2 and if necessary BAT3 to cover RAM. */
|
||||
|
||||
/* Make sure we don't map a block larger than the
|
||||
smallest alignment of the physical address. */
|
||||
tot = top;
|
||||
for (bl = 128<<10; bl < max_size; bl <<= 1) {
|
||||
if (bl * 2 > tot)
|
||||
break;
|
||||
}
|
||||
|
||||
setbat(2, PAGE_OFFSET, 0, bl, PAGE_KERNEL_X);
|
||||
done = (unsigned long)bat_addrs[2].limit - PAGE_OFFSET + 1;
|
||||
if ((done < tot) && !bat_addrs[3].limit) {
|
||||
/* use BAT3 to cover a bit more */
|
||||
tot -= done;
|
||||
for (bl = 128<<10; bl < max_size; bl <<= 1)
|
||||
if (bl * 2 > tot)
|
||||
break;
|
||||
setbat(3, PAGE_OFFSET+done, done, bl, PAGE_KERNEL_X);
|
||||
done = (unsigned long)bat_addrs[3].limit - PAGE_OFFSET + 1;
|
||||
}
|
||||
|
||||
return done;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up one of the I/D BAT (block address translation) register pairs.
|
||||
* The parameters are not checked; in particular size must be a power
|
||||
* of 2 between 128k and 256M.
|
||||
*/
|
||||
void __init setbat(int index, unsigned long virt, phys_addr_t phys,
|
||||
unsigned int size, int flags)
|
||||
{
|
||||
unsigned int bl;
|
||||
int wimgxpp;
|
||||
struct ppc_bat *bat = BATS[index];
|
||||
|
||||
if ((flags & _PAGE_NO_CACHE) ||
|
||||
(cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
|
||||
flags &= ~_PAGE_COHERENT;
|
||||
|
||||
bl = (size >> 17) - 1;
|
||||
if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
|
||||
/* 603, 604, etc. */
|
||||
/* Do DBAT first */
|
||||
wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
|
||||
| _PAGE_COHERENT | _PAGE_GUARDED);
|
||||
wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
|
||||
bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
|
||||
bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
|
||||
if (flags & _PAGE_USER)
|
||||
bat[1].batu |= 1; /* Vp = 1 */
|
||||
if (flags & _PAGE_GUARDED) {
|
||||
/* G bit must be zero in IBATs */
|
||||
bat[0].batu = bat[0].batl = 0;
|
||||
} else {
|
||||
/* make IBAT same as DBAT */
|
||||
bat[0] = bat[1];
|
||||
}
|
||||
} else {
|
||||
/* 601 cpu */
|
||||
if (bl > BL_8M)
|
||||
bl = BL_8M;
|
||||
wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
|
||||
| _PAGE_COHERENT);
|
||||
wimgxpp |= (flags & _PAGE_RW)?
|
||||
((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
|
||||
bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */
|
||||
bat->batl = phys | bl | 0x40; /* V=1 */
|
||||
}
|
||||
|
||||
bat_addrs[index].start = virt;
|
||||
bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
|
||||
bat_addrs[index].phys = phys;
|
||||
}
|
||||
|
||||
/*
|
||||
* Preload a translation in the hash table
|
||||
*/
|
||||
void hash_preload(struct mm_struct *mm, unsigned long ea,
|
||||
unsigned long access, unsigned long trap)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
|
||||
if (Hash == 0)
|
||||
return;
|
||||
pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea);
|
||||
if (!pmd_none(*pmd))
|
||||
add_hash_page(mm->context.id, ea, pmd_val(*pmd));
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the hash table and patch the instructions in hashtable.S.
|
||||
*/
|
||||
void __init MMU_init_hw(void)
|
||||
{
|
||||
unsigned int hmask, mb, mb2;
|
||||
unsigned int n_hpteg, lg_n_hpteg;
|
||||
|
||||
extern unsigned int hash_page_patch_A[];
|
||||
extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
|
||||
extern unsigned int hash_page[];
|
||||
extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
|
||||
|
||||
if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
|
||||
/*
|
||||
* Put a blr (procedure return) instruction at the
|
||||
* start of hash_page, since we can still get DSI
|
||||
* exceptions on a 603.
|
||||
*/
|
||||
hash_page[0] = 0x4e800020;
|
||||
flush_icache_range((unsigned long) &hash_page[0],
|
||||
(unsigned long) &hash_page[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
|
||||
|
||||
#define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */
|
||||
#define SDR1_LOW_BITS ((n_hpteg - 1) >> 10)
|
||||
#define MIN_N_HPTEG 1024 /* min 64kB hash table */
|
||||
|
||||
/*
|
||||
* Allow 1 HPTE (1/8 HPTEG) for each page of memory.
|
||||
* This is less than the recommended amount, but then
|
||||
* Linux ain't AIX.
|
||||
*/
|
||||
n_hpteg = total_memory / (PAGE_SIZE * 8);
|
||||
if (n_hpteg < MIN_N_HPTEG)
|
||||
n_hpteg = MIN_N_HPTEG;
|
||||
lg_n_hpteg = __ilog2(n_hpteg);
|
||||
if (n_hpteg & (n_hpteg - 1)) {
|
||||
++lg_n_hpteg; /* round up if not power of 2 */
|
||||
n_hpteg = 1 << lg_n_hpteg;
|
||||
}
|
||||
Hash_size = n_hpteg << LG_HPTEG_SIZE;
|
||||
|
||||
/*
|
||||
* Find some memory for the hash table.
|
||||
*/
|
||||
if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
|
||||
Hash = __va(memblock_alloc(Hash_size, Hash_size));
|
||||
cacheable_memzero(Hash, Hash_size);
|
||||
_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
|
||||
|
||||
Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
|
||||
|
||||
printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n",
|
||||
(unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash);
|
||||
|
||||
|
||||
/*
|
||||
* Patch up the instructions in hashtable.S:create_hpte
|
||||
*/
|
||||
if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
|
||||
Hash_mask = n_hpteg - 1;
|
||||
hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
|
||||
mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
|
||||
if (lg_n_hpteg > 16)
|
||||
mb2 = 16 - LG_HPTEG_SIZE;
|
||||
|
||||
hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
|
||||
| ((unsigned int)(Hash) >> 16);
|
||||
hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
|
||||
hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
|
||||
hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
|
||||
hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
|
||||
|
||||
/*
|
||||
* Ensure that the locations we've patched have been written
|
||||
* out from the data cache and invalidated in the instruction
|
||||
* cache, on those machines with split caches.
|
||||
*/
|
||||
flush_icache_range((unsigned long) &hash_page_patch_A[0],
|
||||
(unsigned long) &hash_page_patch_C[1]);
|
||||
|
||||
/*
|
||||
* Patch up the instructions in hashtable.S:flush_hash_page
|
||||
*/
|
||||
flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
|
||||
| ((unsigned int)(Hash) >> 16);
|
||||
flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
|
||||
flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
|
||||
flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
|
||||
flush_icache_range((unsigned long) &flush_hash_patch_A[0],
|
||||
(unsigned long) &flush_hash_patch_B[1]);
|
||||
|
||||
if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
|
||||
}
|
||||
|
||||
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
|
||||
phys_addr_t first_memblock_size)
|
||||
{
|
||||
/* We don't currently support the first MEMBLOCK not mapping 0
|
||||
* physical on those processors
|
||||
*/
|
||||
BUG_ON(first_memblock_base != 0);
|
||||
|
||||
/* 601 can only access 16MB at the moment */
|
||||
if (PVR_VER(mfspr(SPRN_PVR)) == 1)
|
||||
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
|
||||
else /* Anything else has 256M mapped */
|
||||
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
|
||||
}
|
||||
332
arch/powerpc/mm/slb.c
Normal file
332
arch/powerpc/mm/slb.c
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* PowerPC64 SLB support.
|
||||
*
|
||||
* Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
|
||||
* Based on earlier code written by:
|
||||
* Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
|
||||
* Copyright (c) 2001 Dave Engebretsen
|
||||
* Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
|
||||
*
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/paca.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/smp.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/udbg.h>
|
||||
#include <asm/code-patching.h>
|
||||
|
||||
|
||||
extern void slb_allocate_realmode(unsigned long ea);
|
||||
extern void slb_allocate_user(unsigned long ea);
|
||||
|
||||
static void slb_allocate(unsigned long ea)
|
||||
{
|
||||
/* Currently, we do real mode for all SLBs including user, but
|
||||
* that will change if we bring back dynamic VSIDs
|
||||
*/
|
||||
slb_allocate_realmode(ea);
|
||||
}
|
||||
|
||||
#define slb_esid_mask(ssize) \
|
||||
(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
|
||||
|
||||
static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
|
||||
unsigned long slot)
|
||||
{
|
||||
return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
|
||||
}
|
||||
|
||||
static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
|
||||
unsigned long flags)
|
||||
{
|
||||
return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
|
||||
((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
|
||||
}
|
||||
|
||||
static inline void slb_shadow_update(unsigned long ea, int ssize,
|
||||
unsigned long flags,
|
||||
unsigned long entry)
|
||||
{
|
||||
/*
|
||||
* Clear the ESID first so the entry is not valid while we are
|
||||
* updating it. No write barriers are needed here, provided
|
||||
* we only update the current CPU's SLB shadow buffer.
|
||||
*/
|
||||
get_slb_shadow()->save_area[entry].esid = 0;
|
||||
get_slb_shadow()->save_area[entry].vsid =
|
||||
cpu_to_be64(mk_vsid_data(ea, ssize, flags));
|
||||
get_slb_shadow()->save_area[entry].esid =
|
||||
cpu_to_be64(mk_esid_data(ea, ssize, entry));
|
||||
}
|
||||
|
||||
static inline void slb_shadow_clear(unsigned long entry)
|
||||
{
|
||||
get_slb_shadow()->save_area[entry].esid = 0;
|
||||
}
|
||||
|
||||
static inline void create_shadowed_slbe(unsigned long ea, int ssize,
|
||||
unsigned long flags,
|
||||
unsigned long entry)
|
||||
{
|
||||
/*
|
||||
* Updating the shadow buffer before writing the SLB ensures
|
||||
* we don't get a stale entry here if we get preempted by PHYP
|
||||
* between these two statements.
|
||||
*/
|
||||
slb_shadow_update(ea, ssize, flags, entry);
|
||||
|
||||
asm volatile("slbmte %0,%1" :
|
||||
: "r" (mk_vsid_data(ea, ssize, flags)),
|
||||
"r" (mk_esid_data(ea, ssize, entry))
|
||||
: "memory" );
|
||||
}
|
||||
|
||||
static void __slb_flush_and_rebolt(void)
|
||||
{
|
||||
/* If you change this make sure you change SLB_NUM_BOLTED
|
||||
* and PR KVM appropriately too. */
|
||||
unsigned long linear_llp, vmalloc_llp, lflags, vflags;
|
||||
unsigned long ksp_esid_data, ksp_vsid_data;
|
||||
|
||||
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
|
||||
vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
|
||||
lflags = SLB_VSID_KERNEL | linear_llp;
|
||||
vflags = SLB_VSID_KERNEL | vmalloc_llp;
|
||||
|
||||
ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, 2);
|
||||
if ((ksp_esid_data & ~0xfffffffUL) <= PAGE_OFFSET) {
|
||||
ksp_esid_data &= ~SLB_ESID_V;
|
||||
ksp_vsid_data = 0;
|
||||
slb_shadow_clear(2);
|
||||
} else {
|
||||
/* Update stack entry; others don't change */
|
||||
slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, 2);
|
||||
ksp_vsid_data =
|
||||
be64_to_cpu(get_slb_shadow()->save_area[2].vsid);
|
||||
}
|
||||
|
||||
/* We need to do this all in asm, so we're sure we don't touch
|
||||
* the stack between the slbia and rebolting it. */
|
||||
asm volatile("isync\n"
|
||||
"slbia\n"
|
||||
/* Slot 1 - first VMALLOC segment */
|
||||
"slbmte %0,%1\n"
|
||||
/* Slot 2 - kernel stack */
|
||||
"slbmte %2,%3\n"
|
||||
"isync"
|
||||
:: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, vflags)),
|
||||
"r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, 1)),
|
||||
"r"(ksp_vsid_data),
|
||||
"r"(ksp_esid_data)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
void slb_flush_and_rebolt(void)
|
||||
{
|
||||
|
||||
WARN_ON(!irqs_disabled());
|
||||
|
||||
/*
|
||||
* We can't take a PMU exception in the following code, so hard
|
||||
* disable interrupts.
|
||||
*/
|
||||
hard_irq_disable();
|
||||
|
||||
__slb_flush_and_rebolt();
|
||||
get_paca()->slb_cache_ptr = 0;
|
||||
}
|
||||
|
||||
void slb_vmalloc_update(void)
|
||||
{
|
||||
unsigned long vflags;
|
||||
|
||||
vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
|
||||
slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
|
||||
slb_flush_and_rebolt();
|
||||
}
|
||||
|
||||
/* Helper function to compare esids. There are four cases to handle.
|
||||
* 1. The system is not 1T segment size capable. Use the GET_ESID compare.
|
||||
* 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare.
|
||||
* 3. The system is 1T capable, only one of the two addresses is > 1T. This is not a match.
|
||||
* 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare.
|
||||
*/
|
||||
static inline int esids_match(unsigned long addr1, unsigned long addr2)
|
||||
{
|
||||
int esid_1t_count;
|
||||
|
||||
/* System is not 1T segment size capable. */
|
||||
if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
|
||||
return (GET_ESID(addr1) == GET_ESID(addr2));
|
||||
|
||||
esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
|
||||
((addr2 >> SID_SHIFT_1T) != 0));
|
||||
|
||||
/* both addresses are < 1T */
|
||||
if (esid_1t_count == 0)
|
||||
return (GET_ESID(addr1) == GET_ESID(addr2));
|
||||
|
||||
/* One address < 1T, the other > 1T. Not a match */
|
||||
if (esid_1t_count == 1)
|
||||
return 0;
|
||||
|
||||
/* Both addresses are > 1T. */
|
||||
return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
|
||||
}
|
||||
|
||||
/* Flush all user entries from the segment table of the current processor. */
|
||||
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
unsigned long offset;
|
||||
unsigned long slbie_data = 0;
|
||||
unsigned long pc = KSTK_EIP(tsk);
|
||||
unsigned long stack = KSTK_ESP(tsk);
|
||||
unsigned long exec_base;
|
||||
|
||||
/*
|
||||
* We need interrupts hard-disabled here, not just soft-disabled,
|
||||
* so that a PMU interrupt can't occur, which might try to access
|
||||
* user memory (to get a stack trace) and possible cause an SLB miss
|
||||
* which would update the slb_cache/slb_cache_ptr fields in the PACA.
|
||||
*/
|
||||
hard_irq_disable();
|
||||
offset = get_paca()->slb_cache_ptr;
|
||||
if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
|
||||
offset <= SLB_CACHE_ENTRIES) {
|
||||
int i;
|
||||
asm volatile("isync" : : : "memory");
|
||||
for (i = 0; i < offset; i++) {
|
||||
slbie_data = (unsigned long)get_paca()->slb_cache[i]
|
||||
<< SID_SHIFT; /* EA */
|
||||
slbie_data |= user_segment_size(slbie_data)
|
||||
<< SLBIE_SSIZE_SHIFT;
|
||||
slbie_data |= SLBIE_C; /* C set for user addresses */
|
||||
asm volatile("slbie %0" : : "r" (slbie_data));
|
||||
}
|
||||
asm volatile("isync" : : : "memory");
|
||||
} else {
|
||||
__slb_flush_and_rebolt();
|
||||
}
|
||||
|
||||
/* Workaround POWER5 < DD2.1 issue */
|
||||
if (offset == 1 || offset > SLB_CACHE_ENTRIES)
|
||||
asm volatile("slbie %0" : : "r" (slbie_data));
|
||||
|
||||
get_paca()->slb_cache_ptr = 0;
|
||||
get_paca()->context = mm->context;
|
||||
|
||||
/*
|
||||
* preload some userspace segments into the SLB.
|
||||
* Almost all 32 and 64bit PowerPC executables are linked at
|
||||
* 0x10000000 so it makes sense to preload this segment.
|
||||
*/
|
||||
exec_base = 0x10000000;
|
||||
|
||||
if (is_kernel_addr(pc) || is_kernel_addr(stack) ||
|
||||
is_kernel_addr(exec_base))
|
||||
return;
|
||||
|
||||
slb_allocate(pc);
|
||||
|
||||
if (!esids_match(pc, stack))
|
||||
slb_allocate(stack);
|
||||
|
||||
if (!esids_match(pc, exec_base) &&
|
||||
!esids_match(stack, exec_base))
|
||||
slb_allocate(exec_base);
|
||||
}
|
||||
|
||||
static inline void patch_slb_encoding(unsigned int *insn_addr,
|
||||
unsigned int immed)
|
||||
{
|
||||
int insn = (*insn_addr & 0xffff0000) | immed;
|
||||
patch_instruction(insn_addr, insn);
|
||||
}
|
||||
|
||||
extern u32 slb_compare_rr_to_size[];
|
||||
extern u32 slb_miss_kernel_load_linear[];
|
||||
extern u32 slb_miss_kernel_load_io[];
|
||||
extern u32 slb_compare_rr_to_size[];
|
||||
extern u32 slb_miss_kernel_load_vmemmap[];
|
||||
|
||||
void slb_set_size(u16 size)
|
||||
{
|
||||
if (mmu_slb_size == size)
|
||||
return;
|
||||
|
||||
mmu_slb_size = size;
|
||||
patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size);
|
||||
}
|
||||
|
||||
void slb_initialize(void)
|
||||
{
|
||||
unsigned long linear_llp, vmalloc_llp, io_llp;
|
||||
unsigned long lflags, vflags;
|
||||
static int slb_encoding_inited;
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
unsigned long vmemmap_llp;
|
||||
#endif
|
||||
|
||||
/* Prepare our SLB miss handler based on our page size */
|
||||
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
|
||||
io_llp = mmu_psize_defs[mmu_io_psize].sllp;
|
||||
vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
|
||||
get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
|
||||
#endif
|
||||
if (!slb_encoding_inited) {
|
||||
slb_encoding_inited = 1;
|
||||
patch_slb_encoding(slb_miss_kernel_load_linear,
|
||||
SLB_VSID_KERNEL | linear_llp);
|
||||
patch_slb_encoding(slb_miss_kernel_load_io,
|
||||
SLB_VSID_KERNEL | io_llp);
|
||||
patch_slb_encoding(slb_compare_rr_to_size,
|
||||
mmu_slb_size);
|
||||
|
||||
pr_devel("SLB: linear LLP = %04lx\n", linear_llp);
|
||||
pr_devel("SLB: io LLP = %04lx\n", io_llp);
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
patch_slb_encoding(slb_miss_kernel_load_vmemmap,
|
||||
SLB_VSID_KERNEL | vmemmap_llp);
|
||||
pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
|
||||
#endif
|
||||
}
|
||||
|
||||
get_paca()->stab_rr = SLB_NUM_BOLTED;
|
||||
|
||||
lflags = SLB_VSID_KERNEL | linear_llp;
|
||||
vflags = SLB_VSID_KERNEL | vmalloc_llp;
|
||||
|
||||
/* Invalidate the entire SLB (even slot 0) & all the ERATS */
|
||||
asm volatile("isync":::"memory");
|
||||
asm volatile("slbmte %0,%0"::"r" (0) : "memory");
|
||||
asm volatile("isync; slbia; isync":::"memory");
|
||||
create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, 0);
|
||||
|
||||
create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
|
||||
|
||||
/* For the boot cpu, we're running on the stack in init_thread_union,
|
||||
* which is in the first segment of the linear mapping, and also
|
||||
* get_paca()->kstack hasn't been initialized yet.
|
||||
* For secondary cpus, we need to bolt the kernel stack entry now.
|
||||
*/
|
||||
slb_shadow_clear(2);
|
||||
if (raw_smp_processor_id() != boot_cpuid &&
|
||||
(get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
|
||||
create_shadowed_slbe(get_paca()->kstack,
|
||||
mmu_kernel_ssize, lflags, 2);
|
||||
|
||||
asm volatile("isync":::"memory");
|
||||
}
|
||||
321
arch/powerpc/mm/slb_low.S
Normal file
321
arch/powerpc/mm/slb_low.S
Normal file
|
|
@ -0,0 +1,321 @@
|
|||
/*
|
||||
* Low-level SLB routines
|
||||
*
|
||||
* Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
|
||||
*
|
||||
* Based on earlier C version:
|
||||
* Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
|
||||
* Copyright (c) 2001 Dave Engebretsen
|
||||
* Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/firmware.h>
|
||||
|
||||
/* void slb_allocate_realmode(unsigned long ea);
|
||||
*
|
||||
* Create an SLB entry for the given EA (user or kernel).
|
||||
* r3 = faulting address, r13 = PACA
|
||||
* r9, r10, r11 are clobbered by this function
|
||||
* No other registers are examined or changed.
|
||||
*/
|
||||
_GLOBAL(slb_allocate_realmode)
|
||||
/*
|
||||
* check for bad kernel/user address
|
||||
* (ea & ~REGION_MASK) >= PGTABLE_RANGE
|
||||
*/
|
||||
rldicr. r9,r3,4,(63 - PGTABLE_EADDR_SIZE - 4)
|
||||
bne- 8f
|
||||
|
||||
srdi r9,r3,60 /* get region */
|
||||
srdi r10,r3,SID_SHIFT /* get esid */
|
||||
cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */
|
||||
|
||||
/* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
|
||||
blt cr7,0f /* user or kernel? */
|
||||
|
||||
/* kernel address: proto-VSID = ESID */
|
||||
/* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but
|
||||
* this code will generate the protoVSID 0xfffffffff for the
|
||||
* top segment. That's ok, the scramble below will translate
|
||||
* it to VSID 0, which is reserved as a bad VSID - one which
|
||||
* will never have any pages in it. */
|
||||
|
||||
/* Check if hitting the linear mapping or some other kernel space
|
||||
*/
|
||||
bne cr7,1f
|
||||
|
||||
/* Linear mapping encoding bits, the "li" instruction below will
|
||||
* be patched by the kernel at boot
|
||||
*/
|
||||
.globl slb_miss_kernel_load_linear
|
||||
slb_miss_kernel_load_linear:
|
||||
li r11,0
|
||||
/*
|
||||
* context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
|
||||
* r9 = region id.
|
||||
*/
|
||||
addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
|
||||
addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
|
||||
|
||||
|
||||
BEGIN_FTR_SECTION
|
||||
b slb_finish_load
|
||||
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
|
||||
b slb_finish_load_1T
|
||||
|
||||
1:
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
/* Check virtual memmap region. To be patches at kernel boot */
|
||||
cmpldi cr0,r9,0xf
|
||||
bne 1f
|
||||
.globl slb_miss_kernel_load_vmemmap
|
||||
slb_miss_kernel_load_vmemmap:
|
||||
li r11,0
|
||||
b 6f
|
||||
1:
|
||||
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
|
||||
|
||||
/* vmalloc mapping gets the encoding from the PACA as the mapping
|
||||
* can be demoted from 64K -> 4K dynamically on some machines
|
||||
*/
|
||||
clrldi r11,r10,48
|
||||
cmpldi r11,(VMALLOC_SIZE >> 28) - 1
|
||||
bgt 5f
|
||||
lhz r11,PACAVMALLOCSLLP(r13)
|
||||
b 6f
|
||||
5:
|
||||
/* IO mapping */
|
||||
.globl slb_miss_kernel_load_io
|
||||
slb_miss_kernel_load_io:
|
||||
li r11,0
|
||||
6:
|
||||
/*
|
||||
* context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
|
||||
* r9 = region id.
|
||||
*/
|
||||
addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
|
||||
addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
|
||||
|
||||
BEGIN_FTR_SECTION
|
||||
b slb_finish_load
|
||||
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
|
||||
b slb_finish_load_1T
|
||||
|
||||
0:
|
||||
/* when using slices, we extract the psize off the slice bitmaps
|
||||
* and then we need to get the sllp encoding off the mmu_psize_defs
|
||||
* array.
|
||||
*
|
||||
* XXX This is a bit inefficient especially for the normal case,
|
||||
* so we should try to implement a fast path for the standard page
|
||||
* size using the old sllp value so we avoid the array. We cannot
|
||||
* really do dynamic patching unfortunately as processes might flip
|
||||
* between 4k and 64k standard page size
|
||||
*/
|
||||
#ifdef CONFIG_PPC_MM_SLICES
|
||||
/* r10 have esid */
|
||||
cmpldi r10,16
|
||||
/* below SLICE_LOW_TOP */
|
||||
blt 5f
|
||||
/*
|
||||
* Handle hpsizes,
|
||||
* r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
|
||||
*/
|
||||
srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
|
||||
addi r9,r11,PACAHIGHSLICEPSIZE
|
||||
lbzx r9,r13,r9 /* r9 is hpsizes[r11] */
|
||||
/* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
|
||||
rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
|
||||
b 6f
|
||||
|
||||
5:
|
||||
/*
|
||||
* Handle lpsizes
|
||||
* r9 is get_paca()->context.low_slices_psize, r11 is index
|
||||
*/
|
||||
ld r9,PACALOWSLICESPSIZE(r13)
|
||||
mr r11,r10
|
||||
6:
|
||||
sldi r11,r11,2 /* index * 4 */
|
||||
/* Extract the psize and multiply to get an array offset */
|
||||
srd r9,r9,r11
|
||||
andi. r9,r9,0xf
|
||||
mulli r9,r9,MMUPSIZEDEFSIZE
|
||||
|
||||
/* Now get to the array and obtain the sllp
|
||||
*/
|
||||
ld r11,PACATOC(r13)
|
||||
ld r11,mmu_psize_defs@got(r11)
|
||||
add r11,r11,r9
|
||||
ld r11,MMUPSIZESLLP(r11)
|
||||
ori r11,r11,SLB_VSID_USER
|
||||
#else
|
||||
/* paca context sllp already contains the SLB_VSID_USER bits */
|
||||
lhz r11,PACACONTEXTSLLP(r13)
|
||||
#endif /* CONFIG_PPC_MM_SLICES */
|
||||
|
||||
ld r9,PACACONTEXTID(r13)
|
||||
BEGIN_FTR_SECTION
|
||||
cmpldi r10,0x1000
|
||||
bge slb_finish_load_1T
|
||||
END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
|
||||
b slb_finish_load
|
||||
|
||||
8: /* invalid EA */
|
||||
li r10,0 /* BAD_VSID */
|
||||
li r9,0 /* BAD_VSID */
|
||||
li r11,SLB_VSID_USER /* flags don't much matter */
|
||||
b slb_finish_load
|
||||
|
||||
#ifdef __DISABLED__
|
||||
|
||||
/* void slb_allocate_user(unsigned long ea);
|
||||
*
|
||||
* Create an SLB entry for the given EA (user or kernel).
|
||||
* r3 = faulting address, r13 = PACA
|
||||
* r9, r10, r11 are clobbered by this function
|
||||
* No other registers are examined or changed.
|
||||
*
|
||||
* It is called with translation enabled in order to be able to walk the
|
||||
* page tables. This is not currently used.
|
||||
*/
|
||||
_GLOBAL(slb_allocate_user)
|
||||
/* r3 = faulting address */
|
||||
srdi r10,r3,28 /* get esid */
|
||||
|
||||
crset 4*cr7+lt /* set "user" flag for later */
|
||||
|
||||
/* check if we fit in the range covered by the pagetables*/
|
||||
srdi. r9,r3,PGTABLE_EADDR_SIZE
|
||||
crnot 4*cr0+eq,4*cr0+eq
|
||||
beqlr
|
||||
|
||||
/* now we need to get to the page tables in order to get the page
|
||||
* size encoding from the PMD. In the future, we'll be able to deal
|
||||
* with 1T segments too by getting the encoding from the PGD instead
|
||||
*/
|
||||
ld r9,PACAPGDIR(r13)
|
||||
cmpldi cr0,r9,0
|
||||
beqlr
|
||||
rlwinm r11,r10,8,25,28
|
||||
ldx r9,r9,r11 /* get pgd_t */
|
||||
cmpldi cr0,r9,0
|
||||
beqlr
|
||||
rlwinm r11,r10,3,17,28
|
||||
ldx r9,r9,r11 /* get pmd_t */
|
||||
cmpldi cr0,r9,0
|
||||
beqlr
|
||||
|
||||
/* build vsid flags */
|
||||
andi. r11,r9,SLB_VSID_LLP
|
||||
ori r11,r11,SLB_VSID_USER
|
||||
|
||||
/* get context to calculate proto-VSID */
|
||||
ld r9,PACACONTEXTID(r13)
|
||||
/* fall through slb_finish_load */
|
||||
|
||||
#endif /* __DISABLED__ */
|
||||
|
||||
|
||||
/*
|
||||
* Finish loading of an SLB entry and return
|
||||
*
|
||||
* r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
|
||||
*/
|
||||
slb_finish_load:
|
||||
rldimi r10,r9,ESID_BITS,0
|
||||
ASM_VSID_SCRAMBLE(r10,r9,256M)
|
||||
/*
|
||||
* bits above VSID_BITS_256M need to be ignored from r10
|
||||
* also combine VSID and flags
|
||||
*/
|
||||
rldimi r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
|
||||
|
||||
/* r3 = EA, r11 = VSID data */
|
||||
/*
|
||||
* Find a slot, round robin. Previously we tried to find a
|
||||
* free slot first but that took too long. Unfortunately we
|
||||
* dont have any LRU information to help us choose a slot.
|
||||
*/
|
||||
|
||||
7: ld r10,PACASTABRR(r13)
|
||||
addi r10,r10,1
|
||||
/* This gets soft patched on boot. */
|
||||
.globl slb_compare_rr_to_size
|
||||
slb_compare_rr_to_size:
|
||||
cmpldi r10,0
|
||||
|
||||
blt+ 4f
|
||||
li r10,SLB_NUM_BOLTED
|
||||
|
||||
4:
|
||||
std r10,PACASTABRR(r13)
|
||||
|
||||
3:
|
||||
rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
|
||||
oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
|
||||
|
||||
/* r3 = ESID data, r11 = VSID data */
|
||||
|
||||
/*
|
||||
* No need for an isync before or after this slbmte. The exception
|
||||
* we enter with and the rfid we exit with are context synchronizing.
|
||||
*/
|
||||
slbmte r11,r10
|
||||
|
||||
/* we're done for kernel addresses */
|
||||
crclr 4*cr0+eq /* set result to "success" */
|
||||
bgelr cr7
|
||||
|
||||
/* Update the slb cache */
|
||||
lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
|
||||
cmpldi r3,SLB_CACHE_ENTRIES
|
||||
bge 1f
|
||||
|
||||
/* still room in the slb cache */
|
||||
sldi r11,r3,2 /* r11 = offset * sizeof(u32) */
|
||||
srdi r10,r10,28 /* get the 36 bits of the ESID */
|
||||
add r11,r11,r13 /* r11 = (u32 *)paca + offset */
|
||||
stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
|
||||
addi r3,r3,1 /* offset++ */
|
||||
b 2f
|
||||
1: /* offset >= SLB_CACHE_ENTRIES */
|
||||
li r3,SLB_CACHE_ENTRIES+1
|
||||
2:
|
||||
sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
|
||||
crclr 4*cr0+eq /* set result to "success" */
|
||||
blr
|
||||
|
||||
/*
|
||||
* Finish loading of a 1T SLB entry (for the kernel linear mapping) and return.
|
||||
*
|
||||
* r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
|
||||
*/
|
||||
slb_finish_load_1T:
|
||||
srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
|
||||
rldimi r10,r9,ESID_BITS_1T,0
|
||||
ASM_VSID_SCRAMBLE(r10,r9,1T)
|
||||
/*
|
||||
* bits above VSID_BITS_1T need to be ignored from r10
|
||||
* also combine VSID and flags
|
||||
*/
|
||||
rldimi r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
|
||||
li r10,MMU_SEGSIZE_1T
|
||||
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
|
||||
|
||||
/* r3 = EA, r11 = VSID data */
|
||||
clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */
|
||||
b 7b
|
||||
|
||||
730
arch/powerpc/mm/slice.c
Normal file
730
arch/powerpc/mm/slice.c
Normal file
|
|
@ -0,0 +1,730 @@
|
|||
/*
|
||||
* address space "slices" (meta-segments) support
|
||||
*
|
||||
* Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation.
|
||||
*
|
||||
* Based on hugetlb implementation
|
||||
*
|
||||
* Copyright (C) 2003 David Gibson, IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#undef DEBUG
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <asm/mman.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/copro.h>
|
||||
#include <asm/hugetlb.h>
|
||||
|
||||
/* some sanity checks */
|
||||
#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
|
||||
#error PGTABLE_RANGE exceeds slice_mask high_slices size
|
||||
#endif
|
||||
|
||||
static DEFINE_SPINLOCK(slice_convert_lock);
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
int _slice_debug = 1;
|
||||
|
||||
static void slice_print_mask(const char *label, struct slice_mask mask)
|
||||
{
|
||||
char *p, buf[16 + 3 + 64 + 1];
|
||||
int i;
|
||||
|
||||
if (!_slice_debug)
|
||||
return;
|
||||
p = buf;
|
||||
for (i = 0; i < SLICE_NUM_LOW; i++)
|
||||
*(p++) = (mask.low_slices & (1 << i)) ? '1' : '0';
|
||||
*(p++) = ' ';
|
||||
*(p++) = '-';
|
||||
*(p++) = ' ';
|
||||
for (i = 0; i < SLICE_NUM_HIGH; i++)
|
||||
*(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
|
||||
*(p++) = 0;
|
||||
|
||||
printk(KERN_DEBUG "%s:%s\n", label, buf);
|
||||
}
|
||||
|
||||
#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0)
|
||||
|
||||
#else
|
||||
|
||||
static void slice_print_mask(const char *label, struct slice_mask mask) {}
|
||||
#define slice_dbg(fmt...)
|
||||
|
||||
#endif
|
||||
|
||||
static struct slice_mask slice_range_to_mask(unsigned long start,
|
||||
unsigned long len)
|
||||
{
|
||||
unsigned long end = start + len - 1;
|
||||
struct slice_mask ret = { 0, 0 };
|
||||
|
||||
if (start < SLICE_LOW_TOP) {
|
||||
unsigned long mend = min(end, SLICE_LOW_TOP);
|
||||
unsigned long mstart = min(start, SLICE_LOW_TOP);
|
||||
|
||||
ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
|
||||
- (1u << GET_LOW_SLICE_INDEX(mstart));
|
||||
}
|
||||
|
||||
if ((start + len) > SLICE_LOW_TOP)
|
||||
ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
|
||||
- (1ul << GET_HIGH_SLICE_INDEX(start));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned long len)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
if ((mm->task_size - len) < addr)
|
||||
return 0;
|
||||
vma = find_vma(mm, addr);
|
||||
return (!vma || (addr + len) <= vma->vm_start);
|
||||
}
|
||||
|
||||
static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
|
||||
{
|
||||
return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT,
|
||||
1ul << SLICE_LOW_SHIFT);
|
||||
}
|
||||
|
||||
static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
|
||||
{
|
||||
unsigned long start = slice << SLICE_HIGH_SHIFT;
|
||||
unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
|
||||
|
||||
/* Hack, so that each addresses is controlled by exactly one
|
||||
* of the high or low area bitmaps, the first high area starts
|
||||
* at 4GB, not 0 */
|
||||
if (start == 0)
|
||||
start = SLICE_LOW_TOP;
|
||||
|
||||
return !slice_area_is_free(mm, start, end - start);
|
||||
}
|
||||
|
||||
static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
|
||||
{
|
||||
struct slice_mask ret = { 0, 0 };
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < SLICE_NUM_LOW; i++)
|
||||
if (!slice_low_has_vma(mm, i))
|
||||
ret.low_slices |= 1u << i;
|
||||
|
||||
if (mm->task_size <= SLICE_LOW_TOP)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < SLICE_NUM_HIGH; i++)
|
||||
if (!slice_high_has_vma(mm, i))
|
||||
ret.high_slices |= 1ul << i;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
|
||||
{
|
||||
unsigned char *hpsizes;
|
||||
int index, mask_index;
|
||||
struct slice_mask ret = { 0, 0 };
|
||||
unsigned long i;
|
||||
u64 lpsizes;
|
||||
|
||||
lpsizes = mm->context.low_slices_psize;
|
||||
for (i = 0; i < SLICE_NUM_LOW; i++)
|
||||
if (((lpsizes >> (i * 4)) & 0xf) == psize)
|
||||
ret.low_slices |= 1u << i;
|
||||
|
||||
hpsizes = mm->context.high_slices_psize;
|
||||
for (i = 0; i < SLICE_NUM_HIGH; i++) {
|
||||
mask_index = i & 0x1;
|
||||
index = i >> 1;
|
||||
if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
|
||||
ret.high_slices |= 1ul << i;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
|
||||
{
|
||||
return (mask.low_slices & available.low_slices) == mask.low_slices &&
|
||||
(mask.high_slices & available.high_slices) == mask.high_slices;
|
||||
}
|
||||
|
||||
static void slice_flush_segments(void *parm)
|
||||
{
|
||||
struct mm_struct *mm = parm;
|
||||
unsigned long flags;
|
||||
|
||||
if (mm != current->active_mm)
|
||||
return;
|
||||
|
||||
/* update the paca copy of the context struct */
|
||||
get_paca()->context = current->active_mm->context;
|
||||
|
||||
local_irq_save(flags);
|
||||
slb_flush_and_rebolt();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
|
||||
{
|
||||
int index, mask_index;
|
||||
/* Write the new slice psize bits */
|
||||
unsigned char *hpsizes;
|
||||
u64 lpsizes;
|
||||
unsigned long i, flags;
|
||||
|
||||
slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
|
||||
slice_print_mask(" mask", mask);
|
||||
|
||||
/* We need to use a spinlock here to protect against
|
||||
* concurrent 64k -> 4k demotion ...
|
||||
*/
|
||||
spin_lock_irqsave(&slice_convert_lock, flags);
|
||||
|
||||
lpsizes = mm->context.low_slices_psize;
|
||||
for (i = 0; i < SLICE_NUM_LOW; i++)
|
||||
if (mask.low_slices & (1u << i))
|
||||
lpsizes = (lpsizes & ~(0xful << (i * 4))) |
|
||||
(((unsigned long)psize) << (i * 4));
|
||||
|
||||
/* Assign the value back */
|
||||
mm->context.low_slices_psize = lpsizes;
|
||||
|
||||
hpsizes = mm->context.high_slices_psize;
|
||||
for (i = 0; i < SLICE_NUM_HIGH; i++) {
|
||||
mask_index = i & 0x1;
|
||||
index = i >> 1;
|
||||
if (mask.high_slices & (1ul << i))
|
||||
hpsizes[index] = (hpsizes[index] &
|
||||
~(0xf << (mask_index * 4))) |
|
||||
(((unsigned long)psize) << (mask_index * 4));
|
||||
}
|
||||
|
||||
slice_dbg(" lsps=%lx, hsps=%lx\n",
|
||||
mm->context.low_slices_psize,
|
||||
mm->context.high_slices_psize);
|
||||
|
||||
spin_unlock_irqrestore(&slice_convert_lock, flags);
|
||||
|
||||
copro_flush_all_slbs(mm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute which slice addr is part of;
|
||||
* set *boundary_addr to the start or end boundary of that slice
|
||||
* (depending on 'end' parameter);
|
||||
* return boolean indicating if the slice is marked as available in the
|
||||
* 'available' slice_mark.
|
||||
*/
|
||||
static bool slice_scan_available(unsigned long addr,
|
||||
struct slice_mask available,
|
||||
int end,
|
||||
unsigned long *boundary_addr)
|
||||
{
|
||||
unsigned long slice;
|
||||
if (addr < SLICE_LOW_TOP) {
|
||||
slice = GET_LOW_SLICE_INDEX(addr);
|
||||
*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
|
||||
return !!(available.low_slices & (1u << slice));
|
||||
} else {
|
||||
slice = GET_HIGH_SLICE_INDEX(addr);
|
||||
*boundary_addr = (slice + end) ?
|
||||
((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
|
||||
return !!(available.high_slices & (1ul << slice));
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
|
||||
unsigned long len,
|
||||
struct slice_mask available,
|
||||
int psize)
|
||||
{
|
||||
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
|
||||
unsigned long addr, found, next_end;
|
||||
struct vm_unmapped_area_info info;
|
||||
|
||||
info.flags = 0;
|
||||
info.length = len;
|
||||
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
|
||||
info.align_offset = 0;
|
||||
|
||||
addr = TASK_UNMAPPED_BASE;
|
||||
while (addr < TASK_SIZE) {
|
||||
info.low_limit = addr;
|
||||
if (!slice_scan_available(addr, available, 1, &addr))
|
||||
continue;
|
||||
|
||||
next_slice:
|
||||
/*
|
||||
* At this point [info.low_limit; addr) covers
|
||||
* available slices only and ends at a slice boundary.
|
||||
* Check if we need to reduce the range, or if we can
|
||||
* extend it to cover the next available slice.
|
||||
*/
|
||||
if (addr >= TASK_SIZE)
|
||||
addr = TASK_SIZE;
|
||||
else if (slice_scan_available(addr, available, 1, &next_end)) {
|
||||
addr = next_end;
|
||||
goto next_slice;
|
||||
}
|
||||
info.high_limit = addr;
|
||||
|
||||
found = vm_unmapped_area(&info);
|
||||
if (!(found & ~PAGE_MASK))
|
||||
return found;
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static unsigned long slice_find_area_topdown(struct mm_struct *mm,
|
||||
unsigned long len,
|
||||
struct slice_mask available,
|
||||
int psize)
|
||||
{
|
||||
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
|
||||
unsigned long addr, found, prev;
|
||||
struct vm_unmapped_area_info info;
|
||||
|
||||
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
||||
info.length = len;
|
||||
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
|
||||
info.align_offset = 0;
|
||||
|
||||
addr = mm->mmap_base;
|
||||
while (addr > PAGE_SIZE) {
|
||||
info.high_limit = addr;
|
||||
if (!slice_scan_available(addr - 1, available, 0, &addr))
|
||||
continue;
|
||||
|
||||
prev_slice:
|
||||
/*
|
||||
* At this point [addr; info.high_limit) covers
|
||||
* available slices only and starts at a slice boundary.
|
||||
* Check if we need to reduce the range, or if we can
|
||||
* extend it to cover the previous available slice.
|
||||
*/
|
||||
if (addr < PAGE_SIZE)
|
||||
addr = PAGE_SIZE;
|
||||
else if (slice_scan_available(addr - 1, available, 0, &prev)) {
|
||||
addr = prev;
|
||||
goto prev_slice;
|
||||
}
|
||||
info.low_limit = addr;
|
||||
|
||||
found = vm_unmapped_area(&info);
|
||||
if (!(found & ~PAGE_MASK))
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* A failed mmap() very likely causes application failure,
|
||||
* so fall back to the bottom-up function here. This scenario
|
||||
* can happen with large stack limits and large mmap()
|
||||
* allocations.
|
||||
*/
|
||||
return slice_find_area_bottomup(mm, len, available, psize);
|
||||
}
|
||||
|
||||
|
||||
static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
|
||||
struct slice_mask mask, int psize,
|
||||
int topdown)
|
||||
{
|
||||
if (topdown)
|
||||
return slice_find_area_topdown(mm, len, mask, psize);
|
||||
else
|
||||
return slice_find_area_bottomup(mm, len, mask, psize);
|
||||
}
|
||||
|
||||
#define or_mask(dst, src) do { \
|
||||
(dst).low_slices |= (src).low_slices; \
|
||||
(dst).high_slices |= (src).high_slices; \
|
||||
} while (0)
|
||||
|
||||
#define andnot_mask(dst, src) do { \
|
||||
(dst).low_slices &= ~(src).low_slices; \
|
||||
(dst).high_slices &= ~(src).high_slices; \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
#define MMU_PAGE_BASE MMU_PAGE_64K
|
||||
#else
|
||||
#define MMU_PAGE_BASE MMU_PAGE_4K
|
||||
#endif
|
||||
|
||||
unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
|
||||
unsigned long flags, unsigned int psize,
|
||||
int topdown)
|
||||
{
|
||||
struct slice_mask mask = {0, 0};
|
||||
struct slice_mask good_mask;
|
||||
struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
|
||||
struct slice_mask compat_mask = {0, 0};
|
||||
int fixed = (flags & MAP_FIXED);
|
||||
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long newaddr;
|
||||
|
||||
/* Sanity checks */
|
||||
BUG_ON(mm->task_size == 0);
|
||||
|
||||
slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
|
||||
slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
|
||||
addr, len, flags, topdown);
|
||||
|
||||
if (len > mm->task_size)
|
||||
return -ENOMEM;
|
||||
if (len & ((1ul << pshift) - 1))
|
||||
return -EINVAL;
|
||||
if (fixed && (addr & ((1ul << pshift) - 1)))
|
||||
return -EINVAL;
|
||||
if (fixed && addr > (mm->task_size - len))
|
||||
return -ENOMEM;
|
||||
|
||||
/* If hint, make sure it matches our alignment restrictions */
|
||||
if (!fixed && addr) {
|
||||
addr = _ALIGN_UP(addr, 1ul << pshift);
|
||||
slice_dbg(" aligned addr=%lx\n", addr);
|
||||
/* Ignore hint if it's too large or overlaps a VMA */
|
||||
if (addr > mm->task_size - len ||
|
||||
!slice_area_is_free(mm, addr, len))
|
||||
addr = 0;
|
||||
}
|
||||
|
||||
/* First make up a "good" mask of slices that have the right size
|
||||
* already
|
||||
*/
|
||||
good_mask = slice_mask_for_size(mm, psize);
|
||||
slice_print_mask(" good_mask", good_mask);
|
||||
|
||||
/*
|
||||
* Here "good" means slices that are already the right page size,
|
||||
* "compat" means slices that have a compatible page size (i.e.
|
||||
* 4k in a 64k pagesize kernel), and "free" means slices without
|
||||
* any VMAs.
|
||||
*
|
||||
* If MAP_FIXED:
|
||||
* check if fits in good | compat => OK
|
||||
* check if fits in good | compat | free => convert free
|
||||
* else bad
|
||||
* If have hint:
|
||||
* check if hint fits in good => OK
|
||||
* check if hint fits in good | free => convert free
|
||||
* Otherwise:
|
||||
* search in good, found => OK
|
||||
* search in good | free, found => convert free
|
||||
* search in good | compat | free, found => convert free.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
/* If we support combo pages, we can allow 64k pages in 4k slices */
|
||||
if (psize == MMU_PAGE_64K) {
|
||||
compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
|
||||
if (fixed)
|
||||
or_mask(good_mask, compat_mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* First check hint if it's valid or if we have MAP_FIXED */
|
||||
if (addr != 0 || fixed) {
|
||||
/* Build a mask for the requested range */
|
||||
mask = slice_range_to_mask(addr, len);
|
||||
slice_print_mask(" mask", mask);
|
||||
|
||||
/* Check if we fit in the good mask. If we do, we just return,
|
||||
* nothing else to do
|
||||
*/
|
||||
if (slice_check_fit(mask, good_mask)) {
|
||||
slice_dbg(" fits good !\n");
|
||||
return addr;
|
||||
}
|
||||
} else {
|
||||
/* Now let's see if we can find something in the existing
|
||||
* slices for that size
|
||||
*/
|
||||
newaddr = slice_find_area(mm, len, good_mask, psize, topdown);
|
||||
if (newaddr != -ENOMEM) {
|
||||
/* Found within the good mask, we don't have to setup,
|
||||
* we thus return directly
|
||||
*/
|
||||
slice_dbg(" found area at 0x%lx\n", newaddr);
|
||||
return newaddr;
|
||||
}
|
||||
}
|
||||
|
||||
/* We don't fit in the good mask, check what other slices are
|
||||
* empty and thus can be converted
|
||||
*/
|
||||
potential_mask = slice_mask_for_free(mm);
|
||||
or_mask(potential_mask, good_mask);
|
||||
slice_print_mask(" potential", potential_mask);
|
||||
|
||||
if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) {
|
||||
slice_dbg(" fits potential !\n");
|
||||
goto convert;
|
||||
}
|
||||
|
||||
/* If we have MAP_FIXED and failed the above steps, then error out */
|
||||
if (fixed)
|
||||
return -EBUSY;
|
||||
|
||||
slice_dbg(" search...\n");
|
||||
|
||||
/* If we had a hint that didn't work out, see if we can fit
|
||||
* anywhere in the good area.
|
||||
*/
|
||||
if (addr) {
|
||||
addr = slice_find_area(mm, len, good_mask, psize, topdown);
|
||||
if (addr != -ENOMEM) {
|
||||
slice_dbg(" found area at 0x%lx\n", addr);
|
||||
return addr;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now let's see if we can find something in the existing slices
|
||||
* for that size plus free slices
|
||||
*/
|
||||
addr = slice_find_area(mm, len, potential_mask, psize, topdown);
|
||||
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
|
||||
/* retry the search with 4k-page slices included */
|
||||
or_mask(potential_mask, compat_mask);
|
||||
addr = slice_find_area(mm, len, potential_mask, psize,
|
||||
topdown);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (addr == -ENOMEM)
|
||||
return -ENOMEM;
|
||||
|
||||
mask = slice_range_to_mask(addr, len);
|
||||
slice_dbg(" found potential area at 0x%lx\n", addr);
|
||||
slice_print_mask(" mask", mask);
|
||||
|
||||
convert:
|
||||
andnot_mask(mask, good_mask);
|
||||
andnot_mask(mask, compat_mask);
|
||||
if (mask.low_slices || mask.high_slices) {
|
||||
slice_convert(mm, mask, psize);
|
||||
if (psize > MMU_PAGE_BASE)
|
||||
on_each_cpu(slice_flush_segments, mm, 1);
|
||||
}
|
||||
return addr;
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
|
||||
|
||||
unsigned long arch_get_unmapped_area(struct file *filp,
|
||||
unsigned long addr,
|
||||
unsigned long len,
|
||||
unsigned long pgoff,
|
||||
unsigned long flags)
|
||||
{
|
||||
return slice_get_unmapped_area(addr, len, flags,
|
||||
current->mm->context.user_psize, 0);
|
||||
}
|
||||
|
||||
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
|
||||
const unsigned long addr0,
|
||||
const unsigned long len,
|
||||
const unsigned long pgoff,
|
||||
const unsigned long flags)
|
||||
{
|
||||
return slice_get_unmapped_area(addr0, len, flags,
|
||||
current->mm->context.user_psize, 1);
|
||||
}
|
||||
|
||||
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
unsigned char *hpsizes;
|
||||
int index, mask_index;
|
||||
|
||||
if (addr < SLICE_LOW_TOP) {
|
||||
u64 lpsizes;
|
||||
lpsizes = mm->context.low_slices_psize;
|
||||
index = GET_LOW_SLICE_INDEX(addr);
|
||||
return (lpsizes >> (index * 4)) & 0xf;
|
||||
}
|
||||
hpsizes = mm->context.high_slices_psize;
|
||||
index = GET_HIGH_SLICE_INDEX(addr);
|
||||
mask_index = index & 0x1;
|
||||
return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_slice_psize);
|
||||
|
||||
/*
|
||||
* This is called by hash_page when it needs to do a lazy conversion of
|
||||
* an address space from real 64K pages to combo 4K pages (typically
|
||||
* when hitting a non cacheable mapping on a processor or hypervisor
|
||||
* that won't allow them for 64K pages).
|
||||
*
|
||||
* This is also called in init_new_context() to change back the user
|
||||
* psize from whatever the parent context had it set to
|
||||
* N.B. This may be called before mm->context.id has been set.
|
||||
*
|
||||
* This function will only change the content of the {low,high)_slice_psize
|
||||
* masks, it will not flush SLBs as this shall be handled lazily by the
|
||||
* caller.
|
||||
*/
|
||||
void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
|
||||
{
|
||||
int index, mask_index;
|
||||
unsigned char *hpsizes;
|
||||
unsigned long flags, lpsizes;
|
||||
unsigned int old_psize;
|
||||
int i;
|
||||
|
||||
slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
|
||||
|
||||
spin_lock_irqsave(&slice_convert_lock, flags);
|
||||
|
||||
old_psize = mm->context.user_psize;
|
||||
slice_dbg(" old_psize=%d\n", old_psize);
|
||||
if (old_psize == psize)
|
||||
goto bail;
|
||||
|
||||
mm->context.user_psize = psize;
|
||||
wmb();
|
||||
|
||||
lpsizes = mm->context.low_slices_psize;
|
||||
for (i = 0; i < SLICE_NUM_LOW; i++)
|
||||
if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
|
||||
lpsizes = (lpsizes & ~(0xful << (i * 4))) |
|
||||
(((unsigned long)psize) << (i * 4));
|
||||
/* Assign the value back */
|
||||
mm->context.low_slices_psize = lpsizes;
|
||||
|
||||
hpsizes = mm->context.high_slices_psize;
|
||||
for (i = 0; i < SLICE_NUM_HIGH; i++) {
|
||||
mask_index = i & 0x1;
|
||||
index = i >> 1;
|
||||
if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
|
||||
hpsizes[index] = (hpsizes[index] &
|
||||
~(0xf << (mask_index * 4))) |
|
||||
(((unsigned long)psize) << (mask_index * 4));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
slice_dbg(" lsps=%lx, hsps=%lx\n",
|
||||
mm->context.low_slices_psize,
|
||||
mm->context.high_slices_psize);
|
||||
|
||||
bail:
|
||||
spin_unlock_irqrestore(&slice_convert_lock, flags);
|
||||
}
|
||||
|
||||
void slice_set_psize(struct mm_struct *mm, unsigned long address,
|
||||
unsigned int psize)
|
||||
{
|
||||
unsigned char *hpsizes;
|
||||
unsigned long i, flags;
|
||||
u64 *lpsizes;
|
||||
|
||||
spin_lock_irqsave(&slice_convert_lock, flags);
|
||||
if (address < SLICE_LOW_TOP) {
|
||||
i = GET_LOW_SLICE_INDEX(address);
|
||||
lpsizes = &mm->context.low_slices_psize;
|
||||
*lpsizes = (*lpsizes & ~(0xful << (i * 4))) |
|
||||
((unsigned long) psize << (i * 4));
|
||||
} else {
|
||||
int index, mask_index;
|
||||
i = GET_HIGH_SLICE_INDEX(address);
|
||||
hpsizes = mm->context.high_slices_psize;
|
||||
mask_index = i & 0x1;
|
||||
index = i >> 1;
|
||||
hpsizes[index] = (hpsizes[index] &
|
||||
~(0xf << (mask_index * 4))) |
|
||||
(((unsigned long)psize) << (mask_index * 4));
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&slice_convert_lock, flags);
|
||||
|
||||
copro_flush_all_slbs(mm);
|
||||
}
|
||||
|
||||
void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long len, unsigned int psize)
|
||||
{
|
||||
struct slice_mask mask = slice_range_to_mask(start, len);
|
||||
|
||||
slice_convert(mm, mask, psize);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
/*
|
||||
* is_hugepage_only_range() is used by generic code to verify whether
|
||||
* a normal mmap mapping (non hugetlbfs) is valid on a given area.
|
||||
*
|
||||
* until the generic code provides a more generic hook and/or starts
|
||||
* calling arch get_unmapped_area for MAP_FIXED (which our implementation
|
||||
* here knows how to deal with), we hijack it to keep standard mappings
|
||||
* away from us.
|
||||
*
|
||||
* because of that generic code limitation, MAP_FIXED mapping cannot
|
||||
* "convert" back a slice with no VMAs to the standard page size, only
|
||||
* get_unmapped_area() can. It would be possible to fix it here but I
|
||||
* prefer working on fixing the generic code instead.
|
||||
*
|
||||
* WARNING: This will not work if hugetlbfs isn't enabled since the
|
||||
* generic code will redefine that function as 0 in that. This is ok
|
||||
* for now as we only use slices with hugetlbfs enabled. This should
|
||||
* be fixed as the generic code gets fixed.
|
||||
*/
|
||||
int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned long len)
|
||||
{
|
||||
struct slice_mask mask, available;
|
||||
unsigned int psize = mm->context.user_psize;
|
||||
|
||||
mask = slice_range_to_mask(addr, len);
|
||||
available = slice_mask_for_size(mm, psize);
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
/* We need to account for 4k slices too */
|
||||
if (psize == MMU_PAGE_64K) {
|
||||
struct slice_mask compat_mask;
|
||||
compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
|
||||
or_mask(available, compat_mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0 /* too verbose */
|
||||
slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
|
||||
mm, addr, len);
|
||||
slice_print_mask(" mask", mask);
|
||||
slice_print_mask(" available", available);
|
||||
#endif
|
||||
return !slice_check_fit(mask, available);
|
||||
}
|
||||
#endif
|
||||
269
arch/powerpc/mm/subpage-prot.c
Normal file
269
arch/powerpc/mm/subpage-prot.c
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
/*
|
||||
* Copyright 2007-2008 Paul Mackerras, IBM Corp.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
/*
|
||||
* Free all pages allocated for subpage protection maps and pointers.
|
||||
* Also makes sure that the subpage_prot_table structure is
|
||||
* reinitialized for the next user.
|
||||
*/
|
||||
void subpage_prot_free(struct mm_struct *mm)
|
||||
{
|
||||
struct subpage_prot_table *spt = &mm->context.spt;
|
||||
unsigned long i, j, addr;
|
||||
u32 **p;
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
if (spt->low_prot[i]) {
|
||||
free_page((unsigned long)spt->low_prot[i]);
|
||||
spt->low_prot[i] = NULL;
|
||||
}
|
||||
}
|
||||
addr = 0;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
p = spt->protptrs[i];
|
||||
if (!p)
|
||||
continue;
|
||||
spt->protptrs[i] = NULL;
|
||||
for (j = 0; j < SBP_L2_COUNT && addr < spt->maxaddr;
|
||||
++j, addr += PAGE_SIZE)
|
||||
if (p[j])
|
||||
free_page((unsigned long)p[j]);
|
||||
free_page((unsigned long)p);
|
||||
}
|
||||
spt->maxaddr = 0;
|
||||
}
|
||||
|
||||
void subpage_prot_init_new_context(struct mm_struct *mm)
|
||||
{
|
||||
struct subpage_prot_table *spt = &mm->context.spt;
|
||||
|
||||
memset(spt, 0, sizeof(*spt));
|
||||
}
|
||||
|
||||
static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
|
||||
int npages)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
spinlock_t *ptl;
|
||||
|
||||
pgd = pgd_offset(mm, addr);
|
||||
if (pgd_none(*pgd))
|
||||
return;
|
||||
pud = pud_offset(pgd, addr);
|
||||
if (pud_none(*pud))
|
||||
return;
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if (pmd_none(*pmd))
|
||||
return;
|
||||
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
arch_enter_lazy_mmu_mode();
|
||||
for (; npages > 0; --npages) {
|
||||
pte_update(mm, addr, pte, 0, 0, 0);
|
||||
addr += PAGE_SIZE;
|
||||
++pte;
|
||||
}
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(pte - 1, ptl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the subpage protection map for an address range, allowing
|
||||
* all accesses that are allowed by the pte permissions.
|
||||
*/
|
||||
static void subpage_prot_clear(unsigned long addr, unsigned long len)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct subpage_prot_table *spt = &mm->context.spt;
|
||||
u32 **spm, *spp;
|
||||
unsigned long i;
|
||||
size_t nw;
|
||||
unsigned long next, limit;
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
limit = addr + len;
|
||||
if (limit > spt->maxaddr)
|
||||
limit = spt->maxaddr;
|
||||
for (; addr < limit; addr = next) {
|
||||
next = pmd_addr_end(addr, limit);
|
||||
if (addr < 0x100000000UL) {
|
||||
spm = spt->low_prot;
|
||||
} else {
|
||||
spm = spt->protptrs[addr >> SBP_L3_SHIFT];
|
||||
if (!spm)
|
||||
continue;
|
||||
}
|
||||
spp = spm[(addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
|
||||
if (!spp)
|
||||
continue;
|
||||
spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
|
||||
|
||||
i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
|
||||
nw = PTRS_PER_PTE - i;
|
||||
if (addr + (nw << PAGE_SHIFT) > next)
|
||||
nw = (next - addr) >> PAGE_SHIFT;
|
||||
|
||||
memset(spp, 0, nw * sizeof(u32));
|
||||
|
||||
/* now flush any existing HPTEs for the range */
|
||||
hpte_flush_range(mm, addr, nw);
|
||||
}
|
||||
up_write(&mm->mmap_sem);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
{
|
||||
struct vm_area_struct *vma = walk->private;
|
||||
split_huge_page_pmd(vma, addr, pmd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned long len)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_walk subpage_proto_walk = {
|
||||
.mm = mm,
|
||||
.pmd_entry = subpage_walk_pmd_entry,
|
||||
};
|
||||
|
||||
/*
|
||||
* We don't try too hard, we just mark all the vma in that range
|
||||
* VM_NOHUGEPAGE and split them.
|
||||
*/
|
||||
vma = find_vma(mm, addr);
|
||||
/*
|
||||
* If the range is in unmapped range, just return
|
||||
*/
|
||||
if (vma && ((addr + len) <= vma->vm_start))
|
||||
return;
|
||||
|
||||
while (vma) {
|
||||
if (vma->vm_start >= (addr + len))
|
||||
break;
|
||||
vma->vm_flags |= VM_NOHUGEPAGE;
|
||||
subpage_proto_walk.private = vma;
|
||||
walk_page_range(vma->vm_start, vma->vm_end,
|
||||
&subpage_proto_walk);
|
||||
vma = vma->vm_next;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned long len)
|
||||
{
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Copy in a subpage protection map for an address range.
|
||||
* The map has 2 bits per 4k subpage, so 32 bits per 64k page.
|
||||
* Each 2-bit field is 0 to allow any access, 1 to prevent writes,
|
||||
* 2 or 3 to prevent all accesses.
|
||||
* Note that the normal page protections also apply; the subpage
|
||||
* protection mechanism is an additional constraint, so putting 0
|
||||
* in a 2-bit field won't allow writes to a page that is otherwise
|
||||
* write-protected.
|
||||
*/
|
||||
long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct subpage_prot_table *spt = &mm->context.spt;
|
||||
u32 **spm, *spp;
|
||||
unsigned long i;
|
||||
size_t nw;
|
||||
unsigned long next, limit;
|
||||
int err;
|
||||
|
||||
/* Check parameters */
|
||||
if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
|
||||
addr >= TASK_SIZE || len >= TASK_SIZE || addr + len > TASK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (is_hugepage_only_range(mm, addr, len))
|
||||
return -EINVAL;
|
||||
|
||||
if (!map) {
|
||||
/* Clear out the protection map for the address range */
|
||||
subpage_prot_clear(addr, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!access_ok(VERIFY_READ, map, (len >> PAGE_SHIFT) * sizeof(u32)))
|
||||
return -EFAULT;
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
subpage_mark_vma_nohuge(mm, addr, len);
|
||||
for (limit = addr + len; addr < limit; addr = next) {
|
||||
next = pmd_addr_end(addr, limit);
|
||||
err = -ENOMEM;
|
||||
if (addr < 0x100000000UL) {
|
||||
spm = spt->low_prot;
|
||||
} else {
|
||||
spm = spt->protptrs[addr >> SBP_L3_SHIFT];
|
||||
if (!spm) {
|
||||
spm = (u32 **)get_zeroed_page(GFP_KERNEL);
|
||||
if (!spm)
|
||||
goto out;
|
||||
spt->protptrs[addr >> SBP_L3_SHIFT] = spm;
|
||||
}
|
||||
}
|
||||
spm += (addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1);
|
||||
spp = *spm;
|
||||
if (!spp) {
|
||||
spp = (u32 *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!spp)
|
||||
goto out;
|
||||
*spm = spp;
|
||||
}
|
||||
spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
|
||||
|
||||
local_irq_disable();
|
||||
demote_segment_4k(mm, addr);
|
||||
local_irq_enable();
|
||||
|
||||
i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
|
||||
nw = PTRS_PER_PTE - i;
|
||||
if (addr + (nw << PAGE_SHIFT) > next)
|
||||
nw = (next - addr) >> PAGE_SHIFT;
|
||||
|
||||
up_write(&mm->mmap_sem);
|
||||
err = -EFAULT;
|
||||
if (__copy_from_user(spp, map, nw * sizeof(u32)))
|
||||
goto out2;
|
||||
map += nw;
|
||||
down_write(&mm->mmap_sem);
|
||||
|
||||
/* now flush any existing HPTEs for the range */
|
||||
hpte_flush_range(mm, addr, nw);
|
||||
}
|
||||
if (limit > spt->maxaddr)
|
||||
spt->maxaddr = limit;
|
||||
err = 0;
|
||||
out:
|
||||
up_write(&mm->mmap_sem);
|
||||
out2:
|
||||
return err;
|
||||
}
|
||||
184
arch/powerpc/mm/tlb_hash32.c
Normal file
184
arch/powerpc/mm/tlb_hash32.c
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* This file contains the routines for TLB flushing.
|
||||
* On machines where the MMU uses a hash table to store virtual to
|
||||
* physical translations, these routines flush entries from the
|
||||
* hash table also.
|
||||
* -- paulus
|
||||
*
|
||||
* Derived from arch/ppc/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/tlb.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
/*
|
||||
* Called when unmapping pages to flush entries from the TLB/hash table.
|
||||
*/
|
||||
void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
|
||||
{
|
||||
unsigned long ptephys;
|
||||
|
||||
if (Hash != 0) {
|
||||
ptephys = __pa(ptep) & PAGE_MASK;
|
||||
flush_hash_pages(mm->context.id, addr, ptephys, 1);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(flush_hash_entry);
|
||||
|
||||
/*
|
||||
* Called by ptep_set_access_flags, must flush on CPUs for which the
|
||||
* DSI handler can't just "fixup" the TLB on a write fault
|
||||
*/
|
||||
void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
|
||||
{
|
||||
if (Hash != 0)
|
||||
return;
|
||||
_tlbie(addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called at the end of a mmu_gather operation to make sure the
|
||||
* TLB flush is completely done.
|
||||
*/
|
||||
void tlb_flush(struct mmu_gather *tlb)
|
||||
{
|
||||
if (Hash == 0) {
|
||||
/*
|
||||
* 603 needs to flush the whole TLB here since
|
||||
* it doesn't use a hash table.
|
||||
*/
|
||||
_tlbia();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* TLB flushing:
|
||||
*
|
||||
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
|
||||
* - flush_tlb_page(vma, vmaddr) flushes one page
|
||||
* - flush_tlb_range(vma, start, end) flushes a range of pages
|
||||
* - flush_tlb_kernel_range(start, end) flushes kernel pages
|
||||
*
|
||||
* since the hardware hash table functions as an extension of the
|
||||
* tlb as far as the linux tables are concerned, flush it too.
|
||||
* -- Cort
|
||||
*/
|
||||
|
||||
static void flush_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
unsigned long pmd_end;
|
||||
int count;
|
||||
unsigned int ctx = mm->context.id;
|
||||
|
||||
if (Hash == 0) {
|
||||
_tlbia();
|
||||
return;
|
||||
}
|
||||
start &= PAGE_MASK;
|
||||
if (start >= end)
|
||||
return;
|
||||
end = (end - 1) | ~PAGE_MASK;
|
||||
pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start);
|
||||
for (;;) {
|
||||
pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
|
||||
if (pmd_end > end)
|
||||
pmd_end = end;
|
||||
if (!pmd_none(*pmd)) {
|
||||
count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
|
||||
flush_hash_pages(ctx, start, pmd_val(*pmd), count);
|
||||
}
|
||||
if (pmd_end == end)
|
||||
break;
|
||||
start = pmd_end + 1;
|
||||
++pmd;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush kernel TLB entries in the given range
|
||||
*/
|
||||
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
||||
{
|
||||
flush_range(&init_mm, start, end);
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_kernel_range);
|
||||
|
||||
/*
|
||||
* Flush all the (user) entries for the address space described by mm.
|
||||
*/
|
||||
void flush_tlb_mm(struct mm_struct *mm)
|
||||
{
|
||||
struct vm_area_struct *mp;
|
||||
|
||||
if (Hash == 0) {
|
||||
_tlbia();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is safe to go down the mm's list of vmas when called
|
||||
* from dup_mmap, holding mmap_sem. It would also be safe from
|
||||
* unmap_region or exit_mmap, but not from vmtruncate on SMP -
|
||||
* but it seems dup_mmap is the only SMP case which gets here.
|
||||
*/
|
||||
for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
|
||||
flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_mm);
|
||||
|
||||
void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
|
||||
{
|
||||
struct mm_struct *mm;
|
||||
pmd_t *pmd;
|
||||
|
||||
if (Hash == 0) {
|
||||
_tlbie(vmaddr);
|
||||
return;
|
||||
}
|
||||
mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
|
||||
pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr);
|
||||
if (!pmd_none(*pmd))
|
||||
flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_page);
|
||||
|
||||
/*
|
||||
* For each address in the range, find the pte for the address
|
||||
* and check _PAGE_HASHPTE bit; if it is set, find and destroy
|
||||
* the corresponding HPTE.
|
||||
*/
|
||||
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
flush_range(vma->vm_mm, start, end);
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_range);
|
||||
|
||||
void __init early_init_mmu(void)
|
||||
{
|
||||
}
|
||||
256
arch/powerpc/mm/tlb_hash64.c
Normal file
256
arch/powerpc/mm/tlb_hash64.c
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* This file contains the routines for flushing entries from the
|
||||
* TLB and MMU hash table.
|
||||
*
|
||||
* Derived from arch/ppc64/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* Dave Engebretsen <engebret@us.ibm.com>
|
||||
* Rework for PPC64 port.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/bug.h>
|
||||
|
||||
#include <trace/events/thp.h>
|
||||
|
||||
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
|
||||
|
||||
/*
|
||||
* A linux PTE was changed and the corresponding hash table entry
|
||||
* neesd to be flushed. This function will either perform the flush
|
||||
* immediately or will batch it up if the current CPU has an active
|
||||
* batch on it.
|
||||
*/
|
||||
void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, unsigned long pte, int huge)
|
||||
{
|
||||
unsigned long vpn;
|
||||
struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
|
||||
unsigned long vsid;
|
||||
unsigned int psize;
|
||||
int ssize;
|
||||
real_pte_t rpte;
|
||||
int i;
|
||||
|
||||
i = batch->index;
|
||||
|
||||
/* Get page size (maybe move back to caller).
|
||||
*
|
||||
* NOTE: when using special 64K mappings in 4K environment like
|
||||
* for SPEs, we obtain the page size from the slice, which thus
|
||||
* must still exist (and thus the VMA not reused) at the time
|
||||
* of this call
|
||||
*/
|
||||
if (huge) {
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
psize = get_slice_psize(mm, addr);
|
||||
/* Mask the address for the correct page size */
|
||||
addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
|
||||
#else
|
||||
BUG();
|
||||
psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
|
||||
#endif
|
||||
} else {
|
||||
psize = pte_pagesize_index(mm, addr, pte);
|
||||
/* Mask the address for the standard page size. If we
|
||||
* have a 64k page kernel, but the hardware does not
|
||||
* support 64k pages, this might be different from the
|
||||
* hardware page size encoded in the slice table. */
|
||||
addr &= PAGE_MASK;
|
||||
}
|
||||
|
||||
|
||||
/* Build full vaddr */
|
||||
if (!is_kernel_addr(addr)) {
|
||||
ssize = user_segment_size(addr);
|
||||
vsid = get_vsid(mm->context.id, addr, ssize);
|
||||
} else {
|
||||
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
|
||||
ssize = mmu_kernel_ssize;
|
||||
}
|
||||
WARN_ON(vsid == 0);
|
||||
vpn = hpt_vpn(addr, vsid, ssize);
|
||||
rpte = __real_pte(__pte(pte), ptep);
|
||||
|
||||
/*
|
||||
* Check if we have an active batch on this CPU. If not, just
|
||||
* flush now and return. For now, we don global invalidates
|
||||
* in that case, might be worth testing the mm cpu mask though
|
||||
* and decide to use local invalidates instead...
|
||||
*/
|
||||
if (!batch->active) {
|
||||
flush_hash_page(vpn, rpte, psize, ssize, 0);
|
||||
put_cpu_var(ppc64_tlb_batch);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* This can happen when we are in the middle of a TLB batch and
|
||||
* we encounter memory pressure (eg copy_page_range when it tries
|
||||
* to allocate a new pte). If we have to reclaim memory and end
|
||||
* up scanning and resetting referenced bits then our batch context
|
||||
* will change mid stream.
|
||||
*
|
||||
* We also need to ensure only one page size is present in a given
|
||||
* batch
|
||||
*/
|
||||
if (i != 0 && (mm != batch->mm || batch->psize != psize ||
|
||||
batch->ssize != ssize)) {
|
||||
__flush_tlb_pending(batch);
|
||||
i = 0;
|
||||
}
|
||||
if (i == 0) {
|
||||
batch->mm = mm;
|
||||
batch->psize = psize;
|
||||
batch->ssize = ssize;
|
||||
}
|
||||
batch->pte[i] = rpte;
|
||||
batch->vpn[i] = vpn;
|
||||
batch->index = ++i;
|
||||
if (i >= PPC64_TLB_BATCH_NR)
|
||||
__flush_tlb_pending(batch);
|
||||
put_cpu_var(ppc64_tlb_batch);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called when terminating an mmu batch or when a batch
|
||||
* is full. It will perform the flush of all the entries currently stored
|
||||
* in a batch.
|
||||
*
|
||||
* Must be called from within some kind of spinlock/non-preempt region...
|
||||
*/
|
||||
void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
|
||||
{
|
||||
const struct cpumask *tmp;
|
||||
int i, local = 0;
|
||||
|
||||
i = batch->index;
|
||||
tmp = cpumask_of(smp_processor_id());
|
||||
if (cpumask_equal(mm_cpumask(batch->mm), tmp))
|
||||
local = 1;
|
||||
if (i == 1)
|
||||
flush_hash_page(batch->vpn[0], batch->pte[0],
|
||||
batch->psize, batch->ssize, local);
|
||||
else
|
||||
flush_hash_range(i, local);
|
||||
batch->index = 0;
|
||||
}
|
||||
|
||||
void tlb_flush(struct mmu_gather *tlb)
|
||||
{
|
||||
struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
|
||||
|
||||
/* If there's a TLB batch pending, then we must flush it because the
|
||||
* pages are going to be freed and we really don't want to have a CPU
|
||||
* access a freed page because it has a stale TLB
|
||||
*/
|
||||
if (tlbbatch->index)
|
||||
__flush_tlb_pending(tlbbatch);
|
||||
|
||||
put_cpu_var(ppc64_tlb_batch);
|
||||
}
|
||||
|
||||
/**
|
||||
* __flush_hash_table_range - Flush all HPTEs for a given address range
|
||||
* from the hash table (and the TLB). But keeps
|
||||
* the linux PTEs intact.
|
||||
*
|
||||
* @mm : mm_struct of the target address space (generally init_mm)
|
||||
* @start : starting address
|
||||
* @end : ending address (not included in the flush)
|
||||
*
|
||||
* This function is mostly to be used by some IO hotplug code in order
|
||||
* to remove all hash entries from a given address range used to map IO
|
||||
* space on a removed PCI-PCI bidge without tearing down the full mapping
|
||||
* since 64K pages may overlap with other bridges when using 64K pages
|
||||
* with 4K HW pages on IO space.
|
||||
*
|
||||
* Because of that usage pattern, it is implemented for small size rather
|
||||
* than speed.
|
||||
*/
|
||||
void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
int hugepage_shift;
|
||||
unsigned long flags;
|
||||
|
||||
start = _ALIGN_DOWN(start, PAGE_SIZE);
|
||||
end = _ALIGN_UP(end, PAGE_SIZE);
|
||||
|
||||
BUG_ON(!mm->pgd);
|
||||
|
||||
/* Note: Normally, we should only ever use a batch within a
|
||||
* PTE locked section. This violates the rule, but will work
|
||||
* since we don't actually modify the PTEs, we just flush the
|
||||
* hash while leaving the PTEs intact (including their reference
|
||||
* to being hashed). This is not the most performance oriented
|
||||
* way to do things but is fine for our needs here.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
arch_enter_lazy_mmu_mode();
|
||||
for (; start < end; start += PAGE_SIZE) {
|
||||
pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start,
|
||||
&hugepage_shift);
|
||||
unsigned long pte;
|
||||
|
||||
if (ptep == NULL)
|
||||
continue;
|
||||
pte = pte_val(*ptep);
|
||||
if (hugepage_shift)
|
||||
trace_hugepage_invalidate(start, pte_val(pte));
|
||||
if (!(pte & _PAGE_HASHPTE))
|
||||
continue;
|
||||
if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
|
||||
hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
|
||||
else
|
||||
hpte_need_flush(mm, start, ptep, pte, 0);
|
||||
}
|
||||
arch_leave_lazy_mmu_mode();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
|
||||
{
|
||||
pte_t *pte;
|
||||
pte_t *start_pte;
|
||||
unsigned long flags;
|
||||
|
||||
addr = _ALIGN_DOWN(addr, PMD_SIZE);
|
||||
/* Note: Normally, we should only ever use a batch within a
|
||||
* PTE locked section. This violates the rule, but will work
|
||||
* since we don't actually modify the PTEs, we just flush the
|
||||
* hash while leaving the PTEs intact (including their reference
|
||||
* to being hashed). This is not the most performance oriented
|
||||
* way to do things but is fine for our needs here.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
arch_enter_lazy_mmu_mode();
|
||||
start_pte = pte_offset_map(pmd, addr);
|
||||
for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
|
||||
unsigned long pteval = pte_val(*pte);
|
||||
if (pteval & _PAGE_HASHPTE)
|
||||
hpte_need_flush(mm, addr, pte, pteval, 0);
|
||||
addr += PAGE_SIZE;
|
||||
}
|
||||
arch_leave_lazy_mmu_mode();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
1218
arch/powerpc/mm/tlb_low_64e.S
Normal file
1218
arch/powerpc/mm/tlb_low_64e.S
Normal file
File diff suppressed because it is too large
Load diff
753
arch/powerpc/mm/tlb_nohash.c
Normal file
753
arch/powerpc/mm/tlb_nohash.c
Normal file
|
|
@ -0,0 +1,753 @@
|
|||
/*
|
||||
* This file contains the routines for TLB flushing.
|
||||
* On machines where the MMU does not use a hash table to store virtual to
|
||||
* physical translations (ie, SW loaded TLBs or Book3E compilant processors,
|
||||
* this does -not- include 603 however which shares the implementation with
|
||||
* hash based processors)
|
||||
*
|
||||
* -- BenH
|
||||
*
|
||||
* Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
|
||||
* IBM Corp.
|
||||
*
|
||||
* Derived from arch/ppc/mm/init.c:
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
||||
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
||||
* Copyright (C) 1996 Paul Mackerras
|
||||
*
|
||||
* Derived from "arch/i386/mm/init.c"
|
||||
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/of_fdt.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/code-patching.h>
|
||||
#include <asm/hugetlb.h>
|
||||
#include <asm/paca.h>
|
||||
|
||||
#include "mmu_decl.h"
|
||||
|
||||
/*
|
||||
* This struct lists the sw-supported page sizes. The hardawre MMU may support
|
||||
* other sizes not listed here. The .ind field is only used on MMUs that have
|
||||
* indirect page table entries.
|
||||
*/
|
||||
#ifdef CONFIG_PPC_BOOK3E_MMU
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
|
||||
[MMU_PAGE_4K] = {
|
||||
.shift = 12,
|
||||
.enc = BOOK3E_PAGESZ_4K,
|
||||
},
|
||||
[MMU_PAGE_2M] = {
|
||||
.shift = 21,
|
||||
.enc = BOOK3E_PAGESZ_2M,
|
||||
},
|
||||
[MMU_PAGE_4M] = {
|
||||
.shift = 22,
|
||||
.enc = BOOK3E_PAGESZ_4M,
|
||||
},
|
||||
[MMU_PAGE_16M] = {
|
||||
.shift = 24,
|
||||
.enc = BOOK3E_PAGESZ_16M,
|
||||
},
|
||||
[MMU_PAGE_64M] = {
|
||||
.shift = 26,
|
||||
.enc = BOOK3E_PAGESZ_64M,
|
||||
},
|
||||
[MMU_PAGE_256M] = {
|
||||
.shift = 28,
|
||||
.enc = BOOK3E_PAGESZ_256M,
|
||||
},
|
||||
[MMU_PAGE_1G] = {
|
||||
.shift = 30,
|
||||
.enc = BOOK3E_PAGESZ_1GB,
|
||||
},
|
||||
};
|
||||
#else
|
||||
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
|
||||
[MMU_PAGE_4K] = {
|
||||
.shift = 12,
|
||||
.ind = 20,
|
||||
.enc = BOOK3E_PAGESZ_4K,
|
||||
},
|
||||
[MMU_PAGE_16K] = {
|
||||
.shift = 14,
|
||||
.enc = BOOK3E_PAGESZ_16K,
|
||||
},
|
||||
[MMU_PAGE_64K] = {
|
||||
.shift = 16,
|
||||
.ind = 28,
|
||||
.enc = BOOK3E_PAGESZ_64K,
|
||||
},
|
||||
[MMU_PAGE_1M] = {
|
||||
.shift = 20,
|
||||
.enc = BOOK3E_PAGESZ_1M,
|
||||
},
|
||||
[MMU_PAGE_16M] = {
|
||||
.shift = 24,
|
||||
.ind = 36,
|
||||
.enc = BOOK3E_PAGESZ_16M,
|
||||
},
|
||||
[MMU_PAGE_256M] = {
|
||||
.shift = 28,
|
||||
.enc = BOOK3E_PAGESZ_256M,
|
||||
},
|
||||
[MMU_PAGE_1G] = {
|
||||
.shift = 30,
|
||||
.enc = BOOK3E_PAGESZ_1GB,
|
||||
},
|
||||
};
|
||||
#endif /* CONFIG_FSL_BOOKE */
|
||||
|
||||
static inline int mmu_get_tsize(int psize)
|
||||
{
|
||||
return mmu_psize_defs[psize].enc;
|
||||
}
|
||||
#else
|
||||
static inline int mmu_get_tsize(int psize)
|
||||
{
|
||||
/* This isn't used on !Book3E for now */
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PPC_BOOK3E_MMU */
|
||||
|
||||
/* The variables below are currently only used on 64-bit Book3E
|
||||
* though this will probably be made common with other nohash
|
||||
* implementations at some point
|
||||
*/
|
||||
#ifdef CONFIG_PPC64
|
||||
|
||||
int mmu_linear_psize; /* Page size used for the linear mapping */
|
||||
int mmu_pte_psize; /* Page size used for PTE pages */
|
||||
int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
|
||||
int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
|
||||
unsigned long linear_map_top; /* Top of linear mapping */
|
||||
|
||||
|
||||
/*
|
||||
* Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
|
||||
* exceptions. This is used for bolted and e6500 TLB miss handlers which
|
||||
* do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
|
||||
* this is set to zero.
|
||||
*/
|
||||
int extlb_level_exc;
|
||||
|
||||
#endif /* CONFIG_PPC64 */
|
||||
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
|
||||
DEFINE_PER_CPU(int, next_tlbcam_idx);
|
||||
EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Base TLB flushing operations:
|
||||
*
|
||||
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
|
||||
* - flush_tlb_page(vma, vmaddr) flushes one page
|
||||
* - flush_tlb_range(vma, start, end) flushes a range of pages
|
||||
* - flush_tlb_kernel_range(start, end) flushes kernel pages
|
||||
*
|
||||
* - local_* variants of page and mm only apply to the current
|
||||
* processor
|
||||
*/
|
||||
|
||||
/*
|
||||
* These are the base non-SMP variants of page and mm flushing
|
||||
*/
|
||||
void local_flush_tlb_mm(struct mm_struct *mm)
|
||||
{
|
||||
unsigned int pid;
|
||||
|
||||
preempt_disable();
|
||||
pid = mm->context.id;
|
||||
if (pid != MMU_NO_CONTEXT)
|
||||
_tlbil_pid(pid);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(local_flush_tlb_mm);
|
||||
|
||||
void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
|
||||
int tsize, int ind)
|
||||
{
|
||||
unsigned int pid;
|
||||
|
||||
preempt_disable();
|
||||
pid = mm ? mm->context.id : 0;
|
||||
if (pid != MMU_NO_CONTEXT)
|
||||
_tlbil_va(vmaddr, pid, tsize, ind);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
|
||||
{
|
||||
__local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
|
||||
mmu_get_tsize(mmu_virtual_psize), 0);
|
||||
}
|
||||
EXPORT_SYMBOL(local_flush_tlb_page);
|
||||
|
||||
/*
|
||||
* And here are the SMP non-local implementations
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static DEFINE_RAW_SPINLOCK(tlbivax_lock);
|
||||
|
||||
static int mm_is_core_local(struct mm_struct *mm)
|
||||
{
|
||||
return cpumask_subset(mm_cpumask(mm),
|
||||
topology_thread_cpumask(smp_processor_id()));
|
||||
}
|
||||
|
||||
struct tlb_flush_param {
|
||||
unsigned long addr;
|
||||
unsigned int pid;
|
||||
unsigned int tsize;
|
||||
unsigned int ind;
|
||||
};
|
||||
|
||||
static void do_flush_tlb_mm_ipi(void *param)
|
||||
{
|
||||
struct tlb_flush_param *p = param;
|
||||
|
||||
_tlbil_pid(p ? p->pid : 0);
|
||||
}
|
||||
|
||||
static void do_flush_tlb_page_ipi(void *param)
|
||||
{
|
||||
struct tlb_flush_param *p = param;
|
||||
|
||||
_tlbil_va(p->addr, p->pid, p->tsize, p->ind);
|
||||
}
|
||||
|
||||
|
||||
/* Note on invalidations and PID:
|
||||
*
|
||||
* We snapshot the PID with preempt disabled. At this point, it can still
|
||||
* change either because:
|
||||
* - our context is being stolen (PID -> NO_CONTEXT) on another CPU
|
||||
* - we are invaliating some target that isn't currently running here
|
||||
* and is concurrently acquiring a new PID on another CPU
|
||||
* - some other CPU is re-acquiring a lost PID for this mm
|
||||
* etc...
|
||||
*
|
||||
* However, this shouldn't be a problem as we only guarantee
|
||||
* invalidation of TLB entries present prior to this call, so we
|
||||
* don't care about the PID changing, and invalidating a stale PID
|
||||
* is generally harmless.
|
||||
*/
|
||||
|
||||
void flush_tlb_mm(struct mm_struct *mm)
|
||||
{
|
||||
unsigned int pid;
|
||||
|
||||
preempt_disable();
|
||||
pid = mm->context.id;
|
||||
if (unlikely(pid == MMU_NO_CONTEXT))
|
||||
goto no_context;
|
||||
if (!mm_is_core_local(mm)) {
|
||||
struct tlb_flush_param p = { .pid = pid };
|
||||
/* Ignores smp_processor_id() even if set. */
|
||||
smp_call_function_many(mm_cpumask(mm),
|
||||
do_flush_tlb_mm_ipi, &p, 1);
|
||||
}
|
||||
_tlbil_pid(pid);
|
||||
no_context:
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_mm);
|
||||
|
||||
void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
|
||||
int tsize, int ind)
|
||||
{
|
||||
struct cpumask *cpu_mask;
|
||||
unsigned int pid;
|
||||
|
||||
preempt_disable();
|
||||
pid = mm ? mm->context.id : 0;
|
||||
if (unlikely(pid == MMU_NO_CONTEXT))
|
||||
goto bail;
|
||||
cpu_mask = mm_cpumask(mm);
|
||||
if (!mm_is_core_local(mm)) {
|
||||
/* If broadcast tlbivax is supported, use it */
|
||||
if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
|
||||
int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
|
||||
if (lock)
|
||||
raw_spin_lock(&tlbivax_lock);
|
||||
_tlbivax_bcast(vmaddr, pid, tsize, ind);
|
||||
if (lock)
|
||||
raw_spin_unlock(&tlbivax_lock);
|
||||
goto bail;
|
||||
} else {
|
||||
struct tlb_flush_param p = {
|
||||
.pid = pid,
|
||||
.addr = vmaddr,
|
||||
.tsize = tsize,
|
||||
.ind = ind,
|
||||
};
|
||||
/* Ignores smp_processor_id() even if set in cpu_mask */
|
||||
smp_call_function_many(cpu_mask,
|
||||
do_flush_tlb_page_ipi, &p, 1);
|
||||
}
|
||||
}
|
||||
_tlbil_va(vmaddr, pid, tsize, ind);
|
||||
bail:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
|
||||
{
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
if (vma && is_vm_hugetlb_page(vma))
|
||||
flush_hugetlb_page(vma, vmaddr);
|
||||
#endif
|
||||
|
||||
__flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
|
||||
mmu_get_tsize(mmu_virtual_psize), 0);
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_page);
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_PPC_47x
|
||||
void __init early_init_mmu_47x(void)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned long root = of_get_flat_dt_root();
|
||||
if (of_get_flat_dt_prop(root, "cooperative-partition", NULL))
|
||||
mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
|
||||
#endif /* CONFIG_SMP */
|
||||
}
|
||||
#endif /* CONFIG_PPC_47x */
|
||||
|
||||
/*
|
||||
* Flush kernel TLB entries in the given range
|
||||
*/
|
||||
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
preempt_disable();
|
||||
smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
|
||||
_tlbil_pid(0);
|
||||
preempt_enable();
|
||||
#else
|
||||
_tlbil_pid(0);
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_kernel_range);
|
||||
|
||||
/*
|
||||
* Currently, for range flushing, we just do a full mm flush. This should
|
||||
* be optimized based on a threshold on the size of the range, since
|
||||
* some implementation can stack multiple tlbivax before a tlbsync but
|
||||
* for now, we keep it that way
|
||||
*/
|
||||
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end)
|
||||
|
||||
{
|
||||
flush_tlb_mm(vma->vm_mm);
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_range);
|
||||
|
||||
void tlb_flush(struct mmu_gather *tlb)
|
||||
{
|
||||
flush_tlb_mm(tlb->mm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Below are functions specific to the 64-bit variant of Book3E though that
|
||||
* may change in the future
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
|
||||
/*
|
||||
* Handling of virtual linear page tables or indirect TLB entries
|
||||
* flushing when PTE pages are freed
|
||||
*/
|
||||
void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
|
||||
{
|
||||
int tsize = mmu_psize_defs[mmu_pte_psize].enc;
|
||||
|
||||
if (book3e_htw_mode != PPC_HTW_NONE) {
|
||||
unsigned long start = address & PMD_MASK;
|
||||
unsigned long end = address + PMD_SIZE;
|
||||
unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
|
||||
|
||||
/* This isn't the most optimal, ideally we would factor out the
|
||||
* while preempt & CPU mask mucking around, or even the IPI but
|
||||
* it will do for now
|
||||
*/
|
||||
while (start < end) {
|
||||
__flush_tlb_page(tlb->mm, start, tsize, 1);
|
||||
start += size;
|
||||
}
|
||||
} else {
|
||||
unsigned long rmask = 0xf000000000000000ul;
|
||||
unsigned long rid = (address & rmask) | 0x1000000000000000ul;
|
||||
unsigned long vpte = address & ~rmask;
|
||||
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
|
||||
#else
|
||||
vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
|
||||
#endif
|
||||
vpte |= rid;
|
||||
__flush_tlb_page(tlb->mm, vpte, tsize, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void setup_page_sizes(void)
|
||||
{
|
||||
unsigned int tlb0cfg;
|
||||
unsigned int tlb0ps;
|
||||
unsigned int eptcfg;
|
||||
int i, psize;
|
||||
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
unsigned int mmucfg = mfspr(SPRN_MMUCFG);
|
||||
int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
|
||||
|
||||
if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
|
||||
unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
|
||||
unsigned int min_pg, max_pg;
|
||||
|
||||
min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
|
||||
max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
|
||||
|
||||
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
|
||||
struct mmu_psize_def *def;
|
||||
unsigned int shift;
|
||||
|
||||
def = &mmu_psize_defs[psize];
|
||||
shift = def->shift;
|
||||
|
||||
if (shift == 0 || shift & 1)
|
||||
continue;
|
||||
|
||||
/* adjust to be in terms of 4^shift Kb */
|
||||
shift = (shift - 10) >> 1;
|
||||
|
||||
if ((shift >= min_pg) && (shift <= max_pg))
|
||||
def->flags |= MMU_PAGE_SIZE_DIRECT;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
|
||||
u32 tlb1cfg, tlb1ps;
|
||||
|
||||
tlb0cfg = mfspr(SPRN_TLB0CFG);
|
||||
tlb1cfg = mfspr(SPRN_TLB1CFG);
|
||||
tlb1ps = mfspr(SPRN_TLB1PS);
|
||||
eptcfg = mfspr(SPRN_EPTCFG);
|
||||
|
||||
if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
|
||||
book3e_htw_mode = PPC_HTW_E6500;
|
||||
|
||||
/*
|
||||
* We expect 4K subpage size and unrestricted indirect size.
|
||||
* The lack of a restriction on indirect size is a Freescale
|
||||
* extension, indicated by PSn = 0 but SPSn != 0.
|
||||
*/
|
||||
if (eptcfg != 2)
|
||||
book3e_htw_mode = PPC_HTW_NONE;
|
||||
|
||||
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
|
||||
struct mmu_psize_def *def = &mmu_psize_defs[psize];
|
||||
|
||||
if (tlb1ps & (1U << (def->shift - 10))) {
|
||||
def->flags |= MMU_PAGE_SIZE_DIRECT;
|
||||
|
||||
if (book3e_htw_mode && psize == MMU_PAGE_2M)
|
||||
def->flags |= MMU_PAGE_SIZE_INDIRECT;
|
||||
}
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
tlb0cfg = mfspr(SPRN_TLB0CFG);
|
||||
tlb0ps = mfspr(SPRN_TLB0PS);
|
||||
eptcfg = mfspr(SPRN_EPTCFG);
|
||||
|
||||
/* Look for supported direct sizes */
|
||||
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
|
||||
struct mmu_psize_def *def = &mmu_psize_defs[psize];
|
||||
|
||||
if (tlb0ps & (1U << (def->shift - 10)))
|
||||
def->flags |= MMU_PAGE_SIZE_DIRECT;
|
||||
}
|
||||
|
||||
/* Indirect page sizes supported ? */
|
||||
if ((tlb0cfg & TLBnCFG_IND) == 0 ||
|
||||
(tlb0cfg & TLBnCFG_PT) == 0)
|
||||
goto out;
|
||||
|
||||
book3e_htw_mode = PPC_HTW_IBM;
|
||||
|
||||
/* Now, we only deal with one IND page size for each
|
||||
* direct size. Hopefully all implementations today are
|
||||
* unambiguous, but we might want to be careful in the
|
||||
* future.
|
||||
*/
|
||||
for (i = 0; i < 3; i++) {
|
||||
unsigned int ps, sps;
|
||||
|
||||
sps = eptcfg & 0x1f;
|
||||
eptcfg >>= 5;
|
||||
ps = eptcfg & 0x1f;
|
||||
eptcfg >>= 5;
|
||||
if (!ps || !sps)
|
||||
continue;
|
||||
for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
|
||||
struct mmu_psize_def *def = &mmu_psize_defs[psize];
|
||||
|
||||
if (ps == (def->shift - 10))
|
||||
def->flags |= MMU_PAGE_SIZE_INDIRECT;
|
||||
if (sps == (def->shift - 10))
|
||||
def->ind = ps + 10;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
/* Cleanup array and print summary */
|
||||
pr_info("MMU: Supported page sizes\n");
|
||||
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
|
||||
struct mmu_psize_def *def = &mmu_psize_defs[psize];
|
||||
const char *__page_type_names[] = {
|
||||
"unsupported",
|
||||
"direct",
|
||||
"indirect",
|
||||
"direct & indirect"
|
||||
};
|
||||
if (def->flags == 0) {
|
||||
def->shift = 0;
|
||||
continue;
|
||||
}
|
||||
pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10),
|
||||
__page_type_names[def->flags & 0x3]);
|
||||
}
|
||||
}
|
||||
|
||||
static void setup_mmu_htw(void)
|
||||
{
|
||||
/*
|
||||
* If we want to use HW tablewalk, enable it by patching the TLB miss
|
||||
* handlers to branch to the one dedicated to it.
|
||||
*/
|
||||
|
||||
switch (book3e_htw_mode) {
|
||||
case PPC_HTW_IBM:
|
||||
patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
|
||||
patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
|
||||
break;
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
case PPC_HTW_E6500:
|
||||
extlb_level_exc = EX_TLB_SIZE;
|
||||
patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
|
||||
patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
pr_info("MMU: Book3E HW tablewalk %s\n",
|
||||
book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
|
||||
}
|
||||
|
||||
/*
|
||||
* Early initialization of the MMU TLB code
|
||||
*/
|
||||
static void early_init_this_mmu(void)
|
||||
{
|
||||
unsigned int mas4;
|
||||
|
||||
/* Set MAS4 based on page table setting */
|
||||
|
||||
mas4 = 0x4 << MAS4_WIMGED_SHIFT;
|
||||
switch (book3e_htw_mode) {
|
||||
case PPC_HTW_E6500:
|
||||
mas4 |= MAS4_INDD;
|
||||
mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
|
||||
mas4 |= MAS4_TLBSELD(1);
|
||||
mmu_pte_psize = MMU_PAGE_2M;
|
||||
break;
|
||||
|
||||
case PPC_HTW_IBM:
|
||||
mas4 |= MAS4_INDD;
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
|
||||
mmu_pte_psize = MMU_PAGE_256M;
|
||||
#else
|
||||
mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
|
||||
mmu_pte_psize = MMU_PAGE_1M;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PPC_HTW_NONE:
|
||||
#ifdef CONFIG_PPC_64K_PAGES
|
||||
mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
|
||||
#else
|
||||
mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
|
||||
#endif
|
||||
mmu_pte_psize = mmu_virtual_psize;
|
||||
break;
|
||||
}
|
||||
mtspr(SPRN_MAS4, mas4);
|
||||
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
|
||||
unsigned int num_cams;
|
||||
|
||||
/* use a quarter of the TLBCAM for bolted linear map */
|
||||
num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
|
||||
linear_map_top = map_mem_in_cams(linear_map_top, num_cams);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* A sync won't hurt us after mucking around with
|
||||
* the MMU configuration
|
||||
*/
|
||||
mb();
|
||||
}
|
||||
|
||||
static void __init early_init_mmu_global(void)
|
||||
{
|
||||
/* XXX This will have to be decided at runtime, but right
|
||||
* now our boot and TLB miss code hard wires it. Ideally
|
||||
* we should find out a suitable page size and patch the
|
||||
* TLB miss code (either that or use the PACA to store
|
||||
* the value we want)
|
||||
*/
|
||||
mmu_linear_psize = MMU_PAGE_1G;
|
||||
|
||||
/* XXX This should be decided at runtime based on supported
|
||||
* page sizes in the TLB, but for now let's assume 16M is
|
||||
* always there and a good fit (which it probably is)
|
||||
*
|
||||
* Freescale booke only supports 4K pages in TLB0, so use that.
|
||||
*/
|
||||
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
|
||||
mmu_vmemmap_psize = MMU_PAGE_4K;
|
||||
else
|
||||
mmu_vmemmap_psize = MMU_PAGE_16M;
|
||||
|
||||
/* XXX This code only checks for TLB 0 capabilities and doesn't
|
||||
* check what page size combos are supported by the HW. It
|
||||
* also doesn't handle the case where a separate array holds
|
||||
* the IND entries from the array loaded by the PT.
|
||||
*/
|
||||
/* Look for supported page sizes */
|
||||
setup_page_sizes();
|
||||
|
||||
/* Look for HW tablewalk support */
|
||||
setup_mmu_htw();
|
||||
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
|
||||
if (book3e_htw_mode == PPC_HTW_NONE) {
|
||||
extlb_level_exc = EX_TLB_SIZE;
|
||||
patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
|
||||
patch_exception(0x1e0,
|
||||
exc_instruction_tlb_miss_bolted_book3e);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Set the global containing the top of the linear mapping
|
||||
* for use by the TLB miss code
|
||||
*/
|
||||
linear_map_top = memblock_end_of_DRAM();
|
||||
}
|
||||
|
||||
static void __init early_mmu_set_memory_limit(void)
|
||||
{
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
|
||||
/*
|
||||
* Limit memory so we dont have linear faults.
|
||||
* Unlike memblock_set_current_limit, which limits
|
||||
* memory available during early boot, this permanently
|
||||
* reduces the memory available to Linux. We need to
|
||||
* do this because highmem is not supported on 64-bit.
|
||||
*/
|
||||
memblock_enforce_memory_limit(linear_map_top);
|
||||
}
|
||||
#endif
|
||||
|
||||
memblock_set_current_limit(linear_map_top);
|
||||
}
|
||||
|
||||
/* boot cpu only */
|
||||
void __init early_init_mmu(void)
|
||||
{
|
||||
early_init_mmu_global();
|
||||
early_init_this_mmu();
|
||||
early_mmu_set_memory_limit();
|
||||
}
|
||||
|
||||
void early_init_mmu_secondary(void)
|
||||
{
|
||||
early_init_this_mmu();
|
||||
}
|
||||
|
||||
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
|
||||
phys_addr_t first_memblock_size)
|
||||
{
|
||||
/* On non-FSL Embedded 64-bit, we adjust the RMA size to match
|
||||
* the bolted TLB entry. We know for now that only 1G
|
||||
* entries are supported though that may eventually
|
||||
* change.
|
||||
*
|
||||
* on FSL Embedded 64-bit, we adjust the RMA size to match the
|
||||
* first bolted TLB entry size. We still limit max to 1G even if
|
||||
* the TLB could cover more. This is due to what the early init
|
||||
* code is setup to do.
|
||||
*
|
||||
* We crop it to the size of the first MEMBLOCK to
|
||||
* avoid going over total available memory just in case...
|
||||
*/
|
||||
#ifdef CONFIG_PPC_FSL_BOOK3E
|
||||
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
|
||||
unsigned long linear_sz;
|
||||
linear_sz = calc_cam_sz(first_memblock_size, PAGE_OFFSET,
|
||||
first_memblock_base);
|
||||
ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
|
||||
} else
|
||||
#endif
|
||||
ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
|
||||
|
||||
/* Finally limit subsequent allocations */
|
||||
memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
|
||||
}
|
||||
#else /* ! CONFIG_PPC64 */
|
||||
void __init early_init_mmu(void)
|
||||
{
|
||||
#ifdef CONFIG_PPC_47x
|
||||
early_init_mmu_47x();
|
||||
#endif
|
||||
}
|
||||
#endif /* CONFIG_PPC64 */
|
||||
426
arch/powerpc/mm/tlb_nohash_low.S
Normal file
426
arch/powerpc/mm/tlb_nohash_low.S
Normal file
|
|
@ -0,0 +1,426 @@
|
|||
/*
|
||||
* This file contains low-level functions for performing various
|
||||
* types of TLB invalidations on various processors with no hash
|
||||
* table.
|
||||
*
|
||||
* This file implements the following functions for all no-hash
|
||||
* processors. Some aren't implemented for some variants. Some
|
||||
* are inline in tlbflush.h
|
||||
*
|
||||
* - tlbil_va
|
||||
* - tlbil_pid
|
||||
* - tlbil_all
|
||||
* - tlbivax_bcast
|
||||
*
|
||||
* Code mostly moved over from misc_32.S
|
||||
*
|
||||
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
||||
*
|
||||
* Partially rewritten by Cort Dougan (cort@cs.nmt.edu)
|
||||
* Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/reg.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/bug.h>
|
||||
|
||||
#if defined(CONFIG_40x)
|
||||
|
||||
/*
|
||||
* 40x implementation needs only tlbil_va
|
||||
*/
|
||||
_GLOBAL(__tlbil_va)
|
||||
/* We run the search with interrupts disabled because we have to change
|
||||
* the PID and I don't want to preempt when that happens.
|
||||
*/
|
||||
mfmsr r5
|
||||
mfspr r6,SPRN_PID
|
||||
wrteei 0
|
||||
mtspr SPRN_PID,r4
|
||||
tlbsx. r3, 0, r3
|
||||
mtspr SPRN_PID,r6
|
||||
wrtee r5
|
||||
bne 1f
|
||||
sync
|
||||
/* There are only 64 TLB entries, so r3 < 64, which means bit 25 is
|
||||
* clear. Since 25 is the V bit in the TLB_TAG, loading this value
|
||||
* will invalidate the TLB entry. */
|
||||
tlbwe r3, r3, TLB_TAG
|
||||
isync
|
||||
1: blr
|
||||
|
||||
#elif defined(CONFIG_8xx)
|
||||
|
||||
/*
|
||||
* Nothing to do for 8xx, everything is inline
|
||||
*/
|
||||
|
||||
#elif defined(CONFIG_44x) /* Includes 47x */
|
||||
|
||||
/*
|
||||
* 440 implementation uses tlbsx/we for tlbil_va and a full sweep
|
||||
* of the TLB for everything else.
|
||||
*/
|
||||
_GLOBAL(__tlbil_va)
|
||||
mfspr r5,SPRN_MMUCR
|
||||
mfmsr r10
|
||||
|
||||
/*
|
||||
* We write 16 bits of STID since 47x supports that much, we
|
||||
* will never be passed out of bounds values on 440 (hopefully)
|
||||
*/
|
||||
rlwimi r5,r4,0,16,31
|
||||
|
||||
/* We have to run the search with interrupts disabled, otherwise
|
||||
* an interrupt which causes a TLB miss can clobber the MMUCR
|
||||
* between the mtspr and the tlbsx.
|
||||
*
|
||||
* Critical and Machine Check interrupts take care of saving
|
||||
* and restoring MMUCR, so only normal interrupts have to be
|
||||
* taken care of.
|
||||
*/
|
||||
wrteei 0
|
||||
mtspr SPRN_MMUCR,r5
|
||||
tlbsx. r6,0,r3
|
||||
bne 10f
|
||||
sync
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
b 2f
|
||||
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
|
||||
/* On 440 There are only 64 TLB entries, so r3 < 64, which means bit
|
||||
* 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this
|
||||
* value will invalidate the TLB entry.
|
||||
*/
|
||||
tlbwe r6,r6,PPC44x_TLB_PAGEID
|
||||
isync
|
||||
10: wrtee r10
|
||||
blr
|
||||
2:
|
||||
#ifdef CONFIG_PPC_47x
|
||||
oris r7,r6,0x8000 /* specify way explicitely */
|
||||
clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */
|
||||
ori r4,r4,PPC47x_TLBE_SIZE
|
||||
tlbwe r4,r7,0 /* write it */
|
||||
isync
|
||||
wrtee r10
|
||||
blr
|
||||
#else /* CONFIG_PPC_47x */
|
||||
1: trap
|
||||
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
|
||||
#endif /* !CONFIG_PPC_47x */
|
||||
|
||||
_GLOBAL(_tlbil_all)
|
||||
_GLOBAL(_tlbil_pid)
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
b 2f
|
||||
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
|
||||
li r3,0
|
||||
sync
|
||||
|
||||
/* Load high watermark */
|
||||
lis r4,tlb_44x_hwater@ha
|
||||
lwz r5,tlb_44x_hwater@l(r4)
|
||||
|
||||
1: tlbwe r3,r3,PPC44x_TLB_PAGEID
|
||||
addi r3,r3,1
|
||||
cmpw 0,r3,r5
|
||||
ble 1b
|
||||
|
||||
isync
|
||||
blr
|
||||
2:
|
||||
#ifdef CONFIG_PPC_47x
|
||||
/* 476 variant. There's not simple way to do this, hopefully we'll
|
||||
* try to limit the amount of such full invalidates
|
||||
*/
|
||||
mfmsr r11 /* Interrupts off */
|
||||
wrteei 0
|
||||
li r3,-1 /* Current set */
|
||||
lis r10,tlb_47x_boltmap@h
|
||||
ori r10,r10,tlb_47x_boltmap@l
|
||||
lis r7,0x8000 /* Specify way explicitely */
|
||||
|
||||
b 9f /* For each set */
|
||||
|
||||
1: li r9,4 /* Number of ways */
|
||||
li r4,0 /* Current way */
|
||||
li r6,0 /* Default entry value 0 */
|
||||
andi. r0,r8,1 /* Check if way 0 is bolted */
|
||||
mtctr r9 /* Load way counter */
|
||||
bne- 3f /* Bolted, skip loading it */
|
||||
|
||||
2: /* For each way */
|
||||
or r5,r3,r4 /* Make way|index for tlbre */
|
||||
rlwimi r5,r5,16,8,15 /* Copy index into position */
|
||||
tlbre r6,r5,0 /* Read entry */
|
||||
3: addis r4,r4,0x2000 /* Next way */
|
||||
andi. r0,r6,PPC47x_TLB0_VALID /* Valid entry ? */
|
||||
beq 4f /* Nope, skip it */
|
||||
rlwimi r7,r5,0,1,2 /* Insert way number */
|
||||
rlwinm r6,r6,0,21,19 /* Clear V */
|
||||
tlbwe r6,r7,0 /* Write it */
|
||||
4: bdnz 2b /* Loop for each way */
|
||||
srwi r8,r8,1 /* Next boltmap bit */
|
||||
9: cmpwi cr1,r3,255 /* Last set done ? */
|
||||
addi r3,r3,1 /* Next set */
|
||||
beq cr1,1f /* End of loop */
|
||||
andi. r0,r3,0x1f /* Need to load a new boltmap word ? */
|
||||
bne 1b /* No, loop */
|
||||
lwz r8,0(r10) /* Load boltmap entry */
|
||||
addi r10,r10,4 /* Next word */
|
||||
b 1b /* Then loop */
|
||||
1: isync /* Sync shadows */
|
||||
wrtee r11
|
||||
#else /* CONFIG_PPC_47x */
|
||||
1: trap
|
||||
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
|
||||
#endif /* !CONFIG_PPC_47x */
|
||||
blr
|
||||
|
||||
#ifdef CONFIG_PPC_47x
|
||||
|
||||
/*
|
||||
* _tlbivax_bcast is only on 47x. We don't bother doing a runtime
|
||||
* check though, it will blow up soon enough if we mistakenly try
|
||||
* to use it on a 440.
|
||||
*/
|
||||
_GLOBAL(_tlbivax_bcast)
|
||||
mfspr r5,SPRN_MMUCR
|
||||
mfmsr r10
|
||||
rlwimi r5,r4,0,16,31
|
||||
wrteei 0
|
||||
mtspr SPRN_MMUCR,r5
|
||||
isync
|
||||
PPC_TLBIVAX(0, R3)
|
||||
isync
|
||||
eieio
|
||||
tlbsync
|
||||
BEGIN_FTR_SECTION
|
||||
b 1f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
|
||||
sync
|
||||
wrtee r10
|
||||
blr
|
||||
/*
|
||||
* DD2 HW could hang if in instruction fetch happens before msync completes.
|
||||
* Touch enough instruction cache lines to ensure cache hits
|
||||
*/
|
||||
1: mflr r9
|
||||
bl 2f
|
||||
2: mflr r6
|
||||
li r7,32
|
||||
PPC_ICBT(0,R6,R7) /* touch next cache line */
|
||||
add r6,r6,r7
|
||||
PPC_ICBT(0,R6,R7) /* touch next cache line */
|
||||
add r6,r6,r7
|
||||
PPC_ICBT(0,R6,R7) /* touch next cache line */
|
||||
sync
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
mtlr r9
|
||||
wrtee r10
|
||||
blr
|
||||
#endif /* CONFIG_PPC_47x */
|
||||
|
||||
#elif defined(CONFIG_FSL_BOOKE)
|
||||
/*
|
||||
* FSL BookE implementations.
|
||||
*
|
||||
* Since feature sections are using _SECTION_ELSE we need
|
||||
* to have the larger code path before the _SECTION_ELSE
|
||||
*/
|
||||
|
||||
/*
|
||||
* Flush MMU TLB on the local processor
|
||||
*/
|
||||
_GLOBAL(_tlbil_all)
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
li r3,(MMUCSR0_TLBFI)@l
|
||||
mtspr SPRN_MMUCSR0, r3
|
||||
1:
|
||||
mfspr r3,SPRN_MMUCSR0
|
||||
andi. r3,r3,MMUCSR0_TLBFI@l
|
||||
bne 1b
|
||||
MMU_FTR_SECTION_ELSE
|
||||
PPC_TLBILX_ALL(0,R0)
|
||||
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
|
||||
msync
|
||||
isync
|
||||
blr
|
||||
|
||||
_GLOBAL(_tlbil_pid)
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
slwi r3,r3,16
|
||||
mfmsr r10
|
||||
wrteei 0
|
||||
mfspr r4,SPRN_MAS6 /* save MAS6 */
|
||||
mtspr SPRN_MAS6,r3
|
||||
PPC_TLBILX_PID(0,R0)
|
||||
mtspr SPRN_MAS6,r4 /* restore MAS6 */
|
||||
wrtee r10
|
||||
MMU_FTR_SECTION_ELSE
|
||||
li r3,(MMUCSR0_TLBFI)@l
|
||||
mtspr SPRN_MMUCSR0, r3
|
||||
1:
|
||||
mfspr r3,SPRN_MMUCSR0
|
||||
andi. r3,r3,MMUCSR0_TLBFI@l
|
||||
bne 1b
|
||||
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX)
|
||||
msync
|
||||
isync
|
||||
blr
|
||||
|
||||
/*
|
||||
* Flush MMU TLB for a particular address, but only on the local processor
|
||||
* (no broadcast)
|
||||
*/
|
||||
_GLOBAL(__tlbil_va)
|
||||
mfmsr r10
|
||||
wrteei 0
|
||||
slwi r4,r4,16
|
||||
ori r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l
|
||||
mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
tlbsx 0,r3
|
||||
mfspr r4,SPRN_MAS1 /* check valid */
|
||||
andis. r3,r4,MAS1_VALID@h
|
||||
beq 1f
|
||||
rlwinm r4,r4,0,1,31
|
||||
mtspr SPRN_MAS1,r4
|
||||
tlbwe
|
||||
MMU_FTR_SECTION_ELSE
|
||||
PPC_TLBILX_VA(0,R3)
|
||||
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
|
||||
msync
|
||||
isync
|
||||
1: wrtee r10
|
||||
blr
|
||||
#elif defined(CONFIG_PPC_BOOK3E)
|
||||
/*
|
||||
* New Book3E (>= 2.06) implementation
|
||||
*
|
||||
* Note: We may be able to get away without the interrupt masking stuff
|
||||
* if we save/restore MAS6 on exceptions that might modify it
|
||||
*/
|
||||
_GLOBAL(_tlbil_pid)
|
||||
slwi r4,r3,MAS6_SPID_SHIFT
|
||||
mfmsr r10
|
||||
wrteei 0
|
||||
mtspr SPRN_MAS6,r4
|
||||
PPC_TLBILX_PID(0,R0)
|
||||
wrtee r10
|
||||
msync
|
||||
isync
|
||||
blr
|
||||
|
||||
_GLOBAL(_tlbil_pid_noind)
|
||||
slwi r4,r3,MAS6_SPID_SHIFT
|
||||
mfmsr r10
|
||||
ori r4,r4,MAS6_SIND
|
||||
wrteei 0
|
||||
mtspr SPRN_MAS6,r4
|
||||
PPC_TLBILX_PID(0,R0)
|
||||
wrtee r10
|
||||
msync
|
||||
isync
|
||||
blr
|
||||
|
||||
_GLOBAL(_tlbil_all)
|
||||
PPC_TLBILX_ALL(0,R0)
|
||||
msync
|
||||
isync
|
||||
blr
|
||||
|
||||
_GLOBAL(_tlbil_va)
|
||||
mfmsr r10
|
||||
wrteei 0
|
||||
cmpwi cr0,r6,0
|
||||
slwi r4,r4,MAS6_SPID_SHIFT
|
||||
rlwimi r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
|
||||
beq 1f
|
||||
rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
|
||||
1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
|
||||
PPC_TLBILX_VA(0,R3)
|
||||
msync
|
||||
isync
|
||||
wrtee r10
|
||||
blr
|
||||
|
||||
_GLOBAL(_tlbivax_bcast)
|
||||
mfmsr r10
|
||||
wrteei 0
|
||||
cmpwi cr0,r6,0
|
||||
slwi r4,r4,MAS6_SPID_SHIFT
|
||||
rlwimi r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
|
||||
beq 1f
|
||||
rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
|
||||
1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
|
||||
PPC_TLBIVAX(0,R3)
|
||||
eieio
|
||||
tlbsync
|
||||
sync
|
||||
wrtee r10
|
||||
blr
|
||||
|
||||
_GLOBAL(set_context)
|
||||
#ifdef CONFIG_BDI_SWITCH
|
||||
/* Context switch the PTE pointer for the Abatron BDI2000.
|
||||
* The PGDIR is the second parameter.
|
||||
*/
|
||||
lis r5, abatron_pteptrs@h
|
||||
ori r5, r5, abatron_pteptrs@l
|
||||
stw r4, 0x4(r5)
|
||||
#endif
|
||||
mtspr SPRN_PID,r3
|
||||
isync /* Force context change */
|
||||
blr
|
||||
#else
|
||||
#error Unsupported processor type !
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PPC_FSL_BOOK3E)
|
||||
/*
|
||||
* extern void loadcam_entry(unsigned int index)
|
||||
*
|
||||
* Load TLBCAM[index] entry in to the L2 CAM MMU
|
||||
*/
|
||||
_GLOBAL(loadcam_entry)
|
||||
mflr r5
|
||||
LOAD_REG_ADDR_PIC(r4, TLBCAM)
|
||||
mtlr r5
|
||||
mulli r5,r3,TLBCAM_SIZE
|
||||
add r3,r5,r4
|
||||
lwz r4,TLBCAM_MAS0(r3)
|
||||
mtspr SPRN_MAS0,r4
|
||||
lwz r4,TLBCAM_MAS1(r3)
|
||||
mtspr SPRN_MAS1,r4
|
||||
PPC_LL r4,TLBCAM_MAS2(r3)
|
||||
mtspr SPRN_MAS2,r4
|
||||
lwz r4,TLBCAM_MAS3(r3)
|
||||
mtspr SPRN_MAS3,r4
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
lwz r4,TLBCAM_MAS7(r3)
|
||||
mtspr SPRN_MAS7,r4
|
||||
END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
|
||||
isync
|
||||
tlbwe
|
||||
isync
|
||||
blr
|
||||
#endif
|
||||
Loading…
Add table
Add a link
Reference in a new issue