Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

113
arch/ia64/kernel/Makefile Normal file
View file

@ -0,0 +1,113 @@
#
# Makefile for the linux kernel.
#
ifdef CONFIG_DYNAMIC_FTRACE
CFLAGS_REMOVE_ftrace.o = -pg
endif
extra-y := head.o init_task.o vmlinux.lds
obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o \
salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
unwind.o mca.o mca_asm.o topology.o dma-mapping.o
obj-$(CONFIG_ACPI) += acpi.o acpi-ext.o
obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
obj-$(CONFIG_IA64_PALINFO) += palinfo.o
obj-$(CONFIG_IOSAPIC) += iosapic.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SMP) += smp.o smpboot.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_PCI_MSI) += msi_ia64.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o \
paravirt_patch.o
obj-$(CONFIG_IA64_ESI) += esi.o
ifneq ($(CONFIG_IA64_ESI),)
obj-y += esi_stub.o # must be in kernel proper
endif
obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_BINFMT_ELF) += elfcore.o
# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
# The gate DSO image is built using a special linker script.
include $(srctree)/arch/ia64/kernel/Makefile.gate
# tell compiled for native
CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE
# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
define sed-y
"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
endef
quiet_cmd_nr_irqs = GEN $@
define cmd_nr_irqs
(set -e; \
echo "#ifndef __ASM_NR_IRQS_H__"; \
echo "#define __ASM_NR_IRQS_H__"; \
echo "/*"; \
echo " * DO NOT MODIFY."; \
echo " *"; \
echo " * This file was generated by Kbuild"; \
echo " *"; \
echo " */"; \
echo ""; \
sed -ne $(sed-y) $<; \
echo ""; \
echo "#endif" ) > $@
endef
# We use internal kbuild rules to avoid the "is up to date" message from make
arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c
$(Q)mkdir -p $(dir $@)
$(call if_changed_dep,cc_s_c)
include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
$(Q)mkdir -p $(dir $@)
$(call cmd,nr_irqs)
#
# native ivt.S, entry.S and fsys.S
#
ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o
define paravirtualized_native
AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK
extra-y += pvchk-$(1)
endef
$(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj))))
#
# Checker for paravirtualizations of privileged operations.
#
quiet_cmd_pv_check_sed = PVCHK $@
define cmd_pv_check_sed
sed -f $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed $< > $@
endef
$(obj)/pvchk-sed-%.s: $(src)/%.S $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed FORCE
$(call if_changed_dep,as_s_S)
$(obj)/pvchk-%.s: $(obj)/pvchk-sed-%.s FORCE
$(call if_changed,pv_check_sed)
$(obj)/pvchk-%.o: $(obj)/pvchk-%.s FORCE
$(call if_changed,as_o_S)
.PRECIOUS: $(obj)/pvchk-sed-%.s $(obj)/pvchk-%.s $(obj)/pvchk-%.o

View file

@ -0,0 +1,27 @@
# The gate DSO image is built using a special linker script.
targets += gate.so gate-syms.o
extra-y += gate.so gate-syms.o gate.lds gate.o
CPPFLAGS_gate.lds := -P -C -U$(ARCH)
quiet_cmd_gate = GATE $@
cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
$(call if_changed,gate)
$(obj)/built-in.o: $(obj)/gate-syms.o
$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
GATECFLAGS_gate-syms.o = -r
$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
$(call if_changed,gate)
# gate-data.o contains the gate DSO image as data in section .data..gate.
# We must build gate.so before we can assemble it.
# Note: kbuild does not track this dependency due to usage of .incbin
$(obj)/gate-data.o: $(obj)/gate.so

104
arch/ia64/kernel/acpi-ext.c Normal file
View file

@ -0,0 +1,104 @@
/*
* (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P.
* Alex Williamson <alex.williamson@hp.com>
* Bjorn Helgaas <bjorn.helgaas@hp.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/acpi.h>
#include <asm/acpi-ext.h>
/*
* Device CSRs that do not appear in PCI config space should be described
* via ACPI. This would normally be done with Address Space Descriptors
* marked as "consumer-only," but old versions of Windows and Linux ignore
* the producer/consumer flag, so HP invented a vendor-defined resource to
* describe the location and size of CSR space.
*/
struct acpi_vendor_uuid hp_ccsr_uuid = {
.subtype = 2,
.data = { 0xf9, 0xad, 0xe9, 0x69, 0x4f, 0x92, 0x5f, 0xab, 0xf6, 0x4a,
0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad },
};
static acpi_status hp_ccsr_locate(acpi_handle obj, u64 *base, u64 *length)
{
acpi_status status;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_resource *resource;
struct acpi_resource_vendor_typed *vendor;
status = acpi_get_vendor_resource(obj, METHOD_NAME__CRS, &hp_ccsr_uuid,
&buffer);
resource = buffer.pointer;
vendor = &resource->data.vendor_typed;
if (ACPI_FAILURE(status) || vendor->byte_length < 16) {
status = AE_NOT_FOUND;
goto exit;
}
memcpy(base, vendor->byte_data, sizeof(*base));
memcpy(length, vendor->byte_data + 8, sizeof(*length));
exit:
kfree(buffer.pointer);
return status;
}
struct csr_space {
u64 base;
u64 length;
};
static acpi_status find_csr_space(struct acpi_resource *resource, void *data)
{
struct csr_space *space = data;
struct acpi_resource_address64 addr;
acpi_status status;
status = acpi_resource_to_address64(resource, &addr);
if (ACPI_SUCCESS(status) &&
addr.resource_type == ACPI_MEMORY_RANGE &&
addr.address_length &&
addr.producer_consumer == ACPI_CONSUMER) {
space->base = addr.minimum;
space->length = addr.address_length;
return AE_CTRL_TERMINATE;
}
return AE_OK; /* keep looking */
}
static acpi_status hp_crs_locate(acpi_handle obj, u64 *base, u64 *length)
{
struct csr_space space = { 0, 0 };
acpi_walk_resources(obj, METHOD_NAME__CRS, find_csr_space, &space);
if (!space.length)
return AE_NOT_FOUND;
*base = space.base;
*length = space.length;
return AE_OK;
}
acpi_status hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
{
acpi_status status;
status = hp_ccsr_locate(obj, csr_base, csr_length);
if (ACPI_SUCCESS(status))
return status;
return hp_crs_locate(obj, csr_base, csr_length);
}
EXPORT_SYMBOL(hp_acpi_csr_space);

1001
arch/ia64/kernel/acpi.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,290 @@
/*
* Generate definitions needed by assembly language modules.
* This code generates raw asm output which is post-processed
* to extract and format the required data.
*/
#define ASM_OFFSETS_C 1
#include <linux/sched.h>
#include <linux/pid.h>
#include <linux/clocksource.h>
#include <linux/kbuild.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/siginfo.h>
#include <asm/sigcontext.h>
#include <asm/mca.h>
#include "../kernel/sigframe.h"
#include "../kernel/fsyscall_gtod_data.h"
void foo(void)
{
DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info));
DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs));
DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack));
DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo));
DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64));
DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
BUILD_BUG_ON(sizeof(struct upid) != 32);
DEFINE(IA64_UPID_SHIFT, 5);
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
#endif
BLANK();
DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
BLANK();
DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
BLANK();
DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
group_stop_count));
DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
BLANK();
DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd));
DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8));
DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9));
DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10));
DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11));
DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr));
DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip));
DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs));
DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat));
DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs));
DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc));
DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat));
DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore));
DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr));
DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0));
DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs));
DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1));
DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12));
DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13));
DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr));
DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15));
DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14));
DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16));
DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17));
DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18));
DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19));
DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20));
DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21));
DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22));
DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23));
DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24));
DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25));
DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26));
DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27));
DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28));
DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29));
DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30));
DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31));
DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv));
DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6));
DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7));
DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8));
DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9));
DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10));
DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11));
BLANK();
DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat));
DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr));
DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2));
DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3));
DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4));
DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5));
DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12));
DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13));
DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14));
DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15));
DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16));
DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17));
DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18));
DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19));
DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20));
DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21));
DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22));
DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23));
DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24));
DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25));
DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26));
DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27));
DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28));
DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29));
DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30));
DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31));
DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4));
DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5));
DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6));
DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7));
DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0));
DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1));
DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2));
DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3));
DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4));
DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5));
DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs));
DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc));
DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat));
DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat));
DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore));
DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr));
BLANK();
DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip));
DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp));
DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr));
DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat));
DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat));
DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0]));
DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm));
DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags));
DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6]));
DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr));
DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12]));
DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base));
DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs));
BLANK();
DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
BLANK();
DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));
DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler));
DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc));
BLANK();
/* for assembly files which can't include sched.h: */
DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
DEFINE(IA64_CLONE_VM, CLONE_VM);
BLANK();
DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
offsetof (struct cpuinfo_ia64, nsec_per_cyc));
DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET,
offsetof (struct cpuinfo_ia64, ptce_base));
DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET,
offsetof (struct cpuinfo_ia64, ptce_count));
DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET,
offsetof (struct cpuinfo_ia64, ptce_stride));
BLANK();
DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET,
offsetof (struct timespec, tv_nsec));
DEFINE(CLONE_SETTLS_BIT, 19);
#if CLONE_SETTLS != (1<<19)
# error "CLONE_SETTLS_BIT incorrect, please fix"
#endif
BLANK();
DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET,
offsetof (struct ia64_mca_cpu, mca_stack));
DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
offsetof (struct ia64_mca_cpu, init_stack));
BLANK();
DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
offsetof (struct ia64_sal_os_state, os_gp));
DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
offsetof (struct ia64_sal_os_state, proc_state_param));
DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET,
offsetof (struct ia64_sal_os_state, sal_ra));
DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET,
offsetof (struct ia64_sal_os_state, sal_gp));
DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
offsetof (struct ia64_sal_os_state, pal_min_state));
DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET,
offsetof (struct ia64_sal_os_state, os_status));
DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET,
offsetof (struct ia64_sal_os_state, context));
DEFINE(IA64_SAL_OS_STATE_SIZE,
sizeof (struct ia64_sal_os_state));
BLANK();
DEFINE(IA64_PMSA_GR_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_gr));
DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_bank1_gr));
DEFINE(IA64_PMSA_PR_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_pr));
DEFINE(IA64_PMSA_BR0_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_br0));
DEFINE(IA64_PMSA_RSC_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_rsc));
DEFINE(IA64_PMSA_IIP_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_iip));
DEFINE(IA64_PMSA_IPSR_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_ipsr));
DEFINE(IA64_PMSA_IFS_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_ifs));
DEFINE(IA64_PMSA_XIP_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_xip));
BLANK();
/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
DEFINE(IA64_GTOD_SEQ_OFFSET,
offsetof (struct fsyscall_gtod_data_t, seq));
DEFINE(IA64_GTOD_WALL_TIME_OFFSET,
offsetof (struct fsyscall_gtod_data_t, wall_time));
DEFINE(IA64_GTOD_MONO_TIME_OFFSET,
offsetof (struct fsyscall_gtod_data_t, monotonic_time));
DEFINE(IA64_CLKSRC_MASK_OFFSET,
offsetof (struct fsyscall_gtod_data_t, clk_mask));
DEFINE(IA64_CLKSRC_MULT_OFFSET,
offsetof (struct fsyscall_gtod_data_t, clk_mult));
DEFINE(IA64_CLKSRC_SHIFT_OFFSET,
offsetof (struct fsyscall_gtod_data_t, clk_shift));
DEFINE(IA64_CLKSRC_MMIO_OFFSET,
offsetof (struct fsyscall_gtod_data_t, clk_fsys_mmio));
DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET,
offsetof (struct fsyscall_gtod_data_t, clk_cycle_last));
DEFINE(IA64_ITC_JITTER_OFFSET,
offsetof (struct itc_jitter_data_t, itc_jitter));
DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
offsetof (struct itc_jitter_data_t, itc_lastcycle));
}

60
arch/ia64/kernel/audit.c Normal file
View file

@ -0,0 +1,60 @@
#include <linux/init.h>
#include <linux/types.h>
#include <linux/audit.h>
#include <asm/unistd.h>
static unsigned dir_class[] = {
#include <asm-generic/audit_dir_write.h>
~0U
};
static unsigned read_class[] = {
#include <asm-generic/audit_read.h>
~0U
};
static unsigned write_class[] = {
#include <asm-generic/audit_write.h>
~0U
};
static unsigned chattr_class[] = {
#include <asm-generic/audit_change_attr.h>
~0U
};
static unsigned signal_class[] = {
#include <asm-generic/audit_signal.h>
~0U
};
int audit_classify_arch(int arch)
{
return 0;
}
int audit_classify_syscall(int abi, unsigned syscall)
{
switch(syscall) {
case __NR_open:
return 2;
case __NR_openat:
return 3;
case __NR_execve:
return 5;
default:
return 0;
}
}
static int __init audit_classes_init(void)
{
audit_register_class(AUDIT_CLASS_WRITE, write_class);
audit_register_class(AUDIT_CLASS_READ, read_class);
audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
return 0;
}
__initcall(audit_classes_init);

234
arch/ia64/kernel/brl_emu.c Normal file
View file

@ -0,0 +1,234 @@
/*
* Emulation of the "brl" instruction for IA64 processors that
* don't support it in hardware.
* Author: Stephan Zeisset, Intel Corp. <Stephan.Zeisset@intel.com>
*
* 02/22/02 D. Mosberger Clear si_flgs, si_isr, and si_imm to avoid
* leaking kernel bits.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5;
struct illegal_op_return {
unsigned long fkt, arg1, arg2, arg3;
};
/*
* The unimplemented bits of a virtual address must be set
* to the value of the most significant implemented bit.
* unimpl_va_mask includes all unimplemented bits and
* the most significant implemented bit, so the result
* of an and operation with the mask must be all 0's
* or all 1's for the address to be valid.
*/
#define unimplemented_virtual_address(va) ( \
((va) & local_cpu_data->unimpl_va_mask) != 0 && \
((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask \
)
/*
* The unimplemented bits of a physical address must be 0.
* unimpl_pa_mask includes all unimplemented bits, so the result
* of an and operation with the mask must be all 0's for the
* address to be valid.
*/
#define unimplemented_physical_address(pa) ( \
((pa) & local_cpu_data->unimpl_pa_mask) != 0 \
)
/*
* Handle an illegal operation fault that was caused by an
* unimplemented "brl" instruction.
* If we are not successful (e.g because the illegal operation
* wasn't caused by a "brl" after all), we return -1.
* If we are successful, we return either 0 or the address
* of a "fixup" function for manipulating preserved register
* state.
*/
struct illegal_op_return
ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
{
unsigned long bundle[2];
unsigned long opcode, btype, qp, offset, cpl;
unsigned long next_ip;
struct siginfo siginfo;
struct illegal_op_return rv;
long tmp_taken, unimplemented_address;
rv.fkt = (unsigned long) -1;
/*
* Decode the instruction bundle.
*/
if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle)))
return rv;
next_ip = (unsigned long) regs->cr_iip + 16;
/* "brl" must be in slot 2. */
if (ia64_psr(regs)->ri != 1) return rv;
/* Must be "mlx" template */
if ((bundle[0] & 0x1e) != 0x4) return rv;
opcode = (bundle[1] >> 60);
btype = ((bundle[1] >> 29) & 0x7);
qp = ((bundle[1] >> 23) & 0x3f);
offset = ((bundle[1] & 0x0800000000000000L) << 4)
| ((bundle[1] & 0x00fffff000000000L) >> 32)
| ((bundle[1] & 0x00000000007fffffL) << 40)
| ((bundle[0] & 0xffff000000000000L) >> 24);
tmp_taken = regs->pr & (1L << qp);
switch(opcode) {
case 0xC:
/*
* Long Branch.
*/
if (btype != 0) return rv;
rv.fkt = 0;
if (!(tmp_taken)) {
/*
* Qualifying predicate is 0.
* Skip instruction.
*/
regs->cr_iip = next_ip;
ia64_psr(regs)->ri = 0;
return rv;
}
break;
case 0xD:
/*
* Long Call.
*/
rv.fkt = 0;
if (!(tmp_taken)) {
/*
* Qualifying predicate is 0.
* Skip instruction.
*/
regs->cr_iip = next_ip;
ia64_psr(regs)->ri = 0;
return rv;
}
/*
* BR[btype] = IP+16
*/
switch(btype) {
case 0:
regs->b0 = next_ip;
break;
case 1:
rv.fkt = (unsigned long) &ia64_set_b1;
break;
case 2:
rv.fkt = (unsigned long) &ia64_set_b2;
break;
case 3:
rv.fkt = (unsigned long) &ia64_set_b3;
break;
case 4:
rv.fkt = (unsigned long) &ia64_set_b4;
break;
case 5:
rv.fkt = (unsigned long) &ia64_set_b5;
break;
case 6:
regs->b6 = next_ip;
break;
case 7:
regs->b7 = next_ip;
break;
}
rv.arg1 = next_ip;
/*
* AR[PFS].pfm = CFM
* AR[PFS].pec = AR[EC]
* AR[PFS].ppl = PSR.cpl
*/
cpl = ia64_psr(regs)->cpl;
regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff)
| (ar_ec << 52) | (cpl << 62));
/*
* CFM.sof -= CFM.sol
* CFM.sol = 0
* CFM.sor = 0
* CFM.rrb.gr = 0
* CFM.rrb.fr = 0
* CFM.rrb.pr = 0
*/
regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f)
- ((regs->cr_ifs >> 7) & 0x7f));
break;
default:
/*
* Unknown opcode.
*/
return rv;
}
regs->cr_iip += offset;
ia64_psr(regs)->ri = 0;
if (ia64_psr(regs)->it == 0)
unimplemented_address = unimplemented_physical_address(regs->cr_iip);
else
unimplemented_address = unimplemented_virtual_address(regs->cr_iip);
if (unimplemented_address) {
/*
* The target address contains unimplemented bits.
*/
printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n");
siginfo.si_signo = SIGILL;
siginfo.si_errno = 0;
siginfo.si_flags = 0;
siginfo.si_isr = 0;
siginfo.si_imm = 0;
siginfo.si_code = ILL_BADIADDR;
force_sig_info(SIGILL, &siginfo, current);
} else if (ia64_psr(regs)->tb) {
/*
* Branch Tracing is enabled.
* Force a taken branch signal.
*/
siginfo.si_signo = SIGTRAP;
siginfo.si_errno = 0;
siginfo.si_code = TRAP_BRANCH;
siginfo.si_flags = 0;
siginfo.si_isr = 0;
siginfo.si_addr = 0;
siginfo.si_imm = 0;
force_sig_info(SIGTRAP, &siginfo, current);
} else if (ia64_psr(regs)->ss) {
/*
* Single Step is enabled.
* Force a trace signal.
*/
siginfo.si_signo = SIGTRAP;
siginfo.si_errno = 0;
siginfo.si_code = TRAP_TRACE;
siginfo.si_flags = 0;
siginfo.si_isr = 0;
siginfo.si_addr = 0;
siginfo.si_imm = 0;
force_sig_info(SIGTRAP, &siginfo, current);
}
return rv;
}

286
arch/ia64/kernel/crash.c Normal file
View file

@ -0,0 +1,286 @@
/*
* arch/ia64/kernel/crash.c
*
* Architecture specific (ia64) functions for kexec based crash dumps.
*
* Created by: Khalid Aziz <khalid.aziz@hp.com>
* Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
* Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com>
*
*/
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
#include <linux/bootmem.h>
#include <linux/kexec.h>
#include <linux/elfcore.h>
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/kdebug.h>
#include <asm/mca.h>
int kdump_status[NR_CPUS];
static atomic_t kdump_cpu_frozen;
atomic_t kdump_in_progress;
static int kdump_freeze_monarch;
static int kdump_on_init = 1;
static int kdump_on_fatal_mca = 1;
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
size_t data_len)
{
struct elf_note *note = (struct elf_note *)buf;
note->n_namesz = strlen(name) + 1;
note->n_descsz = data_len;
note->n_type = type;
buf += (sizeof(*note) + 3)/4;
memcpy(buf, name, note->n_namesz);
buf += (note->n_namesz + 3)/4;
memcpy(buf, data, data_len);
buf += (data_len + 3)/4;
return buf;
}
static void
final_note(void *buf)
{
memset(buf, 0, sizeof(struct elf_note));
}
extern void ia64_dump_cpu_regs(void *);
static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
void
crash_save_this_cpu(void)
{
void *buf;
unsigned long cfm, sof, sol;
int cpu = smp_processor_id();
struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
memset(prstatus, 0, sizeof(*prstatus));
prstatus->pr_pid = current->pid;
ia64_dump_cpu_regs(dst);
cfm = dst[43];
sol = (cfm >> 7) & 0x7f;
sof = cfm & 0x7f;
dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
sof - sol);
buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
if (!buf)
return;
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus,
sizeof(*prstatus));
final_note(buf);
}
#ifdef CONFIG_SMP
static int
kdump_wait_cpu_freeze(void)
{
int cpu_num = num_online_cpus() - 1;
int timeout = 1000;
while(timeout-- > 0) {
if (atomic_read(&kdump_cpu_frozen) == cpu_num)
return 0;
udelay(1000);
}
return 1;
}
#endif
void
machine_crash_shutdown(struct pt_regs *pt)
{
/* This function is only called after the system
* has paniced or is otherwise in a critical state.
* The minimum amount of code to allow a kexec'd kernel
* to run successfully needs to happen here.
*
* In practice this means shooting down the other cpus in
* an SMP system.
*/
kexec_disable_iosapic();
#ifdef CONFIG_SMP
/*
* If kdump_on_init is set and an INIT is asserted here, kdump will
* be started again via INIT monarch.
*/
local_irq_disable();
ia64_set_psr_mc(); /* mask MCA/INIT */
if (atomic_inc_return(&kdump_in_progress) != 1)
unw_init_running(kdump_cpu_freeze, NULL);
/*
* Now this cpu is ready for kdump.
* Stop all others by IPI or INIT. They could receive INIT from
* outside and might be INIT monarch, but only thing they have to
* do is falling into kdump_cpu_freeze().
*
* If an INIT is asserted here:
* - All receivers might be slaves, since some of cpus could already
* be frozen and INIT might be masked on monarch. In this case,
* all slaves will be frozen soon since kdump_in_progress will let
* them into DIE_INIT_SLAVE_LEAVE.
* - One might be a monarch, but INIT rendezvous will fail since
* at least this cpu already have INIT masked so it never join
* to the rendezvous. In this case, all slaves and monarch will
* be frozen soon with no wait since the INIT rendezvous is skipped
* by kdump_in_progress.
*/
kdump_smp_send_stop();
/* not all cpu response to IPI, send INIT to freeze them */
if (kdump_wait_cpu_freeze()) {
kdump_smp_send_init();
/* wait again, don't go ahead if possible */
kdump_wait_cpu_freeze();
}
#endif
}
static void
machine_kdump_on_init(void)
{
crash_save_vmcoreinfo();
local_irq_disable();
kexec_disable_iosapic();
machine_kexec(ia64_kimage);
}
void
kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
{
int cpuid;
local_irq_disable();
cpuid = smp_processor_id();
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
ia64_set_psr_mc(); /* mask MCA/INIT and stop reentrance */
atomic_inc(&kdump_cpu_frozen);
kdump_status[cpuid] = 1;
mb();
for (;;)
cpu_relax();
}
static int
kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
{
struct ia64_mca_notify_die *nd;
struct die_args *args = data;
if (atomic_read(&kdump_in_progress)) {
switch (val) {
case DIE_INIT_MONARCH_LEAVE:
if (!kdump_freeze_monarch)
break;
/* fall through */
case DIE_INIT_SLAVE_LEAVE:
case DIE_INIT_MONARCH_ENTER:
case DIE_MCA_RENDZVOUS_LEAVE:
unw_init_running(kdump_cpu_freeze, NULL);
break;
}
}
if (!kdump_on_init && !kdump_on_fatal_mca)
return NOTIFY_DONE;
if (!ia64_kimage) {
if (val == DIE_INIT_MONARCH_LEAVE)
ia64_mca_printk(KERN_NOTICE
"%s: kdump not configured\n",
__func__);
return NOTIFY_DONE;
}
if (val != DIE_INIT_MONARCH_LEAVE &&
val != DIE_INIT_MONARCH_PROCESS &&
val != DIE_MCA_MONARCH_LEAVE)
return NOTIFY_DONE;
nd = (struct ia64_mca_notify_die *)args->err;
switch (val) {
case DIE_INIT_MONARCH_PROCESS:
/* Reason code 1 means machine check rendezvous*/
if (kdump_on_init && (nd->sos->rv_rc != 1)) {
if (atomic_inc_return(&kdump_in_progress) != 1)
kdump_freeze_monarch = 1;
}
break;
case DIE_INIT_MONARCH_LEAVE:
/* Reason code 1 means machine check rendezvous*/
if (kdump_on_init && (nd->sos->rv_rc != 1))
machine_kdump_on_init();
break;
case DIE_MCA_MONARCH_LEAVE:
/* *(nd->data) indicate if MCA is recoverable */
if (kdump_on_fatal_mca && !(*(nd->data))) {
if (atomic_inc_return(&kdump_in_progress) == 1)
machine_kdump_on_init();
/* We got fatal MCA while kdump!? No way!! */
}
break;
}
return NOTIFY_DONE;
}
#ifdef CONFIG_SYSCTL
static struct ctl_table kdump_ctl_table[] = {
{
.procname = "kdump_on_init",
.data = &kdump_on_init,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "kdump_on_fatal_mca",
.data = &kdump_on_fatal_mca,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
};
static struct ctl_table sys_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = kdump_ctl_table,
},
{ }
};
#endif
static int
machine_crash_setup(void)
{
/* be notified before default_monarch_init_process */
static struct notifier_block kdump_init_notifier_nb = {
.notifier_call = kdump_init_notifier,
.priority = 1,
};
int ret;
if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
return ret;
#ifdef CONFIG_SYSCTL
register_sysctl_table(sys_table);
#endif
return 0;
}
__initcall(machine_crash_setup);

View file

@ -0,0 +1,50 @@
/*
* kernel/crash_dump.c - Memory preserving reboot related code.
*
* Created by: Simon Horman <horms@verge.net.au>
* Original code moved from kernel/crash.c
* Original code comment copied from the i386 version of this file
*/
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/crash_dump.h>
#include <asm/page.h>
#include <asm/uaccess.h>
/**
* copy_oldmem_page - copy one page from "oldmem"
* @pfn: page frame number to be copied
* @buf: target memory address for the copy; this can be in kernel address
* space or user address space (see @userbuf)
* @csize: number of bytes to copy
* @offset: offset in bytes into the page (based on pfn) to begin the copy
* @userbuf: if set, @buf is in user address space, use copy_to_user(),
* otherwise @buf is in kernel address space, use memcpy().
*
* Copy a page from "oldmem". For this page, there is no pte mapped
* in the current kernel. We stitch up a pte, similar to kmap_atomic.
*
* Calling copy_to_user() in atomic context is not desirable. Hence first
* copying the data to a pre-allocated kernel page and then copying to user
* space in non-atomic context.
*/
ssize_t
copy_oldmem_page(unsigned long pfn, char *buf,
size_t csize, unsigned long offset, int userbuf)
{
void *vaddr;
if (!csize)
return 0;
vaddr = __va(pfn<<PAGE_SHIFT);
if (userbuf) {
if (copy_to_user(buf, (vaddr + offset), csize)) {
return -EFAULT;
}
} else
memcpy(buf, (vaddr + offset), csize);
return csize;
}

124
arch/ia64/kernel/cyclone.c Normal file
View file

@ -0,0 +1,124 @@
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/time.h>
#include <linux/errno.h>
#include <linux/timex.h>
#include <linux/clocksource.h>
#include <asm/io.h>
/* IBM Summit (EXA) Cyclone counter code*/
#define CYCLONE_CBAR_ADDR 0xFEB00CD0
#define CYCLONE_PMCC_OFFSET 0x51A0
#define CYCLONE_MPMC_OFFSET 0x51D0
#define CYCLONE_MPCS_OFFSET 0x51A8
#define CYCLONE_TIMER_FREQ 100000000
int use_cyclone;
void __init cyclone_setup(void)
{
use_cyclone = 1;
}
static void __iomem *cyclone_mc;
static cycle_t read_cyclone(struct clocksource *cs)
{
return (cycle_t)readq((void __iomem *)cyclone_mc);
}
static struct clocksource clocksource_cyclone = {
.name = "cyclone",
.rating = 300,
.read = read_cyclone,
.mask = (1LL << 40) - 1,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
int __init init_cyclone_clock(void)
{
u64 __iomem *reg;
u64 base; /* saved cyclone base address */
u64 offset; /* offset from pageaddr to cyclone_timer register */
int i;
u32 __iomem *cyclone_timer; /* Cyclone MPMC0 register */
if (!use_cyclone)
return 0;
printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
/* find base address */
offset = (CYCLONE_CBAR_ADDR);
reg = ioremap_nocache(offset, sizeof(u64));
if(!reg){
printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
" register.\n");
use_cyclone = 0;
return -ENODEV;
}
base = readq(reg);
iounmap(reg);
if(!base){
printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
" value.\n");
use_cyclone = 0;
return -ENODEV;
}
/* setup PMCC */
offset = (base + CYCLONE_PMCC_OFFSET);
reg = ioremap_nocache(offset, sizeof(u64));
if(!reg){
printk(KERN_ERR "Summit chipset: Could not find valid PMCC"
" register.\n");
use_cyclone = 0;
return -ENODEV;
}
writel(0x00000001,reg);
iounmap(reg);
/* setup MPCS */
offset = (base + CYCLONE_MPCS_OFFSET);
reg = ioremap_nocache(offset, sizeof(u64));
if(!reg){
printk(KERN_ERR "Summit chipset: Could not find valid MPCS"
" register.\n");
use_cyclone = 0;
return -ENODEV;
}
writel(0x00000001,reg);
iounmap(reg);
/* map in cyclone_timer */
offset = (base + CYCLONE_MPMC_OFFSET);
cyclone_timer = ioremap_nocache(offset, sizeof(u32));
if(!cyclone_timer){
printk(KERN_ERR "Summit chipset: Could not find valid MPMC"
" register.\n");
use_cyclone = 0;
return -ENODEV;
}
/*quick test to make sure its ticking*/
for(i=0; i<3; i++){
u32 old = readl(cyclone_timer);
int stall = 100;
while(stall--) barrier();
if(readl(cyclone_timer) == old){
printk(KERN_ERR "Summit chipset: Counter not counting!"
" DISABLED\n");
iounmap(cyclone_timer);
cyclone_timer = NULL;
use_cyclone = 0;
return -ENODEV;
}
}
/* initialize last tick */
cyclone_mc = cyclone_timer;
clocksource_cyclone.archdata.fsys_mmio = cyclone_timer;
clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ);
return 0;
}
__initcall(init_cyclone_clock);

View file

@ -0,0 +1,24 @@
#include <linux/dma-mapping.h>
#include <linux/export.h>
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly;
struct dma_map_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
static int __init dma_init(void)
{
dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
return 0;
}
fs_initcall(dma_init);
struct dma_map_ops *dma_get_ops(struct device *dev)
{
return dma_ops;
}
EXPORT_SYMBOL(dma_get_ops);

1342
arch/ia64/kernel/efi.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,86 @@
/*
* EFI call stub.
*
* Copyright (C) 1999-2001 Hewlett-Packard Co
* David Mosberger <davidm@hpl.hp.com>
*
* This stub allows us to make EFI calls in physical mode with interrupts
* turned off. We need this because we can't call SetVirtualMap() until
* the kernel has booted far enough to allow allocation of struct vma_struct
* entries (which we would need to map stuff with memory attributes other
* than uncached or writeback...). Since the GetTime() service gets called
* earlier than that, we need to be able to make physical mode EFI calls from
* the kernel.
*/
/*
* PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
* Abstraction Layer Specification", revision 2.6e). Note that
* psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
* Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
* (the br.ia instruction fails unless psr.dfl and psr.dfh are
* cleared). Fortunately, SAL promises not to touch the floating
* point regs, so at least we don't have to save f2-f127.
*/
#define PSR_BITS_TO_CLEAR \
(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
IA64_PSR_DFL | IA64_PSR_DFH)
#define PSR_BITS_TO_SET \
(IA64_PSR_BN)
#include <asm/processor.h>
#include <asm/asmmacro.h>
/*
* Inputs:
* in0 = address of function descriptor of EFI routine to call
* in1..in7 = arguments to routine
*
* Outputs:
* r8 = EFI_STATUS returned by called function
*/
GLOBAL_ENTRY(efi_call_phys)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
alloc loc1=ar.pfs,8,7,7,0
ld8 r2=[in0],8 // load EFI function's entry point
mov loc0=rp
.body
;;
mov loc2=gp // save global pointer
mov loc4=ar.rsc // save RSE configuration
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
;;
ld8 gp=[in0] // load EFI function's global pointer
movl r16=PSR_BITS_TO_CLEAR
mov loc3=psr // save processor status word
movl r17=PSR_BITS_TO_SET
;;
or loc3=loc3,r17
mov b6=r2
;;
andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
br.call.sptk.many rp=ia64_switch_mode_phys
.ret0: mov out4=in5
mov out0=in1
mov out1=in2
mov out2=in3
mov out3=in4
mov out5=in6
mov out6=in7
mov loc5=r19
mov loc6=r20
br.call.sptk.many rp=b6 // call the EFI function
.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3
mov r19=loc5
mov r20=loc6
br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret2: mov ar.rsc=loc4 // restore RSE configuration
mov ar.pfs=loc1
mov rp=loc0
mov gp=loc2
br.ret.sptk.many rp
END(efi_call_phys)

View file

@ -0,0 +1,76 @@
#include <linux/elf.h>
#include <linux/coredump.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <asm/elf.h>
Elf64_Half elf_core_extra_phdrs(void)
{
return GATE_EHDR->e_phnum;
}
int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
{
const struct elf_phdr *const gate_phdrs =
(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
int i;
Elf64_Off ofs = 0;
for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
struct elf_phdr phdr = gate_phdrs[i];
if (phdr.p_type == PT_LOAD) {
phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);
phdr.p_filesz = phdr.p_memsz;
if (ofs == 0) {
ofs = phdr.p_offset = offset;
offset += phdr.p_filesz;
} else {
phdr.p_offset = ofs;
}
} else {
phdr.p_offset += ofs;
}
phdr.p_paddr = 0; /* match other core phdrs */
if (!dump_emit(cprm, &phdr, sizeof(phdr)))
return 0;
}
return 1;
}
int elf_core_write_extra_data(struct coredump_params *cprm)
{
const struct elf_phdr *const gate_phdrs =
(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
int i;
for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
if (gate_phdrs[i].p_type == PT_LOAD) {
void *addr = (void *)gate_phdrs[i].p_vaddr;
size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
if (!dump_emit(cprm, addr, memsz))
return 0;
break;
}
}
return 1;
}
size_t elf_core_extra_data_size(void)
{
const struct elf_phdr *const gate_phdrs =
(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
int i;
size_t size = 0;
for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
if (gate_phdrs[i].p_type == PT_LOAD) {
size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
break;
}
}
return size;
}

1784
arch/ia64/kernel/entry.S Normal file

File diff suppressed because it is too large Load diff

82
arch/ia64/kernel/entry.h Normal file
View file

@ -0,0 +1,82 @@
/*
* Preserved registers that are shared between code in ivt.S and
* entry.S. Be careful not to step on these!
*/
#define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */
#define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */
#define PRED_USER_STACK 3 /* returning to user-stacks? */
#define PRED_SYSCALL 4 /* inside a system call? */
#define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */
#ifdef __ASSEMBLY__
# define PASTE2(x,y) x##y
# define PASTE(x,y) PASTE2(x,y)
# define pLvSys PASTE(p,PRED_LEAVE_SYSCALL)
# define pKStk PASTE(p,PRED_KERNEL_STACK)
# define pUStk PASTE(p,PRED_USER_STACK)
# define pSys PASTE(p,PRED_SYSCALL)
# define pNonSys PASTE(p,PRED_NON_SYSCALL)
#endif
#define PT(f) (IA64_PT_REGS_##f##_OFFSET)
#define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET)
#define SOS(f) (IA64_SAL_OS_STATE_##f##_OFFSET)
#define PT_REGS_SAVES(off) \
.unwabi 3, 'i'; \
.fframe IA64_PT_REGS_SIZE+16+(off); \
.spillsp rp, PT(CR_IIP)+16+(off); \
.spillsp ar.pfs, PT(CR_IFS)+16+(off); \
.spillsp ar.unat, PT(AR_UNAT)+16+(off); \
.spillsp ar.fpsr, PT(AR_FPSR)+16+(off); \
.spillsp pr, PT(PR)+16+(off);
#define PT_REGS_UNWIND_INFO(off) \
.prologue; \
PT_REGS_SAVES(off); \
.body
#define SWITCH_STACK_SAVES(off) \
.savesp ar.unat,SW(CALLER_UNAT)+16+(off); \
.savesp ar.fpsr,SW(AR_FPSR)+16+(off); \
.spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off); \
.spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off); \
.spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off); \
.spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off); \
.spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off); \
.spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off); \
.spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off); \
.spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off); \
.spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off); \
.spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off); \
.spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off); \
.spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off); \
.spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off); \
.spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off); \
.spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off); \
.spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off); \
.spillsp @priunat,SW(AR_UNAT)+16+(off); \
.spillsp ar.rnat,SW(AR_RNAT)+16+(off); \
.spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \
.spillsp pr,SW(PR)+16+(off)
#define DO_SAVE_SWITCH_STACK \
movl r28=1f; \
;; \
.fframe IA64_SWITCH_STACK_SIZE; \
adds sp=-IA64_SWITCH_STACK_SIZE,sp; \
mov.ret.sptk b7=r28,1f; \
SWITCH_STACK_SAVES(0); \
br.cond.sptk.many save_switch_stack; \
1:
#define DO_LOAD_SWITCH_STACK \
movl r28=1f; \
;; \
invala; \
mov.ret.sptk b7=r28,1f; \
br.cond.sptk.many load_switch_stack; \
1: .restore sp; \
adds sp=IA64_SWITCH_STACK_SIZE,sp

View file

@ -0,0 +1,314 @@
/*
* err_inject.c -
* 1.) Inject errors to a processor.
* 2.) Query error injection capabilities.
* This driver along with user space code can be acting as an error
* injection tool.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Written by: Fenghua Yu <fenghua.yu@intel.com>, Intel Corporation
* Copyright (C) 2006, Intel Corp. All rights reserved.
*
*/
#include <linux/device.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/module.h>
#define ERR_INJ_DEBUG
#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte;
#define define_one_ro(name) \
static DEVICE_ATTR(name, 0444, show_##name, NULL)
#define define_one_rw(name) \
static DEVICE_ATTR(name, 0644, show_##name, store_##name)
static u64 call_start[NR_CPUS];
static u64 phys_addr[NR_CPUS];
static u64 err_type_info[NR_CPUS];
static u64 err_struct_info[NR_CPUS];
static struct {
u64 data1;
u64 data2;
u64 data3;
} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS];
static s64 status[NR_CPUS];
static u64 capabilities[NR_CPUS];
static u64 resources[NR_CPUS];
#define show(name) \
static ssize_t \
show_##name(struct device *dev, struct device_attribute *attr, \
char *buf) \
{ \
u32 cpu=dev->id; \
return sprintf(buf, "%lx\n", name[cpu]); \
}
#define store(name) \
static ssize_t \
store_##name(struct device *dev, struct device_attribute *attr, \
const char *buf, size_t size) \
{ \
unsigned int cpu=dev->id; \
name[cpu] = simple_strtoull(buf, NULL, 16); \
return size; \
}
show(call_start)
/* It's user's responsibility to call the PAL procedure on a specific
* processor. The cpu number in driver is only used for storing data.
*/
static ssize_t
store_call_start(struct device *dev, struct device_attribute *attr,
const char *buf, size_t size)
{
unsigned int cpu=dev->id;
unsigned long call_start = simple_strtoull(buf, NULL, 16);
#ifdef ERR_INJ_DEBUG
printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu);
printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]);
printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]);
printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n",
err_data_buffer[cpu].data1,
err_data_buffer[cpu].data2,
err_data_buffer[cpu].data3);
#endif
switch (call_start) {
case 0: /* Do nothing. */
break;
case 1: /* Call pal_mc_error_inject in physical mode. */
status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu],
err_struct_info[cpu],
ia64_tpa(&err_data_buffer[cpu]),
&capabilities[cpu],
&resources[cpu]);
break;
case 2: /* Call pal_mc_error_inject in virtual mode. */
status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu],
err_struct_info[cpu],
ia64_tpa(&err_data_buffer[cpu]),
&capabilities[cpu],
&resources[cpu]);
break;
default:
status[cpu] = -EINVAL;
break;
}
#ifdef ERR_INJ_DEBUG
printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]);
printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]);
printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);
#endif
return size;
}
show(err_type_info)
store(err_type_info)
static ssize_t
show_virtual_to_phys(struct device *dev, struct device_attribute *attr,
char *buf)
{
unsigned int cpu=dev->id;
return sprintf(buf, "%lx\n", phys_addr[cpu]);
}
static ssize_t
store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
const char *buf, size_t size)
{
unsigned int cpu=dev->id;
u64 virt_addr=simple_strtoull(buf, NULL, 16);
int ret;
ret = get_user_pages(current, current->mm, virt_addr,
1, VM_READ, 0, NULL, NULL);
if (ret<=0) {
#ifdef ERR_INJ_DEBUG
printk("Virtual address %lx is not existing.\n",virt_addr);
#endif
return -EINVAL;
}
phys_addr[cpu] = ia64_tpa(virt_addr);
return size;
}
show(err_struct_info)
store(err_struct_info)
static ssize_t
show_err_data_buffer(struct device *dev,
struct device_attribute *attr, char *buf)
{
unsigned int cpu=dev->id;
return sprintf(buf, "%lx, %lx, %lx\n",
err_data_buffer[cpu].data1,
err_data_buffer[cpu].data2,
err_data_buffer[cpu].data3);
}
static ssize_t
store_err_data_buffer(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t size)
{
unsigned int cpu=dev->id;
int ret;
#ifdef ERR_INJ_DEBUG
printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n",
err_data_buffer[cpu].data1,
err_data_buffer[cpu].data2,
err_data_buffer[cpu].data3,
cpu);
#endif
ret=sscanf(buf, "%lx, %lx, %lx",
&err_data_buffer[cpu].data1,
&err_data_buffer[cpu].data2,
&err_data_buffer[cpu].data3);
if (ret!=ERR_DATA_BUFFER_SIZE)
return -EINVAL;
return size;
}
show(status)
show(capabilities)
show(resources)
define_one_rw(call_start);
define_one_rw(err_type_info);
define_one_rw(err_struct_info);
define_one_rw(err_data_buffer);
define_one_rw(virtual_to_phys);
define_one_ro(status);
define_one_ro(capabilities);
define_one_ro(resources);
static struct attribute *default_attrs[] = {
&dev_attr_call_start.attr,
&dev_attr_virtual_to_phys.attr,
&dev_attr_err_type_info.attr,
&dev_attr_err_struct_info.attr,
&dev_attr_err_data_buffer.attr,
&dev_attr_status.attr,
&dev_attr_capabilities.attr,
&dev_attr_resources.attr,
NULL
};
static struct attribute_group err_inject_attr_group = {
.attrs = default_attrs,
.name = "err_inject"
};
/* Add/Remove err_inject interface for CPU device */
static int err_inject_add_dev(struct device *sys_dev)
{
return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group);
}
static int err_inject_remove_dev(struct device *sys_dev)
{
sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
return 0;
}
static int err_inject_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct device *sys_dev;
sys_dev = get_cpu_device(cpu);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
err_inject_add_dev(sys_dev);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
err_inject_remove_dev(sys_dev);
break;
}
return NOTIFY_OK;
}
static struct notifier_block err_inject_cpu_notifier =
{
.notifier_call = err_inject_cpu_callback,
};
static int __init
err_inject_init(void)
{
int i;
#ifdef ERR_INJ_DEBUG
printk(KERN_INFO "Enter error injection driver.\n");
#endif
cpu_notifier_register_begin();
for_each_online_cpu(i) {
err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE,
(void *)(long)i);
}
__register_hotcpu_notifier(&err_inject_cpu_notifier);
cpu_notifier_register_done();
return 0;
}
static void __exit
err_inject_exit(void)
{
int i;
struct device *sys_dev;
#ifdef ERR_INJ_DEBUG
printk(KERN_INFO "Exit error injection driver.\n");
#endif
cpu_notifier_register_begin();
for_each_online_cpu(i) {
sys_dev = get_cpu_device(i);
sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
}
__unregister_hotcpu_notifier(&err_inject_cpu_notifier);
cpu_notifier_register_done();
}
module_init(err_inject_init);
module_exit(err_inject_exit);
MODULE_AUTHOR("Fenghua Yu <fenghua.yu@intel.com>");
MODULE_DESCRIPTION("MC error injection kernel sysfs interface");
MODULE_LICENSE("GPL");

205
arch/ia64/kernel/esi.c Normal file
View file

@ -0,0 +1,205 @@
/*
* Extensible SAL Interface (ESI) support routines.
*
* Copyright (C) 2006 Hewlett-Packard Co
* Alex Williamson <alex.williamson@hp.com>
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <asm/esi.h>
#include <asm/sal.h>
MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>");
MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support");
MODULE_LICENSE("GPL");
#define MODULE_NAME "esi"
#define ESI_TABLE_GUID \
EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3, \
0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4)
enum esi_systab_entry_type {
ESI_DESC_ENTRY_POINT = 0
};
/*
* Entry type: Size:
* 0 48
*/
#define ESI_DESC_SIZE(type) "\060"[(unsigned) (type)]
typedef struct ia64_esi_desc_entry_point {
u8 type;
u8 reserved1[15];
u64 esi_proc;
u64 gp;
efi_guid_t guid;
} ia64_esi_desc_entry_point_t;
struct pdesc {
void *addr;
void *gp;
};
static struct ia64_sal_systab *esi_systab;
static int __init esi_init (void)
{
efi_config_table_t *config_tables;
struct ia64_sal_systab *systab;
unsigned long esi = 0;
char *p;
int i;
config_tables = __va(efi.systab->tables);
for (i = 0; i < (int) efi.systab->nr_tables; ++i) {
if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) {
esi = config_tables[i].table;
break;
}
}
if (!esi)
return -ENODEV;
systab = __va(esi);
if (strncmp(systab->signature, "ESIT", 4) != 0) {
printk(KERN_ERR "bad signature in ESI system table!");
return -ENODEV;
}
p = (char *) (systab + 1);
for (i = 0; i < systab->entry_count; i++) {
/*
* The first byte of each entry type contains the type
* descriptor.
*/
switch (*p) {
case ESI_DESC_ENTRY_POINT:
break;
default:
printk(KERN_WARNING "Unknown table type %d found in "
"ESI table, ignoring rest of table\n", *p);
return -ENODEV;
}
p += ESI_DESC_SIZE(*p);
}
esi_systab = systab;
return 0;
}
int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp,
enum esi_proc_type proc_type, u64 func,
u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
u64 arg7)
{
struct ia64_fpreg fr[6];
unsigned long flags = 0;
int i;
char *p;
if (!esi_systab)
return -1;
p = (char *) (esi_systab + 1);
for (i = 0; i < esi_systab->entry_count; i++) {
if (*p == ESI_DESC_ENTRY_POINT) {
ia64_esi_desc_entry_point_t *esi = (void *)p;
if (!efi_guidcmp(guid, esi->guid)) {
ia64_sal_handler esi_proc;
struct pdesc pdesc;
pdesc.addr = __va(esi->esi_proc);
pdesc.gp = __va(esi->gp);
esi_proc = (ia64_sal_handler) &pdesc;
ia64_save_scratch_fpregs(fr);
if (proc_type == ESI_PROC_SERIALIZED)
spin_lock_irqsave(&sal_lock, flags);
else if (proc_type == ESI_PROC_MP_SAFE)
local_irq_save(flags);
else
preempt_disable();
*isrvp = (*esi_proc)(func, arg1, arg2, arg3,
arg4, arg5, arg6, arg7);
if (proc_type == ESI_PROC_SERIALIZED)
spin_unlock_irqrestore(&sal_lock,
flags);
else if (proc_type == ESI_PROC_MP_SAFE)
local_irq_restore(flags);
else
preempt_enable();
ia64_load_scratch_fpregs(fr);
return 0;
}
}
p += ESI_DESC_SIZE(*p);
}
return -1;
}
EXPORT_SYMBOL_GPL(ia64_esi_call);
int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp,
u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4,
u64 arg5, u64 arg6, u64 arg7)
{
struct ia64_fpreg fr[6];
unsigned long flags;
u64 esi_params[8];
char *p;
int i;
if (!esi_systab)
return -1;
p = (char *) (esi_systab + 1);
for (i = 0; i < esi_systab->entry_count; i++) {
if (*p == ESI_DESC_ENTRY_POINT) {
ia64_esi_desc_entry_point_t *esi = (void *)p;
if (!efi_guidcmp(guid, esi->guid)) {
ia64_sal_handler esi_proc;
struct pdesc pdesc;
pdesc.addr = (void *)esi->esi_proc;
pdesc.gp = (void *)esi->gp;
esi_proc = (ia64_sal_handler) &pdesc;
esi_params[0] = func;
esi_params[1] = arg1;
esi_params[2] = arg2;
esi_params[3] = arg3;
esi_params[4] = arg4;
esi_params[5] = arg5;
esi_params[6] = arg6;
esi_params[7] = arg7;
ia64_save_scratch_fpregs(fr);
spin_lock_irqsave(&sal_lock, flags);
*isrvp = esi_call_phys(esi_proc, esi_params);
spin_unlock_irqrestore(&sal_lock, flags);
ia64_load_scratch_fpregs(fr);
return 0;
}
}
p += ESI_DESC_SIZE(*p);
}
return -1;
}
EXPORT_SYMBOL_GPL(ia64_esi_call_phys);
static void __exit esi_exit (void)
{
}
module_init(esi_init);
module_exit(esi_exit); /* makes module removable... */

View file

@ -0,0 +1,96 @@
/*
* ESI call stub.
*
* Copyright (C) 2005 Hewlett-Packard Co
* Alex Williamson <alex.williamson@hp.com>
*
* Based on EFI call stub by David Mosberger. The stub is virtually
* identical to the one for EFI phys-mode calls, except that ESI
* calls may have up to 8 arguments, so they get passed to this routine
* through memory.
*
* This stub allows us to make ESI calls in physical mode with interrupts
* turned off. ESI calls may not support calling from virtual mode.
*
* Google for "Extensible SAL specification" for a document describing the
* ESI standard.
*/
/*
* PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
* Abstraction Layer Specification", revision 2.6e). Note that
* psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
* Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
* (the br.ia instruction fails unless psr.dfl and psr.dfh are
* cleared). Fortunately, SAL promises not to touch the floating
* point regs, so at least we don't have to save f2-f127.
*/
#define PSR_BITS_TO_CLEAR \
(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
IA64_PSR_DFL | IA64_PSR_DFH)
#define PSR_BITS_TO_SET \
(IA64_PSR_BN)
#include <asm/processor.h>
#include <asm/asmmacro.h>
/*
* Inputs:
* in0 = address of function descriptor of ESI routine to call
* in1 = address of array of ESI parameters
*
* Outputs:
* r8 = result returned by called function
*/
GLOBAL_ENTRY(esi_call_phys)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
alloc loc1=ar.pfs,2,7,8,0
ld8 r2=[in0],8 // load ESI function's entry point
mov loc0=rp
.body
;;
ld8 out0=[in1],8 // ESI params loaded from array
;; // passing all as inputs doesn't work
ld8 out1=[in1],8
;;
ld8 out2=[in1],8
;;
ld8 out3=[in1],8
;;
ld8 out4=[in1],8
;;
ld8 out5=[in1],8
;;
ld8 out6=[in1],8
;;
ld8 out7=[in1]
mov loc2=gp // save global pointer
mov loc4=ar.rsc // save RSE configuration
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
;;
ld8 gp=[in0] // load ESI function's global pointer
movl r16=PSR_BITS_TO_CLEAR
mov loc3=psr // save processor status word
movl r17=PSR_BITS_TO_SET
;;
or loc3=loc3,r17
mov b6=r2
;;
andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
br.call.sptk.many rp=ia64_switch_mode_phys
.ret0: mov loc5=r19 // old ar.bsp
mov loc6=r20 // old sp
br.call.sptk.many rp=b6 // call the ESI function
.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3 // save virtual mode psr
mov r19=loc5 // save virtual mode bspstore
mov r20=loc6 // save virtual mode sp
br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret2: mov ar.rsc=loc4 // restore RSE configuration
mov ar.pfs=loc1
mov rp=loc0
mov gp=loc2
br.ret.sptk.many rp
END(esi_call_phys)

836
arch/ia64/kernel/fsys.S Normal file
View file

@ -0,0 +1,836 @@
/*
* This file contains the light-weight system call handlers (fsyscall-handlers).
*
* Copyright (C) 2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* 25-Sep-03 davidm Implement fsys_rt_sigprocmask().
* 18-Feb-03 louisk Implement fsys_gettimeofday().
* 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
* probably broke it along the way... ;-)
* 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
* it capable of using memory based clocks without falling back to C code.
* 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
*
*/
#include <asm/asmmacro.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/percpu.h>
#include <asm/thread_info.h>
#include <asm/sal.h>
#include <asm/signal.h>
#include <asm/unistd.h>
#include "entry.h"
#include "paravirt_inst.h"
/*
* See Documentation/ia64/fsys.txt for details on fsyscalls.
*
* On entry to an fsyscall handler:
* r10 = 0 (i.e., defaults to "successful syscall return")
* r11 = saved ar.pfs (a user-level value)
* r15 = system call number
* r16 = "current" task pointer (in normal kernel-mode, this is in r13)
* r32-r39 = system call arguments
* b6 = return address (a user-level value)
* ar.pfs = previous frame-state (a user-level value)
* PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
* all other registers may contain values passed in from user-mode
*
* On return from an fsyscall handler:
* r11 = saved ar.pfs (as passed into the fsyscall handler)
* r15 = system call number (as passed into the fsyscall handler)
* r32-r39 = system call arguments (as passed into the fsyscall handler)
* b6 = return address (as passed into the fsyscall handler)
* ar.pfs = previous frame-state (as passed into the fsyscall handler)
*/
ENTRY(fsys_ni_syscall)
.prologue
.altrp b6
.body
mov r8=ENOSYS
mov r10=-1
FSYS_RETURN
END(fsys_ni_syscall)
ENTRY(fsys_getpid)
.prologue
.altrp b6
.body
add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
;;
ld8 r17=[r17] // r17 = current->group_leader
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;;
ld4 r9=[r9]
add r17=IA64_TASK_TGIDLINK_OFFSET,r17
;;
and r9=TIF_ALLWORK_MASK,r9
ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
;;
add r8=IA64_PID_LEVEL_OFFSET,r17
;;
ld4 r8=[r8] // r8 = pid->level
add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
;;
shl r8=r8,IA64_UPID_SHIFT
;;
add r17=r17,r8 // r17 = &pid->numbers[pid->level]
;;
ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
;;
mov r17=0
;;
cmp.ne p8,p0=0,r9
(p8) br.spnt.many fsys_fallback_syscall
FSYS_RETURN
END(fsys_getpid)
ENTRY(fsys_set_tid_address)
.prologue
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
add r17=IA64_TASK_TGIDLINK_OFFSET,r16
;;
ld4 r9=[r9]
tnat.z p6,p7=r32 // check argument register for being NaT
ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
;;
and r9=TIF_ALLWORK_MASK,r9
add r8=IA64_PID_LEVEL_OFFSET,r17
add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
;;
ld4 r8=[r8] // r8 = pid->level
add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
;;
shl r8=r8,IA64_UPID_SHIFT
;;
add r17=r17,r8 // r17 = &pid->numbers[pid->level]
;;
ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
;;
cmp.ne p8,p0=0,r9
mov r17=-1
;;
(p6) st8 [r18]=r32
(p7) st8 [r18]=r17
(p8) br.spnt.many fsys_fallback_syscall
;;
mov r17=0 // i must not leak kernel bits...
mov r18=0 // i must not leak kernel bits...
FSYS_RETURN
END(fsys_set_tid_address)
#if IA64_GTOD_SEQ_OFFSET !=0
#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
#endif
#if IA64_ITC_JITTER_OFFSET !=0
#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
#endif
#define CLOCK_REALTIME 0
#define CLOCK_MONOTONIC 1
#define CLOCK_DIVIDE_BY_1000 0x4000
#define CLOCK_ADD_MONOTONIC 0x8000
ENTRY(fsys_gettimeofday)
.prologue
.altrp b6
.body
mov r31 = r32
tnat.nz p6,p0 = r33 // guard against NaT argument
(p6) br.cond.spnt.few .fail_einval
mov r30 = CLOCK_DIVIDE_BY_1000
;;
.gettime:
// Register map
// Incoming r31 = pointer to address where to place result
// r30 = flags determining how time is processed
// r2,r3 = temp r4-r7 preserved
// r8 = result nanoseconds
// r9 = result seconds
// r10 = temporary storage for clock difference
// r11 = preserved: saved ar.pfs
// r12 = preserved: memory stack
// r13 = preserved: thread pointer
// r14 = address of mask / mask value
// r15 = preserved: system call number
// r16 = preserved: current task pointer
// r17 = (not used)
// r18 = (not used)
// r19 = address of itc_lastcycle
// r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
// r21 = address of mmio_ptr
// r22 = address of wall_time or monotonic_time
// r23 = address of shift / value
// r24 = address mult factor / cycle_last value
// r25 = itc_lastcycle value
// r26 = address clocksource cycle_last
// r27 = (not used)
// r28 = sequence number at the beginning of critcal section
// r29 = address of itc_jitter
// r30 = time processing flags / memory address
// r31 = pointer to result
// Predicates
// p6,p7 short term use
// p8 = timesource ar.itc
// p9 = timesource mmio64
// p10 = timesource mmio32 - not used
// p11 = timesource not to be handled by asm code
// p12 = memory time source ( = p9 | p10) - not used
// p13 = do cmpxchg with itc_lastcycle
// p14 = Divide by 1000
// p15 = Add monotonic
//
// Note that instructions are optimized for McKinley. McKinley can
// process two bundles simultaneously and therefore we continuously
// try to feed the CPU two bundles and then a stop.
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
tnat.nz p6,p0 = r31 // guard against Nat argument
(p6) br.cond.spnt.few .fail_einval
movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
;;
ld4 r2 = [r2] // process work pending flags
movl r29 = itc_jitter_data // itc_jitter
add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
mov pr = r30,0xc000 // Set predicates according to function
;;
and r2 = TIF_ALLWORK_MASK,r2
add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
;;
add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
(p6) br.cond.spnt.many fsys_fallback_syscall
;;
// Begin critical section
.time_redo:
ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first
;;
and r28 = ~1,r28 // And make sequence even to force retry if odd
;;
ld8 r30 = [r21] // clocksource->mmio_ptr
add r24 = IA64_CLKSRC_MULT_OFFSET,r20
ld4 r2 = [r29] // itc_jitter value
add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
add r14 = IA64_CLKSRC_MASK_OFFSET,r20
;;
ld4 r3 = [r24] // clocksource mult value
ld8 r14 = [r14] // clocksource mask value
cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr
;;
setf.sig f7 = r3 // Setup for mult scaling of counter
(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13
ld4 r23 = [r23] // clocksource shift value
ld8 r24 = [r26] // get clksrc_cycle_last value
(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control
;;
.pred.rel.mutex p8,p9
MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!!
(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
(p13) ld8 r25 = [r19] // get itc_lastcycle value
ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
;;
ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
;;
(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
sub r10 = r2,r24 // current_cycle - last_cycle
;;
(p6) sub r10 = r25,r24 // time we got was less than last_cycle
(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
;;
(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv
;;
(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful
;;
(p7) sub r10 = r3,r24 // then use new last_cycle instead
;;
and r10 = r10,r14 // Apply mask
;;
setf.sig f8 = r10
nop.i 123
;;
// fault check takes 5 cycles and we have spare time
EX(.fail_efault, probe.w.fault r31, 3)
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
;;
getf.sig r2 = f8
mf
;;
ld4 r10 = [r20] // gtod_lock.sequence
shr.u r2 = r2,r23 // shift by factor
;;
add r8 = r8,r2 // Add xtime.nsecs
cmp4.ne p7,p0 = r28,r10
(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
// End critical section.
// Now r8=tv->tv_nsec and r9=tv->tv_sec
mov r10 = r0
movl r2 = 1000000000
add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
;;
.time_normalize:
mov r21 = r8
cmp.ge p6,p0 = r8,r2
(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
;;
(p14) setf.sig f8 = r20
(p6) sub r8 = r8,r2
(p6) add r9 = 1,r9 // two nops before the branch.
(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
(p6) br.cond.dpnt.few .time_normalize
;;
// Divided by 8 though shift. Now divide by 125
// The compiler was able to do that with a multiply
// and a shift and we do the same
EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
;;
(p14) getf.sig r2 = f8
;;
mov r8 = r0
(p14) shr.u r21 = r2, 4
;;
EX(.fail_efault, st8 [r31] = r9)
EX(.fail_efault, st8 [r23] = r21)
FSYS_RETURN
.fail_einval:
mov r8 = EINVAL
mov r10 = -1
FSYS_RETURN
.fail_efault:
mov r8 = EFAULT
mov r10 = -1
FSYS_RETURN
END(fsys_gettimeofday)
ENTRY(fsys_clock_gettime)
.prologue
.altrp b6
.body
cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
(p6) br.spnt.few fsys_fallback_syscall
mov r31 = r33
shl r30 = r32,15
br.many .gettime
END(fsys_clock_gettime)
/*
* fsys_getcpu doesn't use the third parameter in this implementation. It reads
* current_thread_info()->cpu and corresponding node in cpu_to_node_map.
*/
ENTRY(fsys_getcpu)
.prologue
.altrp b6
.body
;;
add r2=TI_FLAGS+IA64_TASK_SIZE,r16
tnat.nz p6,p0 = r32 // guard against NaT argument
add r3=TI_CPU+IA64_TASK_SIZE,r16
;;
ld4 r3=[r3] // M r3 = thread_info->cpu
ld4 r2=[r2] // M r2 = thread_info->flags
(p6) br.cond.spnt.few .fail_einval // B
;;
tnat.nz p7,p0 = r33 // I guard against NaT argument
(p7) br.cond.spnt.few .fail_einval // B
;;
cmp.ne p6,p0=r32,r0
cmp.ne p7,p0=r33,r0
;;
#ifdef CONFIG_NUMA
movl r17=cpu_to_node_map
;;
EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles
EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles
shladd r18=r3,1,r17
;;
ld2 r20=[r18] // r20 = cpu_to_node_map[cpu]
and r2 = TIF_ALLWORK_MASK,r2
;;
cmp.ne p8,p0=0,r2
(p8) br.spnt.many fsys_fallback_syscall
;;
;;
EX(.fail_efault, (p6) st4 [r32] = r3)
EX(.fail_efault, (p7) st2 [r33] = r20)
mov r8=0
;;
#else
EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles
EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles
and r2 = TIF_ALLWORK_MASK,r2
;;
cmp.ne p8,p0=0,r2
(p8) br.spnt.many fsys_fallback_syscall
;;
EX(.fail_efault, (p6) st4 [r32] = r3)
EX(.fail_efault, (p7) st2 [r33] = r0)
mov r8=0
;;
#endif
FSYS_RETURN
END(fsys_getcpu)
ENTRY(fsys_fallback_syscall)
.prologue
.altrp b6
.body
/*
* We only get here from light-weight syscall handlers. Thus, we already
* know that r15 contains a valid syscall number. No need to re-check.
*/
adds r17=-1024,r15
movl r14=sys_call_table
;;
RSM_PSR_I(p0, r26, r27)
shladd r18=r17,3,r14
;;
ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency)
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
END(fsys_fallback_syscall)
/* FALL THROUGH */
GLOBAL_ENTRY(paravirt_fsys_bubble_down)
.prologue
.altrp b6
.body
/*
* We get here for syscalls that don't have a lightweight
* handler. For those, we need to bubble down into the kernel
* and that requires setting up a minimal pt_regs structure,
* and initializing the CPU state more or less as if an
* interruption had occurred. To make syscall-restarts work,
* we setup pt_regs such that cr_iip points to the second
* instruction in syscall_via_break. Decrementing the IP
* hence will restart the syscall via break and not
* decrementing IP will return us to the caller, as usual.
* Note that we preserve the value of psr.pp rather than
* initializing it from dcr.pp. This makes it possible to
* distinguish fsyscall execution from other privileged
* execution.
*
* On entry:
* - normal fsyscall handler register usage, except
* that we also have:
* - r18: address of syscall entry point
* - r21: ar.fpsr
* - r26: ar.pfs
* - r27: ar.rsc
* - r29: psr
*
* We used to clear some PSR bits here but that requires slow
* serialization. Fortuntely, that isn't really necessary.
* The rationale is as follows: we used to clear bits
* ~PSR_PRESERVED_BITS in PSR.L. Since
* PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
* ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
* However,
*
* PSR.BE : already is turned off in __kernel_syscall_via_epc()
* PSR.AC : don't care (kernel normally turns PSR.AC on)
* PSR.I : already turned off by the time paravirt_fsys_bubble_down gets
* invoked
* PSR.DFL: always 0 (kernel never turns it on)
* PSR.DFH: don't care --- kernel never touches f32-f127 on its own
* initiative
* PSR.DI : always 0 (kernel never turns it on)
* PSR.SI : always 0 (kernel never turns it on)
* PSR.DB : don't care --- kernel never enables kernel-level
* breakpoints
* PSR.TB : must be 0 already; if it wasn't zero on entry to
* __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
* will trigger a taken branch; the taken-trap-handler then
* converts the syscall into a break-based system-call.
*/
/*
* Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
* The rest we have to synthesize.
*/
# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \
| (0x1 << IA64_PSR_RI_BIT) \
| IA64_PSR_BN | IA64_PSR_I)
invala // M0|1
movl r14=ia64_ret_from_syscall // X
nop.m 0
movl r28=__kernel_syscall_via_break // X create cr.iip
;;
mov r2=r16 // A get task addr to addl-addressable register
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
mov r31=pr // I0 save pr (2 cyc)
;;
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS
add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A
;;
ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags
lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store
nop.i 0
;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting
#else
nop.m 0
#endif
nop.i 0
;;
mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!)
nop.i 0
;;
mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS
movl r8=PSR_ONE_BITS // X
;;
mov r25=ar.unat // M2 (5 cyc) save ar.unat
mov r19=b6 // I0 save b6 (2 cyc)
mov r20=r1 // A save caller's gp in r20
;;
or r29=r8,r29 // A construct cr.ipsr value to save
mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc)
addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc)
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
br.call.sptk.many b7=ia64_syscall_setup // B
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mov.m r30=ar.itc is called in advance
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
;;
ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
;;
ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
ld8 r21=[r17] // cumulated utime
sub r22=r19,r18 // stime before leave kernel
;;
st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
sub r18=r30,r19 // elapsed time in user mode
;;
add r20=r20,r22 // sum stime
add r21=r21,r18 // sum utime
;;
st8 [r16]=r20 // update stime
st8 [r17]=r21 // update utime
;;
#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
mov rp=r14 // I0 set the real return addr
and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
;;
SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs
cmp.eq p8,p0=r3,r0 // A
(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
nop.m 0
(p8) br.call.sptk.many b6=b6 // B (ignore return address)
br.cond.spnt ia64_trace_syscall // B
END(paravirt_fsys_bubble_down)
.rodata
.align 8
.globl paravirt_fsyscall_table
data8 paravirt_fsys_bubble_down
paravirt_fsyscall_table:
data8 fsys_ni_syscall
data8 0 // exit // 1025
data8 0 // read
data8 0 // write
data8 0 // open
data8 0 // close
data8 0 // creat // 1030
data8 0 // link
data8 0 // unlink
data8 0 // execve
data8 0 // chdir
data8 0 // fchdir // 1035
data8 0 // utimes
data8 0 // mknod
data8 0 // chmod
data8 0 // chown
data8 0 // lseek // 1040
data8 fsys_getpid // getpid
data8 0 // getppid
data8 0 // mount
data8 0 // umount
data8 0 // setuid // 1045
data8 0 // getuid
data8 0 // geteuid
data8 0 // ptrace
data8 0 // access
data8 0 // sync // 1050
data8 0 // fsync
data8 0 // fdatasync
data8 0 // kill
data8 0 // rename
data8 0 // mkdir // 1055
data8 0 // rmdir
data8 0 // dup
data8 0 // pipe
data8 0 // times
data8 0 // brk // 1060
data8 0 // setgid
data8 0 // getgid
data8 0 // getegid
data8 0 // acct
data8 0 // ioctl // 1065
data8 0 // fcntl
data8 0 // umask
data8 0 // chroot
data8 0 // ustat
data8 0 // dup2 // 1070
data8 0 // setreuid
data8 0 // setregid
data8 0 // getresuid
data8 0 // setresuid
data8 0 // getresgid // 1075
data8 0 // setresgid
data8 0 // getgroups
data8 0 // setgroups
data8 0 // getpgid
data8 0 // setpgid // 1080
data8 0 // setsid
data8 0 // getsid
data8 0 // sethostname
data8 0 // setrlimit
data8 0 // getrlimit // 1085
data8 0 // getrusage
data8 fsys_gettimeofday // gettimeofday
data8 0 // settimeofday
data8 0 // select
data8 0 // poll // 1090
data8 0 // symlink
data8 0 // readlink
data8 0 // uselib
data8 0 // swapon
data8 0 // swapoff // 1095
data8 0 // reboot
data8 0 // truncate
data8 0 // ftruncate
data8 0 // fchmod
data8 0 // fchown // 1100
data8 0 // getpriority
data8 0 // setpriority
data8 0 // statfs
data8 0 // fstatfs
data8 0 // gettid // 1105
data8 0 // semget
data8 0 // semop
data8 0 // semctl
data8 0 // msgget
data8 0 // msgsnd // 1110
data8 0 // msgrcv
data8 0 // msgctl
data8 0 // shmget
data8 0 // shmat
data8 0 // shmdt // 1115
data8 0 // shmctl
data8 0 // syslog
data8 0 // setitimer
data8 0 // getitimer
data8 0 // 1120
data8 0
data8 0
data8 0 // vhangup
data8 0 // lchown
data8 0 // remap_file_pages // 1125
data8 0 // wait4
data8 0 // sysinfo
data8 0 // clone
data8 0 // setdomainname
data8 0 // newuname // 1130
data8 0 // adjtimex
data8 0
data8 0 // init_module
data8 0 // delete_module
data8 0 // 1135
data8 0
data8 0 // quotactl
data8 0 // bdflush
data8 0 // sysfs
data8 0 // personality // 1140
data8 0 // afs_syscall
data8 0 // setfsuid
data8 0 // setfsgid
data8 0 // getdents
data8 0 // flock // 1145
data8 0 // readv
data8 0 // writev
data8 0 // pread64
data8 0 // pwrite64
data8 0 // sysctl // 1150
data8 0 // mmap
data8 0 // munmap
data8 0 // mlock
data8 0 // mlockall
data8 0 // mprotect // 1155
data8 0 // mremap
data8 0 // msync
data8 0 // munlock
data8 0 // munlockall
data8 0 // sched_getparam // 1160
data8 0 // sched_setparam
data8 0 // sched_getscheduler
data8 0 // sched_setscheduler
data8 0 // sched_yield
data8 0 // sched_get_priority_max // 1165
data8 0 // sched_get_priority_min
data8 0 // sched_rr_get_interval
data8 0 // nanosleep
data8 0 // nfsservctl
data8 0 // prctl // 1170
data8 0 // getpagesize
data8 0 // mmap2
data8 0 // pciconfig_read
data8 0 // pciconfig_write
data8 0 // perfmonctl // 1175
data8 0 // sigaltstack
data8 0 // rt_sigaction
data8 0 // rt_sigpending
data8 0 // rt_sigprocmask
data8 0 // rt_sigqueueinfo // 1180
data8 0 // rt_sigreturn
data8 0 // rt_sigsuspend
data8 0 // rt_sigtimedwait
data8 0 // getcwd
data8 0 // capget // 1185
data8 0 // capset
data8 0 // sendfile
data8 0
data8 0
data8 0 // socket // 1190
data8 0 // bind
data8 0 // connect
data8 0 // listen
data8 0 // accept
data8 0 // getsockname // 1195
data8 0 // getpeername
data8 0 // socketpair
data8 0 // send
data8 0 // sendto
data8 0 // recv // 1200
data8 0 // recvfrom
data8 0 // shutdown
data8 0 // setsockopt
data8 0 // getsockopt
data8 0 // sendmsg // 1205
data8 0 // recvmsg
data8 0 // pivot_root
data8 0 // mincore
data8 0 // madvise
data8 0 // newstat // 1210
data8 0 // newlstat
data8 0 // newfstat
data8 0 // clone2
data8 0 // getdents64
data8 0 // getunwind // 1215
data8 0 // readahead
data8 0 // setxattr
data8 0 // lsetxattr
data8 0 // fsetxattr
data8 0 // getxattr // 1220
data8 0 // lgetxattr
data8 0 // fgetxattr
data8 0 // listxattr
data8 0 // llistxattr
data8 0 // flistxattr // 1225
data8 0 // removexattr
data8 0 // lremovexattr
data8 0 // fremovexattr
data8 0 // tkill
data8 0 // futex // 1230
data8 0 // sched_setaffinity
data8 0 // sched_getaffinity
data8 fsys_set_tid_address // set_tid_address
data8 0 // fadvise64_64
data8 0 // tgkill // 1235
data8 0 // exit_group
data8 0 // lookup_dcookie
data8 0 // io_setup
data8 0 // io_destroy
data8 0 // io_getevents // 1240
data8 0 // io_submit
data8 0 // io_cancel
data8 0 // epoll_create
data8 0 // epoll_ctl
data8 0 // epoll_wait // 1245
data8 0 // restart_syscall
data8 0 // semtimedop
data8 0 // timer_create
data8 0 // timer_settime
data8 0 // timer_gettime // 1250
data8 0 // timer_getoverrun
data8 0 // timer_delete
data8 0 // clock_settime
data8 fsys_clock_gettime // clock_gettime
data8 0 // clock_getres // 1255
data8 0 // clock_nanosleep
data8 0 // fstatfs64
data8 0 // statfs64
data8 0 // mbind
data8 0 // get_mempolicy // 1260
data8 0 // set_mempolicy
data8 0 // mq_open
data8 0 // mq_unlink
data8 0 // mq_timedsend
data8 0 // mq_timedreceive // 1265
data8 0 // mq_notify
data8 0 // mq_getsetattr
data8 0 // kexec_load
data8 0 // vserver
data8 0 // waitid // 1270
data8 0 // add_key
data8 0 // request_key
data8 0 // keyctl
data8 0 // ioprio_set
data8 0 // ioprio_get // 1275
data8 0 // move_pages
data8 0 // inotify_init
data8 0 // inotify_add_watch
data8 0 // inotify_rm_watch
data8 0 // migrate_pages // 1280
data8 0 // openat
data8 0 // mkdirat
data8 0 // mknodat
data8 0 // fchownat
data8 0 // futimesat // 1285
data8 0 // newfstatat
data8 0 // unlinkat
data8 0 // renameat
data8 0 // linkat
data8 0 // symlinkat // 1290
data8 0 // readlinkat
data8 0 // fchmodat
data8 0 // faccessat
data8 0
data8 0 // 1295
data8 0 // unshare
data8 0 // splice
data8 0 // set_robust_list
data8 0 // get_robust_list
data8 0 // sync_file_range // 1300
data8 0 // tee
data8 0 // vmsplice
data8 0
data8 fsys_getcpu // getcpu // 1304
// fill in zeros for the remaining entries
.zero:
.space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0

View file

@ -0,0 +1,23 @@
/*
* (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
* Contributed by Peter Keilty <peter.keilty@hp.com>
*
* fsyscall gettimeofday data
*/
struct fsyscall_gtod_data_t {
seqcount_t seq;
struct timespec wall_time;
struct timespec monotonic_time;
cycle_t clk_mask;
u32 clk_mult;
u32 clk_shift;
void *clk_fsys_mmio;
cycle_t clk_cycle_last;
} ____cacheline_aligned;
struct itc_jitter_data_t {
int itc_jitter;
cycle_t itc_lastcycle;
} ____cacheline_aligned;

204
arch/ia64/kernel/ftrace.c Normal file
View file

@ -0,0 +1,204 @@
/*
* Dynamic function tracing support.
*
* Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
*
* For licencing details, see COPYING.
*
* Defines low-level handling of mcount calls when the kernel
* is compiled with the -pg flag. When using dynamic ftrace, the
* mcount call-sites get patched lazily with NOP till they are
* enabled. All code mutation routines here take effect atomically.
*/
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <asm/cacheflush.h>
#include <asm/patch.h>
/* In IA64, each function will be added below two bundles with -pg option */
static unsigned char __attribute__((aligned(8)))
ftrace_orig_code[MCOUNT_INSN_SIZE] = {
0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */
0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */
};
struct ftrace_orig_insn {
u64 dummy1, dummy2, dummy3;
u64 dummy4:64-41+13;
u64 imm20:20;
u64 dummy5:3;
u64 sign:1;
u64 dummy6:4;
};
/* mcount stub will be converted below for nop */
static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
0x00, 0x00, 0x04, 0x00
};
static unsigned char *ftrace_nop_replace(void)
{
return ftrace_nop_code;
}
/*
* mcount stub will be converted below for call
* Note: Just the last instruction is changed against nop
* */
static unsigned char __attribute__((aligned(8)))
ftrace_call_code[MCOUNT_INSN_SIZE] = {
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
0xf8, 0xff, 0xff, 0xc8
};
struct ftrace_call_insn {
u64 dummy1, dummy2;
u64 dummy3:48;
u64 imm39_l:16;
u64 imm39_h:23;
u64 dummy4:13;
u64 imm20:20;
u64 dummy5:3;
u64 i:1;
u64 dummy6:4;
};
static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
struct ftrace_call_insn *code = (void *)ftrace_call_code;
unsigned long offset = addr - (ip + 0x10);
code->imm39_l = offset >> 24;
code->imm39_h = offset >> 40;
code->imm20 = offset >> 4;
code->i = offset >> 63;
return ftrace_call_code;
}
static int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code, int do_check)
{
unsigned char replaced[MCOUNT_INSN_SIZE];
/*
* Note: Due to modules and __init, code can
* disappear and change, we need to protect against faulting
* as well as code changing. We do this by using the
* probe_kernel_* functions.
*
* No real locking needed, this code is run through
* kstop_machine, or before SMP starts.
*/
if (!do_check)
goto skip_check;
/* read the text we want to modify */
if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
/* Make sure it is what we expect it to be */
if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
return -EINVAL;
skip_check:
/* replace the text with the new text */
if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
return -EPERM;
flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
return 0;
}
static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
unsigned long ip = rec->ip;
if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
if (rec->flags & FTRACE_FL_CONVERTED) {
struct ftrace_call_insn *call_insn, *tmp_call;
call_insn = (void *)ftrace_call_code;
tmp_call = (void *)replaced;
call_insn->imm39_l = tmp_call->imm39_l;
call_insn->imm39_h = tmp_call->imm39_h;
call_insn->imm20 = tmp_call->imm20;
call_insn->i = tmp_call->i;
if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
return -EINVAL;
return 0;
} else {
struct ftrace_orig_insn *call_insn, *tmp_call;
call_insn = (void *)ftrace_orig_code;
tmp_call = (void *)replaced;
call_insn->sign = tmp_call->sign;
call_insn->imm20 = tmp_call->imm20;
if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
return -EINVAL;
return 0;
}
}
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
int ret;
char *new;
ret = ftrace_make_nop_check(rec, addr);
if (ret)
return ret;
new = ftrace_nop_replace();
return ftrace_modify_code(rec->ip, NULL, new, 0);
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
unsigned char *old, *new;
old= ftrace_nop_replace();
new = ftrace_call_replace(ip, addr);
return ftrace_modify_code(ip, old, new, 1);
}
/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip;
unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
if (func == ftrace_stub)
return 0;
ip = ((struct fnptr *)func)->ip;
ia64_patch_imm64(addr + 2, ip);
flush_icache_range(addr, addr + 16);
return 0;
}
/* run from kstop_machine */
int __init ftrace_dyn_arch_init(void)
{
return 0;
}

View file

@ -0,0 +1,3 @@
.section .data..gate, "aw"
.incbin "arch/ia64/kernel/gate.so"

386
arch/ia64/kernel/gate.S Normal file
View file

@ -0,0 +1,386 @@
/*
* This file contains the code that gets mapped at the upper end of each task's text
* region. For now, it contains the signal trampoline code only.
*
* Copyright (C) 1999-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
#include <asm/asmmacro.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/sigcontext.h>
#include <asm/unistd.h>
#include <asm/kregs.h>
#include <asm/page.h>
#include "paravirt_inst.h"
/*
* We can't easily refer to symbols inside the kernel. To avoid full runtime relocation,
* complications with the linker (which likes to create PLT stubs for branches
* to targets outside the shared object) and to avoid multi-phase kernel builds, we
* simply create minimalistic "patch lists" in special ELF sections.
*/
.section ".data..patch.fsyscall_table", "a"
.previous
#define LOAD_FSYSCALL_TABLE(reg) \
[1:] movl reg=0; \
.xdata4 ".data..patch.fsyscall_table", 1b-.
.section ".data..patch.brl_fsys_bubble_down", "a"
.previous
#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \
[1:](pr)brl.cond.sptk 0; \
;; \
.xdata4 ".data..patch.brl_fsys_bubble_down", 1b-.
GLOBAL_ENTRY(__kernel_syscall_via_break)
.prologue
.altrp b6
.body
/*
* Note: for (fast) syscall restart to work, the break instruction must be
* the first one in the bundle addressed by syscall_via_break.
*/
{ .mib
break 0x100000
nop.i 0
br.ret.sptk.many b6
}
END(__kernel_syscall_via_break)
# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET)
# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET)
# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET)
# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET)
# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET
# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET
# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET
# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET
# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET
# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET
# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET
# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET
# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET
# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET
# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET
# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET
# define base0 r2
# define base1 r3
/*
* When we get here, the memory stack looks like this:
*
* +===============================+
* | |
* // struct sigframe //
* | |
* +-------------------------------+ <-- sp+16
* | 16 byte of scratch |
* | space |
* +-------------------------------+ <-- sp
*
* The register stack looks _exactly_ the way it looked at the time the signal
* occurred. In other words, we're treading on a potential mine-field: each
* incoming general register may be a NaT value (including sp, in which case the
* process ends up dying with a SIGSEGV).
*
* The first thing need to do is a cover to get the registers onto the backing
* store. Once that is done, we invoke the signal handler which may modify some
* of the machine state. After returning from the signal handler, we return
* control to the previous context by executing a sigreturn system call. A signal
* handler may call the rt_sigreturn() function to directly return to a given
* sigcontext. However, the user-level sigreturn() needs to do much more than
* calling the rt_sigreturn() system call as it needs to unwind the stack to
* restore preserved registers that may have been saved on the signal handler's
* call stack.
*/
#define SIGTRAMP_SAVES \
.unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \
.unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \
.savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \
.savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \
.savesp pr, PR_OFF+SIGCONTEXT_OFF; \
.savesp rp, RP_OFF+SIGCONTEXT_OFF; \
.savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \
.vframesp SP_OFF+SIGCONTEXT_OFF
GLOBAL_ENTRY(__kernel_sigtramp)
// describe the state that is active when we get here:
.prologue
SIGTRAMP_SAVES
.body
.label_state 1
adds base0=SIGHANDLER_OFF,sp
adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp
br.call.sptk.many rp=1f
1:
ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel
ld8 r15=[base1] // get address of new RBS base (or NULL)
cover // push args in interrupted frame onto backing store
;;
cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel)
mov.m r9=ar.bsp // fetch ar.bsp
.spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20)
back_from_setup_rbs:
alloc r8=ar.pfs,0,0,3,0
ld8 out0=[base0],16 // load arg0 (signum)
adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1
;;
ld8 out1=[base1] // load arg1 (siginfop)
ld8 r10=[r17],8 // get signal handler entry point
;;
ld8 out2=[base0] // load arg2 (sigcontextp)
ld8 gp=[r17] // get signal handler's global pointer
adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
;;
.spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF
st8 [base0]=r9 // save sc_ar_bsp
adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
;;
stf.spill [base0]=f6,32
stf.spill [base1]=f7,32
;;
stf.spill [base0]=f8,32
stf.spill [base1]=f9,32
mov b6=r10
;;
stf.spill [base0]=f10,32
stf.spill [base1]=f11,32
;;
stf.spill [base0]=f12,32
stf.spill [base1]=f13,32
;;
stf.spill [base0]=f14,32
stf.spill [base1]=f15,32
br.call.sptk.many rp=b6 // call the signal handler
.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
;;
ld8 r15=[base0] // fetch sc_ar_bsp
mov r14=ar.bsp
;;
cmp.ne p1,p0=r14,r15 // do we need to restore the rbs?
(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7)
;;
back_from_restore_rbs:
adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
;;
ldf.fill f6=[base0],32
ldf.fill f7=[base1],32
;;
ldf.fill f8=[base0],32
ldf.fill f9=[base1],32
;;
ldf.fill f10=[base0],32
ldf.fill f11=[base1],32
;;
ldf.fill f12=[base0],32
ldf.fill f13=[base1],32
;;
ldf.fill f14=[base0],32
ldf.fill f15=[base1],32
mov r15=__NR_rt_sigreturn
.restore sp // pop .prologue
break __BREAK_SYSCALL
.prologue
SIGTRAMP_SAVES
setup_rbs:
mov ar.rsc=0 // put RSE into enforced lazy mode
;;
.save ar.rnat, r19
mov r19=ar.rnat // save RNaT before switching backing store area
adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp
mov r18=ar.bspstore
mov ar.bspstore=r15 // switch over to new register backing store area
;;
.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
st8 [r14]=r19 // save sc_ar_rnat
.body
mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16
adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
;;
invala
sub r15=r16,r15
extr.u r20=r18,3,6
;;
mov ar.rsc=0xf // set RSE into eager mode, pl 3
cmp.eq p8,p0=63,r20
shl r15=r15,16
;;
st8 [r14]=r15 // save sc_loadrs
(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now
.restore sp // pop .prologue
br.cond.sptk back_from_setup_rbs
.prologue
SIGTRAMP_SAVES
.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
.body
restore_rbs:
// On input:
// r14 = bsp1 (bsp at the time of return from signal handler)
// r15 = bsp0 (bsp at the time the signal occurred)
//
// Here, we need to calculate bspstore0, the value that ar.bspstore needs
// to be set to, based on bsp0 and the size of the dirty partition on
// the alternate stack (sc_loadrs >> 16). This can be done with the
// following algorithm:
//
// bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
//
// This is what the code below does.
//
alloc r2=ar.pfs,0,0,0,0 // alloc null frame
adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
;;
ld8 r17=[r16]
ld8 r16=[r18] // get new rnat
extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0)
;;
mov ar.rsc=r17 // put RSE into enforced lazy mode
shr.u r17=r17,16
;;
sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19)
;;
loadrs // restore dirty partition
extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1)
;;
add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
;;
shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
;;
sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1)
movl r17=0x8208208208208209
;;
add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
setf.sig f7=r17
cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)?
;;
(p7) adds r18=-62,r18 // delta -= 62
;;
setf.sig f6=r18
;;
xmpy.h f6=f6,f7
;;
getf.sig r17=f6
;;
add r17=r17,r18
shr r18=r18,63
;;
shr r17=r17,5
;;
sub r17=r17,r18 // r17 = delta/63
;;
add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
;;
shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
;;
mov ar.bspstore=r15 // switch back to old register backing store area
;;
mov ar.rnat=r16 // restore RNaT
mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc)
// invala not necessary as that will happen when returning to user-mode
br.cond.sptk back_from_restore_rbs
END(__kernel_sigtramp)
/*
* On entry:
* r11 = saved ar.pfs
* r15 = system call #
* b0 = saved return address
* b6 = return address
* On exit:
* r11 = saved ar.pfs
* r15 = system call #
* b0 = saved return address
* all other "scratch" registers: undefined
* all "preserved" registers: same as on entry
*/
GLOBAL_ENTRY(__kernel_syscall_via_epc)
.prologue
.altrp b6
.body
{
/*
* Note: the kernel cannot assume that the first two instructions in this
* bundle get executed. The remaining code must be safe even if
* they do not get executed.
*/
adds r17=-1024,r15 // A
mov r10=0 // A default to successful syscall execution
epc // B causes split-issue
}
;;
RSM_PSR_BE_I(r20, r22) // M2 (5 cyc to srlz.d)
LOAD_FSYSCALL_TABLE(r14) // X
;;
mov r16=IA64_KR(CURRENT) // M2 (12 cyc)
shladd r18=r17,3,r14 // A
mov r19=NR_syscalls-1 // A
;;
lfetch [r18] // M0|1
MOV_FROM_PSR(p0, r29, r8) // M2 (12 cyc)
// If r17 is a NaT, p6 will be zero
cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)?
;;
mov r21=ar.fpsr // M2 (12 cyc)
tnat.nz p10,p9=r15 // I0
mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...)
;;
srlz.d // M0 (forces split-issue) ensure PSR.BE==0
(p6) ld8 r18=[r18] // M0|1
nop.i 0
;;
nop.m 0
(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!)
nop.i 0
;;
SSM_PSR_I(p8, p14, r25)
(p6) mov b7=r18 // I0
(p8) br.dptk.many b7 // B
mov r27=ar.rsc // M2 (12 cyc)
/*
* brl.cond doesn't work as intended because the linker would convert this branch
* into a branch to a PLT. Perhaps there will be a way to avoid this with some
* future version of the linker. In the meantime, we just use an indirect branch
* instead.
*/
#ifdef CONFIG_ITANIUM
(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
;;
(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
;;
(p6) mov b7=r14
(p6) br.sptk.many b7
#else
BRL_COND_FSYS_BUBBLE_DOWN(p6)
#endif
SSM_PSR_I(p0, p14, r10)
mov r10=-1
(p10) mov r8=EINVAL
(p9) mov r8=ENOSYS
FSYS_RETURN
#ifdef CONFIG_PARAVIRT
/*
* padd to make the size of this symbol constant
* independent of paravirtualization.
*/
.align PAGE_SIZE / 8
#endif
END(__kernel_syscall_via_epc)

108
arch/ia64/kernel/gate.lds.S Normal file
View file

@ -0,0 +1,108 @@
/*
* Linker script for gate DSO. The gate pages are an ELF shared object
* prelinked to its virtual address, with only one read-only segment and
* one execute-only segment (both fit in one page). This script controls
* its layout.
*/
#include <asm/page.h>
#include "paravirt_patchlist.h"
SECTIONS
{
. = GATE_ADDR + SIZEOF_HEADERS;
.hash : { *(.hash) } :readable
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.note : { *(.note*) } :readable :note
.dynamic : { *(.dynamic) } :readable :dynamic
/*
* This linker script is used both with -r and with -shared. For
* the layouts to match, we need to skip more than enough space for
* the dynamic symbol table et al. If this amount is insufficient,
* ld -shared will barf. Just increase it here.
*/
. = GATE_ADDR + 0x600;
.data..patch : {
__paravirt_start_gate_mckinley_e9_patchlist = .;
*(.data..patch.mckinley_e9)
__paravirt_end_gate_mckinley_e9_patchlist = .;
__paravirt_start_gate_vtop_patchlist = .;
*(.data..patch.vtop)
__paravirt_end_gate_vtop_patchlist = .;
__paravirt_start_gate_fsyscall_patchlist = .;
*(.data..patch.fsyscall_table)
__paravirt_end_gate_fsyscall_patchlist = .;
__paravirt_start_gate_brl_fsys_bubble_down_patchlist = .;
*(.data..patch.brl_fsys_bubble_down)
__paravirt_end_gate_brl_fsys_bubble_down_patchlist = .;
} :readable
.IA_64.unwind_info : { *(.IA_64.unwind_info*) }
.IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind
#ifdef HAVE_BUGGY_SEGREL
.text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable
#else
. = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
.text : { *(.text) *(.text.*) } :epc
#endif
/DISCARD/ : {
*(.got.plt) *(.got)
*(.data .data.* .gnu.linkonce.d.*)
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(__ex_table)
*(__mca_table)
}
}
/*
* ld does not recognize this name token; use the constant.
*/
#define PT_IA_64_UNWIND 0x70000001
/*
* We must supply the ELF program headers explicitly to get just one
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
*/
PHDRS
{
readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */
#ifndef HAVE_BUGGY_SEGREL
epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */
#endif
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
unwind PT_IA_64_UNWIND;
}
/*
* This controls what symbols we export from the DSO.
*/
VERSION
{
LINUX_2.5 {
global:
__kernel_syscall_via_break;
__kernel_syscall_via_epc;
__kernel_sigtramp;
local: *;
};
}
/* The ELF entry point can be used to set the AT_SYSINFO value. */
ENTRY(__kernel_syscall_via_epc)

1212
arch/ia64/kernel/head.S Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,98 @@
/*
* Architecture-specific kernel symbols
*
* Don't put any exports here unless it's defined in an assembler file.
* All other exports should be put directly after the definition.
*/
#include <linux/module.h>
#include <linux/string.h>
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(strlen);
#include <asm/pgtable.h>
EXPORT_SYMBOL_GPL(empty_zero_page);
#include <asm/checksum.h>
EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */
EXPORT_SYMBOL(csum_ipv6_magic);
#include <asm/page.h>
EXPORT_SYMBOL(clear_page);
EXPORT_SYMBOL(copy_page);
#ifdef CONFIG_VIRTUAL_MEM_MAP
#include <linux/bootmem.h>
EXPORT_SYMBOL(min_low_pfn); /* defined by bootmem.c, but not exported by generic code */
EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */
#endif
#include <asm/processor.h>
EXPORT_SYMBOL(ia64_cpu_info);
#ifdef CONFIG_SMP
EXPORT_SYMBOL(local_per_cpu_offset);
#endif
#include <asm/uaccess.h>
EXPORT_SYMBOL(__copy_user);
EXPORT_SYMBOL(__do_clear_user);
EXPORT_SYMBOL(__strlen_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(__strnlen_user);
/* from arch/ia64/lib */
extern void __divsi3(void);
extern void __udivsi3(void);
extern void __modsi3(void);
extern void __umodsi3(void);
extern void __divdi3(void);
extern void __udivdi3(void);
extern void __moddi3(void);
extern void __umoddi3(void);
EXPORT_SYMBOL(__divsi3);
EXPORT_SYMBOL(__udivsi3);
EXPORT_SYMBOL(__modsi3);
EXPORT_SYMBOL(__umodsi3);
EXPORT_SYMBOL(__divdi3);
EXPORT_SYMBOL(__udivdi3);
EXPORT_SYMBOL(__moddi3);
EXPORT_SYMBOL(__umoddi3);
#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
extern void xor_ia64_2(void);
extern void xor_ia64_3(void);
extern void xor_ia64_4(void);
extern void xor_ia64_5(void);
EXPORT_SYMBOL(xor_ia64_2);
EXPORT_SYMBOL(xor_ia64_3);
EXPORT_SYMBOL(xor_ia64_4);
EXPORT_SYMBOL(xor_ia64_5);
#endif
#include <asm/pal.h>
EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
EXPORT_SYMBOL(ia64_pal_call_phys_static);
EXPORT_SYMBOL(ia64_pal_call_stacked);
EXPORT_SYMBOL(ia64_pal_call_static);
EXPORT_SYMBOL(ia64_load_scratch_fpregs);
EXPORT_SYMBOL(ia64_save_scratch_fpregs);
#include <asm/unwind.h>
EXPORT_SYMBOL(unw_init_running);
#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE)
extern void esi_call_phys (void);
EXPORT_SYMBOL_GPL(esi_call_phys);
#endif
extern char ia64_ivt[];
EXPORT_SYMBOL(ia64_ivt);
#include <asm/ftrace.h>
#ifdef CONFIG_FUNCTION_TRACER
/* mcount is defined in assembly */
EXPORT_SYMBOL(_mcount);
#endif

View file

@ -0,0 +1,42 @@
/*
* This is where we statically allocate and initialize the initial
* task.
*
* Copyright (C) 1999, 2002-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/init_task.h>
#include <linux/mqueue.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
/*
* Initial task structure.
*
* We need to make sure that this is properly aligned due to the way process stacks are
* handled. This is done by having a special ".data..init_task" section...
*/
#define init_thread_info init_task_mem.s.thread_info
union {
struct {
struct task_struct task;
struct thread_info thread_info;
} s;
unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
} init_task_mem asm ("init_task") __init_task_data =
{{
.task = INIT_TASK(init_task_mem.s.task),
.thread_info = INIT_THREAD_INFO(init_task_mem.s.task)
}};
EXPORT_SYMBOL(init_task);

1141
arch/ia64/kernel/iosapic.c Normal file

File diff suppressed because it is too large Load diff

202
arch/ia64/kernel/irq.c Normal file
View file

@ -0,0 +1,202 @@
/*
* linux/arch/ia64/kernel/irq.c
*
* Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
*
* This file contains the code used by various IRQ handling routines:
* asking for different IRQs should be done through these routines
* instead of just grabbing them. Thus setups with different IRQ numbers
* shouldn't result in any weird surprises, and installing new handlers
* should be easier.
*
* Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004
*
* 4/14/2004: Added code to handle cpu migration and do safe irq
* migration without losing interrupts for iosapic
* architecture.
*/
#include <asm/delay.h>
#include <asm/uaccess.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <asm/mca.h>
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
*/
void ack_bad_irq(unsigned int irq)
{
printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
}
#ifdef CONFIG_IA64_GENERIC
ia64_vector __ia64_irq_to_vector(int irq)
{
return irq_cfg[irq].vector;
}
unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
{
return __this_cpu_read(vector_irq[vec]);
}
#endif
/*
* Interrupt statistics:
*/
atomic_t irq_err_count;
/*
* /proc/interrupts printing:
*/
int arch_show_interrupts(struct seq_file *p, int prec)
{
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
return 0;
}
#ifdef CONFIG_SMP
static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
{
if (irq < NR_IRQS) {
cpumask_copy(irq_get_irq_data(irq)->affinity,
cpumask_of(cpu_logical_id(hwid)));
irq_redir[irq] = (char) (redir & 0xff);
}
}
bool is_affinity_mask_valid(const struct cpumask *cpumask)
{
if (ia64_platform_is("sn2")) {
/* Only allow one CPU to be specified in the smp_affinity mask */
if (cpumask_weight(cpumask) != 1)
return false;
}
return true;
}
#endif /* CONFIG_SMP */
int __init arch_early_irq_init(void)
{
ia64_mca_irq_init();
return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
unsigned int vectors_in_migration[NR_IRQS];
/*
* Since cpu_online_mask is already updated, we just need to check for
* affinity that has zeros
*/
static void migrate_irqs(void)
{
int irq, new_cpu;
for (irq=0; irq < NR_IRQS; irq++) {
struct irq_desc *desc = irq_to_desc(irq);
struct irq_data *data = irq_desc_get_irq_data(desc);
struct irq_chip *chip = irq_data_get_irq_chip(data);
if (irqd_irq_disabled(data))
continue;
/*
* No handling for now.
* TBD: Implement a disable function so we can now
* tell CPU not to respond to these local intr sources.
* such as ITV,CPEI,MCA etc.
*/
if (irqd_is_per_cpu(data))
continue;
if (cpumask_any_and(data->affinity, cpu_online_mask)
>= nr_cpu_ids) {
/*
* Save it for phase 2 processing
*/
vectors_in_migration[irq] = irq;
new_cpu = cpumask_any(cpu_online_mask);
/*
* Al three are essential, currently WARN_ON.. maybe panic?
*/
if (chip && chip->irq_disable &&
chip->irq_enable && chip->irq_set_affinity) {
chip->irq_disable(data);
chip->irq_set_affinity(data,
cpumask_of(new_cpu), false);
chip->irq_enable(data);
} else {
WARN_ON((!chip || !chip->irq_disable ||
!chip->irq_enable ||
!chip->irq_set_affinity));
}
}
}
}
void fixup_irqs(void)
{
unsigned int irq;
extern void ia64_process_pending_intr(void);
extern volatile int time_keeper_id;
/* Mask ITV to disable timer */
ia64_set_itv(1 << 16);
/*
* Find a new timesync master
*/
if (smp_processor_id() == time_keeper_id) {
time_keeper_id = cpumask_first(cpu_online_mask);
printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
}
/*
* Phase 1: Locate IRQs bound to this cpu and
* relocate them for cpu removal.
*/
migrate_irqs();
/*
* Phase 2: Perform interrupt processing for all entries reported in
* local APIC.
*/
ia64_process_pending_intr();
/*
* Phase 3: Now handle any interrupts not captured in local APIC.
* This is to account for cases that device interrupted during the time the
* rte was being disabled and re-programmed.
*/
for (irq=0; irq < NR_IRQS; irq++) {
if (vectors_in_migration[irq]) {
struct pt_regs *old_regs = set_irq_regs(NULL);
vectors_in_migration[irq]=0;
generic_handle_irq(irq);
set_irq_regs(old_regs);
}
}
/*
* Now let processor die. We do irq disable and max_xtp() to
* ensure there is no more interrupts routed to this processor.
* But the local timer interrupt can have 1 pending which we
* take care in timer_interrupt().
*/
max_xtp();
local_irq_disable();
}
#endif

672
arch/ia64/kernel/irq_ia64.c Normal file
View file

@ -0,0 +1,672 @@
/*
* linux/arch/ia64/kernel/irq_ia64.c
*
* Copyright (C) 1998-2001 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* 6/10/99: Updated to bring in sync with x86 version to facilitate
* support for SMP and different interrupt controllers.
*
* 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
* PCI to vector allocation routine.
* 04/14/2004 Ashok Raj <ashok.raj@intel.com>
* Added CPU Hotplug handling for IPF.
*/
#include <linux/module.h>
#include <linux/jiffies.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/kernel_stat.h>
#include <linux/ptrace.h>
#include <linux/signal.h>
#include <linux/smp.h>
#include <linux/threads.h>
#include <linux/bitops.h>
#include <linux/irq.h>
#include <linux/ratelimit.h>
#include <linux/acpi.h>
#include <linux/sched.h>
#include <asm/delay.h>
#include <asm/intrinsics.h>
#include <asm/io.h>
#include <asm/hw_irq.h>
#include <asm/machvec.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#ifdef CONFIG_PERFMON
# include <asm/perfmon.h>
#endif
#define IRQ_DEBUG 0
#define IRQ_VECTOR_UNASSIGNED (0)
#define IRQ_UNUSED (0)
#define IRQ_USED (1)
#define IRQ_RSVD (2)
/* These can be overridden in platform_irq_init */
int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR;
int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR;
/* default base addr of IPI table */
void __iomem *ipi_base_addr = ((void __iomem *)
(__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
static cpumask_t vector_allocation_domain(int cpu);
/*
* Legacy IRQ to IA-64 vector translation table.
*/
__u8 isa_irq_to_vector_map[16] = {
/* 8259 IRQ translation, first 16 entries */
0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
};
EXPORT_SYMBOL(isa_irq_to_vector_map);
DEFINE_SPINLOCK(vector_lock);
struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
[0 ... NR_IRQS - 1] = {
.vector = IRQ_VECTOR_UNASSIGNED,
.domain = CPU_MASK_NONE
}
};
DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = {
[0 ... IA64_NUM_VECTORS - 1] = -1
};
static cpumask_t vector_table[IA64_NUM_VECTORS] = {
[0 ... IA64_NUM_VECTORS - 1] = CPU_MASK_NONE
};
static int irq_status[NR_IRQS] = {
[0 ... NR_IRQS -1] = IRQ_UNUSED
};
static inline int find_unassigned_irq(void)
{
int irq;
for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++)
if (irq_status[irq] == IRQ_UNUSED)
return irq;
return -ENOSPC;
}
static inline int find_unassigned_vector(cpumask_t domain)
{
cpumask_t mask;
int pos, vector;
cpumask_and(&mask, &domain, cpu_online_mask);
if (cpus_empty(mask))
return -EINVAL;
for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) {
vector = IA64_FIRST_DEVICE_VECTOR + pos;
cpus_and(mask, domain, vector_table[vector]);
if (!cpus_empty(mask))
continue;
return vector;
}
return -ENOSPC;
}
static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
{
cpumask_t mask;
int cpu;
struct irq_cfg *cfg = &irq_cfg[irq];
BUG_ON((unsigned)irq >= NR_IRQS);
BUG_ON((unsigned)vector >= IA64_NUM_VECTORS);
cpumask_and(&mask, &domain, cpu_online_mask);
if (cpus_empty(mask))
return -EINVAL;
if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
return 0;
if (cfg->vector != IRQ_VECTOR_UNASSIGNED)
return -EBUSY;
for_each_cpu_mask(cpu, mask)
per_cpu(vector_irq, cpu)[vector] = irq;
cfg->vector = vector;
cfg->domain = domain;
irq_status[irq] = IRQ_USED;
cpus_or(vector_table[vector], vector_table[vector], domain);
return 0;
}
int bind_irq_vector(int irq, int vector, cpumask_t domain)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&vector_lock, flags);
ret = __bind_irq_vector(irq, vector, domain);
spin_unlock_irqrestore(&vector_lock, flags);
return ret;
}
static void __clear_irq_vector(int irq)
{
int vector, cpu;
cpumask_t mask;
cpumask_t domain;
struct irq_cfg *cfg = &irq_cfg[irq];
BUG_ON((unsigned)irq >= NR_IRQS);
BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED);
vector = cfg->vector;
domain = cfg->domain;
cpumask_and(&mask, &cfg->domain, cpu_online_mask);
for_each_cpu_mask(cpu, mask)
per_cpu(vector_irq, cpu)[vector] = -1;
cfg->vector = IRQ_VECTOR_UNASSIGNED;
cfg->domain = CPU_MASK_NONE;
irq_status[irq] = IRQ_UNUSED;
cpus_andnot(vector_table[vector], vector_table[vector], domain);
}
static void clear_irq_vector(int irq)
{
unsigned long flags;
spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq);
spin_unlock_irqrestore(&vector_lock, flags);
}
int
ia64_native_assign_irq_vector (int irq)
{
unsigned long flags;
int vector, cpu;
cpumask_t domain = CPU_MASK_NONE;
vector = -ENOSPC;
spin_lock_irqsave(&vector_lock, flags);
for_each_online_cpu(cpu) {
domain = vector_allocation_domain(cpu);
vector = find_unassigned_vector(domain);
if (vector >= 0)
break;
}
if (vector < 0)
goto out;
if (irq == AUTO_ASSIGN)
irq = vector;
BUG_ON(__bind_irq_vector(irq, vector, domain));
out:
spin_unlock_irqrestore(&vector_lock, flags);
return vector;
}
void
ia64_native_free_irq_vector (int vector)
{
if (vector < IA64_FIRST_DEVICE_VECTOR ||
vector > IA64_LAST_DEVICE_VECTOR)
return;
clear_irq_vector(vector);
}
int
reserve_irq_vector (int vector)
{
if (vector < IA64_FIRST_DEVICE_VECTOR ||
vector > IA64_LAST_DEVICE_VECTOR)
return -EINVAL;
return !!bind_irq_vector(vector, vector, CPU_MASK_ALL);
}
/*
* Initialize vector_irq on a new cpu. This function must be called
* with vector_lock held.
*/
void __setup_vector_irq(int cpu)
{
int irq, vector;
/* Clear vector_irq */
for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
per_cpu(vector_irq, cpu)[vector] = -1;
/* Mark the inuse vectors */
for (irq = 0; irq < NR_IRQS; ++irq) {
if (!cpu_isset(cpu, irq_cfg[irq].domain))
continue;
vector = irq_to_vector(irq);
per_cpu(vector_irq, cpu)[vector] = irq;
}
}
#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
static enum vector_domain_type {
VECTOR_DOMAIN_NONE,
VECTOR_DOMAIN_PERCPU
} vector_domain_type = VECTOR_DOMAIN_NONE;
static cpumask_t vector_allocation_domain(int cpu)
{
if (vector_domain_type == VECTOR_DOMAIN_PERCPU)
return cpumask_of_cpu(cpu);
return CPU_MASK_ALL;
}
static int __irq_prepare_move(int irq, int cpu)
{
struct irq_cfg *cfg = &irq_cfg[irq];
int vector;
cpumask_t domain;
if (cfg->move_in_progress || cfg->move_cleanup_count)
return -EBUSY;
if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
return -EINVAL;
if (cpu_isset(cpu, cfg->domain))
return 0;
domain = vector_allocation_domain(cpu);
vector = find_unassigned_vector(domain);
if (vector < 0)
return -ENOSPC;
cfg->move_in_progress = 1;
cfg->old_domain = cfg->domain;
cfg->vector = IRQ_VECTOR_UNASSIGNED;
cfg->domain = CPU_MASK_NONE;
BUG_ON(__bind_irq_vector(irq, vector, domain));
return 0;
}
int irq_prepare_move(int irq, int cpu)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&vector_lock, flags);
ret = __irq_prepare_move(irq, cpu);
spin_unlock_irqrestore(&vector_lock, flags);
return ret;
}
void irq_complete_move(unsigned irq)
{
struct irq_cfg *cfg = &irq_cfg[irq];
cpumask_t cleanup_mask;
int i;
if (likely(!cfg->move_in_progress))
return;
if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain)))
return;
cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask);
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
for_each_cpu_mask(i, cleanup_mask)
platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0);
cfg->move_in_progress = 0;
}
static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id)
{
int me = smp_processor_id();
ia64_vector vector;
unsigned long flags;
for (vector = IA64_FIRST_DEVICE_VECTOR;
vector < IA64_LAST_DEVICE_VECTOR; vector++) {
int irq;
struct irq_desc *desc;
struct irq_cfg *cfg;
irq = __this_cpu_read(vector_irq[vector]);
if (irq < 0)
continue;
desc = irq_to_desc(irq);
cfg = irq_cfg + irq;
raw_spin_lock(&desc->lock);
if (!cfg->move_cleanup_count)
goto unlock;
if (!cpu_isset(me, cfg->old_domain))
goto unlock;
spin_lock_irqsave(&vector_lock, flags);
__this_cpu_write(vector_irq[vector], -1);
cpu_clear(me, vector_table[vector]);
spin_unlock_irqrestore(&vector_lock, flags);
cfg->move_cleanup_count--;
unlock:
raw_spin_unlock(&desc->lock);
}
return IRQ_HANDLED;
}
static struct irqaction irq_move_irqaction = {
.handler = smp_irq_move_cleanup_interrupt,
.name = "irq_move"
};
static int __init parse_vector_domain(char *arg)
{
if (!arg)
return -EINVAL;
if (!strcmp(arg, "percpu")) {
vector_domain_type = VECTOR_DOMAIN_PERCPU;
no_int_routing = 1;
}
return 0;
}
early_param("vector", parse_vector_domain);
#else
static cpumask_t vector_allocation_domain(int cpu)
{
return CPU_MASK_ALL;
}
#endif
void destroy_and_reserve_irq(unsigned int irq)
{
unsigned long flags;
irq_init_desc(irq);
spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq);
irq_status[irq] = IRQ_RSVD;
spin_unlock_irqrestore(&vector_lock, flags);
}
/*
* Dynamic irq allocate and deallocation for MSI
*/
int create_irq(void)
{
unsigned long flags;
int irq, vector, cpu;
cpumask_t domain = CPU_MASK_NONE;
irq = vector = -ENOSPC;
spin_lock_irqsave(&vector_lock, flags);
for_each_online_cpu(cpu) {
domain = vector_allocation_domain(cpu);
vector = find_unassigned_vector(domain);
if (vector >= 0)
break;
}
if (vector < 0)
goto out;
irq = find_unassigned_irq();
if (irq < 0)
goto out;
BUG_ON(__bind_irq_vector(irq, vector, domain));
out:
spin_unlock_irqrestore(&vector_lock, flags);
if (irq >= 0)
irq_init_desc(irq);
return irq;
}
void destroy_irq(unsigned int irq)
{
irq_init_desc(irq);
clear_irq_vector(irq);
}
#ifdef CONFIG_SMP
# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE)
# define IS_LOCAL_TLB_FLUSH(vec) (vec == IA64_IPI_LOCAL_TLB_FLUSH)
#else
# define IS_RESCHEDULE(vec) (0)
# define IS_LOCAL_TLB_FLUSH(vec) (0)
#endif
/*
* That's where the IVT branches when we get an external
* interrupt. This branches to the correct hardware IRQ handler via
* function ptr.
*/
void
ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
unsigned long saved_tpr;
#if IRQ_DEBUG
{
unsigned long bsp, sp;
/*
* Note: if the interrupt happened while executing in
* the context switch routine (ia64_switch_to), we may
* get a spurious stack overflow here. This is
* because the register and the memory stack are not
* switched atomically.
*/
bsp = ia64_getreg(_IA64_REG_AR_BSP);
sp = ia64_getreg(_IA64_REG_SP);
if ((sp - bsp) < 1024) {
static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
if (__ratelimit(&ratelimit)) {
printk("ia64_handle_irq: DANGER: less than "
"1KB of free stack space!!\n"
"(bsp=0x%lx, sp=%lx)\n", bsp, sp);
}
}
}
#endif /* IRQ_DEBUG */
/*
* Always set TPR to limit maximum interrupt nesting depth to
* 16 (without this, it would be ~240, which could easily lead
* to kernel stack overflows).
*/
irq_enter();
saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
ia64_srlz_d();
while (vector != IA64_SPURIOUS_INT_VECTOR) {
int irq = local_vector_to_irq(vector);
if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
smp_local_flush_tlb();
kstat_incr_irq_this_cpu(irq);
} else if (unlikely(IS_RESCHEDULE(vector))) {
scheduler_ipi();
kstat_incr_irq_this_cpu(irq);
} else {
ia64_setreg(_IA64_REG_CR_TPR, vector);
ia64_srlz_d();
if (unlikely(irq < 0)) {
printk(KERN_ERR "%s: Unexpected interrupt "
"vector %d on CPU %d is not mapped "
"to any IRQ!\n", __func__, vector,
smp_processor_id());
} else
generic_handle_irq(irq);
/*
* Disable interrupts and send EOI:
*/
local_irq_disable();
ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
}
ia64_eoi();
vector = ia64_get_ivr();
}
/*
* This must be done *after* the ia64_eoi(). For example, the keyboard softirq
* handler needs to be able to wait for further keyboard interrupts, which can't
* come through until ia64_eoi() has been done.
*/
irq_exit();
set_irq_regs(old_regs);
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* This function emulates a interrupt processing when a cpu is about to be
* brought down.
*/
void ia64_process_pending_intr(void)
{
ia64_vector vector;
unsigned long saved_tpr;
extern unsigned int vectors_in_migration[NR_IRQS];
vector = ia64_get_ivr();
irq_enter();
saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
ia64_srlz_d();
/*
* Perform normal interrupt style processing
*/
while (vector != IA64_SPURIOUS_INT_VECTOR) {
int irq = local_vector_to_irq(vector);
if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
smp_local_flush_tlb();
kstat_incr_irq_this_cpu(irq);
} else if (unlikely(IS_RESCHEDULE(vector))) {
kstat_incr_irq_this_cpu(irq);
} else {
struct pt_regs *old_regs = set_irq_regs(NULL);
ia64_setreg(_IA64_REG_CR_TPR, vector);
ia64_srlz_d();
/*
* Now try calling normal ia64_handle_irq as it would have got called
* from a real intr handler. Try passing null for pt_regs, hopefully
* it will work. I hope it works!.
* Probably could shared code.
*/
if (unlikely(irq < 0)) {
printk(KERN_ERR "%s: Unexpected interrupt "
"vector %d on CPU %d not being mapped "
"to any IRQ!!\n", __func__, vector,
smp_processor_id());
} else {
vectors_in_migration[irq]=0;
generic_handle_irq(irq);
}
set_irq_regs(old_regs);
/*
* Disable interrupts and send EOI
*/
local_irq_disable();
ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
}
ia64_eoi();
vector = ia64_get_ivr();
}
irq_exit();
}
#endif
#ifdef CONFIG_SMP
static irqreturn_t dummy_handler (int irq, void *dev_id)
{
BUG();
}
static struct irqaction ipi_irqaction = {
.handler = handle_IPI,
.name = "IPI"
};
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
static struct irqaction resched_irqaction = {
.handler = dummy_handler,
.name = "resched"
};
static struct irqaction tlb_irqaction = {
.handler = dummy_handler,
.name = "tlb_flush"
};
#endif
void
ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action)
{
unsigned int irq;
irq = vec;
BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL));
irq_set_status_flags(irq, IRQ_PER_CPU);
irq_set_chip(irq, &irq_type_ia64_lsapic);
if (action)
setup_irq(irq, action);
irq_set_handler(irq, handle_percpu_irq);
}
void __init
ia64_native_register_ipi(void)
{
#ifdef CONFIG_SMP
register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction);
#endif
}
void __init
init_IRQ (void)
{
#ifdef CONFIG_ACPI
acpi_boot_init();
#endif
ia64_register_ipi();
register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
#ifdef CONFIG_SMP
#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)
if (vector_domain_type != VECTOR_DOMAIN_NONE)
register_percpu_irq(IA64_IRQ_MOVE_VECTOR, &irq_move_irqaction);
#endif
#endif
#ifdef CONFIG_PERFMON
pfm_init_percpu();
#endif
platform_irq_init();
}
void
ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
{
void __iomem *ipi_addr;
unsigned long ipi_data;
unsigned long phys_cpu_id;
phys_cpu_id = cpu_physical_id(cpu);
/*
* cpu number is in 8bit ID and 8bit EID
*/
ipi_data = (delivery_mode << 8) | (vector & 0xff);
ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
writeq(ipi_data, ipi_addr);
}

View file

@ -0,0 +1,44 @@
/*
* LSAPIC Interrupt Controller
*
* This takes care of interrupts that are generated by the CPU's
* internal Streamlined Advanced Programmable Interrupt Controller
* (LSAPIC), such as the ITC and IPI interrupts.
*
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
* Copyright (C) 2000 Hewlett-Packard Co
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
*/
#include <linux/sched.h>
#include <linux/irq.h>
static unsigned int
lsapic_noop_startup (struct irq_data *data)
{
return 0;
}
static void
lsapic_noop (struct irq_data *data)
{
/* nothing to do... */
}
static int lsapic_retrigger(struct irq_data *data)
{
ia64_resend_irq(data->irq);
return 1;
}
struct irq_chip irq_type_ia64_lsapic = {
.name = "LSAPIC",
.irq_startup = lsapic_noop_startup,
.irq_shutdown = lsapic_noop,
.irq_enable = lsapic_noop,
.irq_disable = lsapic_noop,
.irq_ack = lsapic_noop,
.irq_retrigger = lsapic_retrigger,
};

1688
arch/ia64/kernel/ivt.S Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,90 @@
/*
* Jprobe specific operations
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) Intel Corporation, 2005
*
* 2005-May Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
* <anil.s.keshavamurthy@intel.com> initial implementation
*
* Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a
* probe to be inserted into the beginning of a function call. The fundamental
* difference between a jprobe and a kprobe is the jprobe handler is executed
* in the same context as the target function, while the kprobe handlers
* are executed in interrupt context.
*
* For jprobes we initially gain control by placing a break point in the
* first instruction of the targeted function. When we catch that specific
* break, we:
* * set the return address to our jprobe_inst_return() function
* * jump to the jprobe handler function
*
* Since we fixed up the return address, the jprobe handler will return to our
* jprobe_inst_return() function, giving us control again. At this point we
* are back in the parents frame marker, so we do yet another call to our
* jprobe_break() function to fix up the frame marker as it would normally
* exist in the target function.
*
* Our jprobe_return function then transfers control back to kprobes.c by
* executing a break instruction using one of our reserved numbers. When we
* catch that break in kprobes.c, we continue like we do for a normal kprobe
* by single stepping the emulated instruction, and then returning execution
* to the correct location.
*/
#include <asm/asmmacro.h>
#include <asm/break.h>
/*
* void jprobe_break(void)
*/
.section .kprobes.text, "ax"
ENTRY(jprobe_break)
break.m __IA64_BREAK_JPROBE
END(jprobe_break)
/*
* void jprobe_inst_return(void)
*/
GLOBAL_ENTRY(jprobe_inst_return)
br.call.sptk.many b0=jprobe_break
END(jprobe_inst_return)
GLOBAL_ENTRY(invalidate_stacked_regs)
movl r16=invalidate_restore_cfm
;;
mov b6=r16
;;
br.ret.sptk.many b6
;;
invalidate_restore_cfm:
mov r16=ar.rsc
;;
mov ar.rsc=r0
;;
loadrs
;;
mov ar.rsc=r16
;;
br.cond.sptk.many rp
END(invalidate_stacked_regs)
GLOBAL_ENTRY(flush_register_stack)
// flush dirty regs to backing store (must be first in insn group)
flushrs
;;
br.ret.sptk.many rp
END(flush_register_stack)

1129
arch/ia64/kernel/kprobes.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,170 @@
/*
* arch/ia64/kernel/machine_kexec.c
*
* Handle transition of Linux booting another kernel
* Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
* Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
* Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
*
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
*/
#include <linux/mm.h>
#include <linux/kexec.h>
#include <linux/cpu.h>
#include <linux/irq.h>
#include <linux/efi.h>
#include <linux/numa.h>
#include <linux/mmzone.h>
#include <asm/numa.h>
#include <asm/mmu_context.h>
#include <asm/setup.h>
#include <asm/delay.h>
#include <asm/meminit.h>
#include <asm/processor.h>
#include <asm/sal.h>
#include <asm/mca.h>
typedef void (*relocate_new_kernel_t)(
unsigned long indirection_page,
unsigned long start_address,
struct ia64_boot_param *boot_param,
unsigned long pal_addr) __noreturn;
struct kimage *ia64_kimage;
struct resource efi_memmap_res = {
.name = "EFI Memory Map",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
struct resource boot_param_res = {
.name = "Boot parameter",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
/*
* Do what every setup is needed on image and the
* reboot code buffer to allow us to avoid allocations
* later.
*/
int machine_kexec_prepare(struct kimage *image)
{
void *control_code_buffer;
const unsigned long *func;
func = (unsigned long *)&relocate_new_kernel;
/* Pre-load control code buffer to minimize work in kexec path */
control_code_buffer = page_address(image->control_code_page);
memcpy((void *)control_code_buffer, (const void *)func[0],
relocate_new_kernel_size);
flush_icache_range((unsigned long)control_code_buffer,
(unsigned long)control_code_buffer + relocate_new_kernel_size);
ia64_kimage = image;
return 0;
}
void machine_kexec_cleanup(struct kimage *image)
{
}
/*
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
{
struct kimage *image = arg;
relocate_new_kernel_t rnk;
void *pal_addr = efi_get_pal_addr();
unsigned long code_addr;
int ii;
u64 fp, gp;
ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump;
BUG_ON(!image);
code_addr = (unsigned long)page_address(image->control_code_page);
if (image->type == KEXEC_TYPE_CRASH) {
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
/* Register noop init handler */
fp = ia64_tpa(init_handler->fp);
gp = ia64_tpa(ia64_getreg(_IA64_REG_GP));
ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0);
} else {
/* Unregister init handlers of current kernel */
ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0);
}
/* Unregister mca handler - No more recovery on current kernel */
ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0);
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
/* Mask CMC and Performance Monitor interrupts */
ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
/* Mask ITV and Local Redirect Registers */
ia64_set_itv(1 << 16);
ia64_set_lrr0(1 << 16);
ia64_set_lrr1(1 << 16);
/* terminate possible nested in-service interrupts */
for (ii = 0; ii < 16; ii++)
ia64_eoi();
/* unmask TPR and clear any pending interrupts */
ia64_setreg(_IA64_REG_CR_TPR, 0);
ia64_srlz_d();
while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR)
ia64_eoi();
platform_kernel_launch_event();
rnk = (relocate_new_kernel_t)&code_addr;
(*rnk)(image->head, image->start, ia64_boot_param,
GRANULEROUNDDOWN((unsigned long) pal_addr));
BUG();
}
void machine_kexec(struct kimage *image)
{
BUG_ON(!image);
unw_init_running(ia64_machine_kexec, image);
for(;;);
}
void arch_crash_save_vmcoreinfo(void)
{
#if defined(CONFIG_DISCONTIGMEM) || defined(CONFIG_SPARSEMEM)
VMCOREINFO_SYMBOL(pgdat_list);
VMCOREINFO_LENGTH(pgdat_list, MAX_NUMNODES);
#endif
#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_memblk);
VMCOREINFO_LENGTH(node_memblk, NR_NODE_MEMBLKS);
VMCOREINFO_STRUCT_SIZE(node_memblk_s);
VMCOREINFO_OFFSET(node_memblk_s, start_paddr);
VMCOREINFO_OFFSET(node_memblk_s, size);
#endif
#ifdef CONFIG_PGTABLE_3
VMCOREINFO_CONFIG(PGTABLE_3);
#elif defined(CONFIG_PGTABLE_4)
VMCOREINFO_CONFIG(PGTABLE_4);
#endif
}
unsigned long paddr_vmcoreinfo_note(void)
{
return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
}

View file

@ -0,0 +1,90 @@
#include <linux/module.h>
#include <linux/dma-mapping.h>
#include <asm/machvec.h>
#ifdef CONFIG_IA64_GENERIC
#include <linux/kernel.h>
#include <linux/string.h>
#include <asm/page.h>
struct ia64_machine_vector ia64_mv;
EXPORT_SYMBOL(ia64_mv);
static struct ia64_machine_vector * __init
lookup_machvec (const char *name)
{
extern struct ia64_machine_vector machvec_start[];
extern struct ia64_machine_vector machvec_end[];
struct ia64_machine_vector *mv;
for (mv = machvec_start; mv < machvec_end; ++mv)
if (strcmp (mv->name, name) == 0)
return mv;
return 0;
}
void __init
machvec_init (const char *name)
{
struct ia64_machine_vector *mv;
if (!name)
name = acpi_get_sysname();
mv = lookup_machvec(name);
if (!mv)
panic("generic kernel failed to find machine vector for"
" platform %s!", name);
ia64_mv = *mv;
printk(KERN_INFO "booting generic kernel on platform %s\n", name);
}
void __init
machvec_init_from_cmdline(const char *cmdline)
{
char str[64];
const char *start;
char *end;
if (! (start = strstr(cmdline, "machvec=")) )
return machvec_init(NULL);
strlcpy(str, start + strlen("machvec="), sizeof(str));
if ( (end = strchr(str, ' ')) )
*end = '\0';
return machvec_init(str);
}
#endif /* CONFIG_IA64_GENERIC */
void
machvec_setup (char **arg)
{
}
EXPORT_SYMBOL(machvec_setup);
void
machvec_timer_interrupt (int irq, void *dev_id)
{
}
EXPORT_SYMBOL(machvec_timer_interrupt);
void
machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size,
enum dma_data_direction dir)
{
mb();
}
EXPORT_SYMBOL(machvec_dma_sync_single);
void
machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n,
enum dma_data_direction dir)
{
mb();
}
EXPORT_SYMBOL(machvec_dma_sync_sg);

2166
arch/ia64/kernel/mca.c Normal file

File diff suppressed because it is too large Load diff

1122
arch/ia64/kernel/mca_asm.S Normal file

File diff suppressed because it is too large Load diff

795
arch/ia64/kernel/mca_drv.c Normal file
View file

@ -0,0 +1,795 @@
/*
* File: mca_drv.c
* Purpose: Generic MCA handling layer
*
* Copyright (C) 2004 FUJITSU LIMITED
* Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
* Copyright (C) 2005 Silicon Graphics, Inc
* Copyright (C) 2005 Keith Owens <kaos@sgi.com>
* Copyright (C) 2006 Russ Anderson <rja@sgi.com>
*/
#include <linux/types.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kallsyms.h>
#include <linux/bootmem.h>
#include <linux/acpi.h>
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/workqueue.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <asm/delay.h>
#include <asm/machvec.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/sal.h>
#include <asm/mca.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include "mca_drv.h"
/* max size of SAL error record (default) */
static int sal_rec_max = 10000;
/* from mca_drv_asm.S */
extern void *mca_handler_bhhook(void);
static DEFINE_SPINLOCK(mca_bh_lock);
typedef enum {
MCA_IS_LOCAL = 0,
MCA_IS_GLOBAL = 1
} mca_type_t;
#define MAX_PAGE_ISOLATE 1024
static struct page *page_isolate[MAX_PAGE_ISOLATE];
static int num_page_isolate = 0;
typedef enum {
ISOLATE_NG,
ISOLATE_OK,
ISOLATE_NONE
} isolate_status_t;
typedef enum {
MCA_NOT_RECOVERED = 0,
MCA_RECOVERED = 1
} recovery_status_t;
/*
* This pool keeps pointers to the section part of SAL error record
*/
static struct {
slidx_list_t *buffer; /* section pointer list pool */
int cur_idx; /* Current index of section pointer list pool */
int max_idx; /* Maximum index of section pointer list pool */
} slidx_pool;
static int
fatal_mca(const char *fmt, ...)
{
va_list args;
char buf[256];
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
return MCA_NOT_RECOVERED;
}
static int
mca_recovered(const char *fmt, ...)
{
va_list args;
char buf[256];
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
return MCA_RECOVERED;
}
/**
* mca_page_isolate - isolate a poisoned page in order not to use it later
* @paddr: poisoned memory location
*
* Return value:
* one of isolate_status_t, ISOLATE_OK/NG/NONE.
*/
static isolate_status_t
mca_page_isolate(unsigned long paddr)
{
int i;
struct page *p;
/* whether physical address is valid or not */
if (!ia64_phys_addr_valid(paddr))
return ISOLATE_NONE;
if (!pfn_valid(paddr >> PAGE_SHIFT))
return ISOLATE_NONE;
/* convert physical address to physical page number */
p = pfn_to_page(paddr>>PAGE_SHIFT);
/* check whether a page number have been already registered or not */
for (i = 0; i < num_page_isolate; i++)
if (page_isolate[i] == p)
return ISOLATE_OK; /* already listed */
/* limitation check */
if (num_page_isolate == MAX_PAGE_ISOLATE)
return ISOLATE_NG;
/* kick pages having attribute 'SLAB' or 'Reserved' */
if (PageSlab(p) || PageReserved(p))
return ISOLATE_NG;
/* add attribute 'Reserved' and register the page */
get_page(p);
SetPageReserved(p);
page_isolate[num_page_isolate++] = p;
return ISOLATE_OK;
}
/**
* mca_hanlder_bh - Kill the process which occurred memory read error
* @paddr: poisoned address received from MCA Handler
*/
void
mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
{
ia64_mlogbuf_dump();
printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
raw_smp_processor_id(), current->pid,
from_kuid(&init_user_ns, current_uid()),
iip, ipsr, paddr, current->comm);
spin_lock(&mca_bh_lock);
switch (mca_page_isolate(paddr)) {
case ISOLATE_OK:
printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
break;
case ISOLATE_NG:
printk(KERN_CRIT "Page isolation: ( %lx ) failure.\n", paddr);
break;
default:
break;
}
spin_unlock(&mca_bh_lock);
/* This process is about to be killed itself */
do_exit(SIGKILL);
}
/**
* mca_make_peidx - Make index of processor error section
* @slpi: pointer to record of processor error section
* @peidx: pointer to index of processor error section
*/
static void
mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
{
/*
* calculate the start address of
* "struct cpuid_info" and "sal_processor_static_info_t".
*/
u64 total_check_num = slpi->valid.num_cache_check
+ slpi->valid.num_tlb_check
+ slpi->valid.num_bus_check
+ slpi->valid.num_reg_file_check
+ slpi->valid.num_ms_check;
u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num
+ sizeof(sal_log_processor_info_t);
u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info);
peidx_head(peidx) = slpi;
peidx_mid(peidx) = (struct sal_cpuid_info *)
(slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL);
peidx_bottom(peidx) = (sal_processor_static_info_t *)
(slpi->valid.psi_static_struct ?
((char*)slpi + head_size + mid_size) : NULL);
}
/**
* mca_make_slidx - Make index of SAL error record
* @buffer: pointer to SAL error record
* @slidx: pointer to index of SAL error record
*
* Return value:
* 1 if record has platform error / 0 if not
*/
#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
{slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
hl->hdr = ptr; \
list_add(&hl->list, &(sect)); \
slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
static int
mca_make_slidx(void *buffer, slidx_table_t *slidx)
{
int platform_err = 0;
int record_len = ((sal_log_record_header_t*)buffer)->len;
u32 ercd_pos;
int sects;
sal_log_section_hdr_t *sp;
/*
* Initialize index referring current record
*/
INIT_LIST_HEAD(&(slidx->proc_err));
INIT_LIST_HEAD(&(slidx->mem_dev_err));
INIT_LIST_HEAD(&(slidx->sel_dev_err));
INIT_LIST_HEAD(&(slidx->pci_bus_err));
INIT_LIST_HEAD(&(slidx->smbios_dev_err));
INIT_LIST_HEAD(&(slidx->pci_comp_err));
INIT_LIST_HEAD(&(slidx->plat_specific_err));
INIT_LIST_HEAD(&(slidx->host_ctlr_err));
INIT_LIST_HEAD(&(slidx->plat_bus_err));
INIT_LIST_HEAD(&(slidx->unsupported));
/*
* Extract a Record Header
*/
slidx->header = buffer;
/*
* Extract each section records
* (arranged from "int ia64_log_platform_info_print()")
*/
for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0;
ercd_pos < record_len; ercd_pos += sp->len, sects++) {
sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
} else if (!efi_guidcmp(sp->guid,
SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
platform_err = 1;
LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
} else if (!efi_guidcmp(sp->guid,
SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
platform_err = 1;
LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
} else if (!efi_guidcmp(sp->guid,
SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
platform_err = 1;
LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
} else if (!efi_guidcmp(sp->guid,
SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
platform_err = 1;
LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
} else if (!efi_guidcmp(sp->guid,
SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
platform_err = 1;
LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
} else if (!efi_guidcmp(sp->guid,
SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
platform_err = 1;
LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
} else if (!efi_guidcmp(sp->guid,
SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
platform_err = 1;
LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
} else if (!efi_guidcmp(sp->guid,
SAL_PLAT_BUS_ERR_SECT_GUID)) {
platform_err = 1;
LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
} else {
LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp);
}
}
slidx->n_sections = sects;
return platform_err;
}
/**
* init_record_index_pools - Initialize pool of lists for SAL record index
*
* Return value:
* 0 on Success / -ENOMEM on Failure
*/
static int
init_record_index_pools(void)
{
int i;
int rec_max_size; /* Maximum size of SAL error records */
int sect_min_size; /* Minimum size of SAL error sections */
/* minimum size table of each section */
static int sal_log_sect_min_sizes[] = {
sizeof(sal_log_processor_info_t)
+ sizeof(sal_processor_static_info_t),
sizeof(sal_log_mem_dev_err_info_t),
sizeof(sal_log_sel_dev_err_info_t),
sizeof(sal_log_pci_bus_err_info_t),
sizeof(sal_log_smbios_dev_err_info_t),
sizeof(sal_log_pci_comp_err_info_t),
sizeof(sal_log_plat_specific_err_info_t),
sizeof(sal_log_host_ctlr_err_info_t),
sizeof(sal_log_plat_bus_err_info_t),
};
/*
* MCA handler cannot allocate new memory on flight,
* so we preallocate enough memory to handle a SAL record.
*
* Initialize a handling set of slidx_pool:
* 1. Pick up the max size of SAL error records
* 2. Pick up the min size of SAL error sections
* 3. Allocate the pool as enough to 2 SAL records
* (now we can estimate the maxinum of section in a record.)
*/
/* - 1 - */
rec_max_size = sal_rec_max;
/* - 2 - */
sect_min_size = sal_log_sect_min_sizes[0];
for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++)
if (sect_min_size > sal_log_sect_min_sizes[i])
sect_min_size = sal_log_sect_min_sizes[i];
/* - 3 - */
slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
slidx_pool.buffer =
kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
return slidx_pool.buffer ? 0 : -ENOMEM;
}
/*****************************************************************************
* Recovery functions *
*****************************************************************************/
/**
* is_mca_global - Check whether this MCA is global or not
* @peidx: pointer of index of processor error section
* @pbci: pointer to pal_bus_check_info_t
* @sos: pointer to hand off struct between SAL and OS
*
* Return value:
* MCA_IS_LOCAL / MCA_IS_GLOBAL
*/
static mca_type_t
is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
struct ia64_sal_os_state *sos)
{
pal_processor_state_info_t *psp =
(pal_processor_state_info_t*)peidx_psp(peidx);
/*
* PAL can request a rendezvous, if the MCA has a global scope.
* If "rz_always" flag is set, SAL requests MCA rendezvous
* in spite of global MCA.
* Therefore it is local MCA when rendezvous has not been requested.
* Failed to rendezvous, the system must be down.
*/
switch (sos->rv_rc) {
case -1: /* SAL rendezvous unsuccessful */
return MCA_IS_GLOBAL;
case 0: /* SAL rendezvous not required */
return MCA_IS_LOCAL;
case 1: /* SAL rendezvous successful int */
case 2: /* SAL rendezvous successful int with init */
default:
break;
}
/*
* If One or more Cache/TLB/Reg_File/Uarch_Check is here,
* it would be a local MCA. (i.e. processor internal error)
*/
if (psp->tc || psp->cc || psp->rc || psp->uc)
return MCA_IS_LOCAL;
/*
* Bus_Check structure with Bus_Check.ib (internal bus error) flag set
* would be a global MCA. (e.g. a system bus address parity error)
*/
if (!pbci || pbci->ib)
return MCA_IS_GLOBAL;
/*
* Bus_Check structure with Bus_Check.eb (external bus error) flag set
* could be either a local MCA or a global MCA.
*
* Referring Bus_Check.bsi:
* 0: Unknown/unclassified
* 1: BERR#
* 2: BINIT#
* 3: Hard Fail
* (FIXME: Are these SGI specific or generic bsi values?)
*/
if (pbci->eb)
switch (pbci->bsi) {
case 0:
/* e.g. a load from poisoned memory */
return MCA_IS_LOCAL;
case 1:
case 2:
case 3:
return MCA_IS_GLOBAL;
}
return MCA_IS_GLOBAL;
}
/**
* get_target_identifier - Get the valid Cache or Bus check target identifier.
* @peidx: pointer of index of processor error section
*
* Return value:
* target address on Success / 0 on Failure
*/
static u64
get_target_identifier(peidx_table_t *peidx)
{
u64 target_address = 0;
sal_log_mod_error_info_t *smei;
pal_cache_check_info_t *pcci;
int i, level = 9;
/*
* Look through the cache checks for a valid target identifier
* If more than one valid target identifier, return the one
* with the lowest cache level.
*/
for (i = 0; i < peidx_cache_check_num(peidx); i++) {
smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i);
if (smei->valid.target_identifier && smei->target_identifier) {
pcci = (pal_cache_check_info_t *)&(smei->check_info);
if (!target_address || (pcci->level < level)) {
target_address = smei->target_identifier;
level = pcci->level;
continue;
}
}
}
if (target_address)
return target_address;
/*
* Look at the bus check for a valid target identifier
*/
smei = peidx_bus_check(peidx, 0);
if (smei && smei->valid.target_identifier)
return smei->target_identifier;
return 0;
}
/**
* recover_from_read_error - Try to recover the errors which type are "read"s.
* @slidx: pointer of index of SAL error record
* @peidx: pointer of index of processor error section
* @pbci: pointer of pal_bus_check_info
* @sos: pointer to hand off struct between SAL and OS
*
* Return value:
* 1 on Success / 0 on Failure
*/
static int
recover_from_read_error(slidx_table_t *slidx,
peidx_table_t *peidx, pal_bus_check_info_t *pbci,
struct ia64_sal_os_state *sos)
{
u64 target_identifier;
pal_min_state_area_t *pmsa;
struct ia64_psr *psr1, *psr2;
ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
/* Is target address valid? */
target_identifier = get_target_identifier(peidx);
if (!target_identifier)
return fatal_mca("target address not valid");
/*
* cpu read or memory-mapped io read
*
* offending process affected process OS MCA do
* kernel mode kernel mode down system
* kernel mode user mode kill the process
* user mode kernel mode down system (*)
* user mode user mode kill the process
*
* (*) You could terminate offending user-mode process
* if (pbci->pv && pbci->pl != 0) *and* if you sure
* the process not have any locks of kernel.
*/
/* Is minstate valid? */
if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
return fatal_mca("minstate not valid");
psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
/*
* Check the privilege level of interrupted context.
* If it is user-mode, then terminate affected process.
*/
pmsa = sos->pal_min_state;
if (psr1->cpl != 0 ||
((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
/*
* setup for resume to bottom half of MCA,
* "mca_handler_bhhook"
*/
/* pass to bhhook as argument (gr8, ...) */
pmsa->pmsa_gr[8-1] = target_identifier;
pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
/* set interrupted return address (but no use) */
pmsa->pmsa_br0 = pmsa->pmsa_iip;
/* change resume address to bottom half */
pmsa->pmsa_iip = mca_hdlr_bh->fp;
pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
/* set cpl with kernel mode */
psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
psr2->cpl = 0;
psr2->ri = 0;
psr2->bn = 1;
psr2->i = 0;
return mca_recovered("user memory corruption. "
"kill affected process - recovered.");
}
return fatal_mca("kernel context not recovered, iip 0x%lx\n",
pmsa->pmsa_iip);
}
/**
* recover_from_platform_error - Recover from platform error.
* @slidx: pointer of index of SAL error record
* @peidx: pointer of index of processor error section
* @pbci: pointer of pal_bus_check_info
* @sos: pointer to hand off struct between SAL and OS
*
* Return value:
* 1 on Success / 0 on Failure
*/
static int
recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
pal_bus_check_info_t *pbci,
struct ia64_sal_os_state *sos)
{
int status = 0;
pal_processor_state_info_t *psp =
(pal_processor_state_info_t*)peidx_psp(peidx);
if (psp->bc && pbci->eb && pbci->bsi == 0) {
switch(pbci->type) {
case 1: /* partial read */
case 3: /* full line(cpu) read */
case 9: /* I/O space read */
status = recover_from_read_error(slidx, peidx, pbci,
sos);
break;
case 0: /* unknown */
case 2: /* partial write */
case 4: /* full line write */
case 5: /* implicit or explicit write-back operation */
case 6: /* snoop probe */
case 7: /* incoming or outgoing ptc.g */
case 8: /* write coalescing transactions */
case 10: /* I/O space write */
case 11: /* inter-processor interrupt message(IPI) */
case 12: /* interrupt acknowledge or
external task priority cycle */
default:
break;
}
} else if (psp->cc && !psp->bc) { /* Cache error */
status = recover_from_read_error(slidx, peidx, pbci, sos);
}
return status;
}
/*
* recover_from_tlb_check
* @peidx: pointer of index of processor error section
*
* Return value:
* 1 on Success / 0 on Failure
*/
static int
recover_from_tlb_check(peidx_table_t *peidx)
{
sal_log_mod_error_info_t *smei;
pal_tlb_check_info_t *ptci;
smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
ptci = (pal_tlb_check_info_t *)&(smei->check_info);
/*
* Look for signature of a duplicate TLB DTC entry, which is
* a SW bug and always fatal.
*/
if (ptci->op == PAL_TLB_CHECK_OP_PURGE
&& !(ptci->itr || ptci->dtc || ptci->itc))
return fatal_mca("Duplicate TLB entry");
return mca_recovered("TLB check recovered");
}
/**
* recover_from_processor_error
* @platform: whether there are some platform error section or not
* @slidx: pointer of index of SAL error record
* @peidx: pointer of index of processor error section
* @pbci: pointer of pal_bus_check_info
* @sos: pointer to hand off struct between SAL and OS
*
* Return value:
* 1 on Success / 0 on Failure
*/
static int
recover_from_processor_error(int platform, slidx_table_t *slidx,
peidx_table_t *peidx, pal_bus_check_info_t *pbci,
struct ia64_sal_os_state *sos)
{
pal_processor_state_info_t *psp =
(pal_processor_state_info_t*)peidx_psp(peidx);
/*
* Processor recovery status must key off of the PAL recovery
* status in the Processor State Parameter.
*/
/*
* The machine check is corrected.
*/
if (psp->cm == 1)
return mca_recovered("machine check is already corrected.");
/*
* The error was not contained. Software must be reset.
*/
if (psp->us || psp->ci == 0)
return fatal_mca("error not contained");
/*
* Look for recoverable TLB check
*/
if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
return recover_from_tlb_check(peidx);
/*
* The cache check and bus check bits have four possible states
* cc bc
* 1 1 Memory error, attempt recovery
* 1 0 Cache error, attempt recovery
* 0 1 I/O error, attempt recovery
* 0 0 Other error type, not recovered
*/
if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL))
return fatal_mca("No cache or bus check");
/*
* Cannot handle more than one bus check.
*/
if (peidx_bus_check_num(peidx) > 1)
return fatal_mca("Too many bus checks");
if (pbci->ib)
return fatal_mca("Internal Bus error");
if (pbci->eb && pbci->bsi > 0)
return fatal_mca("External bus check fatal status");
/*
* This is a local MCA and estimated as a recoverable error.
*/
if (platform)
return recover_from_platform_error(slidx, peidx, pbci, sos);
/*
* On account of strange SAL error record, we cannot recover.
*/
return fatal_mca("Strange SAL record");
}
/**
* mca_try_to_recover - Try to recover from MCA
* @rec: pointer to a SAL error record
* @sos: pointer to hand off struct between SAL and OS
*
* Return value:
* 1 on Success / 0 on Failure
*/
static int
mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
{
int platform_err;
int n_proc_err;
slidx_table_t slidx;
peidx_table_t peidx;
pal_bus_check_info_t pbci;
/* Make index of SAL error record */
platform_err = mca_make_slidx(rec, &slidx);
/* Count processor error sections */
n_proc_err = slidx_count(&slidx, proc_err);
/* Now, OS can recover when there is one processor error section */
if (n_proc_err > 1)
return fatal_mca("Too Many Errors");
else if (n_proc_err == 0)
/* Weird SAL record ... We can't do anything */
return fatal_mca("Weird SAL record");
/* Make index of processor error section */
mca_make_peidx((sal_log_processor_info_t*)
slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
/* Extract Processor BUS_CHECK[0] */
*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
/* Check whether MCA is global or not */
if (is_mca_global(&peidx, &pbci, sos))
return fatal_mca("global MCA");
/* Try to recover a processor error */
return recover_from_processor_error(platform_err, &slidx, &peidx,
&pbci, sos);
}
/*
* =============================================================================
*/
int __init mca_external_handler_init(void)
{
if (init_record_index_pools())
return -ENOMEM;
/* register external mca handlers */
if (ia64_reg_MCA_extension(mca_try_to_recover)) {
printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
kfree(slidx_pool.buffer);
return -EFAULT;
}
return 0;
}
void __exit mca_external_handler_exit(void)
{
/* unregister external mca handlers */
ia64_unreg_MCA_extension();
kfree(slidx_pool.buffer);
}
module_init(mca_external_handler_init);
module_exit(mca_external_handler_exit);
module_param(sal_rec_max, int, 0644);
MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record");
MODULE_DESCRIPTION("ia64 platform dependent mca handler driver");
MODULE_LICENSE("GPL");

122
arch/ia64/kernel/mca_drv.h Normal file
View file

@ -0,0 +1,122 @@
/*
* File: mca_drv.h
* Purpose: Define helpers for Generic MCA handling
*
* Copyright (C) 2004 FUJITSU LIMITED
* Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
*/
/*
* Processor error section:
*
* +-sal_log_processor_info_t *info-------------+
* | sal_log_section_hdr_t header; |
* | ... |
* | sal_log_mod_error_info_t info[0]; |
* +-+----------------+-------------------------+
* | CACHE_CHECK | ^ num_cache_check v
* +----------------+
* | TLB_CHECK | ^ num_tlb_check v
* +----------------+
* | BUS_CHECK | ^ num_bus_check v
* +----------------+
* | REG_FILE_CHECK | ^ num_reg_file_check v
* +----------------+
* | MS_CHECK | ^ num_ms_check v
* +-struct cpuid_info *id----------------------+
* | regs[5]; |
* | reserved; |
* +-sal_processor_static_info_t *regs----------+
* | valid; |
* | ... |
* | fr[128]; |
* +--------------------------------------------+
*/
/* peidx: index of processor error section */
typedef struct peidx_table {
sal_log_processor_info_t *info;
struct sal_cpuid_info *id;
sal_processor_static_info_t *regs;
} peidx_table_t;
#define peidx_head(p) (((p)->info))
#define peidx_mid(p) (((p)->id))
#define peidx_bottom(p) (((p)->regs))
#define peidx_psp(p) (&(peidx_head(p)->proc_state_parameter))
#define peidx_field_valid(p) (&(peidx_head(p)->valid))
#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area))
#define peidx_cache_check_num(p) (peidx_head(p)->valid.num_cache_check)
#define peidx_tlb_check_num(p) (peidx_head(p)->valid.num_tlb_check)
#define peidx_bus_check_num(p) (peidx_head(p)->valid.num_bus_check)
#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check)
#define peidx_ms_check_num(p) (peidx_head(p)->valid.num_ms_check)
#define peidx_cache_check_idx(p, n) (n)
#define peidx_tlb_check_idx(p, n) (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n)
#define peidx_bus_check_idx(p, n) (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n)
#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n)
#define peidx_ms_check_idx(p, n) (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n)
#define peidx_mod_error_info(p, name, n) \
({ int __idx = peidx_##name##_idx(p, n); \
sal_log_mod_error_info_t *__ret = NULL; \
if (peidx_##name##_num(p) > n) /*BUG*/ \
__ret = &(peidx_head(p)->info[__idx]); \
__ret; })
#define peidx_cache_check(p, n) peidx_mod_error_info(p, cache_check, n)
#define peidx_tlb_check(p, n) peidx_mod_error_info(p, tlb_check, n)
#define peidx_bus_check(p, n) peidx_mod_error_info(p, bus_check, n)
#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n)
#define peidx_ms_check(p, n) peidx_mod_error_info(p, ms_check, n)
#define peidx_check_info(proc, name, n) \
({ \
sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\
u64 __temp = __info && __info->valid.check_info \
? __info->check_info : 0; \
__temp; })
/* slidx: index of SAL log error record */
typedef struct slidx_list {
struct list_head list;
sal_log_section_hdr_t *hdr;
} slidx_list_t;
typedef struct slidx_table {
sal_log_record_header_t *header;
int n_sections; /* # of section headers */
struct list_head proc_err;
struct list_head mem_dev_err;
struct list_head sel_dev_err;
struct list_head pci_bus_err;
struct list_head smbios_dev_err;
struct list_head pci_comp_err;
struct list_head plat_specific_err;
struct list_head host_ctlr_err;
struct list_head plat_bus_err;
struct list_head unsupported; /* list of unsupported sections */
} slidx_table_t;
#define slidx_foreach_entry(pos, head) \
list_for_each_entry(pos, head, list)
#define slidx_first_entry(head) \
(((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL)
#define slidx_count(slidx, sec) \
({ int __count = 0; \
slidx_list_t *__pos; \
slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
__count; })
struct mca_table_entry {
int start_addr; /* location-relative starting address of MCA recoverable range */
int end_addr; /* location-relative ending address of MCA recoverable range */
};
extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
extern int mca_recover_range(unsigned long);
extern void ia64_mlogbuf_dump(void);

View file

@ -0,0 +1,55 @@
/*
* File: mca_drv_asm.S
* Purpose: Assembly portion of Generic MCA handling
*
* Copyright (C) 2004 FUJITSU LIMITED
* Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
*/
#include <linux/threads.h>
#include <asm/asmmacro.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
GLOBAL_ENTRY(mca_handler_bhhook)
invala // clear RSE ?
cover
;;
clrrrb
;;
alloc r16=ar.pfs,0,2,3,0 // make a new frame
mov ar.rsc=0
mov r13=IA64_KR(CURRENT) // current task pointer
;;
mov r2=r13
;;
addl r22=IA64_RBS_OFFSET,r2
;;
mov ar.bspstore=r22
addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
;;
adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
st1 [r2]=r0 // clear current->thread.on_ustack flag
mov loc0=r16
movl loc1=mca_handler_bh // recovery C function
;;
mov out0=r8 // poisoned address
mov out1=r9 // iip
mov out2=r10 // psr
mov b6=loc1
;;
mov loc1=rp
ssm psr.ic
;;
srlz.i
;;
ssm psr.i
br.call.sptk.many rp=b6 // does not return ...
;;
mov ar.pfs=loc0
mov rp=loc1
;;
mov r8=r0
br.ret.sptk.many rp
END(mca_handler_bhhook)

250
arch/ia64/kernel/minstate.h Normal file
View file

@ -0,0 +1,250 @@
#include <asm/cache.h>
#include "entry.h"
#include "paravirt_inst.h"
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* read ar.itc in advance, and use it before leaving bank 0 */
#define ACCOUNT_GET_STAMP \
(pUStk) mov.m r20=ar.itc;
#define ACCOUNT_SYS_ENTER \
(pUStk) br.call.spnt rp=account_sys_enter \
;;
#else
#define ACCOUNT_GET_STAMP
#define ACCOUNT_SYS_ENTER
#endif
.section ".data..patch.rse", "a"
.previous
/*
* DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
* the minimum state necessary that allows us to turn psr.ic back
* on.
*
* Assumed state upon entry:
* psr.ic: off
* r31: contains saved predicates (pr)
*
* Upon exit, the state is as follows:
* psr.ic: off
* r2 = points to &pt_regs.r16
* r8 = contents of ar.ccv
* r9 = contents of ar.csd
* r10 = contents of ar.ssd
* r11 = FPSR_DEFAULT
* r12 = kernel sp (kernel virtual address)
* r13 = points to current task_struct (kernel virtual address)
* p15 = TRUE if psr.i is set in cr.ipsr
* predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
* preserved
*
* Note that psr.ic is NOT turned on by this macro. This is so that
* we can pass interruption state as arguments to a handler.
*/
#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND) \
mov r16=IA64_KR(CURRENT); /* M */ \
mov r27=ar.rsc; /* M */ \
mov r20=r1; /* A */ \
mov r25=ar.unat; /* M */ \
MOV_FROM_IPSR(p0,r29); /* M */ \
mov r26=ar.pfs; /* I */ \
MOV_FROM_IIP(r28); /* M */ \
mov r21=ar.fpsr; /* M */ \
__COVER; /* B;; (or nothing) */ \
;; \
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \
;; \
ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \
st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \
adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \
/* switch from user to kernel RBS: */ \
;; \
invala; /* M */ \
SAVE_IFS; \
cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \
;; \
(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
;; \
(pUStk) mov.m r24=ar.rnat; \
(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
(pKStk) mov r1=sp; /* get sp */ \
;; \
(pUStk) lfetch.fault.excl.nt1 [r22]; \
(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
;; \
(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
;; \
(pUStk) mov r18=ar.bsp; \
(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \
adds r16=PT(CR_IPSR),r1; \
;; \
lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
st8 [r16]=r29; /* save cr.ipsr */ \
;; \
lfetch.fault.excl.nt1 [r17]; \
tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
mov r29=b0 \
;; \
WORKAROUND; \
adds r16=PT(R8),r1; /* initialize first base pointer */ \
adds r17=PT(R9),r1; /* initialize second base pointer */ \
(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
;; \
.mem.offset 0,0; st8.spill [r16]=r8,16; \
.mem.offset 8,0; st8.spill [r17]=r9,16; \
;; \
.mem.offset 0,0; st8.spill [r16]=r10,24; \
.mem.offset 8,0; st8.spill [r17]=r11,24; \
;; \
st8 [r16]=r28,16; /* save cr.iip */ \
st8 [r17]=r30,16; /* save cr.ifs */ \
(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
mov r8=ar.ccv; \
mov r9=ar.csd; \
mov r10=ar.ssd; \
movl r11=FPSR_DEFAULT; /* L-unit */ \
;; \
st8 [r16]=r25,16; /* save ar.unat */ \
st8 [r17]=r26,16; /* save ar.pfs */ \
shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
;; \
st8 [r16]=r27,16; /* save ar.rsc */ \
(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \
(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \
;; /* avoid RAW on r16 & r17 */ \
(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \
st8 [r17]=r31,16; /* save predicates */ \
(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \
;; \
st8 [r16]=r29,16; /* save b0 */ \
st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
;; \
.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \
.mem.offset 8,0; st8.spill [r17]=r12,16; \
adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
;; \
.mem.offset 0,0; st8.spill [r16]=r13,16; \
.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \
mov r13=IA64_KR(CURRENT); /* establish `current' */ \
;; \
.mem.offset 0,0; st8.spill [r16]=r15,16; \
.mem.offset 8,0; st8.spill [r17]=r14,16; \
;; \
.mem.offset 0,0; st8.spill [r16]=r2,16; \
.mem.offset 8,0; st8.spill [r17]=r3,16; \
ACCOUNT_GET_STAMP \
adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
;; \
EXTRA; \
movl r1=__gp; /* establish kernel global pointer */ \
;; \
ACCOUNT_SYS_ENTER \
bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
;;
/*
* SAVE_REST saves the remainder of pt_regs (with psr.ic on).
*
* Assumed state upon entry:
* psr.ic: on
* r2: points to &pt_regs.r16
* r3: points to &pt_regs.r17
* r8: contents of ar.ccv
* r9: contents of ar.csd
* r10: contents of ar.ssd
* r11: FPSR_DEFAULT
*
* Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
*/
#define SAVE_REST \
.mem.offset 0,0; st8.spill [r2]=r16,16; \
.mem.offset 8,0; st8.spill [r3]=r17,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r18,16; \
.mem.offset 8,0; st8.spill [r3]=r19,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r20,16; \
.mem.offset 8,0; st8.spill [r3]=r21,16; \
mov r18=b6; \
;; \
.mem.offset 0,0; st8.spill [r2]=r22,16; \
.mem.offset 8,0; st8.spill [r3]=r23,16; \
mov r19=b7; \
;; \
.mem.offset 0,0; st8.spill [r2]=r24,16; \
.mem.offset 8,0; st8.spill [r3]=r25,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r26,16; \
.mem.offset 8,0; st8.spill [r3]=r27,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r28,16; \
.mem.offset 8,0; st8.spill [r3]=r29,16; \
;; \
.mem.offset 0,0; st8.spill [r2]=r30,16; \
.mem.offset 8,0; st8.spill [r3]=r31,32; \
;; \
mov ar.fpsr=r11; /* M-unit */ \
st8 [r2]=r8,8; /* ar.ccv */ \
adds r24=PT(B6)-PT(F7),r3; \
;; \
stf.spill [r2]=f6,32; \
stf.spill [r3]=f7,32; \
;; \
stf.spill [r2]=f8,32; \
stf.spill [r3]=f9,32; \
;; \
stf.spill [r2]=f10; \
stf.spill [r3]=f11; \
adds r25=PT(B7)-PT(F11),r3; \
;; \
st8 [r24]=r18,16; /* b6 */ \
st8 [r25]=r19,16; /* b7 */ \
;; \
st8 [r24]=r9; /* ar.csd */ \
st8 [r25]=r10; /* ar.ssd */ \
;;
#define RSE_WORKAROUND \
(pUStk) extr.u r17=r18,3,6; \
(pUStk) sub r16=r18,r22; \
[1:](pKStk) br.cond.sptk.many 1f; \
.xdata4 ".data..patch.rse",1b-. \
;; \
cmp.ge p6,p7 = 33,r17; \
;; \
(p6) mov r17=0x310; \
(p7) mov r17=0x308; \
;; \
cmp.leu p1,p0=r16,r17; \
(p1) br.cond.sptk.many 1f; \
dep.z r17=r26,0,62; \
movl r16=2f; \
;; \
mov ar.pfs=r17; \
dep r27=r0,r27,16,14; \
mov b0=r16; \
;; \
br.ret.sptk b0; \
;; \
2: \
mov ar.rsc=r0 \
;; \
flushrs; \
;; \
mov ar.bspstore=r22 \
;; \
mov r18=ar.bsp; \
;; \
1: \
.pred.rel "mutex", pKStk, pUStk
#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND)
#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , )

953
arch/ia64/kernel/module.c Normal file
View file

@ -0,0 +1,953 @@
/*
* IA-64-specific support for kernel module loader.
*
* Copyright (C) 2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* Loosely based on patch by Rusty Russell.
*/
/* relocs tested so far:
DIR64LSB
FPTR64LSB
GPREL22
LDXMOV
LDXMOV
LTOFF22
LTOFF22X
LTOFF22X
LTOFF_FPTR22
PCREL21B (for br.call only; br.cond is not supported out of modules!)
PCREL60B (for brl.cond only; brl.call is not supported for modules!)
PCREL64LSB
SECREL32LSB
SEGREL64LSB
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/elf.h>
#include <linux/moduleloader.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <asm/patch.h>
#include <asm/unaligned.h>
#define ARCH_MODULE_DEBUG 0
#if ARCH_MODULE_DEBUG
# define DEBUGP printk
# define inline
#else
# define DEBUGP(fmt , a...)
#endif
#ifdef CONFIG_ITANIUM
# define USE_BRL 0
#else
# define USE_BRL 1
#endif
#define MAX_LTOFF ((uint64_t) (1 << 22)) /* max. allowable linkage-table offset */
/* Define some relocation helper macros/types: */
#define FORMAT_SHIFT 0
#define FORMAT_BITS 3
#define FORMAT_MASK ((1 << FORMAT_BITS) - 1)
#define VALUE_SHIFT 3
#define VALUE_BITS 5
#define VALUE_MASK ((1 << VALUE_BITS) - 1)
enum reloc_target_format {
/* direct encoded formats: */
RF_NONE = 0,
RF_INSN14 = 1,
RF_INSN22 = 2,
RF_INSN64 = 3,
RF_32MSB = 4,
RF_32LSB = 5,
RF_64MSB = 6,
RF_64LSB = 7,
/* formats that cannot be directly decoded: */
RF_INSN60,
RF_INSN21B, /* imm21 form 1 */
RF_INSN21M, /* imm21 form 2 */
RF_INSN21F /* imm21 form 3 */
};
enum reloc_value_formula {
RV_DIRECT = 4, /* S + A */
RV_GPREL = 5, /* @gprel(S + A) */
RV_LTREL = 6, /* @ltoff(S + A) */
RV_PLTREL = 7, /* @pltoff(S + A) */
RV_FPTR = 8, /* @fptr(S + A) */
RV_PCREL = 9, /* S + A - P */
RV_LTREL_FPTR = 10, /* @ltoff(@fptr(S + A)) */
RV_SEGREL = 11, /* @segrel(S + A) */
RV_SECREL = 12, /* @secrel(S + A) */
RV_BDREL = 13, /* BD + A */
RV_LTV = 14, /* S + A (like RV_DIRECT, except frozen at static link-time) */
RV_PCREL2 = 15, /* S + A - P */
RV_SPECIAL = 16, /* various (see below) */
RV_RSVD17 = 17,
RV_TPREL = 18, /* @tprel(S + A) */
RV_LTREL_TPREL = 19, /* @ltoff(@tprel(S + A)) */
RV_DTPMOD = 20, /* @dtpmod(S + A) */
RV_LTREL_DTPMOD = 21, /* @ltoff(@dtpmod(S + A)) */
RV_DTPREL = 22, /* @dtprel(S + A) */
RV_LTREL_DTPREL = 23, /* @ltoff(@dtprel(S + A)) */
RV_RSVD24 = 24,
RV_RSVD25 = 25,
RV_RSVD26 = 26,
RV_RSVD27 = 27
/* 28-31 reserved for implementation-specific purposes. */
};
#define N(reloc) [R_IA64_##reloc] = #reloc
static const char *reloc_name[256] = {
N(NONE), N(IMM14), N(IMM22), N(IMM64),
N(DIR32MSB), N(DIR32LSB), N(DIR64MSB), N(DIR64LSB),
N(GPREL22), N(GPREL64I), N(GPREL32MSB), N(GPREL32LSB),
N(GPREL64MSB), N(GPREL64LSB), N(LTOFF22), N(LTOFF64I),
N(PLTOFF22), N(PLTOFF64I), N(PLTOFF64MSB), N(PLTOFF64LSB),
N(FPTR64I), N(FPTR32MSB), N(FPTR32LSB), N(FPTR64MSB),
N(FPTR64LSB), N(PCREL60B), N(PCREL21B), N(PCREL21M),
N(PCREL21F), N(PCREL32MSB), N(PCREL32LSB), N(PCREL64MSB),
N(PCREL64LSB), N(LTOFF_FPTR22), N(LTOFF_FPTR64I), N(LTOFF_FPTR32MSB),
N(LTOFF_FPTR32LSB), N(LTOFF_FPTR64MSB), N(LTOFF_FPTR64LSB), N(SEGREL32MSB),
N(SEGREL32LSB), N(SEGREL64MSB), N(SEGREL64LSB), N(SECREL32MSB),
N(SECREL32LSB), N(SECREL64MSB), N(SECREL64LSB), N(REL32MSB),
N(REL32LSB), N(REL64MSB), N(REL64LSB), N(LTV32MSB),
N(LTV32LSB), N(LTV64MSB), N(LTV64LSB), N(PCREL21BI),
N(PCREL22), N(PCREL64I), N(IPLTMSB), N(IPLTLSB),
N(COPY), N(LTOFF22X), N(LDXMOV), N(TPREL14),
N(TPREL22), N(TPREL64I), N(TPREL64MSB), N(TPREL64LSB),
N(LTOFF_TPREL22), N(DTPMOD64MSB), N(DTPMOD64LSB), N(LTOFF_DTPMOD22),
N(DTPREL14), N(DTPREL22), N(DTPREL64I), N(DTPREL32MSB),
N(DTPREL32LSB), N(DTPREL64MSB), N(DTPREL64LSB), N(LTOFF_DTPREL22)
};
#undef N
/* Opaque struct for insns, to protect against derefs. */
struct insn;
static inline uint64_t
bundle (const struct insn *insn)
{
return (uint64_t) insn & ~0xfUL;
}
static inline int
slot (const struct insn *insn)
{
return (uint64_t) insn & 0x3;
}
static int
apply_imm64 (struct module *mod, struct insn *insn, uint64_t val)
{
if (slot(insn) != 2) {
printk(KERN_ERR "%s: invalid slot number %d for IMM64\n",
mod->name, slot(insn));
return 0;
}
ia64_patch_imm64((u64) insn, val);
return 1;
}
static int
apply_imm60 (struct module *mod, struct insn *insn, uint64_t val)
{
if (slot(insn) != 2) {
printk(KERN_ERR "%s: invalid slot number %d for IMM60\n",
mod->name, slot(insn));
return 0;
}
if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) {
printk(KERN_ERR "%s: value %ld out of IMM60 range\n",
mod->name, (long) val);
return 0;
}
ia64_patch_imm60((u64) insn, val);
return 1;
}
static int
apply_imm22 (struct module *mod, struct insn *insn, uint64_t val)
{
if (val + (1 << 21) >= (1 << 22)) {
printk(KERN_ERR "%s: value %li out of IMM22 range\n",
mod->name, (long)val);
return 0;
}
ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
| ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */
| ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */
| ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */));
return 1;
}
static int
apply_imm21b (struct module *mod, struct insn *insn, uint64_t val)
{
if (val + (1 << 20) >= (1 << 21)) {
printk(KERN_ERR "%s: value %li out of IMM21b range\n",
mod->name, (long)val);
return 0;
}
ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
| ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */));
return 1;
}
#if USE_BRL
struct plt_entry {
/* Three instruction bundles in PLT. */
unsigned char bundle[2][16];
};
static const struct plt_entry ia64_plt_template = {
{
{
0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */
0x00, 0x00, 0x00, 0x60
},
{
0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* brl.many gp=TARGET_GP */
0x08, 0x00, 0x00, 0xc0
}
}
};
static int
patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
{
if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp)
&& apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2),
(target_ip - (int64_t) plt->bundle[1]) / 16))
return 1;
return 0;
}
unsigned long
plt_target (struct plt_entry *plt)
{
uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1];
long off;
b0 = b[0]; b1 = b[1];
off = ( ((b1 & 0x00fffff000000000UL) >> 36) /* imm20b -> bit 0 */
| ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36) /* imm39 -> bit 20 */
| ((b1 & 0x0800000000000000UL) << 0)); /* i -> bit 59 */
return (long) plt->bundle[1] + 16*off;
}
#else /* !USE_BRL */
struct plt_entry {
/* Three instruction bundles in PLT. */
unsigned char bundle[3][16];
};
static const struct plt_entry ia64_plt_template = {
{
{
0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* movl r16=TARGET_IP */
0x02, 0x00, 0x00, 0x60
},
{
0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */
0x00, 0x00, 0x00, 0x60
},
{
0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */
0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /* mov b6=r16 */
0x60, 0x00, 0x80, 0x00 /* br.few b6 */
}
}
};
static int
patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
{
if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip)
&& apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp))
return 1;
return 0;
}
unsigned long
plt_target (struct plt_entry *plt)
{
uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0];
b0 = b[0]; b1 = b[1];
return ( ((b1 & 0x000007f000000000) >> 36) /* imm7b -> bit 0 */
| ((b1 & 0x07fc000000000000) >> 43) /* imm9d -> bit 7 */
| ((b1 & 0x0003e00000000000) >> 29) /* imm5c -> bit 16 */
| ((b1 & 0x0000100000000000) >> 23) /* ic -> bit 21 */
| ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40) /* imm41 -> bit 22 */
| ((b1 & 0x0800000000000000) << 4)); /* i -> bit 63 */
}
#endif /* !USE_BRL */
void
module_free (struct module *mod, void *module_region)
{
if (mod && mod->arch.init_unw_table &&
module_region == mod->module_init) {
unw_remove_unwind_table(mod->arch.init_unw_table);
mod->arch.init_unw_table = NULL;
}
vfree(module_region);
}
/* Have we already seen one of these relocations? */
/* FIXME: we could look in other sections, too --RR */
static int
duplicate_reloc (const Elf64_Rela *rela, unsigned int num)
{
unsigned int i;
for (i = 0; i < num; i++) {
if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend)
return 1;
}
return 0;
}
/* Count how many GOT entries we may need */
static unsigned int
count_gots (const Elf64_Rela *rela, unsigned int num)
{
unsigned int i, ret = 0;
/* Sure, this is order(n^2), but it's usually short, and not
time critical */
for (i = 0; i < num; i++) {
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_IA64_LTOFF22:
case R_IA64_LTOFF22X:
case R_IA64_LTOFF64I:
case R_IA64_LTOFF_FPTR22:
case R_IA64_LTOFF_FPTR64I:
case R_IA64_LTOFF_FPTR32MSB:
case R_IA64_LTOFF_FPTR32LSB:
case R_IA64_LTOFF_FPTR64MSB:
case R_IA64_LTOFF_FPTR64LSB:
if (!duplicate_reloc(rela, i))
ret++;
break;
}
}
return ret;
}
/* Count how many PLT entries we may need */
static unsigned int
count_plts (const Elf64_Rela *rela, unsigned int num)
{
unsigned int i, ret = 0;
/* Sure, this is order(n^2), but it's usually short, and not
time critical */
for (i = 0; i < num; i++) {
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_IA64_PCREL21B:
case R_IA64_PLTOFF22:
case R_IA64_PLTOFF64I:
case R_IA64_PLTOFF64MSB:
case R_IA64_PLTOFF64LSB:
case R_IA64_IPLTMSB:
case R_IA64_IPLTLSB:
if (!duplicate_reloc(rela, i))
ret++;
break;
}
}
return ret;
}
/* We need to create an function-descriptors for any internal function
which is referenced. */
static unsigned int
count_fdescs (const Elf64_Rela *rela, unsigned int num)
{
unsigned int i, ret = 0;
/* Sure, this is order(n^2), but it's usually short, and not time critical. */
for (i = 0; i < num; i++) {
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_IA64_FPTR64I:
case R_IA64_FPTR32LSB:
case R_IA64_FPTR32MSB:
case R_IA64_FPTR64LSB:
case R_IA64_FPTR64MSB:
case R_IA64_LTOFF_FPTR22:
case R_IA64_LTOFF_FPTR32LSB:
case R_IA64_LTOFF_FPTR32MSB:
case R_IA64_LTOFF_FPTR64I:
case R_IA64_LTOFF_FPTR64LSB:
case R_IA64_LTOFF_FPTR64MSB:
case R_IA64_IPLTMSB:
case R_IA64_IPLTLSB:
/*
* Jumps to static functions sometimes go straight to their
* offset. Of course, that may not be possible if the jump is
* from init -> core or vice. versa, so we need to generate an
* FDESC (and PLT etc) for that.
*/
case R_IA64_PCREL21B:
if (!duplicate_reloc(rela, i))
ret++;
break;
}
}
return ret;
}
int
module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
struct module *mod)
{
unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0;
Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
/*
* To store the PLTs and function-descriptors, we expand the .text section for
* core module-code and the .init.text section for initialization code.
*/
for (s = sechdrs; s < sechdrs_end; ++s)
if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
mod->arch.core_plt = s;
else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
mod->arch.init_plt = s;
else if (strcmp(".got", secstrings + s->sh_name) == 0)
mod->arch.got = s;
else if (strcmp(".opd", secstrings + s->sh_name) == 0)
mod->arch.opd = s;
else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
mod->arch.unwind = s;
#ifdef CONFIG_PARAVIRT
else if (strcmp(".paravirt_bundles",
secstrings + s->sh_name) == 0)
mod->arch.paravirt_bundles = s;
else if (strcmp(".paravirt_insts",
secstrings + s->sh_name) == 0)
mod->arch.paravirt_insts = s;
#endif
if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
printk(KERN_ERR "%s: sections missing\n", mod->name);
return -ENOEXEC;
}
/* GOT and PLTs can occur in any relocated section... */
for (s = sechdrs + 1; s < sechdrs_end; ++s) {
const Elf64_Rela *rels = (void *)ehdr + s->sh_offset;
unsigned long numrels = s->sh_size/sizeof(Elf64_Rela);
if (s->sh_type != SHT_RELA)
continue;
gots += count_gots(rels, numrels);
fdescs += count_fdescs(rels, numrels);
if (strstr(secstrings + s->sh_name, ".init"))
init_plts += count_plts(rels, numrels);
else
core_plts += count_plts(rels, numrels);
}
mod->arch.core_plt->sh_type = SHT_NOBITS;
mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
mod->arch.core_plt->sh_addralign = 16;
mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry);
mod->arch.init_plt->sh_type = SHT_NOBITS;
mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
mod->arch.init_plt->sh_addralign = 16;
mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry);
mod->arch.got->sh_type = SHT_NOBITS;
mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC;
mod->arch.got->sh_addralign = 8;
mod->arch.got->sh_size = gots * sizeof(struct got_entry);
mod->arch.opd->sh_type = SHT_NOBITS;
mod->arch.opd->sh_flags = SHF_ALLOC;
mod->arch.opd->sh_addralign = 8;
mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc);
DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n",
__func__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
mod->arch.got->sh_size, mod->arch.opd->sh_size);
return 0;
}
static inline int
in_init (const struct module *mod, uint64_t addr)
{
return addr - (uint64_t) mod->module_init < mod->init_size;
}
static inline int
in_core (const struct module *mod, uint64_t addr)
{
return addr - (uint64_t) mod->module_core < mod->core_size;
}
static inline int
is_internal (const struct module *mod, uint64_t value)
{
return in_init(mod, value) || in_core(mod, value);
}
/*
* Get gp-relative offset for the linkage-table entry of VALUE.
*/
static uint64_t
get_ltoff (struct module *mod, uint64_t value, int *okp)
{
struct got_entry *got, *e;
if (!*okp)
return 0;
got = (void *) mod->arch.got->sh_addr;
for (e = got; e < got + mod->arch.next_got_entry; ++e)
if (e->val == value)
goto found;
/* Not enough GOT entries? */
BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size));
e->val = value;
++mod->arch.next_got_entry;
found:
return (uint64_t) e - mod->arch.gp;
}
static inline int
gp_addressable (struct module *mod, uint64_t value)
{
return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF;
}
/* Get PC-relative PLT entry for this value. Returns 0 on failure. */
static uint64_t
get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp)
{
struct plt_entry *plt, *plt_end;
uint64_t target_ip, target_gp;
if (!*okp)
return 0;
if (in_init(mod, (uint64_t) insn)) {
plt = (void *) mod->arch.init_plt->sh_addr;
plt_end = (void *) plt + mod->arch.init_plt->sh_size;
} else {
plt = (void *) mod->arch.core_plt->sh_addr;
plt_end = (void *) plt + mod->arch.core_plt->sh_size;
}
/* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */
target_ip = ((uint64_t *) value)[0];
target_gp = ((uint64_t *) value)[1];
/* Look for existing PLT entry. */
while (plt->bundle[0][0]) {
if (plt_target(plt) == target_ip)
goto found;
if (++plt >= plt_end)
BUG();
}
*plt = ia64_plt_template;
if (!patch_plt(mod, plt, target_ip, target_gp)) {
*okp = 0;
return 0;
}
#if ARCH_MODULE_DEBUG
if (plt_target(plt) != target_ip) {
printk("%s: mistargeted PLT: wanted %lx, got %lx\n",
__func__, target_ip, plt_target(plt));
*okp = 0;
return 0;
}
#endif
found:
return (uint64_t) plt;
}
/* Get function descriptor for VALUE. */
static uint64_t
get_fdesc (struct module *mod, uint64_t value, int *okp)
{
struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr;
if (!*okp)
return 0;
if (!value) {
printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name);
return 0;
}
if (!is_internal(mod, value))
/*
* If it's not a module-local entry-point, "value" already points to a
* function-descriptor.
*/
return value;
/* Look for existing function descriptor. */
while (fdesc->ip) {
if (fdesc->ip == value)
return (uint64_t)fdesc;
if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size)
BUG();
}
/* Create new one */
fdesc->ip = value;
fdesc->gp = mod->arch.gp;
return (uint64_t) fdesc;
}
static inline int
do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
Elf64_Shdr *sec, void *location)
{
enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK;
enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK;
uint64_t val;
int ok = 1;
val = sym->st_value + addend;
switch (formula) {
case RV_SEGREL: /* segment base is arbitrarily chosen to be 0 for kernel modules */
case RV_DIRECT:
break;
case RV_GPREL: val -= mod->arch.gp; break;
case RV_LTREL: val = get_ltoff(mod, val, &ok); break;
case RV_PLTREL: val = get_plt(mod, location, val, &ok); break;
case RV_FPTR: val = get_fdesc(mod, val, &ok); break;
case RV_SECREL: val -= sec->sh_addr; break;
case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break;
case RV_PCREL:
switch (r_type) {
case R_IA64_PCREL21B:
if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) ||
(in_core(mod, val) && in_init(mod, (uint64_t)location))) {
/*
* Init section may have been allocated far away from core,
* if the branch won't reach, then allocate a plt for it.
*/
uint64_t delta = ((int64_t)val - (int64_t)location) / 16;
if (delta + (1 << 20) >= (1 << 21)) {
val = get_fdesc(mod, val, &ok);
val = get_plt(mod, location, val, &ok);
}
} else if (!is_internal(mod, val))
val = get_plt(mod, location, val, &ok);
/* FALL THROUGH */
default:
val -= bundle(location);
break;
case R_IA64_PCREL32MSB:
case R_IA64_PCREL32LSB:
case R_IA64_PCREL64MSB:
case R_IA64_PCREL64LSB:
val -= (uint64_t) location;
break;
}
switch (r_type) {
case R_IA64_PCREL60B: format = RF_INSN60; break;
case R_IA64_PCREL21B: format = RF_INSN21B; break;
case R_IA64_PCREL21M: format = RF_INSN21M; break;
case R_IA64_PCREL21F: format = RF_INSN21F; break;
default: break;
}
break;
case RV_BDREL:
val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core);
break;
case RV_LTV:
/* can link-time value relocs happen here? */
BUG();
break;
case RV_PCREL2:
if (r_type == R_IA64_PCREL21BI) {
if (!is_internal(mod, val)) {
printk(KERN_ERR "%s: %s reloc against "
"non-local symbol (%lx)\n", __func__,
reloc_name[r_type], (unsigned long)val);
return -ENOEXEC;
}
format = RF_INSN21B;
}
val -= bundle(location);
break;
case RV_SPECIAL:
switch (r_type) {
case R_IA64_IPLTMSB:
case R_IA64_IPLTLSB:
val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok);
format = RF_64LSB;
if (r_type == R_IA64_IPLTMSB)
format = RF_64MSB;
break;
case R_IA64_SUB:
val = addend - sym->st_value;
format = RF_INSN64;
break;
case R_IA64_LTOFF22X:
if (gp_addressable(mod, val))
val -= mod->arch.gp;
else
val = get_ltoff(mod, val, &ok);
format = RF_INSN22;
break;
case R_IA64_LDXMOV:
if (gp_addressable(mod, val)) {
/* turn "ld8" into "mov": */
DEBUGP("%s: patching ld8 at %p to mov\n", __func__, location);
ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL);
}
return 0;
default:
if (reloc_name[r_type])
printk(KERN_ERR "%s: special reloc %s not supported",
mod->name, reloc_name[r_type]);
else
printk(KERN_ERR "%s: unknown special reloc %x\n",
mod->name, r_type);
return -ENOEXEC;
}
break;
case RV_TPREL:
case RV_LTREL_TPREL:
case RV_DTPMOD:
case RV_LTREL_DTPMOD:
case RV_DTPREL:
case RV_LTREL_DTPREL:
printk(KERN_ERR "%s: %s reloc not supported\n",
mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?");
return -ENOEXEC;
default:
printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type);
return -ENOEXEC;
}
if (!ok)
return -ENOEXEC;
DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __func__, location, val,
reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend);
switch (format) {
case RF_INSN21B: ok = apply_imm21b(mod, location, (int64_t) val / 16); break;
case RF_INSN22: ok = apply_imm22(mod, location, val); break;
case RF_INSN64: ok = apply_imm64(mod, location, val); break;
case RF_INSN60: ok = apply_imm60(mod, location, (int64_t) val / 16); break;
case RF_32LSB: put_unaligned(val, (uint32_t *) location); break;
case RF_64LSB: put_unaligned(val, (uint64_t *) location); break;
case RF_32MSB: /* ia64 Linux is little-endian... */
case RF_64MSB: /* ia64 Linux is little-endian... */
case RF_INSN14: /* must be within-module, i.e., resolved by "ld -r" */
case RF_INSN21M: /* must be within-module, i.e., resolved by "ld -r" */
case RF_INSN21F: /* must be within-module, i.e., resolved by "ld -r" */
printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n",
mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?");
return -ENOEXEC;
default:
printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n",
mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format);
return -ENOEXEC;
}
return ok ? 0 : -ENOEXEC;
}
int
apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
unsigned int relsec, struct module *mod)
{
unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela);
Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr;
Elf64_Shdr *target_sec;
int ret;
DEBUGP("%s: applying section %u (%u relocs) to %u\n", __func__,
relsec, n, sechdrs[relsec].sh_info);
target_sec = sechdrs + sechdrs[relsec].sh_info;
if (target_sec->sh_entsize == ~0UL)
/*
* If target section wasn't allocated, we don't need to relocate it.
* Happens, e.g., for debug sections.
*/
return 0;
if (!mod->arch.gp) {
/*
* XXX Should have an arch-hook for running this after final section
* addresses have been selected...
*/
uint64_t gp;
if (mod->core_size > MAX_LTOFF)
/*
* This takes advantage of fact that SHF_ARCH_SMALL gets allocated
* at the end of the module.
*/
gp = mod->core_size - MAX_LTOFF / 2;
else
gp = mod->core_size / 2;
gp = (uint64_t) mod->module_core + ((gp + 7) & -8);
mod->arch.gp = gp;
DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp);
}
for (i = 0; i < n; i++) {
ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info),
((Elf64_Sym *) sechdrs[symindex].sh_addr
+ ELF64_R_SYM(rela[i].r_info)),
rela[i].r_addend, target_sec,
(void *) target_sec->sh_addr + rela[i].r_offset);
if (ret < 0)
return ret;
}
return 0;
}
/*
* Modules contain a single unwind table which covers both the core and the init text
* sections but since the two are not contiguous, we need to split this table up such that
* we can register (and unregister) each "segment" separately. Fortunately, this sounds
* more complicated than it really is.
*/
static void
register_unwind_table (struct module *mod)
{
struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr;
struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start);
struct unw_table_entry tmp, *e1, *e2, *core, *init;
unsigned long num_init = 0, num_core = 0;
/* First, count how many init and core unwind-table entries there are. */
for (e1 = start; e1 < end; ++e1)
if (in_init(mod, e1->start_offset))
++num_init;
else
++num_core;
/*
* Second, sort the table such that all unwind-table entries for the init and core
* text sections are nicely separated. We do this with a stupid bubble sort
* (unwind tables don't get ridiculously huge).
*/
for (e1 = start; e1 < end; ++e1) {
for (e2 = e1 + 1; e2 < end; ++e2) {
if (e2->start_offset < e1->start_offset) {
tmp = *e1;
*e1 = *e2;
*e2 = tmp;
}
}
}
/*
* Third, locate the init and core segments in the unwind table:
*/
if (in_init(mod, start->start_offset)) {
init = start;
core = start + num_init;
} else {
core = start;
init = start + num_core;
}
DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __func__,
mod->name, mod->arch.gp, num_init, num_core);
/*
* Fourth, register both tables (if not empty).
*/
if (num_core > 0) {
mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
core, core + num_core);
DEBUGP("%s: core: handle=%p [%p-%p)\n", __func__,
mod->arch.core_unw_table, core, core + num_core);
}
if (num_init > 0) {
mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
init, init + num_init);
DEBUGP("%s: init: handle=%p [%p-%p)\n", __func__,
mod->arch.init_unw_table, init, init + num_init);
}
}
int
module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod)
{
DEBUGP("%s: init: entry=%p\n", __func__, mod->init);
if (mod->arch.unwind)
register_unwind_table(mod);
#ifdef CONFIG_PARAVIRT
if (mod->arch.paravirt_bundles) {
struct paravirt_patch_site_bundle *start =
(struct paravirt_patch_site_bundle *)
mod->arch.paravirt_bundles->sh_addr;
struct paravirt_patch_site_bundle *end =
(struct paravirt_patch_site_bundle *)
(mod->arch.paravirt_bundles->sh_addr +
mod->arch.paravirt_bundles->sh_size);
paravirt_patch_apply_bundle(start, end);
}
if (mod->arch.paravirt_insts) {
struct paravirt_patch_site_inst *start =
(struct paravirt_patch_site_inst *)
mod->arch.paravirt_insts->sh_addr;
struct paravirt_patch_site_inst *end =
(struct paravirt_patch_site_inst *)
(mod->arch.paravirt_insts->sh_addr +
mod->arch.paravirt_insts->sh_size);
paravirt_patch_apply_inst(start, end);
}
#endif
return 0;
}
void
module_arch_cleanup (struct module *mod)
{
if (mod->arch.init_unw_table)
unw_remove_unwind_table(mod->arch.init_unw_table);
if (mod->arch.core_unw_table)
unw_remove_unwind_table(mod->arch.core_unw_table);
}

208
arch/ia64/kernel/msi_ia64.c Normal file
View file

@ -0,0 +1,208 @@
/*
* MSI hooks for standard x86 apic
*/
#include <linux/pci.h>
#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/dmar.h>
#include <asm/smp.h>
#include <asm/msidef.h>
static struct irq_chip ia64_msi_chip;
#ifdef CONFIG_SMP
static int ia64_set_msi_irq_affinity(struct irq_data *idata,
const cpumask_t *cpu_mask, bool force)
{
struct msi_msg msg;
u32 addr, data;
int cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
unsigned int irq = idata->irq;
if (irq_prepare_move(irq, cpu))
return -1;
__get_cached_msi_msg(idata->msi_desc, &msg);
addr = msg.address_lo;
addr &= MSI_ADDR_DEST_ID_MASK;
addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
msg.address_lo = addr;
data = msg.data;
data &= MSI_DATA_VECTOR_MASK;
data |= MSI_DATA_VECTOR(irq_to_vector(irq));
msg.data = data;
write_msi_msg(irq, &msg);
cpumask_copy(idata->affinity, cpumask_of(cpu));
return 0;
}
#endif /* CONFIG_SMP */
int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
{
struct msi_msg msg;
unsigned long dest_phys_id;
int irq, vector;
cpumask_t mask;
irq = create_irq();
if (irq < 0)
return irq;
irq_set_msi_desc(irq, desc);
cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
dest_phys_id = cpu_physical_id(first_cpu(mask));
vector = irq_to_vector(irq);
msg.address_hi = 0;
msg.address_lo =
MSI_ADDR_HEADER |
MSI_ADDR_DEST_MODE_PHYS |
MSI_ADDR_REDIRECTION_CPU |
MSI_ADDR_DEST_ID_CPU(dest_phys_id);
msg.data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(vector);
write_msi_msg(irq, &msg);
irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq);
return 0;
}
void ia64_teardown_msi_irq(unsigned int irq)
{
destroy_irq(irq);
}
static void ia64_ack_msi_irq(struct irq_data *data)
{
irq_complete_move(data->irq);
irq_move_irq(data);
ia64_eoi();
}
static int ia64_msi_retrigger_irq(struct irq_data *data)
{
unsigned int vector = irq_to_vector(data->irq);
ia64_resend_irq(vector);
return 1;
}
/*
* Generic ops used on most IA64 platforms.
*/
static struct irq_chip ia64_msi_chip = {
.name = "PCI-MSI",
.irq_mask = mask_msi_irq,
.irq_unmask = unmask_msi_irq,
.irq_ack = ia64_ack_msi_irq,
#ifdef CONFIG_SMP
.irq_set_affinity = ia64_set_msi_irq_affinity,
#endif
.irq_retrigger = ia64_msi_retrigger_irq,
};
int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
{
if (platform_setup_msi_irq)
return platform_setup_msi_irq(pdev, desc);
return ia64_setup_msi_irq(pdev, desc);
}
void arch_teardown_msi_irq(unsigned int irq)
{
if (platform_teardown_msi_irq)
return platform_teardown_msi_irq(irq);
return ia64_teardown_msi_irq(irq);
}
#ifdef CONFIG_INTEL_IOMMU
#ifdef CONFIG_SMP
static int dmar_msi_set_affinity(struct irq_data *data,
const struct cpumask *mask, bool force)
{
unsigned int irq = data->irq;
struct irq_cfg *cfg = irq_cfg + irq;
struct msi_msg msg;
int cpu = cpumask_first_and(mask, cpu_online_mask);
if (irq_prepare_move(irq, cpu))
return -1;
dmar_msi_read(irq, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
dmar_msi_write(irq, &msg);
cpumask_copy(data->affinity, mask);
return 0;
}
#endif /* CONFIG_SMP */
static struct irq_chip dmar_msi_type = {
.name = "DMAR_MSI",
.irq_unmask = dmar_msi_unmask,
.irq_mask = dmar_msi_mask,
.irq_ack = ia64_ack_msi_irq,
#ifdef CONFIG_SMP
.irq_set_affinity = dmar_msi_set_affinity,
#endif
.irq_retrigger = ia64_msi_retrigger_irq,
};
static int
msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
{
struct irq_cfg *cfg = irq_cfg + irq;
unsigned dest;
cpumask_t mask;
cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
dest = cpu_physical_id(first_cpu(mask));
msg->address_hi = 0;
msg->address_lo =
MSI_ADDR_HEADER |
MSI_ADDR_DEST_MODE_PHYS |
MSI_ADDR_REDIRECTION_CPU |
MSI_ADDR_DEST_ID_CPU(dest);
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(cfg->vector);
return 0;
}
int arch_setup_dmar_msi(unsigned int irq)
{
int ret;
struct msi_msg msg;
ret = msi_compose_msg(NULL, irq, &msg);
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
"edge");
return 0;
}
#endif /* CONFIG_INTEL_IOMMU */

View file

@ -0,0 +1,21 @@
/*
* calculate
* NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...)
* depending on config.
* This must be calculated before processing asm-offset.c.
*/
#define ASM_OFFSETS_C 1
#include <linux/kbuild.h>
#include <linux/threads.h>
#include <asm/native/irq.h>
void foo(void)
{
union paravirt_nr_irqs_max {
char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS];
};
DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max));
}

85
arch/ia64/kernel/numa.c Normal file
View file

@ -0,0 +1,85 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ia64 kernel NUMA specific stuff
*
* Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
* Copyright (C) 2004 Silicon Graphics, Inc.
* Jesse Barnes <jbarnes@sgi.com>
*/
#include <linux/topology.h>
#include <linux/module.h>
#include <asm/processor.h>
#include <asm/smp.h>
u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_to_node_map);
cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
EXPORT_SYMBOL(node_to_cpu_mask);
void map_cpu_to_node(int cpu, int nid)
{
int oldnid;
if (nid < 0) { /* just initialize by zero */
cpu_to_node_map[cpu] = 0;
return;
}
/* sanity check first */
oldnid = cpu_to_node_map[cpu];
if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
return; /* nothing to do */
}
/* we don't have cpu-driven node hot add yet...
In usual case, node is created from SRAT at boot time. */
if (!node_online(nid))
nid = first_online_node;
cpu_to_node_map[cpu] = nid;
cpu_set(cpu, node_to_cpu_mask[nid]);
return;
}
void unmap_cpu_from_node(int cpu, int nid)
{
WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
WARN_ON(cpu_to_node_map[cpu] != nid);
cpu_to_node_map[cpu] = 0;
cpu_clear(cpu, node_to_cpu_mask[nid]);
}
/**
* build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
*
* Build cpu to node mapping and initialize the per node cpu masks using
* info from the node_cpuid array handed to us by ACPI.
*/
void __init build_cpu_to_node_map(void)
{
int cpu, i, node;
for(node=0; node < MAX_NUMNODES; node++)
cpus_clear(node_to_cpu_mask[node]);
for_each_possible_early_cpu(cpu) {
node = -1;
for (i = 0; i < NR_CPUS; ++i)
if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
node = node_cpuid[i].nid;
break;
}
map_cpu_to_node(cpu, node);
}
}

298
arch/ia64/kernel/pal.S Normal file
View file

@ -0,0 +1,298 @@
/*
* PAL Firmware support
* IA-64 Processor Programmers Reference Vol 2
*
* Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
* Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
* David Mosberger <davidm@hpl.hp.com>
* Stephane Eranian <eranian@hpl.hp.com>
*
* 05/22/2000 eranian Added support for stacked register calls
* 05/24/2000 eranian Added support for physical mode static calls
*/
#include <asm/asmmacro.h>
#include <asm/processor.h>
.data
pal_entry_point:
data8 ia64_pal_default_handler
.text
/*
* Set the PAL entry point address. This could be written in C code, but we
* do it here to keep it all in one module (besides, it's so trivial that it's
* not a big deal).
*
* in0 Address of the PAL entry point (text address, NOT a function
* descriptor).
*/
GLOBAL_ENTRY(ia64_pal_handler_init)
alloc r3=ar.pfs,1,0,0,0
movl r2=pal_entry_point
;;
st8 [r2]=in0
br.ret.sptk.many rp
END(ia64_pal_handler_init)
/*
* Default PAL call handler. This needs to be coded in assembly because it
* uses the static calling convention, i.e., the RSE may not be used and
* calls are done via "br.cond" (not "br.call").
*/
GLOBAL_ENTRY(ia64_pal_default_handler)
mov r8=-1
br.cond.sptk.many rp
END(ia64_pal_default_handler)
/*
* Make a PAL call using the static calling convention.
*
* in0 Index of PAL service
* in1 - in3 Remaining PAL arguments
*/
GLOBAL_ENTRY(ia64_pal_call_static)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
alloc loc1 = ar.pfs,4,5,0,0
movl loc2 = pal_entry_point
1: {
mov r28 = in0
mov r29 = in1
mov r8 = ip
}
;;
ld8 loc2 = [loc2] // loc2 <- entry point
adds r8 = 1f-1b,r8
mov loc4=ar.rsc // save RSE configuration
;;
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov loc3 = psr
mov loc0 = rp
.body
mov r30 = in2
mov r31 = in3
mov b7 = loc2
rsm psr.i
;;
mov rp = r8
br.cond.sptk.many b7
1: mov psr.l = loc3
mov ar.rsc = loc4 // restore RSE configuration
mov ar.pfs = loc1
mov rp = loc0
;;
srlz.d // seralize restoration of psr.l
br.ret.sptk.many b0
END(ia64_pal_call_static)
/*
* Make a PAL call using the stacked registers calling convention.
*
* Inputs:
* in0 Index of PAL service
* in2 - in3 Remaining PAL arguments
*/
GLOBAL_ENTRY(ia64_pal_call_stacked)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
alloc loc1 = ar.pfs,4,4,4,0
movl loc2 = pal_entry_point
mov r28 = in0 // Index MUST be copied to r28
mov out0 = in0 // AND in0 of PAL function
mov loc0 = rp
.body
;;
ld8 loc2 = [loc2] // loc2 <- entry point
mov out1 = in1
mov out2 = in2
mov out3 = in3
mov loc3 = psr
;;
rsm psr.i
mov b7 = loc2
;;
br.call.sptk.many rp=b7 // now make the call
.ret0: mov psr.l = loc3
mov ar.pfs = loc1
mov rp = loc0
;;
srlz.d // serialize restoration of psr.l
br.ret.sptk.many b0
END(ia64_pal_call_stacked)
/*
* Make a physical mode PAL call using the static registers calling convention.
*
* Inputs:
* in0 Index of PAL service
* in2 - in3 Remaining PAL arguments
*
* PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
* So we don't need to clear them.
*/
#define PAL_PSR_BITS_TO_CLEAR \
(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT |\
IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
IA64_PSR_DFL | IA64_PSR_DFH)
#define PAL_PSR_BITS_TO_SET \
(IA64_PSR_BN)
GLOBAL_ENTRY(ia64_pal_call_phys_static)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
alloc loc1 = ar.pfs,4,7,0,0
movl loc2 = pal_entry_point
1: {
mov r28 = in0 // copy procedure index
mov r8 = ip // save ip to compute branch
mov loc0 = rp // save rp
}
.body
;;
ld8 loc2 = [loc2] // loc2 <- entry point
mov r29 = in1 // first argument
mov r30 = in2 // copy arg2
mov r31 = in3 // copy arg3
;;
mov loc3 = psr // save psr
adds r8 = 1f-1b,r8 // calculate return address for call
;;
mov loc4=ar.rsc // save RSE configuration
dep.z loc2=loc2,0,61 // convert pal entry point to physical
tpa r8=r8 // convert rp to physical
;;
mov b7 = loc2 // install target to branch reg
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
movl r16=PAL_PSR_BITS_TO_CLEAR
movl r17=PAL_PSR_BITS_TO_SET
;;
or loc3=loc3,r17 // add in psr the bits to set
;;
andcm r16=loc3,r16 // removes bits to clear from psr
br.call.sptk.many rp=ia64_switch_mode_phys
mov rp = r8 // install return address (physical)
mov loc5 = r19
mov loc6 = r20
br.cond.sptk.many b7
1:
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3 // r16= original psr
mov r19=loc5
mov r20=loc6
br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
mov psr.l = loc3 // restore init PSR
mov ar.pfs = loc1
mov rp = loc0
;;
mov ar.rsc=loc4 // restore RSE configuration
srlz.d // seralize restoration of psr.l
br.ret.sptk.many b0
END(ia64_pal_call_phys_static)
/*
* Make a PAL call using the stacked registers in physical mode.
*
* Inputs:
* in0 Index of PAL service
* in2 - in3 Remaining PAL arguments
*/
GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
alloc loc1 = ar.pfs,5,7,4,0
movl loc2 = pal_entry_point
1: {
mov r28 = in0 // copy procedure index
mov loc0 = rp // save rp
}
.body
;;
ld8 loc2 = [loc2] // loc2 <- entry point
mov loc3 = psr // save psr
;;
mov loc4=ar.rsc // save RSE configuration
dep.z loc2=loc2,0,61 // convert pal entry point to physical
;;
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
movl r16=PAL_PSR_BITS_TO_CLEAR
movl r17=PAL_PSR_BITS_TO_SET
;;
or loc3=loc3,r17 // add in psr the bits to set
mov b7 = loc2 // install target to branch reg
;;
andcm r16=loc3,r16 // removes bits to clear from psr
br.call.sptk.many rp=ia64_switch_mode_phys
mov out0 = in0 // first argument
mov out1 = in1 // copy arg2
mov out2 = in2 // copy arg3
mov out3 = in3 // copy arg3
mov loc5 = r19
mov loc6 = r20
br.call.sptk.many rp=b7 // now make the call
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3 // r16= original psr
mov r19=loc5
mov r20=loc6
br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
mov psr.l = loc3 // restore init PSR
mov ar.pfs = loc1
mov rp = loc0
;;
mov ar.rsc=loc4 // restore RSE configuration
srlz.d // seralize restoration of psr.l
br.ret.sptk.many b0
END(ia64_pal_call_phys_stacked)
/*
* Save scratch fp scratch regs which aren't saved in pt_regs already
* (fp10-fp15).
*
* NOTE: We need to do this since firmware (SAL and PAL) may use any of the
* scratch regs fp-low partition.
*
* Inputs:
* in0 Address of stack storage for fp regs
*/
GLOBAL_ENTRY(ia64_save_scratch_fpregs)
alloc r3=ar.pfs,1,0,0,0
add r2=16,in0
;;
stf.spill [in0] = f10,32
stf.spill [r2] = f11,32
;;
stf.spill [in0] = f12,32
stf.spill [r2] = f13,32
;;
stf.spill [in0] = f14,32
stf.spill [r2] = f15,32
br.ret.sptk.many rp
END(ia64_save_scratch_fpregs)
/*
* Load scratch fp scratch regs (fp10-fp15)
*
* Inputs:
* in0 Address of stack storage for fp regs
*/
GLOBAL_ENTRY(ia64_load_scratch_fpregs)
alloc r3=ar.pfs,1,0,0,0
add r2=16,in0
;;
ldf.fill f10 = [in0],32
ldf.fill f11 = [r2],32
;;
ldf.fill f12 = [in0],32
ldf.fill f13 = [r2],32
;;
ldf.fill f14 = [in0],32
ldf.fill f15 = [r2],32
br.ret.sptk.many rp
END(ia64_load_scratch_fpregs)

1022
arch/ia64/kernel/palinfo.c Normal file

File diff suppressed because it is too large Load diff

902
arch/ia64/kernel/paravirt.c Normal file
View file

@ -0,0 +1,902 @@
/******************************************************************************
* arch/ia64/kernel/paravirt.c
*
* Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan K.K.
* Yaozu (Eddie) Dong <eddie.dong@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/types.h>
#include <asm/iosapic.h>
#include <asm/paravirt.h>
/***************************************************************************
* general info
*/
struct pv_info pv_info = {
.kernel_rpl = 0,
.paravirt_enabled = 0,
.name = "bare hardware"
};
/***************************************************************************
* pv_init_ops
* initialization hooks.
*/
static void __init
ia64_native_patch_branch(unsigned long tag, unsigned long type);
struct pv_init_ops pv_init_ops =
{
#ifdef ASM_SUPPORTED
.patch_bundle = ia64_native_patch_bundle,
#endif
.patch_branch = ia64_native_patch_branch,
};
/***************************************************************************
* pv_cpu_ops
* intrinsics hooks.
*/
#ifndef ASM_SUPPORTED
/* ia64_native_xxx are macros so that we have to make them real functions */
#define DEFINE_VOID_FUNC1(name) \
static void \
ia64_native_ ## name ## _func(unsigned long arg) \
{ \
ia64_native_ ## name(arg); \
}
#define DEFINE_VOID_FUNC1_VOID(name) \
static void \
ia64_native_ ## name ## _func(void *arg) \
{ \
ia64_native_ ## name(arg); \
}
#define DEFINE_VOID_FUNC2(name) \
static void \
ia64_native_ ## name ## _func(unsigned long arg0, \
unsigned long arg1) \
{ \
ia64_native_ ## name(arg0, arg1); \
}
#define DEFINE_FUNC0(name) \
static unsigned long \
ia64_native_ ## name ## _func(void) \
{ \
return ia64_native_ ## name(); \
}
#define DEFINE_FUNC1(name, type) \
static unsigned long \
ia64_native_ ## name ## _func(type arg) \
{ \
return ia64_native_ ## name(arg); \
} \
DEFINE_VOID_FUNC1_VOID(fc);
DEFINE_VOID_FUNC1(intrin_local_irq_restore);
DEFINE_VOID_FUNC2(ptcga);
DEFINE_VOID_FUNC2(set_rr);
DEFINE_FUNC0(get_psr_i);
DEFINE_FUNC1(thash, unsigned long);
DEFINE_FUNC1(get_cpuid, int);
DEFINE_FUNC1(get_pmd, int);
DEFINE_FUNC1(get_rr, unsigned long);
static void
ia64_native_ssm_i_func(void)
{
ia64_native_ssm(IA64_PSR_I);
}
static void
ia64_native_rsm_i_func(void)
{
ia64_native_rsm(IA64_PSR_I);
}
static void
ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
unsigned long val2, unsigned long val3,
unsigned long val4)
{
ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4);
}
#define CASE_GET_REG(id) \
case _IA64_REG_ ## id: \
res = ia64_native_getreg(_IA64_REG_ ## id); \
break;
#define CASE_GET_AR(id) CASE_GET_REG(AR_ ## id)
#define CASE_GET_CR(id) CASE_GET_REG(CR_ ## id)
unsigned long
ia64_native_getreg_func(int regnum)
{
unsigned long res = -1;
switch (regnum) {
CASE_GET_REG(GP);
/*CASE_GET_REG(IP);*/ /* returned ip value shouldn't be constant */
CASE_GET_REG(PSR);
CASE_GET_REG(TP);
CASE_GET_REG(SP);
CASE_GET_AR(KR0);
CASE_GET_AR(KR1);
CASE_GET_AR(KR2);
CASE_GET_AR(KR3);
CASE_GET_AR(KR4);
CASE_GET_AR(KR5);
CASE_GET_AR(KR6);
CASE_GET_AR(KR7);
CASE_GET_AR(RSC);
CASE_GET_AR(BSP);
CASE_GET_AR(BSPSTORE);
CASE_GET_AR(RNAT);
CASE_GET_AR(FCR);
CASE_GET_AR(EFLAG);
CASE_GET_AR(CSD);
CASE_GET_AR(SSD);
CASE_GET_AR(CFLAG);
CASE_GET_AR(FSR);
CASE_GET_AR(FIR);
CASE_GET_AR(FDR);
CASE_GET_AR(CCV);
CASE_GET_AR(UNAT);
CASE_GET_AR(FPSR);
CASE_GET_AR(ITC);
CASE_GET_AR(PFS);
CASE_GET_AR(LC);
CASE_GET_AR(EC);
CASE_GET_CR(DCR);
CASE_GET_CR(ITM);
CASE_GET_CR(IVA);
CASE_GET_CR(PTA);
CASE_GET_CR(IPSR);
CASE_GET_CR(ISR);
CASE_GET_CR(IIP);
CASE_GET_CR(IFA);
CASE_GET_CR(ITIR);
CASE_GET_CR(IIPA);
CASE_GET_CR(IFS);
CASE_GET_CR(IIM);
CASE_GET_CR(IHA);
CASE_GET_CR(LID);
CASE_GET_CR(IVR);
CASE_GET_CR(TPR);
CASE_GET_CR(EOI);
CASE_GET_CR(IRR0);
CASE_GET_CR(IRR1);
CASE_GET_CR(IRR2);
CASE_GET_CR(IRR3);
CASE_GET_CR(ITV);
CASE_GET_CR(PMV);
CASE_GET_CR(CMCV);
CASE_GET_CR(LRR0);
CASE_GET_CR(LRR1);
default:
printk(KERN_CRIT "wrong_getreg %d\n", regnum);
break;
}
return res;
}
#define CASE_SET_REG(id) \
case _IA64_REG_ ## id: \
ia64_native_setreg(_IA64_REG_ ## id, val); \
break;
#define CASE_SET_AR(id) CASE_SET_REG(AR_ ## id)
#define CASE_SET_CR(id) CASE_SET_REG(CR_ ## id)
void
ia64_native_setreg_func(int regnum, unsigned long val)
{
switch (regnum) {
case _IA64_REG_PSR_L:
ia64_native_setreg(_IA64_REG_PSR_L, val);
ia64_dv_serialize_data();
break;
CASE_SET_REG(SP);
CASE_SET_REG(GP);
CASE_SET_AR(KR0);
CASE_SET_AR(KR1);
CASE_SET_AR(KR2);
CASE_SET_AR(KR3);
CASE_SET_AR(KR4);
CASE_SET_AR(KR5);
CASE_SET_AR(KR6);
CASE_SET_AR(KR7);
CASE_SET_AR(RSC);
CASE_SET_AR(BSP);
CASE_SET_AR(BSPSTORE);
CASE_SET_AR(RNAT);
CASE_SET_AR(FCR);
CASE_SET_AR(EFLAG);
CASE_SET_AR(CSD);
CASE_SET_AR(SSD);
CASE_SET_AR(CFLAG);
CASE_SET_AR(FSR);
CASE_SET_AR(FIR);
CASE_SET_AR(FDR);
CASE_SET_AR(CCV);
CASE_SET_AR(UNAT);
CASE_SET_AR(FPSR);
CASE_SET_AR(ITC);
CASE_SET_AR(PFS);
CASE_SET_AR(LC);
CASE_SET_AR(EC);
CASE_SET_CR(DCR);
CASE_SET_CR(ITM);
CASE_SET_CR(IVA);
CASE_SET_CR(PTA);
CASE_SET_CR(IPSR);
CASE_SET_CR(ISR);
CASE_SET_CR(IIP);
CASE_SET_CR(IFA);
CASE_SET_CR(ITIR);
CASE_SET_CR(IIPA);
CASE_SET_CR(IFS);
CASE_SET_CR(IIM);
CASE_SET_CR(IHA);
CASE_SET_CR(LID);
CASE_SET_CR(IVR);
CASE_SET_CR(TPR);
CASE_SET_CR(EOI);
CASE_SET_CR(IRR0);
CASE_SET_CR(IRR1);
CASE_SET_CR(IRR2);
CASE_SET_CR(IRR3);
CASE_SET_CR(ITV);
CASE_SET_CR(PMV);
CASE_SET_CR(CMCV);
CASE_SET_CR(LRR0);
CASE_SET_CR(LRR1);
default:
printk(KERN_CRIT "wrong setreg %d\n", regnum);
break;
}
}
#else
#define __DEFINE_FUNC(name, code) \
extern const char ia64_native_ ## name ## _direct_start[]; \
extern const char ia64_native_ ## name ## _direct_end[]; \
asm (".align 32\n" \
".proc ia64_native_" #name "_func\n" \
"ia64_native_" #name "_func:\n" \
"ia64_native_" #name "_direct_start:\n" \
code \
"ia64_native_" #name "_direct_end:\n" \
"br.cond.sptk.many b6\n" \
".endp ia64_native_" #name "_func\n")
#define DEFINE_VOID_FUNC0(name, code) \
extern void \
ia64_native_ ## name ## _func(void); \
__DEFINE_FUNC(name, code)
#define DEFINE_VOID_FUNC1(name, code) \
extern void \
ia64_native_ ## name ## _func(unsigned long arg); \
__DEFINE_FUNC(name, code)
#define DEFINE_VOID_FUNC1_VOID(name, code) \
extern void \
ia64_native_ ## name ## _func(void *arg); \
__DEFINE_FUNC(name, code)
#define DEFINE_VOID_FUNC2(name, code) \
extern void \
ia64_native_ ## name ## _func(unsigned long arg0, \
unsigned long arg1); \
__DEFINE_FUNC(name, code)
#define DEFINE_FUNC0(name, code) \
extern unsigned long \
ia64_native_ ## name ## _func(void); \
__DEFINE_FUNC(name, code)
#define DEFINE_FUNC1(name, type, code) \
extern unsigned long \
ia64_native_ ## name ## _func(type arg); \
__DEFINE_FUNC(name, code)
DEFINE_VOID_FUNC1_VOID(fc,
"fc r8\n");
DEFINE_VOID_FUNC1(intrin_local_irq_restore,
";;\n"
" cmp.ne p6, p7 = r8, r0\n"
";;\n"
"(p6) ssm psr.i\n"
"(p7) rsm psr.i\n"
";;\n"
"(p6) srlz.d\n");
DEFINE_VOID_FUNC2(ptcga,
"ptc.ga r8, r9\n");
DEFINE_VOID_FUNC2(set_rr,
"mov rr[r8] = r9\n");
/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */
DEFINE_FUNC0(get_psr_i,
"mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n"
"mov r8 = psr\n"
";;\n"
"and r8 = r2, r8\n");
DEFINE_FUNC1(thash, unsigned long,
"thash r8 = r8\n");
DEFINE_FUNC1(get_cpuid, int,
"mov r8 = cpuid[r8]\n");
DEFINE_FUNC1(get_pmd, int,
"mov r8 = pmd[r8]\n");
DEFINE_FUNC1(get_rr, unsigned long,
"mov r8 = rr[r8]\n");
DEFINE_VOID_FUNC0(ssm_i,
"ssm psr.i\n");
DEFINE_VOID_FUNC0(rsm_i,
"rsm psr.i\n");
extern void
ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
unsigned long val2, unsigned long val3,
unsigned long val4);
__DEFINE_FUNC(set_rr0_to_rr4,
"mov rr[r0] = r8\n"
"movl r2 = 0x2000000000000000\n"
";;\n"
"mov rr[r2] = r9\n"
"shl r3 = r2, 1\n" /* movl r3 = 0x4000000000000000 */
";;\n"
"add r2 = r2, r3\n" /* movl r2 = 0x6000000000000000 */
"mov rr[r3] = r10\n"
";;\n"
"mov rr[r2] = r11\n"
"shl r3 = r3, 1\n" /* movl r3 = 0x8000000000000000 */
";;\n"
"mov rr[r3] = r14\n");
extern unsigned long ia64_native_getreg_func(int regnum);
asm(".global ia64_native_getreg_func\n");
#define __DEFINE_GET_REG(id, reg) \
"mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \
";;\n" \
"cmp.eq p6, p0 = r2, r8\n" \
";;\n" \
"(p6) mov r8 = " #reg "\n" \
"(p6) br.cond.sptk.many b6\n" \
";;\n"
#define __DEFINE_GET_AR(id, reg) __DEFINE_GET_REG(AR_ ## id, ar.reg)
#define __DEFINE_GET_CR(id, reg) __DEFINE_GET_REG(CR_ ## id, cr.reg)
__DEFINE_FUNC(getreg,
__DEFINE_GET_REG(GP, gp)
/*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */
__DEFINE_GET_REG(PSR, psr)
__DEFINE_GET_REG(TP, tp)
__DEFINE_GET_REG(SP, sp)
__DEFINE_GET_REG(AR_KR0, ar0)
__DEFINE_GET_REG(AR_KR1, ar1)
__DEFINE_GET_REG(AR_KR2, ar2)
__DEFINE_GET_REG(AR_KR3, ar3)
__DEFINE_GET_REG(AR_KR4, ar4)
__DEFINE_GET_REG(AR_KR5, ar5)
__DEFINE_GET_REG(AR_KR6, ar6)
__DEFINE_GET_REG(AR_KR7, ar7)
__DEFINE_GET_AR(RSC, rsc)
__DEFINE_GET_AR(BSP, bsp)
__DEFINE_GET_AR(BSPSTORE, bspstore)
__DEFINE_GET_AR(RNAT, rnat)
__DEFINE_GET_AR(FCR, fcr)
__DEFINE_GET_AR(EFLAG, eflag)
__DEFINE_GET_AR(CSD, csd)
__DEFINE_GET_AR(SSD, ssd)
__DEFINE_GET_REG(AR_CFLAG, ar27)
__DEFINE_GET_AR(FSR, fsr)
__DEFINE_GET_AR(FIR, fir)
__DEFINE_GET_AR(FDR, fdr)
__DEFINE_GET_AR(CCV, ccv)
__DEFINE_GET_AR(UNAT, unat)
__DEFINE_GET_AR(FPSR, fpsr)
__DEFINE_GET_AR(ITC, itc)
__DEFINE_GET_AR(PFS, pfs)
__DEFINE_GET_AR(LC, lc)
__DEFINE_GET_AR(EC, ec)
__DEFINE_GET_CR(DCR, dcr)
__DEFINE_GET_CR(ITM, itm)
__DEFINE_GET_CR(IVA, iva)
__DEFINE_GET_CR(PTA, pta)
__DEFINE_GET_CR(IPSR, ipsr)
__DEFINE_GET_CR(ISR, isr)
__DEFINE_GET_CR(IIP, iip)
__DEFINE_GET_CR(IFA, ifa)
__DEFINE_GET_CR(ITIR, itir)
__DEFINE_GET_CR(IIPA, iipa)
__DEFINE_GET_CR(IFS, ifs)
__DEFINE_GET_CR(IIM, iim)
__DEFINE_GET_CR(IHA, iha)
__DEFINE_GET_CR(LID, lid)
__DEFINE_GET_CR(IVR, ivr)
__DEFINE_GET_CR(TPR, tpr)
__DEFINE_GET_CR(EOI, eoi)
__DEFINE_GET_CR(IRR0, irr0)
__DEFINE_GET_CR(IRR1, irr1)
__DEFINE_GET_CR(IRR2, irr2)
__DEFINE_GET_CR(IRR3, irr3)
__DEFINE_GET_CR(ITV, itv)
__DEFINE_GET_CR(PMV, pmv)
__DEFINE_GET_CR(CMCV, cmcv)
__DEFINE_GET_CR(LRR0, lrr0)
__DEFINE_GET_CR(LRR1, lrr1)
"mov r8 = -1\n" /* unsupported case */
);
extern void ia64_native_setreg_func(int regnum, unsigned long val);
asm(".global ia64_native_setreg_func\n");
#define __DEFINE_SET_REG(id, reg) \
"mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \
";;\n" \
"cmp.eq p6, p0 = r2, r9\n" \
";;\n" \
"(p6) mov " #reg " = r8\n" \
"(p6) br.cond.sptk.many b6\n" \
";;\n"
#define __DEFINE_SET_AR(id, reg) __DEFINE_SET_REG(AR_ ## id, ar.reg)
#define __DEFINE_SET_CR(id, reg) __DEFINE_SET_REG(CR_ ## id, cr.reg)
__DEFINE_FUNC(setreg,
"mov r2 = " __stringify(_IA64_REG_PSR_L) "\n"
";;\n"
"cmp.eq p6, p0 = r2, r9\n"
";;\n"
"(p6) mov psr.l = r8\n"
#ifdef HAVE_SERIALIZE_DIRECTIVE
".serialize.data\n"
#endif
"(p6) br.cond.sptk.many b6\n"
__DEFINE_SET_REG(GP, gp)
__DEFINE_SET_REG(SP, sp)
__DEFINE_SET_REG(AR_KR0, ar0)
__DEFINE_SET_REG(AR_KR1, ar1)
__DEFINE_SET_REG(AR_KR2, ar2)
__DEFINE_SET_REG(AR_KR3, ar3)
__DEFINE_SET_REG(AR_KR4, ar4)
__DEFINE_SET_REG(AR_KR5, ar5)
__DEFINE_SET_REG(AR_KR6, ar6)
__DEFINE_SET_REG(AR_KR7, ar7)
__DEFINE_SET_AR(RSC, rsc)
__DEFINE_SET_AR(BSP, bsp)
__DEFINE_SET_AR(BSPSTORE, bspstore)
__DEFINE_SET_AR(RNAT, rnat)
__DEFINE_SET_AR(FCR, fcr)
__DEFINE_SET_AR(EFLAG, eflag)
__DEFINE_SET_AR(CSD, csd)
__DEFINE_SET_AR(SSD, ssd)
__DEFINE_SET_REG(AR_CFLAG, ar27)
__DEFINE_SET_AR(FSR, fsr)
__DEFINE_SET_AR(FIR, fir)
__DEFINE_SET_AR(FDR, fdr)
__DEFINE_SET_AR(CCV, ccv)
__DEFINE_SET_AR(UNAT, unat)
__DEFINE_SET_AR(FPSR, fpsr)
__DEFINE_SET_AR(ITC, itc)
__DEFINE_SET_AR(PFS, pfs)
__DEFINE_SET_AR(LC, lc)
__DEFINE_SET_AR(EC, ec)
__DEFINE_SET_CR(DCR, dcr)
__DEFINE_SET_CR(ITM, itm)
__DEFINE_SET_CR(IVA, iva)
__DEFINE_SET_CR(PTA, pta)
__DEFINE_SET_CR(IPSR, ipsr)
__DEFINE_SET_CR(ISR, isr)
__DEFINE_SET_CR(IIP, iip)
__DEFINE_SET_CR(IFA, ifa)
__DEFINE_SET_CR(ITIR, itir)
__DEFINE_SET_CR(IIPA, iipa)
__DEFINE_SET_CR(IFS, ifs)
__DEFINE_SET_CR(IIM, iim)
__DEFINE_SET_CR(IHA, iha)
__DEFINE_SET_CR(LID, lid)
__DEFINE_SET_CR(IVR, ivr)
__DEFINE_SET_CR(TPR, tpr)
__DEFINE_SET_CR(EOI, eoi)
__DEFINE_SET_CR(IRR0, irr0)
__DEFINE_SET_CR(IRR1, irr1)
__DEFINE_SET_CR(IRR2, irr2)
__DEFINE_SET_CR(IRR3, irr3)
__DEFINE_SET_CR(ITV, itv)
__DEFINE_SET_CR(PMV, pmv)
__DEFINE_SET_CR(CMCV, cmcv)
__DEFINE_SET_CR(LRR0, lrr0)
__DEFINE_SET_CR(LRR1, lrr1)
);
#endif
struct pv_cpu_ops pv_cpu_ops = {
.fc = ia64_native_fc_func,
.thash = ia64_native_thash_func,
.get_cpuid = ia64_native_get_cpuid_func,
.get_pmd = ia64_native_get_pmd_func,
.ptcga = ia64_native_ptcga_func,
.get_rr = ia64_native_get_rr_func,
.set_rr = ia64_native_set_rr_func,
.set_rr0_to_rr4 = ia64_native_set_rr0_to_rr4_func,
.ssm_i = ia64_native_ssm_i_func,
.getreg = ia64_native_getreg_func,
.setreg = ia64_native_setreg_func,
.rsm_i = ia64_native_rsm_i_func,
.get_psr_i = ia64_native_get_psr_i_func,
.intrin_local_irq_restore
= ia64_native_intrin_local_irq_restore_func,
};
EXPORT_SYMBOL(pv_cpu_ops);
/******************************************************************************
* replacement of hand written assembly codes.
*/
void
paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch)
{
extern unsigned long paravirt_switch_to_targ;
extern unsigned long paravirt_leave_syscall_targ;
extern unsigned long paravirt_work_processed_syscall_targ;
extern unsigned long paravirt_leave_kernel_targ;
paravirt_switch_to_targ = cpu_asm_switch->switch_to;
paravirt_leave_syscall_targ = cpu_asm_switch->leave_syscall;
paravirt_work_processed_syscall_targ =
cpu_asm_switch->work_processed_syscall;
paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel;
}
/***************************************************************************
* pv_iosapic_ops
* iosapic read/write hooks.
*/
static unsigned int
ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg)
{
return __ia64_native_iosapic_read(iosapic, reg);
}
static void
ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
{
__ia64_native_iosapic_write(iosapic, reg, val);
}
struct pv_iosapic_ops pv_iosapic_ops = {
.pcat_compat_init = ia64_native_iosapic_pcat_compat_init,
.__get_irq_chip = ia64_native_iosapic_get_irq_chip,
.__read = ia64_native_iosapic_read,
.__write = ia64_native_iosapic_write,
};
/***************************************************************************
* pv_irq_ops
* irq operations
*/
struct pv_irq_ops pv_irq_ops = {
.register_ipi = ia64_native_register_ipi,
.assign_irq_vector = ia64_native_assign_irq_vector,
.free_irq_vector = ia64_native_free_irq_vector,
.register_percpu_irq = ia64_native_register_percpu_irq,
.resend_irq = ia64_native_resend_irq,
};
/***************************************************************************
* pv_time_ops
* time operations
*/
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
static int
ia64_native_do_steal_accounting(unsigned long *new_itm)
{
return 0;
}
struct pv_time_ops pv_time_ops = {
.do_steal_accounting = ia64_native_do_steal_accounting,
.sched_clock = ia64_native_sched_clock,
};
/***************************************************************************
* binary pacthing
* pv_init_ops.patch_bundle
*/
#ifdef ASM_SUPPORTED
#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg) \
__DEFINE_FUNC(get_ ## name, \
";;\n" \
"mov r8 = " #reg "\n" \
";;\n")
#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \
__DEFINE_FUNC(set_ ## name, \
";;\n" \
"mov " #reg " = r8\n" \
";;\n")
#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg) \
IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg); \
IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \
#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg) \
IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg)
#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg) \
IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg)
IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr);
IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp);
/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */
__DEFINE_FUNC(set_psr_l,
";;\n"
"mov psr.l = r8\n"
#ifdef HAVE_SERIALIZE_DIRECTIVE
".serialize.data\n"
#endif
";;\n");
IA64_NATIVE_PATCH_DEFINE_REG(gp, gp);
IA64_NATIVE_PATCH_DEFINE_REG(sp, sp);
IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0);
IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1);
IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2);
IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3);
IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4);
IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5);
IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6);
IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7);
IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc);
IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp);
IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore);
IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat);
IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr);
IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag);
IA64_NATIVE_PATCH_DEFINE_AR(csd, csd);
IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd);
IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27);
IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr);
IA64_NATIVE_PATCH_DEFINE_AR(fir, fir);
IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr);
IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv);
IA64_NATIVE_PATCH_DEFINE_AR(unat, unat);
IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr);
IA64_NATIVE_PATCH_DEFINE_AR(itc, itc);
IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs);
IA64_NATIVE_PATCH_DEFINE_AR(lc, lc);
IA64_NATIVE_PATCH_DEFINE_AR(ec, ec);
IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr);
IA64_NATIVE_PATCH_DEFINE_CR(itm, itm);
IA64_NATIVE_PATCH_DEFINE_CR(iva, iva);
IA64_NATIVE_PATCH_DEFINE_CR(pta, pta);
IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr);
IA64_NATIVE_PATCH_DEFINE_CR(isr, isr);
IA64_NATIVE_PATCH_DEFINE_CR(iip, iip);
IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa);
IA64_NATIVE_PATCH_DEFINE_CR(itir, itir);
IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa);
IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs);
IA64_NATIVE_PATCH_DEFINE_CR(iim, iim);
IA64_NATIVE_PATCH_DEFINE_CR(iha, iha);
IA64_NATIVE_PATCH_DEFINE_CR(lid, lid);
IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr);
IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr);
IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi);
IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0);
IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1);
IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2);
IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3);
IA64_NATIVE_PATCH_DEFINE_CR(itv, itv);
IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv);
IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv);
IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0);
IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1);
static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[]
__initdata_or_module =
{
#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type) \
{ \
(void*)ia64_native_ ## name ## _direct_start, \
(void*)ia64_native_ ## name ## _direct_end, \
PARAVIRT_PATCH_TYPE_ ## type, \
}
IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC),
IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH),
IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore,
INTRIN_LOCAL_IRQ_RESTORE),
#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg) \
{ \
(void*)ia64_native_get_ ## name ## _direct_start, \
(void*)ia64_native_get_ ## name ## _direct_end, \
PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \
}
#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \
{ \
(void*)ia64_native_set_ ## name ## _direct_start, \
(void*)ia64_native_set_ ## name ## _direct_end, \
PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \
}
#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg) \
IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg), \
IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \
#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg) \
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg)
#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg) \
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg)
IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP),
IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD),
IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC),
IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0),
IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1),
};
unsigned long __init_or_module
ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
{
const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) /
sizeof(ia64_native_patch_bundle_elems[0]);
return __paravirt_patch_apply_bundle(sbundle, ebundle, type,
ia64_native_patch_bundle_elems,
nelems, NULL);
}
#endif /* ASM_SUPPOTED */
extern const char ia64_native_switch_to[];
extern const char ia64_native_leave_syscall[];
extern const char ia64_native_work_processed_syscall[];
extern const char ia64_native_leave_kernel[];
const struct paravirt_patch_branch_target ia64_native_branch_target[]
__initconst = {
#define PARAVIRT_BR_TARGET(name, type) \
{ \
ia64_native_ ## name, \
PARAVIRT_PATCH_TYPE_BR_ ## type, \
}
PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
};
static void __init
ia64_native_patch_branch(unsigned long tag, unsigned long type)
{
const unsigned long nelem =
sizeof(ia64_native_branch_target) /
sizeof(ia64_native_branch_target[0]);
__paravirt_patch_apply_branch(tag, type,
ia64_native_branch_target, nelem);
}

View file

@ -0,0 +1,28 @@
/******************************************************************************
* linux/arch/ia64/xen/paravirt_inst.h
*
* Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan K.K.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK
#include <asm/native/pvchk_inst.h>
#else
#include <asm/native/inst.h>
#endif

View file

@ -0,0 +1,514 @@
/******************************************************************************
* linux/arch/ia64/xen/paravirt_patch.c
*
* Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan K.K.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <linux/init.h>
#include <asm/intrinsics.h>
#include <asm/kprobes.h>
#include <asm/paravirt.h>
#include <asm/paravirt_patch.h>
typedef union ia64_inst {
struct {
unsigned long long qp : 6;
unsigned long long : 31;
unsigned long long opcode : 4;
unsigned long long reserved : 23;
} generic;
unsigned long long l;
} ia64_inst_t;
/*
* flush_icache_range() can't be used here.
* we are here before cpu_init() which initializes
* ia64_i_cache_stride_shift. flush_icache_range() uses it.
*/
void __init_or_module
paravirt_flush_i_cache_range(const void *instr, unsigned long size)
{
extern void paravirt_fc_i(const void *addr);
unsigned long i;
for (i = 0; i < size; i += sizeof(bundle_t))
paravirt_fc_i(instr + i);
}
bundle_t* __init_or_module
paravirt_get_bundle(unsigned long tag)
{
return (bundle_t *)(tag & ~3UL);
}
unsigned long __init_or_module
paravirt_get_slot(unsigned long tag)
{
return tag & 3UL;
}
unsigned long __init_or_module
paravirt_get_num_inst(unsigned long stag, unsigned long etag)
{
bundle_t *sbundle = paravirt_get_bundle(stag);
unsigned long sslot = paravirt_get_slot(stag);
bundle_t *ebundle = paravirt_get_bundle(etag);
unsigned long eslot = paravirt_get_slot(etag);
return (ebundle - sbundle) * 3 + eslot - sslot + 1;
}
unsigned long __init_or_module
paravirt_get_next_tag(unsigned long tag)
{
unsigned long slot = paravirt_get_slot(tag);
switch (slot) {
case 0:
case 1:
return tag + 1;
case 2: {
bundle_t *bundle = paravirt_get_bundle(tag);
return (unsigned long)(bundle + 1);
}
default:
BUG();
}
/* NOTREACHED */
}
ia64_inst_t __init_or_module
paravirt_read_slot0(const bundle_t *bundle)
{
ia64_inst_t inst;
inst.l = bundle->quad0.slot0;
return inst;
}
ia64_inst_t __init_or_module
paravirt_read_slot1(const bundle_t *bundle)
{
ia64_inst_t inst;
inst.l = bundle->quad0.slot1_p0 |
((unsigned long long)bundle->quad1.slot1_p1 << 18UL);
return inst;
}
ia64_inst_t __init_or_module
paravirt_read_slot2(const bundle_t *bundle)
{
ia64_inst_t inst;
inst.l = bundle->quad1.slot2;
return inst;
}
ia64_inst_t __init_or_module
paravirt_read_inst(unsigned long tag)
{
bundle_t *bundle = paravirt_get_bundle(tag);
unsigned long slot = paravirt_get_slot(tag);
switch (slot) {
case 0:
return paravirt_read_slot0(bundle);
case 1:
return paravirt_read_slot1(bundle);
case 2:
return paravirt_read_slot2(bundle);
default:
BUG();
}
/* NOTREACHED */
}
void __init_or_module
paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst)
{
bundle->quad0.slot0 = inst.l;
}
void __init_or_module
paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst)
{
bundle->quad0.slot1_p0 = inst.l;
bundle->quad1.slot1_p1 = inst.l >> 18UL;
}
void __init_or_module
paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst)
{
bundle->quad1.slot2 = inst.l;
}
void __init_or_module
paravirt_write_inst(unsigned long tag, ia64_inst_t inst)
{
bundle_t *bundle = paravirt_get_bundle(tag);
unsigned long slot = paravirt_get_slot(tag);
switch (slot) {
case 0:
paravirt_write_slot0(bundle, inst);
break;
case 1:
paravirt_write_slot1(bundle, inst);
break;
case 2:
paravirt_write_slot2(bundle, inst);
break;
default:
BUG();
break;
}
paravirt_flush_i_cache_range(bundle, sizeof(*bundle));
}
/* for debug */
void
paravirt_print_bundle(const bundle_t *bundle)
{
const unsigned long *quad = (const unsigned long *)bundle;
ia64_inst_t slot0 = paravirt_read_slot0(bundle);
ia64_inst_t slot1 = paravirt_read_slot1(bundle);
ia64_inst_t slot2 = paravirt_read_slot2(bundle);
printk(KERN_DEBUG
"bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]);
printk(KERN_DEBUG
"bundle template 0x%x\n",
bundle->quad0.template);
printk(KERN_DEBUG
"slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n",
(unsigned long)bundle->quad0.slot0,
(unsigned long)bundle->quad0.slot1_p0,
(unsigned long)bundle->quad1.slot1_p1,
(unsigned long)bundle->quad1.slot2);
printk(KERN_DEBUG
"slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n",
slot0.l, slot1.l, slot2.l);
}
static int noreplace_paravirt __init_or_module = 0;
static int __init setup_noreplace_paravirt(char *str)
{
noreplace_paravirt = 1;
return 1;
}
__setup("noreplace-paravirt", setup_noreplace_paravirt);
#ifdef ASM_SUPPORTED
static void __init_or_module
fill_nop_bundle(void *sbundle, void *ebundle)
{
extern const char paravirt_nop_bundle[];
extern const unsigned long paravirt_nop_bundle_size;
void *bundle = sbundle;
BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
while (bundle < ebundle) {
memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size);
bundle += paravirt_nop_bundle_size;
}
}
/* helper function */
unsigned long __init_or_module
__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
const struct paravirt_patch_bundle_elem *elems,
unsigned long nelems,
const struct paravirt_patch_bundle_elem **found)
{
unsigned long used = 0;
unsigned long i;
BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
found = NULL;
for (i = 0; i < nelems; i++) {
const struct paravirt_patch_bundle_elem *p = &elems[i];
if (p->type == type) {
unsigned long need = p->ebundle - p->sbundle;
unsigned long room = ebundle - sbundle;
if (found != NULL)
*found = p;
if (room < need) {
/* no room to replace. skip it */
printk(KERN_DEBUG
"the space is too small to put "
"bundles. type %ld need %ld room %ld\n",
type, need, room);
break;
}
used = need;
memcpy(sbundle, p->sbundle, used);
break;
}
}
return used;
}
void __init_or_module
paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
const struct paravirt_patch_site_bundle *end)
{
const struct paravirt_patch_site_bundle *p;
if (noreplace_paravirt)
return;
if (pv_init_ops.patch_bundle == NULL)
return;
for (p = start; p < end; p++) {
unsigned long used;
used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle,
p->type);
if (used == 0)
continue;
fill_nop_bundle(p->sbundle + used, p->ebundle);
paravirt_flush_i_cache_range(p->sbundle,
p->ebundle - p->sbundle);
}
ia64_sync_i();
ia64_srlz_i();
}
/*
* nop.i, nop.m, nop.f instruction are same format.
* but nop.b has differennt format.
* This doesn't support nop.b for now.
*/
static void __init_or_module
fill_nop_inst(unsigned long stag, unsigned long etag)
{
extern const bundle_t paravirt_nop_mfi_inst_bundle[];
unsigned long tag;
const ia64_inst_t nop_inst =
paravirt_read_slot0(paravirt_nop_mfi_inst_bundle);
for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag))
paravirt_write_inst(tag, nop_inst);
}
void __init_or_module
paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
const struct paravirt_patch_site_inst *end)
{
const struct paravirt_patch_site_inst *p;
if (noreplace_paravirt)
return;
if (pv_init_ops.patch_inst == NULL)
return;
for (p = start; p < end; p++) {
unsigned long tag;
bundle_t *sbundle;
bundle_t *ebundle;
tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type);
if (tag == p->stag)
continue;
fill_nop_inst(tag, p->etag);
sbundle = paravirt_get_bundle(p->stag);
ebundle = paravirt_get_bundle(p->etag) + 1;
paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) *
sizeof(bundle_t));
}
ia64_sync_i();
ia64_srlz_i();
}
#endif /* ASM_SUPPOTED */
/* brl.cond.sptk.many <target64> X3 */
typedef union inst_x3_op {
ia64_inst_t inst;
struct {
unsigned long qp: 6;
unsigned long btyp: 3;
unsigned long unused: 3;
unsigned long p: 1;
unsigned long imm20b: 20;
unsigned long wh: 2;
unsigned long d: 1;
unsigned long i: 1;
unsigned long opcode: 4;
};
unsigned long l;
} inst_x3_op_t;
typedef union inst_x3_imm {
ia64_inst_t inst;
struct {
unsigned long unused: 2;
unsigned long imm39: 39;
};
unsigned long l;
} inst_x3_imm_t;
void __init_or_module
paravirt_patch_reloc_brl(unsigned long tag, const void *target)
{
unsigned long tag_op = paravirt_get_next_tag(tag);
unsigned long tag_imm = tag;
bundle_t *bundle = paravirt_get_bundle(tag);
ia64_inst_t inst_op = paravirt_read_inst(tag_op);
ia64_inst_t inst_imm = paravirt_read_inst(tag_imm);
inst_x3_op_t inst_x3_op = { .l = inst_op.l };
inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l };
unsigned long imm60 =
((unsigned long)target - (unsigned long)bundle) >> 4;
BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */
BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
/* imm60[59] 1bit */
inst_x3_op.i = (imm60 >> 59) & 1;
/* imm60[19:0] 20bit */
inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1);
/* imm60[58:20] 39bit */
inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1);
inst_op.l = inst_x3_op.l;
inst_imm.l = inst_x3_imm.l;
paravirt_write_inst(tag_op, inst_op);
paravirt_write_inst(tag_imm, inst_imm);
}
/* br.cond.sptk.many <target25> B1 */
typedef union inst_b1 {
ia64_inst_t inst;
struct {
unsigned long qp: 6;
unsigned long btype: 3;
unsigned long unused: 3;
unsigned long p: 1;
unsigned long imm20b: 20;
unsigned long wh: 2;
unsigned long d: 1;
unsigned long s: 1;
unsigned long opcode: 4;
};
unsigned long l;
} inst_b1_t;
void __init
paravirt_patch_reloc_br(unsigned long tag, const void *target)
{
bundle_t *bundle = paravirt_get_bundle(tag);
ia64_inst_t inst = paravirt_read_inst(tag);
unsigned long target25 = (unsigned long)target - (unsigned long)bundle;
inst_b1_t inst_b1;
BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
inst_b1.l = inst.l;
if (target25 & (1UL << 63))
inst_b1.s = 1;
else
inst_b1.s = 0;
inst_b1.imm20b = target25 >> 4;
inst.l = inst_b1.l;
paravirt_write_inst(tag, inst);
}
void __init
__paravirt_patch_apply_branch(
unsigned long tag, unsigned long type,
const struct paravirt_patch_branch_target *entries,
unsigned int nr_entries)
{
unsigned int i;
for (i = 0; i < nr_entries; i++) {
if (entries[i].type == type) {
paravirt_patch_reloc_br(tag, entries[i].entry);
break;
}
}
}
static void __init
paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start,
const struct paravirt_patch_site_branch *end)
{
const struct paravirt_patch_site_branch *p;
if (noreplace_paravirt)
return;
if (pv_init_ops.patch_branch == NULL)
return;
for (p = start; p < end; p++)
(*pv_init_ops.patch_branch)(p->tag, p->type);
ia64_sync_i();
ia64_srlz_i();
}
void __init
paravirt_patch_apply(void)
{
extern const char __start_paravirt_bundles[];
extern const char __stop_paravirt_bundles[];
extern const char __start_paravirt_insts[];
extern const char __stop_paravirt_insts[];
extern const char __start_paravirt_branches[];
extern const char __stop_paravirt_branches[];
paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *)
__start_paravirt_bundles,
(const struct paravirt_patch_site_bundle *)
__stop_paravirt_bundles);
paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *)
__start_paravirt_insts,
(const struct paravirt_patch_site_inst *)
__stop_paravirt_insts);
paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *)
__start_paravirt_branches,
(const struct paravirt_patch_site_branch *)
__stop_paravirt_branches);
}
/*
* Local variables:
* mode: C
* c-set-style: "linux"
* c-basic-offset: 8
* tab-width: 8
* indent-tabs-mode: t
* End:
*/

View file

@ -0,0 +1,81 @@
/******************************************************************************
* Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan K.K.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <linux/bug.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <asm/paravirt.h>
#define DECLARE(name) \
extern unsigned long \
__ia64_native_start_gate_##name##_patchlist[]; \
extern unsigned long \
__ia64_native_end_gate_##name##_patchlist[]
DECLARE(fsyscall);
DECLARE(brl_fsys_bubble_down);
DECLARE(vtop);
DECLARE(mckinley_e9);
extern unsigned long __start_gate_section[];
#define ASSIGN(name) \
.start_##name##_patchlist = \
(unsigned long)__ia64_native_start_gate_##name##_patchlist, \
.end_##name##_patchlist = \
(unsigned long)__ia64_native_end_gate_##name##_patchlist
struct pv_patchdata pv_patchdata __initdata = {
ASSIGN(fsyscall),
ASSIGN(brl_fsys_bubble_down),
ASSIGN(vtop),
ASSIGN(mckinley_e9),
.gate_section = (void*)__start_gate_section,
};
unsigned long __init
paravirt_get_gate_patchlist(enum pv_gate_patchlist type)
{
#define CASE(NAME, name) \
case PV_GATE_START_##NAME: \
return pv_patchdata.start_##name##_patchlist; \
case PV_GATE_END_##NAME: \
return pv_patchdata.end_##name##_patchlist; \
switch (type) {
CASE(FSYSCALL, fsyscall);
CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down);
CASE(VTOP, vtop);
CASE(MCKINLEY_E9, mckinley_e9);
default:
BUG();
break;
}
return 0;
}
void * __init
paravirt_get_gate_section(void)
{
return pv_patchdata.gate_section;
}

View file

@ -0,0 +1,24 @@
/******************************************************************************
* linux/arch/ia64/xen/paravirt_patchlist.h
*
* Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan K.K.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <asm/native/patchlist.h>

View file

@ -0,0 +1,121 @@
/******************************************************************************
* linux/arch/ia64/xen/paravirtentry.S
*
* Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan K.K.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <linux/init.h>
#include <asm/asmmacro.h>
#include <asm/asm-offsets.h>
#include <asm/paravirt_privop.h>
#include <asm/paravirt_patch.h>
#include "entry.h"
#define DATA8(sym, init_value) \
.pushsection .data..read_mostly ; \
.align 8 ; \
.global sym ; \
sym: ; \
data8 init_value ; \
.popsection
#define BRANCH(targ, reg, breg, type) \
PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ; \
;; \
movl reg=targ ; \
;; \
ld8 reg=[reg] ; \
;; \
mov breg=reg ; \
br.cond.sptk.many breg
#define BRANCH_PROC(sym, reg, breg, type) \
DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
GLOBAL_ENTRY(paravirt_ ## sym) ; \
BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \
END(paravirt_ ## sym)
#define BRANCH_PROC_UNWINFO(sym, reg, breg, type) \
DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
GLOBAL_ENTRY(paravirt_ ## sym) ; \
PT_REGS_UNWIND_INFO(0) ; \
BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \
END(paravirt_ ## sym)
BRANCH_PROC(switch_to, r22, b7, SWITCH_TO)
BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL)
BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL)
BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL)
#ifdef CONFIG_MODULES
#define __INIT_OR_MODULE .text
#define __INITDATA_OR_MODULE .data
#else
#define __INIT_OR_MODULE __INIT
#define __INITDATA_OR_MODULE __INITDATA
#endif /* CONFIG_MODULES */
__INIT_OR_MODULE
GLOBAL_ENTRY(paravirt_fc_i)
fc.i r32
br.ret.sptk.many rp
END(paravirt_fc_i)
__FINIT
__INIT_OR_MODULE
.align 32
GLOBAL_ENTRY(paravirt_nop_b_inst_bundle)
{
nop.b 0
nop.b 0
nop.b 0
}
END(paravirt_nop_b_inst_bundle)
__FINIT
/* NOTE: nop.[mfi] has same format */
__INIT_OR_MODULE
GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle)
{
nop.m 0
nop.f 0
nop.i 0
}
END(paravirt_nop_mfi_inst_bundle)
__FINIT
__INIT_OR_MODULE
GLOBAL_ENTRY(paravirt_nop_bundle)
paravirt_nop_bundle_start:
{
nop 0
nop 0
nop 0
}
paravirt_nop_bundle_end:
END(paravirt_nop_bundle)
__FINIT
__INITDATA_OR_MODULE
.align 8
.global paravirt_nop_bundle_size
paravirt_nop_bundle_size:
data8 paravirt_nop_bundle_end - paravirt_nop_bundle_start

256
arch/ia64/kernel/patch.c Normal file
View file

@ -0,0 +1,256 @@
/*
* Instruction-patching support.
*
* Copyright (C) 2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
#include <linux/init.h>
#include <linux/string.h>
#include <asm/paravirt.h>
#include <asm/patch.h>
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/unistd.h>
/*
* This was adapted from code written by Tony Luck:
*
* The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
* like this:
*
* 6 6 5 4 3 2 1
* 3210987654321098765432109876543210987654321098765432109876543210
* ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
*
* CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
* xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
*/
static u64
get_imm64 (u64 insn_addr)
{
u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */
return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/
((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
((p[1] & 0x000007f000000000UL) >> 36); /*G*/
}
/* Patch instruction with "val" where "mask" has 1 bits. */
void
ia64_patch (u64 insn_addr, u64 mask, u64 val)
{
u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
# define insn_mask ((1UL << 41) - 1)
unsigned long shift;
b0 = b[0]; b1 = b[1];
shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */
if (shift >= 64) {
m1 = mask << (shift - 64);
v1 = val << (shift - 64);
} else {
m0 = mask << shift; m1 = mask >> (64 - shift);
v0 = val << shift; v1 = val >> (64 - shift);
b[0] = (b0 & ~m0) | (v0 & m0);
}
b[1] = (b1 & ~m1) | (v1 & m1);
}
void
ia64_patch_imm64 (u64 insn_addr, u64 val)
{
/* The assembler may generate offset pointing to either slot 1
or slot 2 for a long (2-slot) instruction, occupying slots 1
and 2. */
insn_addr &= -16UL;
ia64_patch(insn_addr + 2,
0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
| ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */
| ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */
| ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */
| ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */));
ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
}
void
ia64_patch_imm60 (u64 insn_addr, u64 val)
{
/* The assembler may generate offset pointing to either slot 1
or slot 2 for a long (2-slot) instruction, occupying slots 1
and 2. */
insn_addr &= -16UL;
ia64_patch(insn_addr + 2,
0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
| ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */));
ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18);
}
/*
* We need sometimes to load the physical address of a kernel
* object. Often we can convert the virtual address to physical
* at execution time, but sometimes (either for performance reasons
* or during error recovery) we cannot to this. Patch the marked
* bundles to load the physical address.
*/
void __init
ia64_patch_vtop (unsigned long start, unsigned long end)
{
s32 *offp = (s32 *) start;
u64 ip;
while (offp < (s32 *) end) {
ip = (u64) offp + *offp;
/* replace virtual address with corresponding physical address: */
ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
ia64_fc((void *) ip);
++offp;
}
ia64_sync_i();
ia64_srlz_i();
}
/*
* Disable the RSE workaround by turning the conditional branch
* that we tagged in each place the workaround was used into an
* unconditional branch.
*/
void __init
ia64_patch_rse (unsigned long start, unsigned long end)
{
s32 *offp = (s32 *) start;
u64 ip, *b;
while (offp < (s32 *) end) {
ip = (u64) offp + *offp;
b = (u64 *)(ip & -16);
b[1] &= ~0xf800000L;
ia64_fc((void *) ip);
++offp;
}
ia64_sync_i();
ia64_srlz_i();
}
void __init
ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
{
static int first_time = 1;
int need_workaround;
s32 *offp = (s32 *) start;
u64 *wp;
need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);
if (first_time) {
first_time = 0;
if (need_workaround)
printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
}
if (need_workaround)
return;
while (offp < (s32 *) end) {
wp = (u64 *) ia64_imva((char *) offp + *offp);
wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
wp[1] = 0x0084006880000200UL;
wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
wp[3] = 0x0004000000000200UL;
ia64_fc(wp); ia64_fc(wp + 2);
++offp;
}
ia64_sync_i();
ia64_srlz_i();
}
extern unsigned long ia64_native_fsyscall_table[NR_syscalls];
extern char ia64_native_fsys_bubble_down[];
struct pv_fsys_data pv_fsys_data __initdata = {
.fsyscall_table = (unsigned long *)ia64_native_fsyscall_table,
.fsys_bubble_down = (void *)ia64_native_fsys_bubble_down,
};
unsigned long * __init
paravirt_get_fsyscall_table(void)
{
return pv_fsys_data.fsyscall_table;
}
char * __init
paravirt_get_fsys_bubble_down(void)
{
return pv_fsys_data.fsys_bubble_down;
}
static void __init
patch_fsyscall_table (unsigned long start, unsigned long end)
{
u64 fsyscall_table = (u64)paravirt_get_fsyscall_table();
s32 *offp = (s32 *) start;
u64 ip;
while (offp < (s32 *) end) {
ip = (u64) ia64_imva((char *) offp + *offp);
ia64_patch_imm64(ip, fsyscall_table);
ia64_fc((void *) ip);
++offp;
}
ia64_sync_i();
ia64_srlz_i();
}
static void __init
patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
{
u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down();
s32 *offp = (s32 *) start;
u64 ip;
while (offp < (s32 *) end) {
ip = (u64) offp + *offp;
ia64_patch_imm60((u64) ia64_imva((void *) ip),
(u64) (fsys_bubble_down - (ip & -16)) / 16);
ia64_fc((void *) ip);
++offp;
}
ia64_sync_i();
ia64_srlz_i();
}
void __init
ia64_patch_gate (void)
{
# define START(name) paravirt_get_gate_patchlist(PV_GATE_START_##name)
# define END(name) paravirt_get_gate_patchlist(PV_GATE_END_##name)
patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL));
patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN));
ia64_patch_vtop(START(VTOP), END(VTOP));
ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9));
}
void ia64_patch_phys_stack_reg(unsigned long val)
{
s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
u64 ip, mask, imm;
/* see instruction format A4: adds r1 = imm13, r3 */
mask = (0x3fUL << 27) | (0x7f << 13);
imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
while (offp < end) {
ip = (u64) offp + *offp;
ia64_patch(ip, mask, imm);
ia64_fc((void *)ip);
++offp;
}
ia64_sync_i();
ia64_srlz_i();
}

110
arch/ia64/kernel/pci-dma.c Normal file
View file

@ -0,0 +1,110 @@
/*
* Dynamic DMA mapping support.
*/
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/dmar.h>
#include <asm/iommu.h>
#include <asm/machvec.h>
#include <linux/dma-mapping.h>
#ifdef CONFIG_INTEL_IOMMU
#include <linux/kernel.h>
#include <asm/page.h>
dma_addr_t bad_dma_address __read_mostly;
EXPORT_SYMBOL(bad_dma_address);
static int iommu_sac_force __read_mostly;
int no_iommu __read_mostly;
#ifdef CONFIG_IOMMU_DEBUG
int force_iommu __read_mostly = 1;
#else
int force_iommu __read_mostly;
#endif
int iommu_pass_through;
extern struct dma_map_ops intel_dma_ops;
static int __init pci_iommu_init(void)
{
if (iommu_detected)
intel_iommu_init();
return 0;
}
/* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init);
void pci_iommu_shutdown(void)
{
return;
}
void __init
iommu_dma_init(void)
{
return;
}
int iommu_dma_supported(struct device *dev, u64 mask)
{
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
The caller just has to use GFP_DMA in this case. */
if (mask < DMA_BIT_MASK(24))
return 0;
/* Tell the device to use SAC when IOMMU force is on. This
allows the driver to use cheaper accesses in some cases.
Problem with this is that if we overflow the IOMMU area and
return DAC as fallback address the device may not handle it
correctly.
As a special case some controllers have a 39bit address
mode that is as efficient as 32bit (aic79xx). Don't force
SAC for these. Assume all masks <= 40 bits are of this
type. Normally this doesn't make any difference, but gives
more gentle handling of IOMMU overflow. */
if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
dev_info(dev, "Force SAC with mask %llx\n", mask);
return 0;
}
return 1;
}
EXPORT_SYMBOL(iommu_dma_supported);
void __init pci_iommu_alloc(void)
{
dma_ops = &intel_dma_ops;
dma_ops->sync_single_for_cpu = machvec_dma_sync_single;
dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg;
dma_ops->sync_single_for_device = machvec_dma_sync_single;
dma_ops->sync_sg_for_device = machvec_dma_sync_sg;
dma_ops->dma_supported = iommu_dma_supported;
/*
* The order of these functions is important for
* fall-back/fail-over reasons
*/
detect_intel_iommu();
#ifdef CONFIG_SWIOTLB
pci_swiotlb_init();
#endif
}
#endif

View file

@ -0,0 +1,67 @@
/* Glue code to lib/swiotlb.c */
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/cache.h>
#include <linux/module.h>
#include <linux/dma-mapping.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
#include <asm/iommu.h>
#include <asm/machvec.h>
int swiotlb __read_mostly;
EXPORT_SYMBOL(swiotlb);
static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp,
struct dma_attrs *attrs)
{
if (dev->coherent_dma_mask != DMA_BIT_MASK(64))
gfp |= GFP_DMA;
return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
}
static void ia64_swiotlb_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
struct dma_attrs *attrs)
{
swiotlb_free_coherent(dev, size, vaddr, dma_addr);
}
struct dma_map_ops swiotlb_dma_ops = {
.alloc = ia64_swiotlb_alloc_coherent,
.free = ia64_swiotlb_free_coherent,
.map_page = swiotlb_map_page,
.unmap_page = swiotlb_unmap_page,
.map_sg = swiotlb_map_sg_attrs,
.unmap_sg = swiotlb_unmap_sg_attrs,
.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
.sync_single_for_device = swiotlb_sync_single_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = swiotlb_sync_sg_for_device,
.dma_supported = swiotlb_dma_supported,
.mapping_error = swiotlb_dma_mapping_error,
};
void __init swiotlb_dma_init(void)
{
dma_ops = &swiotlb_dma_ops;
swiotlb_init(1);
}
void __init pci_swiotlb_init(void)
{
if (!iommu_detected) {
#ifdef CONFIG_IA64_GENERIC
swiotlb = 1;
printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
machvec_init("dig");
swiotlb_init(1);
dma_ops = &swiotlb_dma_ops;
#else
panic("Unable to find Intel IOMMU");
#endif
}
}

6792
arch/ia64/kernel/perfmon.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,296 @@
/*
* Copyright (C) 2002-2003 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
*
* This file implements the default sampling buffer format
* for the Linux/ia64 perfmon-2 subsystem.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/init.h>
#include <asm/delay.h>
#include <linux/smp.h>
#include <asm/perfmon.h>
#include <asm/perfmon_default_smpl.h>
MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
MODULE_DESCRIPTION("perfmon default sampling format");
MODULE_LICENSE("GPL");
#define DEFAULT_DEBUG 1
#ifdef DEFAULT_DEBUG
#define DPRINT(a) \
do { \
if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
} while (0)
#define DPRINT_ovfl(a) \
do { \
if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
} while (0)
#else
#define DPRINT(a)
#define DPRINT_ovfl(a)
#endif
static int
default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
{
pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
int ret = 0;
if (data == NULL) {
DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
return -EINVAL;
}
DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
/*
* must hold at least the buffer header + one minimally sized entry
*/
if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
DPRINT(("buf_size=%lu\n", arg->buf_size));
return ret;
}
static int
default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
{
pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
/*
* size has been validated in default_validate
*/
*size = arg->buf_size;
return 0;
}
static int
default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
{
pfm_default_smpl_hdr_t *hdr;
pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
hdr = (pfm_default_smpl_hdr_t *)buf;
hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION;
hdr->hdr_buf_size = arg->buf_size;
hdr->hdr_cur_offs = sizeof(*hdr);
hdr->hdr_overflows = 0UL;
hdr->hdr_count = 0UL;
DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
task_pid_nr(task),
buf,
hdr->hdr_buf_size,
sizeof(*hdr),
hdr->hdr_version,
hdr->hdr_cur_offs));
return 0;
}
static int
default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
{
pfm_default_smpl_hdr_t *hdr;
pfm_default_smpl_entry_t *ent;
void *cur, *last;
unsigned long *e, entry_size;
unsigned int npmds, i;
unsigned char ovfl_pmd;
unsigned char ovfl_notify;
if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
return -EINVAL;
}
hdr = (pfm_default_smpl_hdr_t *)buf;
cur = buf+hdr->hdr_cur_offs;
last = buf+hdr->hdr_buf_size;
ovfl_pmd = arg->ovfl_pmd;
ovfl_notify = arg->ovfl_notify;
/*
* precheck for sanity
*/
if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
npmds = hweight64(arg->smpl_pmds[0]);
ent = (pfm_default_smpl_entry_t *)cur;
prefetch(arg->smpl_pmds_values);
entry_size = sizeof(*ent) + (npmds << 3);
/* position for first pmd */
e = (unsigned long *)(ent+1);
hdr->hdr_count++;
DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
task->pid,
hdr->hdr_count,
cur, last,
last-cur,
ovfl_pmd,
ovfl_notify, npmds));
/*
* current = task running at the time of the overflow.
*
* per-task mode:
* - this is usually the task being monitored.
* Under certain conditions, it might be a different task
*
* system-wide:
* - this is not necessarily the task controlling the session
*/
ent->pid = current->pid;
ent->ovfl_pmd = ovfl_pmd;
ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
/*
* where did the fault happen (includes slot number)
*/
ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
ent->tstamp = stamp;
ent->cpu = smp_processor_id();
ent->set = arg->active_set;
ent->tgid = current->tgid;
/*
* selectively store PMDs in increasing index number
*/
if (npmds) {
unsigned long *val = arg->smpl_pmds_values;
for(i=0; i < npmds; i++) {
*e++ = *val++;
}
}
/*
* update position for next entry
*/
hdr->hdr_cur_offs += entry_size;
cur += entry_size;
/*
* post check to avoid losing the last sample
*/
if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
/*
* keep same ovfl_pmds, ovfl_notify
*/
arg->ovfl_ctrl.bits.notify_user = 0;
arg->ovfl_ctrl.bits.block_task = 0;
arg->ovfl_ctrl.bits.mask_monitoring = 0;
arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
return 0;
full:
DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
/*
* increment number of buffer overflow.
* important to detect duplicate set of samples.
*/
hdr->hdr_overflows++;
/*
* if no notification requested, then we saturate the buffer
*/
if (ovfl_notify == 0) {
arg->ovfl_ctrl.bits.notify_user = 0;
arg->ovfl_ctrl.bits.block_task = 0;
arg->ovfl_ctrl.bits.mask_monitoring = 1;
arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
} else {
arg->ovfl_ctrl.bits.notify_user = 1;
arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */
arg->ovfl_ctrl.bits.mask_monitoring = 1;
arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
}
return -1; /* we are full, sorry */
}
static int
default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
{
pfm_default_smpl_hdr_t *hdr;
hdr = (pfm_default_smpl_hdr_t *)buf;
hdr->hdr_count = 0UL;
hdr->hdr_cur_offs = sizeof(*hdr);
ctrl->bits.mask_monitoring = 0;
ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
return 0;
}
static int
default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
{
DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
return 0;
}
static pfm_buffer_fmt_t default_fmt={
.fmt_name = "default_format",
.fmt_uuid = PFM_DEFAULT_SMPL_UUID,
.fmt_arg_size = sizeof(pfm_default_smpl_arg_t),
.fmt_validate = default_validate,
.fmt_getsize = default_get_size,
.fmt_init = default_init,
.fmt_handler = default_handler,
.fmt_restart = default_restart,
.fmt_restart_active = default_restart,
.fmt_exit = default_exit,
};
static int __init
pfm_default_smpl_init_module(void)
{
int ret;
ret = pfm_register_buffer_fmt(&default_fmt);
if (ret == 0) {
printk("perfmon_default_smpl: %s v%u.%u registered\n",
default_fmt.fmt_name,
PFM_DEFAULT_SMPL_VERSION_MAJ,
PFM_DEFAULT_SMPL_VERSION_MIN);
} else {
printk("perfmon_default_smpl: %s cannot register ret=%d\n",
default_fmt.fmt_name,
ret);
}
return ret;
}
static void __exit
pfm_default_smpl_cleanup_module(void)
{
int ret;
ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
}
module_init(pfm_default_smpl_init_module);
module_exit(pfm_default_smpl_cleanup_module);

View file

@ -0,0 +1,45 @@
/*
* This file contains the generic PMU register description tables
* and pmc checker used by perfmon.c.
*
* Copyright (C) 2002-2003 Hewlett Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
*/
static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
{ PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
};
static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
{ PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
};
/*
* impl_pmcs, impl_pmds are computed at runtime to minimize errors!
*/
static pmu_config_t pmu_conf_gen={
.pmu_name = "Generic",
.pmu_family = 0xff, /* any */
.ovfl_val = (1UL << 32) - 1,
.num_ibrs = 0, /* does not use */
.num_dbrs = 0, /* does not use */
.pmd_desc = pfm_gen_pmd_desc,
.pmc_desc = pfm_gen_pmc_desc
};

View file

@ -0,0 +1,115 @@
/*
* This file contains the Itanium PMU register description tables
* and pmc checker used by perfmon.c.
*
* Copyright (C) 2002-2003 Hewlett Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
*/
static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
{ PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
};
static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
{ PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
};
static int
pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
{
int ret;
int is_loaded;
/* sanitfy check */
if (ctx == NULL) return -EINVAL;
is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
/*
* we must clear the (instruction) debug registers if pmc13.ta bit is cleared
* before they are written (fl_using_dbreg==0) to avoid picking up stale information.
*/
if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
/* don't mix debug with perfmon */
if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
/*
* a count of 0 will mark the debug registers as in use and also
* ensure that they are properly cleared.
*/
ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
if (ret) return ret;
}
/*
* we must clear the (data) debug registers if pmc11.pt bit is cleared
* before they are written (fl_using_dbreg==0) to avoid picking up stale information.
*/
if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
/* don't mix debug with perfmon */
if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
/*
* a count of 0 will mark the debug registers as in use and also
* ensure that they are properly cleared.
*/
ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
if (ret) return ret;
}
return 0;
}
/*
* impl_pmcs, impl_pmds are computed at runtime to minimize errors!
*/
static pmu_config_t pmu_conf_ita={
.pmu_name = "Itanium",
.pmu_family = 0x7,
.ovfl_val = (1UL << 32) - 1,
.pmd_desc = pfm_ita_pmd_desc,
.pmc_desc = pfm_ita_pmc_desc,
.num_ibrs = 8,
.num_dbrs = 8,
.use_rr_dbregs = 1, /* debug register are use for range retrictions */
};

View file

@ -0,0 +1,187 @@
/*
* This file contains the McKinley PMU register description tables
* and pmc checker used by perfmon.c.
*
* Copyright (C) 2002-2003 Hewlett Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
*/
static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
{ PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
};
static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
{ PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
};
/*
* PMC reserved fields must have their power-up values preserved
*/
static int
pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
{
unsigned long tmp1, tmp2, ival = *val;
/* remove reserved areas from user value */
tmp1 = ival & PMC_RSVD_MASK(cnum);
/* get reserved fields values */
tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
*val = tmp1 | tmp2;
DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
return 0;
}
/*
* task can be NULL if the context is unloaded
*/
static int
pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
{
int ret = 0, check_case1 = 0;
unsigned long val8 = 0, val14 = 0, val13 = 0;
int is_loaded;
/* first preserve the reserved fields */
pfm_mck_reserved(cnum, val, regs);
/* sanitfy check */
if (ctx == NULL) return -EINVAL;
is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
/*
* we must clear the debug registers if pmc13 has a value which enable
* memory pipeline event constraints. In this case we need to clear the
* the debug registers if they have not yet been accessed. This is required
* to avoid picking stale state.
* PMC13 is "active" if:
* one of the pmc13.cfg_dbrpXX field is different from 0x3
* AND
* at the corresponding pmc13.ena_dbrpXX is set.
*/
DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
if (cnum == 13 && is_loaded
&& (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
/* don't mix debug with perfmon */
if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
/*
* a count of 0 will mark the debug registers as in use and also
* ensure that they are properly cleared.
*/
ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
if (ret) return ret;
}
/*
* we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
* before they are (fl_using_dbreg==0) to avoid picking up stale information.
*/
if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
/* don't mix debug with perfmon */
if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
/*
* a count of 0 will mark the debug registers as in use and also
* ensure that they are properly cleared.
*/
ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
if (ret) return ret;
}
switch(cnum) {
case 4: *val |= 1UL << 23; /* force power enable bit */
break;
case 8: val8 = *val;
val13 = ctx->ctx_pmcs[13];
val14 = ctx->ctx_pmcs[14];
check_case1 = 1;
break;
case 13: val8 = ctx->ctx_pmcs[8];
val13 = *val;
val14 = ctx->ctx_pmcs[14];
check_case1 = 1;
break;
case 14: val8 = ctx->ctx_pmcs[8];
val13 = ctx->ctx_pmcs[13];
val14 = *val;
check_case1 = 1;
break;
}
/* check illegal configuration which can produce inconsistencies in tagging
* i-side events in L1D and L2 caches
*/
if (check_case1) {
ret = ((val13 >> 45) & 0xf) == 0
&& ((val8 & 0x1) == 0)
&& ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
}
return ret ? -EINVAL : 0;
}
/*
* impl_pmcs, impl_pmds are computed at runtime to minimize errors!
*/
static pmu_config_t pmu_conf_mck={
.pmu_name = "Itanium 2",
.pmu_family = 0x1f,
.flags = PFM_PMU_IRQ_RESEND,
.ovfl_val = (1UL << 47) - 1,
.pmd_desc = pfm_mck_pmd_desc,
.pmc_desc = pfm_mck_pmc_desc,
.num_ibrs = 8,
.num_dbrs = 8,
.use_rr_dbregs = 1 /* debug register are use for range restrictions */
};

View file

@ -0,0 +1,269 @@
/*
* This file contains the Montecito PMU register description tables
* and pmc checker used by perfmon.c.
*
* Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
* Contributed by Stephane Eranian <eranian@hpl.hp.com>
*/
static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
#define RDEP_MONT_ETB (RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\
RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63))
#define RDEP_MONT_DEAR (RDEP(32)|RDEP(33)|RDEP(36))
#define RDEP_MONT_IEAR (RDEP(34)|RDEP(35))
static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={
/* pmc0 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc4 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}},
/* pmc5 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}},
/* pmc6 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}},
/* pmc7 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}},
/* pmc8 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}},
/* pmc9 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}},
/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}},
/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}},
/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}},
/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}},
/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}},
/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}},
/* pmc16 */ { PFM_REG_NOTIMPL, },
/* pmc17 */ { PFM_REG_NOTIMPL, },
/* pmc18 */ { PFM_REG_NOTIMPL, },
/* pmc19 */ { PFM_REG_NOTIMPL, },
/* pmc20 */ { PFM_REG_NOTIMPL, },
/* pmc21 */ { PFM_REG_NOTIMPL, },
/* pmc22 */ { PFM_REG_NOTIMPL, },
/* pmc23 */ { PFM_REG_NOTIMPL, },
/* pmc24 */ { PFM_REG_NOTIMPL, },
/* pmc25 */ { PFM_REG_NOTIMPL, },
/* pmc26 */ { PFM_REG_NOTIMPL, },
/* pmc27 */ { PFM_REG_NOTIMPL, },
/* pmc28 */ { PFM_REG_NOTIMPL, },
/* pmc29 */ { PFM_REG_NOTIMPL, },
/* pmc30 */ { PFM_REG_NOTIMPL, },
/* pmc31 */ { PFM_REG_NOTIMPL, },
/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}},
/* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}},
/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
{ PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
};
static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={
/* pmd0 */ { PFM_REG_NOTIMPL, },
/* pmd1 */ { PFM_REG_NOTIMPL, },
/* pmd2 */ { PFM_REG_NOTIMPL, },
/* pmd3 */ { PFM_REG_NOTIMPL, },
/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}},
/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}},
/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}},
/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}},
/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}},
/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}},
/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}},
/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}},
/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}},
/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}},
/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}},
/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}},
/* pmd16 */ { PFM_REG_NOTIMPL, },
/* pmd17 */ { PFM_REG_NOTIMPL, },
/* pmd18 */ { PFM_REG_NOTIMPL, },
/* pmd19 */ { PFM_REG_NOTIMPL, },
/* pmd20 */ { PFM_REG_NOTIMPL, },
/* pmd21 */ { PFM_REG_NOTIMPL, },
/* pmd22 */ { PFM_REG_NOTIMPL, },
/* pmd23 */ { PFM_REG_NOTIMPL, },
/* pmd24 */ { PFM_REG_NOTIMPL, },
/* pmd25 */ { PFM_REG_NOTIMPL, },
/* pmd26 */ { PFM_REG_NOTIMPL, },
/* pmd27 */ { PFM_REG_NOTIMPL, },
/* pmd28 */ { PFM_REG_NOTIMPL, },
/* pmd29 */ { PFM_REG_NOTIMPL, },
/* pmd30 */ { PFM_REG_NOTIMPL, },
/* pmd31 */ { PFM_REG_NOTIMPL, },
/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}},
/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}},
/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}},
/* pmd37 */ { PFM_REG_NOTIMPL, },
/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd40 */ { PFM_REG_NOTIMPL, },
/* pmd41 */ { PFM_REG_NOTIMPL, },
/* pmd42 */ { PFM_REG_NOTIMPL, },
/* pmd43 */ { PFM_REG_NOTIMPL, },
/* pmd44 */ { PFM_REG_NOTIMPL, },
/* pmd45 */ { PFM_REG_NOTIMPL, },
/* pmd46 */ { PFM_REG_NOTIMPL, },
/* pmd47 */ { PFM_REG_NOTIMPL, },
/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
{ PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
};
/*
* PMC reserved fields must have their power-up values preserved
*/
static int
pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
{
unsigned long tmp1, tmp2, ival = *val;
/* remove reserved areas from user value */
tmp1 = ival & PMC_RSVD_MASK(cnum);
/* get reserved fields values */
tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
*val = tmp1 | tmp2;
DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
return 0;
}
/*
* task can be NULL if the context is unloaded
*/
static int
pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
{
int ret = 0;
unsigned long val32 = 0, val38 = 0, val41 = 0;
unsigned long tmpval;
int check_case1 = 0;
int is_loaded;
/* first preserve the reserved fields */
pfm_mont_reserved(cnum, val, regs);
tmpval = *val;
/* sanity check */
if (ctx == NULL) return -EINVAL;
is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
/*
* we must clear the debug registers if pmc41 has a value which enable
* memory pipeline event constraints. In this case we need to clear the
* the debug registers if they have not yet been accessed. This is required
* to avoid picking stale state.
* PMC41 is "active" if:
* one of the pmc41.cfg_dtagXX field is different from 0x3
* AND
* at the corresponding pmc41.en_dbrpXX is set.
* AND
* ctx_fl_using_dbreg == 0 (i.e., dbr not yet used)
*/
DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded));
if (cnum == 41 && is_loaded
&& (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval));
/* don't mix debug with perfmon */
if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
/*
* a count of 0 will mark the debug registers if:
* AND
*/
ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
if (ret) return ret;
}
/*
* we must clear the (instruction) debug registers if:
* pmc38.ig_ibrpX is 0 (enabled)
* AND
* ctx_fl_using_dbreg == 0 (i.e., dbr not yet used)
*/
if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) {
DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval));
/* don't mix debug with perfmon */
if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
/*
* a count of 0 will mark the debug registers as in use and also
* ensure that they are properly cleared.
*/
ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
if (ret) return ret;
}
switch(cnum) {
case 32: val32 = *val;
val38 = ctx->ctx_pmcs[38];
val41 = ctx->ctx_pmcs[41];
check_case1 = 1;
break;
case 38: val38 = *val;
val32 = ctx->ctx_pmcs[32];
val41 = ctx->ctx_pmcs[41];
check_case1 = 1;
break;
case 41: val41 = *val;
val32 = ctx->ctx_pmcs[32];
val38 = ctx->ctx_pmcs[38];
check_case1 = 1;
break;
}
/* check illegal configuration which can produce inconsistencies in tagging
* i-side events in L1D and L2 caches
*/
if (check_case1) {
ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
&& ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
|| (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
if (ret) {
DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32));
return -EINVAL;
}
}
*val = tmpval;
return 0;
}
/*
* impl_pmcs, impl_pmds are computed at runtime to minimize errors!
*/
static pmu_config_t pmu_conf_mont={
.pmu_name = "Montecito",
.pmu_family = 0x20,
.flags = PFM_PMU_IRQ_RESEND,
.ovfl_val = (1UL << 47) - 1,
.pmd_desc = pfm_mont_pmd_desc,
.pmc_desc = pfm_mont_pmc_desc,
.num_ibrs = 8,
.num_dbrs = 8,
.use_rr_dbregs = 1 /* debug register are use for range retrictions */
};

682
arch/ia64/kernel/process.c Normal file
View file

@ -0,0 +1,682 @@
/*
* Architecture-specific setup.
*
* Copyright (C) 1998-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
* 04/11/17 Ashok Raj <ashok.raj@intel.com> Added CPU Hotplug Support
*
* 2005-10-07 Keith Owens <kaos@sgi.com>
* Add notify_die() hooks.
*/
#include <linux/cpu.h>
#include <linux/pm.h>
#include <linux/elf.h>
#include <linux/errno.h>
#include <linux/kallsyms.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/personality.h>
#include <linux/sched.h>
#include <linux/stddef.h>
#include <linux/thread_info.h>
#include <linux/unistd.h>
#include <linux/efi.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/kdebug.h>
#include <linux/utsname.h>
#include <linux/tracehook.h>
#include <linux/rcupdate.h>
#include <asm/cpu.h>
#include <asm/delay.h>
#include <asm/elf.h>
#include <asm/irq.h>
#include <asm/kexec.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
#include <asm/sal.h>
#include <asm/switch_to.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
#include <asm/unwind.h>
#include <asm/user.h>
#include "entry.h"
#ifdef CONFIG_PERFMON
# include <asm/perfmon.h>
#endif
#include "sigframe.h"
void (*ia64_mark_idle)(int);
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
void (*pm_power_off) (void);
EXPORT_SYMBOL(pm_power_off);
void
ia64_do_show_stack (struct unw_frame_info *info, void *arg)
{
unsigned long ip, sp, bsp;
char buf[128]; /* don't make it so big that it overflows the stack! */
printk("\nCall Trace:\n");
do {
unw_get_ip(info, &ip);
if (ip == 0)
break;
unw_get_sp(info, &sp);
unw_get_bsp(info, &bsp);
snprintf(buf, sizeof(buf),
" [<%016lx>] %%s\n"
" sp=%016lx bsp=%016lx\n",
ip, sp, bsp);
print_symbol(buf, ip);
} while (unw_unwind(info) >= 0);
}
void
show_stack (struct task_struct *task, unsigned long *sp)
{
if (!task)
unw_init_running(ia64_do_show_stack, NULL);
else {
struct unw_frame_info info;
unw_init_from_blocked_task(&info, task);
ia64_do_show_stack(&info, NULL);
}
}
void
show_regs (struct pt_regs *regs)
{
unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
print_modules();
printk("\n");
show_regs_print_info(KERN_DEFAULT);
printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n",
regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
init_utsname()->release);
print_symbol("ip is at %s\n", ip);
printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
printk("rnat: %016lx bsps: %016lx pr : %016lx\n",
regs->ar_rnat, regs->ar_bspstore, regs->pr);
printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7);
printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
regs->f6.u.bits[1], regs->f6.u.bits[0],
regs->f7.u.bits[1], regs->f7.u.bits[0]);
printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
regs->f8.u.bits[1], regs->f8.u.bits[0],
regs->f9.u.bits[1], regs->f9.u.bits[0]);
printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
regs->f10.u.bits[1], regs->f10.u.bits[0],
regs->f11.u.bits[1], regs->f11.u.bits[0]);
printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3);
printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10);
printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13);
printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16);
printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19);
printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22);
printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25);
printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28);
printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31);
if (user_mode(regs)) {
/* print the stacked registers */
unsigned long val, *bsp, ndirty;
int i, sof, is_nat = 0;
sof = regs->cr_ifs & 0x7f; /* size of frame */
ndirty = (regs->loadrs >> 19);
bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty);
for (i = 0; i < sof; ++i) {
get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i));
printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val,
((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
}
} else
show_stack(NULL, NULL);
}
/* local support for deprecated console_print */
void
console_print(const char *s)
{
printk(KERN_EMERG "%s", s);
}
void
do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
{
if (fsys_mode(current, &scr->pt)) {
/*
* defer signal-handling etc. until we return to
* privilege-level 0.
*/
if (!ia64_psr(&scr->pt)->lp)
ia64_psr(&scr->pt)->lp = 1;
return;
}
#ifdef CONFIG_PERFMON
if (current->thread.pfm_needs_checking)
/*
* Note: pfm_handle_work() allow us to call it with interrupts
* disabled, and may enable interrupts within the function.
*/
pfm_handle_work();
#endif
/* deal with pending signal delivery */
if (test_thread_flag(TIF_SIGPENDING)) {
local_irq_enable(); /* force interrupt enable */
ia64_do_signal(scr, in_syscall);
}
if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) {
local_irq_enable(); /* force interrupt enable */
tracehook_notify_resume(&scr->pt);
}
/* copy user rbs to kernel rbs */
if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) {
local_irq_enable(); /* force interrupt enable */
ia64_sync_krbs();
}
local_irq_disable(); /* force interrupt disable */
}
static int __init nohalt_setup(char * str)
{
cpu_idle_poll_ctrl(true);
return 1;
}
__setup("nohalt", nohalt_setup);
#ifdef CONFIG_HOTPLUG_CPU
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
unsigned int this_cpu = smp_processor_id();
/* Ack it */
__this_cpu_write(cpu_state, CPU_DEAD);
max_xtp();
local_irq_disable();
idle_task_exit();
ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
/*
* The above is a point of no-return, the processor is
* expected to be in SAL loop now.
*/
BUG();
}
#else
static inline void play_dead(void)
{
BUG();
}
#endif /* CONFIG_HOTPLUG_CPU */
void arch_cpu_idle_dead(void)
{
play_dead();
}
void arch_cpu_idle(void)
{
void (*mark_idle)(int) = ia64_mark_idle;
#ifdef CONFIG_SMP
min_xtp();
#endif
rmb();
if (mark_idle)
(*mark_idle)(1);
safe_halt();
if (mark_idle)
(*mark_idle)(0);
#ifdef CONFIG_SMP
normal_xtp();
#endif
}
void
ia64_save_extra (struct task_struct *task)
{
#ifdef CONFIG_PERFMON
unsigned long info;
#endif
if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
ia64_save_debug_regs(&task->thread.dbr[0]);
#ifdef CONFIG_PERFMON
if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
pfm_save_regs(task);
info = __this_cpu_read(pfm_syst_info);
if (info & PFM_CPUINFO_SYST_WIDE)
pfm_syst_wide_update_task(task, info, 0);
#endif
}
void
ia64_load_extra (struct task_struct *task)
{
#ifdef CONFIG_PERFMON
unsigned long info;
#endif
if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
ia64_load_debug_regs(&task->thread.dbr[0]);
#ifdef CONFIG_PERFMON
if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
pfm_load_regs(task);
info = __this_cpu_read(pfm_syst_info);
if (info & PFM_CPUINFO_SYST_WIDE)
pfm_syst_wide_update_task(task, info, 1);
#endif
}
/*
* Copy the state of an ia-64 thread.
*
* We get here through the following call chain:
*
* from user-level: from kernel:
*
* <clone syscall> <some kernel call frames>
* sys_clone :
* do_fork do_fork
* copy_thread copy_thread
*
* This means that the stack layout is as follows:
*
* +---------------------+ (highest addr)
* | struct pt_regs |
* +---------------------+
* | struct switch_stack |
* +---------------------+
* | |
* | memory stack |
* | | <-- sp (lowest addr)
* +---------------------+
*
* Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an
* integer to address X causes bit N in ar.unat to be set to the NaT bit of the register,
* with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the
* pt_regs structure in the parent is congruent to that of the child, modulo 512. Since
* the stack is page aligned and the page size is at least 4KB, this is always the case,
* so there is nothing to worry about.
*/
int
copy_thread(unsigned long clone_flags,
unsigned long user_stack_base, unsigned long user_stack_size,
struct task_struct *p)
{
extern char ia64_ret_from_clone;
struct switch_stack *child_stack, *stack;
unsigned long rbs, child_rbs, rbs_size;
struct pt_regs *child_ptregs;
struct pt_regs *regs = current_pt_regs();
int retval = 0;
child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
child_stack = (struct switch_stack *) child_ptregs - 1;
rbs = (unsigned long) current + IA64_RBS_OFFSET;
child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
/* copy parts of thread_struct: */
p->thread.ksp = (unsigned long) child_stack - 16;
/*
* NOTE: The calling convention considers all floating point
* registers in the high partition (fph) to be scratch. Since
* the only way to get to this point is through a system call,
* we know that the values in fph are all dead. Hence, there
* is no need to inherit the fph state from the parent to the
* child and all we have to do is to make sure that
* IA64_THREAD_FPH_VALID is cleared in the child.
*
* XXX We could push this optimization a bit further by
* clearing IA64_THREAD_FPH_VALID on ANY system call.
* However, it's not clear this is worth doing. Also, it
* would be a slight deviation from the normal Linux system
* call behavior where scratch registers are preserved across
* system calls (unless used by the system call itself).
*/
# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \
| IA64_THREAD_PM_VALID)
# define THREAD_FLAGS_TO_SET 0
p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
| THREAD_FLAGS_TO_SET);
ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */
if (unlikely(p->flags & PF_KTHREAD)) {
if (unlikely(!user_stack_base)) {
/* fork_idle() called us */
return 0;
}
memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack));
child_stack->r4 = user_stack_base; /* payload */
child_stack->r5 = user_stack_size; /* argument */
/*
* Preserve PSR bits, except for bits 32-34 and 37-45,
* which we can't read.
*/
child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
/* mark as valid, empty frame */
child_ptregs->cr_ifs = 1UL << 63;
child_stack->ar_fpsr = child_ptregs->ar_fpsr
= ia64_getreg(_IA64_REG_AR_FPSR);
child_stack->pr = (1 << PRED_KERNEL_STACK);
child_stack->ar_bspstore = child_rbs;
child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
/* stop some PSR bits from being inherited.
* the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
* therefore we must specify them explicitly here and not include them in
* IA64_PSR_BITS_TO_CLEAR.
*/
child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
& ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
return 0;
}
stack = ((struct switch_stack *) regs) - 1;
/* copy parent's switch_stack & pt_regs to child: */
memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
/* copy the parent's register backing store to the child: */
rbs_size = stack->ar_bspstore - rbs;
memcpy((void *) child_rbs, (void *) rbs, rbs_size);
if (clone_flags & CLONE_SETTLS)
child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */
if (user_stack_base) {
child_ptregs->r12 = user_stack_base + user_stack_size - 16;
child_ptregs->ar_bspstore = user_stack_base;
child_ptregs->ar_rnat = 0;
child_ptregs->loadrs = 0;
}
child_stack->ar_bspstore = child_rbs + rbs_size;
child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
/* stop some PSR bits from being inherited.
* the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
* therefore we must specify them explicitly here and not include them in
* IA64_PSR_BITS_TO_CLEAR.
*/
child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
& ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
#ifdef CONFIG_PERFMON
if (current->thread.pfm_context)
pfm_inherit(p, child_ptregs);
#endif
return retval;
}
static void
do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg)
{
unsigned long mask, sp, nat_bits = 0, ar_rnat, urbs_end, cfm;
unsigned long uninitialized_var(ip); /* GCC be quiet */
elf_greg_t *dst = arg;
struct pt_regs *pt;
char nat;
int i;
memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits to user-level */
if (unw_unwind_to_user(info) < 0)
return;
unw_get_sp(info, &sp);
pt = (struct pt_regs *) (sp + 16);
urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
return;
ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end),
&ar_rnat);
/*
* coredump format:
* r0-r31
* NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
* predicate registers (p0-p63)
* b0-b7
* ip cfm user-mask
* ar.rsc ar.bsp ar.bspstore ar.rnat
* ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
*/
/* r0 is zero */
for (i = 1, mask = (1UL << i); i < 32; ++i) {
unw_get_gr(info, i, &dst[i], &nat);
if (nat)
nat_bits |= mask;
mask <<= 1;
}
dst[32] = nat_bits;
unw_get_pr(info, &dst[33]);
for (i = 0; i < 8; ++i)
unw_get_br(info, i, &dst[34 + i]);
unw_get_rp(info, &ip);
dst[42] = ip + ia64_psr(pt)->ri;
dst[43] = cfm;
dst[44] = pt->cr_ipsr & IA64_PSR_UM;
unw_get_ar(info, UNW_AR_RSC, &dst[45]);
/*
* For bsp and bspstore, unw_get_ar() would return the kernel
* addresses, but we need the user-level addresses instead:
*/
dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the end of the urbs! */
dst[47] = pt->ar_bspstore;
dst[48] = ar_rnat;
unw_get_ar(info, UNW_AR_CCV, &dst[49]);
unw_get_ar(info, UNW_AR_UNAT, &dst[50]);
unw_get_ar(info, UNW_AR_FPSR, &dst[51]);
dst[52] = pt->ar_pfs; /* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */
unw_get_ar(info, UNW_AR_LC, &dst[53]);
unw_get_ar(info, UNW_AR_EC, &dst[54]);
unw_get_ar(info, UNW_AR_CSD, &dst[55]);
unw_get_ar(info, UNW_AR_SSD, &dst[56]);
}
void
do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg)
{
elf_fpreg_t *dst = arg;
int i;
memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits */
if (unw_unwind_to_user(info) < 0)
return;
/* f0 is 0.0, f1 is 1.0 */
for (i = 2; i < 32; ++i)
unw_get_fr(info, i, dst + i);
ia64_flush_fph(task);
if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
memcpy(dst + 32, task->thread.fph, 96*16);
}
void
do_copy_regs (struct unw_frame_info *info, void *arg)
{
do_copy_task_regs(current, info, arg);
}
void
do_dump_fpu (struct unw_frame_info *info, void *arg)
{
do_dump_task_fpu(current, info, arg);
}
void
ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
{
unw_init_running(do_copy_regs, dst);
}
int
dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
{
unw_init_running(do_dump_fpu, dst);
return 1; /* f0-f31 are always valid so we always return 1 */
}
/*
* Flush thread state. This is called when a thread does an execve().
*/
void
flush_thread (void)
{
/* drop floating-point and debug-register state if it exists: */
current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
ia64_drop_fpu(current);
}
/*
* Clean up state associated with current thread. This is called when
* the thread calls exit().
*/
void
exit_thread (void)
{
ia64_drop_fpu(current);
#ifdef CONFIG_PERFMON
/* if needed, stop monitoring and flush state to perfmon context */
if (current->thread.pfm_context)
pfm_exit_thread(current);
/* free debug register resources */
if (current->thread.flags & IA64_THREAD_DBG_VALID)
pfm_release_debug_registers(current);
#endif
}
unsigned long
get_wchan (struct task_struct *p)
{
struct unw_frame_info info;
unsigned long ip;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
/*
* Note: p may not be a blocked task (it could be current or
* another process running on some other CPU. Rather than
* trying to determine if p is really blocked, we just assume
* it's blocked and rely on the unwind routines to fail
* gracefully if the process wasn't really blocked after all.
* --davidm 99/12/15
*/
unw_init_from_blocked_task(&info, p);
do {
if (p->state == TASK_RUNNING)
return 0;
if (unw_unwind(&info) < 0)
return 0;
unw_get_ip(&info, &ip);
if (!in_sched_functions(ip))
return ip;
} while (count++ < 16);
return 0;
}
void
cpu_halt (void)
{
pal_power_mgmt_info_u_t power_info[8];
unsigned long min_power;
int i, min_power_state;
if (ia64_pal_halt_info(power_info) != 0)
return;
min_power_state = 0;
min_power = power_info[0].pal_power_mgmt_info_s.power_consumption;
for (i = 1; i < 8; ++i)
if (power_info[i].pal_power_mgmt_info_s.im
&& power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) {
min_power = power_info[i].pal_power_mgmt_info_s.power_consumption;
min_power_state = i;
}
while (1)
ia64_pal_halt(min_power_state);
}
void machine_shutdown(void)
{
#ifdef CONFIG_HOTPLUG_CPU
int cpu;
for_each_online_cpu(cpu) {
if (cpu != smp_processor_id())
cpu_down(cpu);
}
#endif
#ifdef CONFIG_KEXEC
kexec_disable_iosapic();
#endif
}
void
machine_restart (char *restart_cmd)
{
(void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0);
efi_reboot(REBOOT_WARM, NULL);
}
void
machine_halt (void)
{
(void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0);
cpu_halt();
}
void
machine_power_off (void)
{
if (pm_power_off)
pm_power_off();
machine_halt();
}

2194
arch/ia64/kernel/ptrace.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,325 @@
/*
* arch/ia64/kernel/relocate_kernel.S
*
* Relocate kexec'able kernel and start it
*
* Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
* Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
* Copyright (C) 2005 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
*
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
*/
#include <asm/asmmacro.h>
#include <asm/kregs.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/mca_asm.h>
/* Must be relocatable PIC code callable as a C function
*/
GLOBAL_ENTRY(relocate_new_kernel)
.prologue
alloc r31=ar.pfs,4,0,0,0
.body
.reloc_entry:
{
rsm psr.i| psr.ic
mov r2=ip
}
;;
{
flushrs // must be first insn in group
srlz.i
}
;;
dep r2=0,r2,61,3 //to physical address
;;
//first switch to physical mode
add r3=1f-.reloc_entry, r2
movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
mov ar.rsc=0 // put RSE in enforced lazy mode
;;
add sp=(memory_stack_end - 16 - .reloc_entry),r2
add r8=(register_stack - .reloc_entry),r2
;;
mov r18=ar.rnat
mov ar.bspstore=r8
;;
mov cr.ipsr=r16
mov cr.iip=r3
mov cr.ifs=r0
srlz.i
;;
mov ar.rnat=r18
rfi // note: this unmask MCA/INIT (psr.mc)
;;
1:
//physical mode code begin
mov b6=in1
dep r28=0,in2,61,3 //to physical address
// purge all TC entries
#define O(member) IA64_CPUINFO_##member##_OFFSET
GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
;;
addl r17=O(PTCE_STRIDE),r2
addl r2=O(PTCE_BASE),r2
;;
ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base
ld4 r19=[r2],4 // r19=ptce_count[0]
ld4 r21=[r17],4 // r21=ptce_stride[0]
;;
ld4 r20=[r2] // r20=ptce_count[1]
ld4 r22=[r17] // r22=ptce_stride[1]
mov r24=r0
;;
adds r20=-1,r20
;;
#undef O
2:
cmp.ltu p6,p7=r24,r19
(p7) br.cond.dpnt.few 4f
mov ar.lc=r20
3:
ptc.e r18
;;
add r18=r22,r18
br.cloop.sptk.few 3b
;;
add r18=r21,r18
add r24=1,r24
;;
br.sptk.few 2b
4:
srlz.i
;;
// purge TR entry for kernel text and data
movl r16=KERNEL_START
mov r18=KERNEL_TR_PAGE_SHIFT<<2
;;
ptr.i r16, r18
ptr.d r16, r18
;;
srlz.i
;;
// purge TR entry for pal code
mov r16=in3
mov r18=IA64_GRANULE_SHIFT<<2
;;
ptr.i r16,r18
;;
srlz.i
;;
// purge TR entry for stack
mov r16=IA64_KR(CURRENT_STACK)
;;
shl r16=r16,IA64_GRANULE_SHIFT
movl r19=PAGE_OFFSET
;;
add r16=r19,r16
mov r18=IA64_GRANULE_SHIFT<<2
;;
ptr.d r16,r18
;;
srlz.i
;;
//copy segments
movl r16=PAGE_MASK
mov r30=in0 // in0 is page_list
br.sptk.few .dest_page
;;
.loop:
ld8 r30=[in0], 8;;
.dest_page:
tbit.z p0, p6=r30, 0;; // 0x1 dest page
(p6) and r17=r30, r16
(p6) br.cond.sptk.few .loop;;
tbit.z p0, p6=r30, 1;; // 0x2 indirect page
(p6) and in0=r30, r16
(p6) br.cond.sptk.few .loop;;
tbit.z p0, p6=r30, 2;; // 0x4 end flag
(p6) br.cond.sptk.few .end_loop;;
tbit.z p6, p0=r30, 3;; // 0x8 source page
(p6) br.cond.sptk.few .loop
and r18=r30, r16
// simple copy page, may optimize later
movl r14=PAGE_SIZE/8 - 1;;
mov ar.lc=r14;;
1:
ld8 r14=[r18], 8;;
st8 [r17]=r14;;
fc.i r17
add r17=8, r17
br.ctop.sptk.few 1b
br.sptk.few .loop
;;
.end_loop:
sync.i // for fc.i
;;
srlz.i
;;
srlz.d
;;
br.call.sptk.many b0=b6;;
.align 32
memory_stack:
.fill 8192, 1, 0
memory_stack_end:
register_stack:
.fill 8192, 1, 0
register_stack_end:
relocate_new_kernel_end:
END(relocate_new_kernel)
.global relocate_new_kernel_size
relocate_new_kernel_size:
data8 relocate_new_kernel_end - relocate_new_kernel
GLOBAL_ENTRY(ia64_dump_cpu_regs)
.prologue
alloc loc0=ar.pfs,1,2,0,0
.body
mov ar.rsc=0 // put RSE in enforced lazy mode
add loc1=4*8, in0 // save r4 and r5 first
;;
{
flushrs // flush dirty regs to backing store
srlz.i
}
st8 [loc1]=r4, 8
;;
st8 [loc1]=r5, 8
;;
add loc1=32*8, in0
mov r4=ar.rnat
;;
st8 [in0]=r0, 8 // r0
st8 [loc1]=r4, 8 // rnat
mov r5=pr
;;
st8 [in0]=r1, 8 // r1
st8 [loc1]=r5, 8 // pr
mov r4=b0
;;
st8 [in0]=r2, 8 // r2
st8 [loc1]=r4, 8 // b0
mov r5=b1;
;;
st8 [in0]=r3, 24 // r3
st8 [loc1]=r5, 8 // b1
mov r4=b2
;;
st8 [in0]=r6, 8 // r6
st8 [loc1]=r4, 8 // b2
mov r5=b3
;;
st8 [in0]=r7, 8 // r7
st8 [loc1]=r5, 8 // b3
mov r4=b4
;;
st8 [in0]=r8, 8 // r8
st8 [loc1]=r4, 8 // b4
mov r5=b5
;;
st8 [in0]=r9, 8 // r9
st8 [loc1]=r5, 8 // b5
mov r4=b6
;;
st8 [in0]=r10, 8 // r10
st8 [loc1]=r5, 8 // b6
mov r5=b7
;;
st8 [in0]=r11, 8 // r11
st8 [loc1]=r5, 8 // b7
mov r4=b0
;;
st8 [in0]=r12, 8 // r12
st8 [loc1]=r4, 8 // ip
mov r5=loc0
;;
st8 [in0]=r13, 8 // r13
extr.u r5=r5, 0, 38 // ar.pfs.pfm
mov r4=r0 // user mask
;;
st8 [in0]=r14, 8 // r14
st8 [loc1]=r5, 8 // cfm
;;
st8 [in0]=r15, 8 // r15
st8 [loc1]=r4, 8 // user mask
mov r5=ar.rsc
;;
st8 [in0]=r16, 8 // r16
st8 [loc1]=r5, 8 // ar.rsc
mov r4=ar.bsp
;;
st8 [in0]=r17, 8 // r17
st8 [loc1]=r4, 8 // ar.bsp
mov r5=ar.bspstore
;;
st8 [in0]=r18, 8 // r18
st8 [loc1]=r5, 8 // ar.bspstore
mov r4=ar.rnat
;;
st8 [in0]=r19, 8 // r19
st8 [loc1]=r4, 8 // ar.rnat
mov r5=ar.ccv
;;
st8 [in0]=r20, 8 // r20
st8 [loc1]=r5, 8 // ar.ccv
mov r4=ar.unat
;;
st8 [in0]=r21, 8 // r21
st8 [loc1]=r4, 8 // ar.unat
mov r5 = ar.fpsr
;;
st8 [in0]=r22, 8 // r22
st8 [loc1]=r5, 8 // ar.fpsr
mov r4 = ar.unat
;;
st8 [in0]=r23, 8 // r23
st8 [loc1]=r4, 8 // unat
mov r5 = ar.fpsr
;;
st8 [in0]=r24, 8 // r24
st8 [loc1]=r5, 8 // fpsr
mov r4 = ar.pfs
;;
st8 [in0]=r25, 8 // r25
st8 [loc1]=r4, 8 // ar.pfs
mov r5 = ar.lc
;;
st8 [in0]=r26, 8 // r26
st8 [loc1]=r5, 8 // ar.lc
mov r4 = ar.ec
;;
st8 [in0]=r27, 8 // r27
st8 [loc1]=r4, 8 // ar.ec
mov r5 = ar.csd
;;
st8 [in0]=r28, 8 // r28
st8 [loc1]=r5, 8 // ar.csd
mov r4 = ar.ssd
;;
st8 [in0]=r29, 8 // r29
st8 [loc1]=r4, 8 // ar.ssd
;;
st8 [in0]=r30, 8 // r30
;;
st8 [in0]=r31, 8 // r31
mov ar.pfs=loc0
;;
br.ret.sptk.many rp
END(ia64_dump_cpu_regs)

405
arch/ia64/kernel/sal.c Normal file
View file

@ -0,0 +1,405 @@
/*
* System Abstraction Layer (SAL) interface routines.
*
* Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <asm/delay.h>
#include <asm/page.h>
#include <asm/sal.h>
#include <asm/pal.h>
__cacheline_aligned DEFINE_SPINLOCK(sal_lock);
unsigned long sal_platform_features;
unsigned short sal_revision;
unsigned short sal_version;
#define SAL_MAJOR(x) ((x) >> 8)
#define SAL_MINOR(x) ((x) & 0xff)
static struct {
void *addr; /* function entry point */
void *gpval; /* gp value to use */
} pdesc;
static long
default_handler (void)
{
return -1;
}
ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler;
ia64_sal_desc_ptc_t *ia64_ptc_domain_info;
const char *
ia64_sal_strerror (long status)
{
const char *str;
switch (status) {
case 0: str = "Call completed without error"; break;
case 1: str = "Effect a warm boot of the system to complete "
"the update"; break;
case -1: str = "Not implemented"; break;
case -2: str = "Invalid argument"; break;
case -3: str = "Call completed with error"; break;
case -4: str = "Virtual address not registered"; break;
case -5: str = "No information available"; break;
case -6: str = "Insufficient space to add the entry"; break;
case -7: str = "Invalid entry_addr value"; break;
case -8: str = "Invalid interrupt vector"; break;
case -9: str = "Requested memory not available"; break;
case -10: str = "Unable to write to the NVM device"; break;
case -11: str = "Invalid partition type specified"; break;
case -12: str = "Invalid NVM_Object id specified"; break;
case -13: str = "NVM_Object already has the maximum number "
"of partitions"; break;
case -14: str = "Insufficient space in partition for the "
"requested write sub-function"; break;
case -15: str = "Insufficient data buffer space for the "
"requested read record sub-function"; break;
case -16: str = "Scratch buffer required for the write/delete "
"sub-function"; break;
case -17: str = "Insufficient space in the NVM_Object for the "
"requested create sub-function"; break;
case -18: str = "Invalid value specified in the partition_rec "
"argument"; break;
case -19: str = "Record oriented I/O not supported for this "
"partition"; break;
case -20: str = "Bad format of record to be written or "
"required keyword variable not "
"specified"; break;
default: str = "Unknown SAL status code"; break;
}
return str;
}
void __init
ia64_sal_handler_init (void *entry_point, void *gpval)
{
/* fill in the SAL procedure descriptor and point ia64_sal to it: */
pdesc.addr = entry_point;
pdesc.gpval = gpval;
ia64_sal = (ia64_sal_handler) &pdesc;
}
static void __init
check_versions (struct ia64_sal_systab *systab)
{
sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor;
sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor;
/* Check for broken firmware */
if ((sal_revision == SAL_VERSION_CODE(49, 29))
&& (sal_version == SAL_VERSION_CODE(49, 29)))
{
/*
* Old firmware for zx2000 prototypes have this weird version number,
* reset it to something sane.
*/
sal_revision = SAL_VERSION_CODE(2, 8);
sal_version = SAL_VERSION_CODE(0, 0);
}
if (ia64_platform_is("sn2") && (sal_revision == SAL_VERSION_CODE(2, 9)))
/*
* SGI Altix has hard-coded version 2.9 in their prom
* but they actually implement 3.2, so let's fix it here.
*/
sal_revision = SAL_VERSION_CODE(3, 2);
}
static void __init
sal_desc_entry_point (void *p)
{
struct ia64_sal_desc_entry_point *ep = p;
ia64_pal_handler_init(__va(ep->pal_proc));
ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
}
#ifdef CONFIG_SMP
static void __init
set_smp_redirect (int flag)
{
#ifndef CONFIG_HOTPLUG_CPU
if (no_int_routing)
smp_int_redirect &= ~flag;
else
smp_int_redirect |= flag;
#else
/*
* For CPU Hotplug we dont want to do any chipset supported
* interrupt redirection. The reason is this would require that
* All interrupts be stopped and hard bind the irq to a cpu.
* Later when the interrupt is fired we need to set the redir hint
* on again in the vector. This is cumbersome for something that the
* user mode irq balancer will solve anyways.
*/
no_int_routing=1;
smp_int_redirect &= ~flag;
#endif
}
#else
#define set_smp_redirect(flag) do { } while (0)
#endif
static void __init
sal_desc_platform_feature (void *p)
{
struct ia64_sal_desc_platform_feature *pf = p;
sal_platform_features = pf->feature_mask;
printk(KERN_INFO "SAL Platform features:");
if (!sal_platform_features) {
printk(" None\n");
return;
}
if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK)
printk(" BusLock");
if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) {
printk(" IRQ_Redirection");
set_smp_redirect(SMP_IRQ_REDIRECTION);
}
if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) {
printk(" IPI_Redirection");
set_smp_redirect(SMP_IPI_REDIRECTION);
}
if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)
printk(" ITC_Drift");
printk("\n");
}
#ifdef CONFIG_SMP
static void __init
sal_desc_ap_wakeup (void *p)
{
struct ia64_sal_desc_ap_wakeup *ap = p;
switch (ap->mechanism) {
case IA64_SAL_AP_EXTERNAL_INT:
ap_wakeup_vector = ap->vector;
printk(KERN_INFO "SAL: AP wakeup using external interrupt "
"vector 0x%lx\n", ap_wakeup_vector);
break;
default:
printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n");
break;
}
}
static void __init
chk_nointroute_opt(void)
{
char *cp;
for (cp = boot_command_line; *cp; ) {
if (memcmp(cp, "nointroute", 10) == 0) {
no_int_routing = 1;
printk ("no_int_routing on\n");
break;
} else {
while (*cp != ' ' && *cp)
++cp;
while (*cp == ' ')
++cp;
}
}
}
#else
static void __init sal_desc_ap_wakeup(void *p) { }
#endif
/*
* HP rx5670 firmware polls for interrupts during SAL_CACHE_FLUSH by reading
* cr.ivr, but it never writes cr.eoi. This leaves any interrupt marked as
* "in-service" and masks other interrupts of equal or lower priority.
*
* HP internal defect reports: F1859, F2775, F3031.
*/
static int sal_cache_flush_drops_interrupts;
static int __init
force_pal_cache_flush(char *str)
{
sal_cache_flush_drops_interrupts = 1;
return 0;
}
early_param("force_pal_cache_flush", force_pal_cache_flush);
void __init
check_sal_cache_flush (void)
{
unsigned long flags;
int cpu;
u64 vector, cache_type = 3;
struct ia64_sal_retval isrv;
if (sal_cache_flush_drops_interrupts)
return;
cpu = get_cpu();
local_irq_save(flags);
/*
* Send ourselves a timer interrupt, wait until it's reported, and see
* if SAL_CACHE_FLUSH drops it.
*/
platform_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0);
while (!ia64_get_irr(IA64_TIMER_VECTOR))
cpu_relax();
SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
if (isrv.status)
printk(KERN_ERR "SAL_CAL_FLUSH failed with %ld\n", isrv.status);
if (ia64_get_irr(IA64_TIMER_VECTOR)) {
vector = ia64_get_ivr();
ia64_eoi();
WARN_ON(vector != IA64_TIMER_VECTOR);
} else {
sal_cache_flush_drops_interrupts = 1;
printk(KERN_ERR "SAL: SAL_CACHE_FLUSH drops interrupts; "
"PAL_CACHE_FLUSH will be used instead\n");
ia64_eoi();
}
local_irq_restore(flags);
put_cpu();
}
s64
ia64_sal_cache_flush (u64 cache_type)
{
struct ia64_sal_retval isrv;
if (sal_cache_flush_drops_interrupts) {
unsigned long flags;
u64 progress;
s64 rc;
progress = 0;
local_irq_save(flags);
rc = ia64_pal_cache_flush(cache_type,
PAL_CACHE_FLUSH_INVALIDATE, &progress, NULL);
local_irq_restore(flags);
return rc;
}
SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
return isrv.status;
}
EXPORT_SYMBOL_GPL(ia64_sal_cache_flush);
void __init
ia64_sal_init (struct ia64_sal_systab *systab)
{
char *p;
int i;
if (!systab) {
printk(KERN_WARNING "Hmm, no SAL System Table.\n");
return;
}
if (strncmp(systab->signature, "SST_", 4) != 0)
printk(KERN_ERR "bad signature in system table!");
check_versions(systab);
#ifdef CONFIG_SMP
chk_nointroute_opt();
#endif
/* revisions are coded in BCD, so %x does the job for us */
printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n",
SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision),
systab->oem_id, systab->product_id,
systab->product_id[0] ? " " : "",
SAL_MAJOR(sal_version), SAL_MINOR(sal_version));
p = (char *) (systab + 1);
for (i = 0; i < systab->entry_count; i++) {
/*
* The first byte of each entry type contains the type
* descriptor.
*/
switch (*p) {
case SAL_DESC_ENTRY_POINT:
sal_desc_entry_point(p);
break;
case SAL_DESC_PLATFORM_FEATURE:
sal_desc_platform_feature(p);
break;
case SAL_DESC_PTC:
ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p;
break;
case SAL_DESC_AP_WAKEUP:
sal_desc_ap_wakeup(p);
break;
}
p += SAL_DESC_SIZE(*p);
}
}
int
ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
{
if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
return -1;
SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
return 0;
}
EXPORT_SYMBOL(ia64_sal_oemcall);
int
ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
u64 arg7)
{
if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
return -1;
SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
arg7);
return 0;
}
EXPORT_SYMBOL(ia64_sal_oemcall_nolock);
int
ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc,
u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
u64 arg6, u64 arg7)
{
if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
return -1;
SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
arg7);
return 0;
}
EXPORT_SYMBOL(ia64_sal_oemcall_reentrant);
long
ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second,
unsigned long *drift_info)
{
struct ia64_sal_retval isrv;
SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
*ticks_per_second = isrv.v0;
*drift_info = isrv.v1;
return isrv.status;
}
EXPORT_SYMBOL_GPL(ia64_sal_freq_base);

704
arch/ia64/kernel/salinfo.c Normal file
View file

@ -0,0 +1,704 @@
/*
* salinfo.c
*
* Creates entries in /proc/sal for various system features.
*
* Copyright (c) 2003, 2006 Silicon Graphics, Inc. All rights reserved.
* Copyright (c) 2003 Hewlett-Packard Co
* Bjorn Helgaas <bjorn.helgaas@hp.com>
*
* 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo
* code to create this file
* Oct 23 2003 kaos@sgi.com
* Replace IPI with set_cpus_allowed() to read a record from the required cpu.
* Redesign salinfo log processing to separate interrupt and user space
* contexts.
* Cache the record across multi-block reads from user space.
* Support > 64 cpus.
* Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module.
*
* Jan 28 2004 kaos@sgi.com
* Periodically check for outstanding MCA or INIT records.
*
* Dec 5 2004 kaos@sgi.com
* Standardize which records are cleared automatically.
*
* Aug 18 2005 kaos@sgi.com
* mca.c may not pass a buffer, a NULL buffer just indicates that a new
* record is available in SAL.
* Replace some NR_CPUS by cpus_online, for hotplug cpu.
*
* Jan 5 2006 kaos@sgi.com
* Handle hotplug cpus coming online.
* Handle hotplug cpus going offline while they still have outstanding records.
* Use the cpu_* macros consistently.
* Replace the counting semaphore with a mutex and a test if the cpumask is non-empty.
* Modify the locking to make the test for "work to do" an atomic operation.
*/
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/types.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
#include <linux/semaphore.h>
#include <asm/sal.h>
#include <asm/uaccess.h>
MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
MODULE_LICENSE("GPL");
static const struct file_operations proc_salinfo_fops;
typedef struct {
const char *name; /* name of the proc entry */
unsigned long feature; /* feature bit */
struct proc_dir_entry *entry; /* registered entry (removal) */
} salinfo_entry_t;
/*
* List {name,feature} pairs for every entry in /proc/sal/<feature>
* that this module exports
*/
static const salinfo_entry_t salinfo_entries[]={
{ "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, },
{ "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, },
{ "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, },
{ "itc_drift", IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, },
};
#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries)
static char *salinfo_log_name[] = {
"mca",
"init",
"cmc",
"cpe",
};
static struct proc_dir_entry *salinfo_proc_entries[
ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */
ARRAY_SIZE(salinfo_log_name) + /* /proc/sal/{mca,...} */
(2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */
1]; /* /proc/sal */
/* Some records we get ourselves, some are accessed as saved data in buffers
* that are owned by mca.c.
*/
struct salinfo_data_saved {
u8* buffer;
u64 size;
u64 id;
int cpu;
};
/* State transitions. Actions are :-
* Write "read <cpunum>" to the data file.
* Write "clear <cpunum>" to the data file.
* Write "oemdata <cpunum> <offset> to the data file.
* Read from the data file.
* Close the data file.
*
* Start state is NO_DATA.
*
* NO_DATA
* write "read <cpunum>" -> NO_DATA or LOG_RECORD.
* write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
* write "oemdata <cpunum> <offset> -> return -EINVAL.
* read data -> return EOF.
* close -> unchanged. Free record areas.
*
* LOG_RECORD
* write "read <cpunum>" -> NO_DATA or LOG_RECORD.
* write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
* write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
* read data -> return the INIT/MCA/CMC/CPE record.
* close -> unchanged. Keep record areas.
*
* OEMDATA
* write "read <cpunum>" -> NO_DATA or LOG_RECORD.
* write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
* write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
* read data -> return the formatted oemdata.
* close -> unchanged. Keep record areas.
*
* Closing the data file does not change the state. This allows shell scripts
* to manipulate salinfo data, each shell redirection opens the file, does one
* action then closes it again. The record areas are only freed at close when
* the state is NO_DATA.
*/
enum salinfo_state {
STATE_NO_DATA,
STATE_LOG_RECORD,
STATE_OEMDATA,
};
struct salinfo_data {
cpumask_t cpu_event; /* which cpus have outstanding events */
struct semaphore mutex;
u8 *log_buffer;
u64 log_size;
u8 *oemdata; /* decoded oem data */
u64 oemdata_size;
int open; /* single-open to prevent races */
u8 type;
u8 saved_num; /* using a saved record? */
enum salinfo_state state :8; /* processing state */
u8 padding;
int cpu_check; /* next CPU to check */
struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */
};
static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
static DEFINE_SPINLOCK(data_lock);
static DEFINE_SPINLOCK(data_saved_lock);
/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
* record.
* @sect_header: pointer to the start of the section to decode.
* @oemdata: returns vmalloc area containing the decoded output.
* @oemdata_size: returns length of decoded output (strlen).
*
* Description: If user space asks for oem data to be decoded by the kernel
* and/or prom and the platform has set salinfo_platform_oemdata to the address
* of a platform specific routine then call that routine. salinfo_platform_oemdata
* vmalloc's and formats its output area, returning the address of the text
* and its strlen. Returns 0 for success, -ve for error. The callback is
* invoked on the cpu that generated the error record.
*/
int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size);
struct salinfo_platform_oemdata_parms {
const u8 *efi_guid;
u8 **oemdata;
u64 *oemdata_size;
int ret;
};
/* Kick the mutex that tells user space that there is work to do. Instead of
* trying to track the state of the mutex across multiple cpus, in user
* context, interrupt context, non-maskable interrupt context and hotplug cpu,
* it is far easier just to grab the mutex if it is free then release it.
*
* This routine must be called with data_saved_lock held, to make the down/up
* operation atomic.
*/
static void
salinfo_work_to_do(struct salinfo_data *data)
{
(void)(down_trylock(&data->mutex) ?: 0);
up(&data->mutex);
}
static void
salinfo_platform_oemdata_cpu(void *context)
{
struct salinfo_platform_oemdata_parms *parms = context;
parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size);
}
static void
shift1_data_saved (struct salinfo_data *data, int shift)
{
memcpy(data->data_saved+shift, data->data_saved+shift+1,
(ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0]));
memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0,
sizeof(data->data_saved[0]));
}
/* This routine is invoked in interrupt context. Note: mca.c enables
* interrupts before calling this code for CMC/CPE. MCA and INIT events are
* not irq safe, do not call any routines that use spinlocks, they may deadlock.
* MCA and INIT records are recorded, a timer event will look for any
* outstanding events and wake up the user space code.
*
* The buffer passed from mca.c points to the output from ia64_log_get. This is
* a persistent buffer but its contents can change between the interrupt and
* when user space processes the record. Save the record id to identify
* changes. If the buffer is NULL then just update the bitmap.
*/
void
salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
{
struct salinfo_data *data = salinfo_data + type;
struct salinfo_data_saved *data_saved;
unsigned long flags = 0;
int i;
int saved_size = ARRAY_SIZE(data->data_saved);
BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
if (irqsafe)
spin_lock_irqsave(&data_saved_lock, flags);
if (buffer) {
for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
if (!data_saved->buffer)
break;
}
if (i == saved_size) {
if (!data->saved_num) {
shift1_data_saved(data, 0);
data_saved = data->data_saved + saved_size - 1;
} else
data_saved = NULL;
}
if (data_saved) {
data_saved->cpu = smp_processor_id();
data_saved->id = ((sal_log_record_header_t *)buffer)->id;
data_saved->size = size;
data_saved->buffer = buffer;
}
}
cpu_set(smp_processor_id(), data->cpu_event);
if (irqsafe) {
salinfo_work_to_do(data);
spin_unlock_irqrestore(&data_saved_lock, flags);
}
}
/* Check for outstanding MCA/INIT records every minute (arbitrary) */
#define SALINFO_TIMER_DELAY (60*HZ)
static struct timer_list salinfo_timer;
extern void ia64_mlogbuf_dump(void);
static void
salinfo_timeout_check(struct salinfo_data *data)
{
unsigned long flags;
if (!data->open)
return;
if (!cpus_empty(data->cpu_event)) {
spin_lock_irqsave(&data_saved_lock, flags);
salinfo_work_to_do(data);
spin_unlock_irqrestore(&data_saved_lock, flags);
}
}
static void
salinfo_timeout (unsigned long arg)
{
ia64_mlogbuf_dump();
salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
add_timer(&salinfo_timer);
}
static int
salinfo_event_open(struct inode *inode, struct file *file)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
return 0;
}
static ssize_t
salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
struct salinfo_data *data = PDE_DATA(file_inode(file));
char cmd[32];
size_t size;
int i, n, cpu = -1;
retry:
if (cpus_empty(data->cpu_event) && down_trylock(&data->mutex)) {
if (file->f_flags & O_NONBLOCK)
return -EAGAIN;
if (down_interruptible(&data->mutex))
return -EINTR;
}
n = data->cpu_check;
for (i = 0; i < nr_cpu_ids; i++) {
if (cpu_isset(n, data->cpu_event)) {
if (!cpu_online(n)) {
cpu_clear(n, data->cpu_event);
continue;
}
cpu = n;
break;
}
if (++n == nr_cpu_ids)
n = 0;
}
if (cpu == -1)
goto retry;
ia64_mlogbuf_dump();
/* for next read, start checking at next CPU */
data->cpu_check = cpu;
if (++data->cpu_check == nr_cpu_ids)
data->cpu_check = 0;
snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
size = strlen(cmd);
if (size > count)
size = count;
if (copy_to_user(buffer, cmd, size))
return -EFAULT;
return size;
}
static const struct file_operations salinfo_event_fops = {
.open = salinfo_event_open,
.read = salinfo_event_read,
.llseek = noop_llseek,
};
static int
salinfo_log_open(struct inode *inode, struct file *file)
{
struct salinfo_data *data = PDE_DATA(inode);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
spin_lock(&data_lock);
if (data->open) {
spin_unlock(&data_lock);
return -EBUSY;
}
data->open = 1;
spin_unlock(&data_lock);
if (data->state == STATE_NO_DATA &&
!(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) {
data->open = 0;
return -ENOMEM;
}
return 0;
}
static int
salinfo_log_release(struct inode *inode, struct file *file)
{
struct salinfo_data *data = PDE_DATA(inode);
if (data->state == STATE_NO_DATA) {
vfree(data->log_buffer);
vfree(data->oemdata);
data->log_buffer = NULL;
data->oemdata = NULL;
}
spin_lock(&data_lock);
data->open = 0;
spin_unlock(&data_lock);
return 0;
}
static void
call_on_cpu(int cpu, void (*fn)(void *), void *arg)
{
cpumask_t save_cpus_allowed = current->cpus_allowed;
set_cpus_allowed_ptr(current, cpumask_of(cpu));
(*fn)(arg);
set_cpus_allowed_ptr(current, &save_cpus_allowed);
}
static void
salinfo_log_read_cpu(void *context)
{
struct salinfo_data *data = context;
sal_log_record_header_t *rh;
data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer);
rh = (sal_log_record_header_t *)(data->log_buffer);
/* Clear corrected errors as they are read from SAL */
if (rh->severity == sal_log_severity_corrected)
ia64_sal_clear_state_info(data->type);
}
static void
salinfo_log_new_read(int cpu, struct salinfo_data *data)
{
struct salinfo_data_saved *data_saved;
unsigned long flags;
int i;
int saved_size = ARRAY_SIZE(data->data_saved);
data->saved_num = 0;
spin_lock_irqsave(&data_saved_lock, flags);
retry:
for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
if (data_saved->buffer && data_saved->cpu == cpu) {
sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer);
data->log_size = data_saved->size;
memcpy(data->log_buffer, rh, data->log_size);
barrier(); /* id check must not be moved */
if (rh->id == data_saved->id) {
data->saved_num = i+1;
break;
}
/* saved record changed by mca.c since interrupt, discard it */
shift1_data_saved(data, i);
goto retry;
}
}
spin_unlock_irqrestore(&data_saved_lock, flags);
if (!data->saved_num)
call_on_cpu(cpu, salinfo_log_read_cpu, data);
if (!data->log_size) {
data->state = STATE_NO_DATA;
cpu_clear(cpu, data->cpu_event);
} else {
data->state = STATE_LOG_RECORD;
}
}
static ssize_t
salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
struct salinfo_data *data = PDE_DATA(file_inode(file));
u8 *buf;
u64 bufsize;
if (data->state == STATE_LOG_RECORD) {
buf = data->log_buffer;
bufsize = data->log_size;
} else if (data->state == STATE_OEMDATA) {
buf = data->oemdata;
bufsize = data->oemdata_size;
} else {
buf = NULL;
bufsize = 0;
}
return simple_read_from_buffer(buffer, count, ppos, buf, bufsize);
}
static void
salinfo_log_clear_cpu(void *context)
{
struct salinfo_data *data = context;
ia64_sal_clear_state_info(data->type);
}
static int
salinfo_log_clear(struct salinfo_data *data, int cpu)
{
sal_log_record_header_t *rh;
unsigned long flags;
spin_lock_irqsave(&data_saved_lock, flags);
data->state = STATE_NO_DATA;
if (!cpu_isset(cpu, data->cpu_event)) {
spin_unlock_irqrestore(&data_saved_lock, flags);
return 0;
}
cpu_clear(cpu, data->cpu_event);
if (data->saved_num) {
shift1_data_saved(data, data->saved_num - 1);
data->saved_num = 0;
}
spin_unlock_irqrestore(&data_saved_lock, flags);
rh = (sal_log_record_header_t *)(data->log_buffer);
/* Corrected errors have already been cleared from SAL */
if (rh->severity != sal_log_severity_corrected)
call_on_cpu(cpu, salinfo_log_clear_cpu, data);
/* clearing a record may make a new record visible */
salinfo_log_new_read(cpu, data);
if (data->state == STATE_LOG_RECORD) {
spin_lock_irqsave(&data_saved_lock, flags);
cpu_set(cpu, data->cpu_event);
salinfo_work_to_do(data);
spin_unlock_irqrestore(&data_saved_lock, flags);
}
return 0;
}
static ssize_t
salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
{
struct salinfo_data *data = PDE_DATA(file_inode(file));
char cmd[32];
size_t size;
u32 offset;
int cpu;
size = sizeof(cmd);
if (count < size)
size = count;
if (copy_from_user(cmd, buffer, size))
return -EFAULT;
if (sscanf(cmd, "read %d", &cpu) == 1) {
salinfo_log_new_read(cpu, data);
} else if (sscanf(cmd, "clear %d", &cpu) == 1) {
int ret;
if ((ret = salinfo_log_clear(data, cpu)))
count = ret;
} else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) {
if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA)
return -EINVAL;
if (offset > data->log_size - sizeof(efi_guid_t))
return -EINVAL;
data->state = STATE_OEMDATA;
if (salinfo_platform_oemdata) {
struct salinfo_platform_oemdata_parms parms = {
.efi_guid = data->log_buffer + offset,
.oemdata = &data->oemdata,
.oemdata_size = &data->oemdata_size
};
call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms);
if (parms.ret)
count = parms.ret;
} else
data->oemdata_size = 0;
} else
return -EINVAL;
return count;
}
static const struct file_operations salinfo_data_fops = {
.open = salinfo_log_open,
.release = salinfo_log_release,
.read = salinfo_log_read,
.write = salinfo_log_write,
.llseek = default_llseek,
};
static int
salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
{
unsigned int i, cpu = (unsigned long)hcpu;
unsigned long flags;
struct salinfo_data *data;
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
spin_lock_irqsave(&data_saved_lock, flags);
for (i = 0, data = salinfo_data;
i < ARRAY_SIZE(salinfo_data);
++i, ++data) {
cpu_set(cpu, data->cpu_event);
salinfo_work_to_do(data);
}
spin_unlock_irqrestore(&data_saved_lock, flags);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
spin_lock_irqsave(&data_saved_lock, flags);
for (i = 0, data = salinfo_data;
i < ARRAY_SIZE(salinfo_data);
++i, ++data) {
struct salinfo_data_saved *data_saved;
int j;
for (j = ARRAY_SIZE(data->data_saved) - 1, data_saved = data->data_saved + j;
j >= 0;
--j, --data_saved) {
if (data_saved->buffer && data_saved->cpu == cpu) {
shift1_data_saved(data, j);
}
}
cpu_clear(cpu, data->cpu_event);
}
spin_unlock_irqrestore(&data_saved_lock, flags);
break;
}
return NOTIFY_OK;
}
static struct notifier_block salinfo_cpu_notifier =
{
.notifier_call = salinfo_cpu_callback,
.priority = 0,
};
static int __init
salinfo_init(void)
{
struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
struct proc_dir_entry *dir, *entry;
struct salinfo_data *data;
int i, j;
salinfo_dir = proc_mkdir("sal", NULL);
if (!salinfo_dir)
return 0;
for (i=0; i < NR_SALINFO_ENTRIES; i++) {
/* pass the feature bit in question as misc data */
*sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir,
&proc_salinfo_fops,
(void *)salinfo_entries[i].feature);
}
cpu_notifier_register_begin();
for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
data = salinfo_data + i;
data->type = i;
sema_init(&data->mutex, 1);
dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
if (!dir)
continue;
entry = proc_create_data("event", S_IRUSR, dir,
&salinfo_event_fops, data);
if (!entry)
continue;
*sdir++ = entry;
entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir,
&salinfo_data_fops, data);
if (!entry)
continue;
*sdir++ = entry;
/* we missed any events before now */
for_each_online_cpu(j)
cpu_set(j, data->cpu_event);
*sdir++ = dir;
}
*sdir++ = salinfo_dir;
init_timer(&salinfo_timer);
salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
salinfo_timer.function = &salinfo_timeout;
add_timer(&salinfo_timer);
__register_hotcpu_notifier(&salinfo_cpu_notifier);
cpu_notifier_register_done();
return 0;
}
/*
* 'data' contains an integer that corresponds to the feature we're
* testing
*/
static int proc_salinfo_show(struct seq_file *m, void *v)
{
unsigned long data = (unsigned long)v;
seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n");
return 0;
}
static int proc_salinfo_open(struct inode *inode, struct file *file)
{
return single_open(file, proc_salinfo_show, PDE_DATA(inode));
}
static const struct file_operations proc_salinfo_fops = {
.open = proc_salinfo_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
module_init(salinfo_init);

1070
arch/ia64/kernel/setup.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,25 @@
struct sigscratch {
unsigned long scratch_unat; /* ar.unat for the general registers saved in pt */
unsigned long ar_pfs; /* for syscalls, the user-level function-state */
struct pt_regs pt;
};
struct sigframe {
/*
* Place signal handler args where user-level unwinder can find them easily.
* DO NOT MOVE THESE. They are part of the IA-64 Linux ABI and there is
* user-level code that depends on their presence!
*/
unsigned long arg0; /* signum */
unsigned long arg1; /* siginfo pointer */
unsigned long arg2; /* sigcontext pointer */
/*
* End of architected state.
*/
void __user *handler; /* pointer to the plabel of the signal handler */
struct siginfo info;
struct sigcontext sc;
};
extern void ia64_do_signal (struct sigscratch *, long);

496
arch/ia64/kernel/signal.c Normal file
View file

@ -0,0 +1,496 @@
/*
* Architecture-specific signal handling support.
*
* Copyright (C) 1999-2004 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* Derived from i386 and Alpha versions.
*/
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/unistd.h>
#include <linux/wait.h>
#include <asm/intrinsics.h>
#include <asm/uaccess.h>
#include <asm/rse.h>
#include <asm/sigcontext.h>
#include "sigframe.h"
#define DEBUG_SIG 0
#define STACK_ALIGN 16 /* minimal alignment for stack pointer */
#if _NSIG_WORDS > 1
# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t))
#else
# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0])
# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0])
#endif
static long
restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
{
unsigned long ip, flags, nat, um, cfm, rsc;
long err;
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
/* restore scratch that always needs gets updated during signal delivery: */
err = __get_user(flags, &sc->sc_flags);
err |= __get_user(nat, &sc->sc_nat);
err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */
err |= __get_user(cfm, &sc->sc_cfm);
err |= __get_user(um, &sc->sc_um); /* user mask */
err |= __get_user(rsc, &sc->sc_ar_rsc);
err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
err |= __get_user(scr->pt.pr, &sc->sc_pr); /* predicates */
err |= __get_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */
err |= __get_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */
err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8); /* r1 */
err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8); /* r8-r11 */
err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8); /* r12-r13 */
err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */
scr->pt.cr_ifs = cfm | (1UL << 63);
scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */
/* establish new instruction pointer: */
scr->pt.cr_iip = ip & ~0x3UL;
ia64_psr(&scr->pt)->ri = ip & 0x3;
scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);
scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat);
if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
/* Restore most scratch-state only when not in syscall. */
err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */
err |= __get_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */
err |= __get_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */
err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8); /* r2-r3 */
err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8); /* r16-r31 */
}
if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) {
struct ia64_psr *psr = ia64_psr(&scr->pt);
err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
psr->mfh = 0; /* drop signal handler's fph contents... */
preempt_disable();
if (psr->dfh)
ia64_drop_fpu(current);
else {
/* We already own the local fph, otherwise psr->dfh wouldn't be 0. */
__ia64_load_fpu(current->thread.fph);
ia64_set_local_fpu_owner(current);
}
preempt_enable();
}
return err;
}
int
copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from)
{
if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
return -EFAULT;
if (from->si_code < 0) {
if (__copy_to_user(to, from, sizeof(siginfo_t)))
return -EFAULT;
return 0;
} else {
int err;
/*
* If you change siginfo_t structure, please be sure this code is fixed
* accordingly. It should never copy any pad contained in the structure
* to avoid security leaks, but must copy the generic 3 ints plus the
* relevant union member.
*/
err = __put_user(from->si_signo, &to->si_signo);
err |= __put_user(from->si_errno, &to->si_errno);
err |= __put_user((short)from->si_code, &to->si_code);
switch (from->si_code >> 16) {
case __SI_FAULT >> 16:
err |= __put_user(from->si_flags, &to->si_flags);
err |= __put_user(from->si_isr, &to->si_isr);
case __SI_POLL >> 16:
err |= __put_user(from->si_addr, &to->si_addr);
err |= __put_user(from->si_imm, &to->si_imm);
break;
case __SI_TIMER >> 16:
err |= __put_user(from->si_tid, &to->si_tid);
err |= __put_user(from->si_overrun, &to->si_overrun);
err |= __put_user(from->si_ptr, &to->si_ptr);
break;
case __SI_RT >> 16: /* Not generated by the kernel as of now. */
case __SI_MESGQ >> 16:
err |= __put_user(from->si_uid, &to->si_uid);
err |= __put_user(from->si_pid, &to->si_pid);
err |= __put_user(from->si_ptr, &to->si_ptr);
break;
case __SI_CHLD >> 16:
err |= __put_user(from->si_utime, &to->si_utime);
err |= __put_user(from->si_stime, &to->si_stime);
err |= __put_user(from->si_status, &to->si_status);
default:
err |= __put_user(from->si_uid, &to->si_uid);
err |= __put_user(from->si_pid, &to->si_pid);
break;
}
return err;
}
}
long
ia64_rt_sigreturn (struct sigscratch *scr)
{
extern char ia64_strace_leave_kernel, ia64_leave_kernel;
struct sigcontext __user *sc;
struct siginfo si;
sigset_t set;
long retval;
sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc;
/*
* When we return to the previously executing context, r8 and r10 have already
* been setup the way we want them. Indeed, if the signal wasn't delivered while
* in a system call, we must not touch r8 or r10 as otherwise user-level state
* could be corrupted.
*/
retval = (long) &ia64_leave_kernel;
if (test_thread_flag(TIF_SYSCALL_TRACE)
|| test_thread_flag(TIF_SYSCALL_AUDIT))
/*
* strace expects to be notified after sigreturn returns even though the
* context to which we return may not be in the middle of a syscall.
* Thus, the return-value that strace displays for sigreturn is
* meaningless.
*/
retval = (long) &ia64_strace_leave_kernel;
if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
goto give_sigsegv;
if (GET_SIGSET(&set, &sc->sc_mask))
goto give_sigsegv;
set_current_blocked(&set);
if (restore_sigcontext(sc, scr))
goto give_sigsegv;
#if DEBUG_SIG
printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip);
#endif
if (restore_altstack(&sc->sc_stack))
goto give_sigsegv;
return retval;
give_sigsegv:
si.si_signo = SIGSEGV;
si.si_errno = 0;
si.si_code = SI_KERNEL;
si.si_pid = task_pid_vnr(current);
si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
si.si_addr = sc;
force_sig_info(SIGSEGV, &si, current);
return retval;
}
/*
* This does just the minimum required setup of sigcontext.
* Specifically, it only installs data that is either not knowable at
* the user-level or that gets modified before execution in the
* trampoline starts. Everything else is done at the user-level.
*/
static long
setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr)
{
unsigned long flags = 0, ifs, cfm, nat;
long err = 0;
ifs = scr->pt.cr_ifs;
if (on_sig_stack((unsigned long) sc))
flags |= IA64_SC_FLAG_ONSTACK;
if ((ifs & (1UL << 63)) == 0)
/* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
flags |= IA64_SC_FLAG_IN_SYSCALL;
cfm = ifs & ((1UL << 38) - 1);
ia64_flush_fph(current);
if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
flags |= IA64_SC_FLAG_FPH_VALID;
err = __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
}
nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat);
err |= __put_user(flags, &sc->sc_flags);
err |= __put_user(nat, &sc->sc_nat);
err |= PUT_SIGSET(mask, &sc->sc_mask);
err |= __put_user(cfm, &sc->sc_cfm);
err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um);
err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat); /* ar.unat */
err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */
err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
err |= __put_user(scr->pt.pr, &sc->sc_pr); /* predicates */
err |= __put_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */
err |= __put_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */
err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8); /* r1 */
err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8); /* r8-r11 */
err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8); /* r12-r13 */
err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8); /* r15 */
err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip);
if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
/* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */
err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */
err |= __put_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */
err |= __put_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */
err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */
err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8); /* r2-r3 */
err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8); /* r16-r31 */
}
return err;
}
/*
* Check whether the register-backing store is already on the signal stack.
*/
static inline int
rbs_on_sig_stack (unsigned long bsp)
{
return (bsp - current->sas_ss_sp < current->sas_ss_size);
}
static long
force_sigsegv_info (int sig, void __user *addr)
{
unsigned long flags;
struct siginfo si;
if (sig == SIGSEGV) {
/*
* Acquiring siglock around the sa_handler-update is almost
* certainly overkill, but this isn't a
* performance-critical path and I'd rather play it safe
* here than having to debug a nasty race if and when
* something changes in kernel/signal.c that would make it
* no longer safe to modify sa_handler without holding the
* lock.
*/
spin_lock_irqsave(&current->sighand->siglock, flags);
current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
si.si_signo = SIGSEGV;
si.si_errno = 0;
si.si_code = SI_KERNEL;
si.si_pid = task_pid_vnr(current);
si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
si.si_addr = addr;
force_sig_info(SIGSEGV, &si, current);
return 1;
}
static long
setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr)
{
extern char __kernel_sigtramp[];
unsigned long tramp_addr, new_rbs = 0, new_sp;
struct sigframe __user *frame;
long err;
new_sp = scr->pt.r12;
tramp_addr = (unsigned long) __kernel_sigtramp;
if (ksig->ka.sa.sa_flags & SA_ONSTACK) {
int onstack = sas_ss_flags(new_sp);
if (onstack == 0) {
new_sp = current->sas_ss_sp + current->sas_ss_size;
/*
* We need to check for the register stack being on the
* signal stack separately, because it's switched
* separately (memory stack is switched in the kernel,
* register stack is switched in the signal trampoline).
*/
if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
new_rbs = ALIGN(current->sas_ss_sp,
sizeof(long));
} else if (onstack == SS_ONSTACK) {
unsigned long check_sp;
/*
* If we are on the alternate signal stack and would
* overflow it, don't. Return an always-bogus address
* instead so we will die with SIGSEGV.
*/
check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN;
if (!likely(on_sig_stack(check_sp)))
return force_sigsegv_info(ksig->sig, (void __user *)
check_sp);
}
}
frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return force_sigsegv_info(ksig->sig, frame);
err = __put_user(ksig->sig, &frame->arg0);
err |= __put_user(&frame->info, &frame->arg1);
err |= __put_user(&frame->sc, &frame->arg2);
err |= __put_user(new_rbs, &frame->sc.sc_rbs_base);
err |= __put_user(0, &frame->sc.sc_loadrs); /* initialize to zero */
err |= __put_user(ksig->ka.sa.sa_handler, &frame->handler);
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
err |= __save_altstack(&frame->sc.sc_stack, scr->pt.r12);
err |= setup_sigcontext(&frame->sc, set, scr);
if (unlikely(err))
return force_sigsegv_info(ksig->sig, frame);
scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */
scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */
scr->pt.cr_iip = tramp_addr;
ia64_psr(&scr->pt)->ri = 0; /* start executing in first slot */
ia64_psr(&scr->pt)->be = 0; /* force little-endian byte-order */
/*
* Force the interruption function mask to zero. This has no effect when a
* system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is
* ignored), but it has the desirable effect of making it possible to deliver a
* signal with an incomplete register frame (which happens when a mandatory RSE
* load faults). Furthermore, it has no negative effect on the getting the user's
* dirty partition preserved, because that's governed by scr->pt.loadrs.
*/
scr->pt.cr_ifs = (1UL << 63);
/*
* Note: this affects only the NaT bits of the scratch regs (the ones saved in
* pt_regs), which is exactly what we want.
*/
scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */
#if DEBUG_SIG
printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n",
current->comm, current->pid, ksig->sig, scr->pt.r12, frame->sc.sc_ip, frame->handler);
#endif
return 0;
}
static long
handle_signal (struct ksignal *ksig, struct sigscratch *scr)
{
int ret = setup_frame(ksig, sigmask_to_save(), scr);
if (!ret)
signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP));
return ret;
}
/*
* Note that `init' is a special process: it doesn't get signals it doesn't want to
* handle. Thus you cannot kill init even with a SIGKILL even by mistake.
*/
void
ia64_do_signal (struct sigscratch *scr, long in_syscall)
{
long restart = in_syscall;
long errno = scr->pt.r8;
struct ksignal ksig;
/*
* This only loops in the rare cases of handle_signal() failing, in which case we
* need to push through a forced SIGSEGV.
*/
while (1) {
get_signal(&ksig);
/*
* get_signal_to_deliver() may have run a debugger (via notify_parent())
* and the debugger may have modified the state (e.g., to arrange for an
* inferior call), thus it's important to check for restarting _after_
* get_signal_to_deliver().
*/
if ((long) scr->pt.r10 != -1)
/*
* A system calls has to be restarted only if one of the error codes
* ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10
* isn't -1 then r8 doesn't hold an error code and we don't need to
* restart the syscall, so we can clear the "restart" flag here.
*/
restart = 0;
if (ksig.sig <= 0)
break;
if (unlikely(restart)) {
switch (errno) {
case ERESTART_RESTARTBLOCK:
case ERESTARTNOHAND:
scr->pt.r8 = EINTR;
/* note: scr->pt.r10 is already -1 */
break;
case ERESTARTSYS:
if ((ksig.ka.sa.sa_flags & SA_RESTART) == 0) {
scr->pt.r8 = EINTR;
/* note: scr->pt.r10 is already -1 */
break;
}
case ERESTARTNOINTR:
ia64_decrement_ip(&scr->pt);
restart = 0; /* don't restart twice if handle_signal() fails... */
}
}
/*
* Whee! Actually deliver the signal. If the delivery failed, we need to
* continue to iterate in this loop so we can deliver the SIGSEGV...
*/
if (handle_signal(&ksig, scr))
return;
}
/* Did we come from a system call? */
if (restart) {
/* Restart the system call - no handlers present */
if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
|| errno == ERESTART_RESTARTBLOCK)
{
/*
* Note: the syscall number is in r15 which is saved in
* pt_regs so all we need to do here is adjust ip so that
* the "break" instruction gets re-executed.
*/
ia64_decrement_ip(&scr->pt);
if (errno == ERESTART_RESTARTBLOCK)
scr->pt.r15 = __NR_restart_syscall;
}
}
/* if there's no signal to deliver, we just put the saved sigmask
* back */
restore_saved_sigmask();
}

342
arch/ia64/kernel/smp.c Normal file
View file

@ -0,0 +1,342 @@
/*
* SMP Support
*
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
* Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
*
* Lots of stuff stolen from arch/alpha/kernel/smp.c
*
* 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized
* the existing code (on the lines of x86 port).
* 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
* calibration on each CPU.
* 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
* 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor
* & cpu_online_map now gets done here (instead of setup.c)
* 99/10/05 davidm Update to bring it in sync with new command-line processing
* scheme.
* 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
* smp_call_function_single to resend IPI on timeouts
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/smp.h>
#include <linux/kernel_stat.h>
#include <linux/mm.h>
#include <linux/cache.h>
#include <linux/delay.h>
#include <linux/efi.h>
#include <linux/bitops.h>
#include <linux/kexec.h>
#include <linux/atomic.h>
#include <asm/current.h>
#include <asm/delay.h>
#include <asm/machvec.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/sal.h>
#include <asm/tlbflush.h>
#include <asm/unistd.h>
#include <asm/mca.h>
/*
* Note: alignment of 4 entries/cacheline was empirically determined
* to be a good tradeoff between hot cachelines & spreading the array
* across too many cacheline.
*/
static struct local_tlb_flush_counts {
unsigned int count;
} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS],
shadow_flush_counts);
#define IPI_CALL_FUNC 0
#define IPI_CPU_STOP 1
#define IPI_CALL_FUNC_SINGLE 2
#define IPI_KDUMP_CPU_STOP 3
/* This needs to be cacheline aligned because it is written to by *other* CPUs. */
static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation);
extern void cpu_halt (void);
static void
stop_this_cpu(void)
{
/*
* Remove this CPU:
*/
set_cpu_online(smp_processor_id(), false);
max_xtp();
local_irq_disable();
cpu_halt();
}
void
cpu_die(void)
{
max_xtp();
local_irq_disable();
cpu_halt();
/* Should never be here */
BUG();
for (;;);
}
irqreturn_t
handle_IPI (int irq, void *dev_id)
{
int this_cpu = get_cpu();
unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
unsigned long ops;
mb(); /* Order interrupt and bit testing. */
while ((ops = xchg(pending_ipis, 0)) != 0) {
mb(); /* Order bit clearing and data access. */
do {
unsigned long which;
which = ffz(~ops);
ops &= ~(1 << which);
switch (which) {
case IPI_CPU_STOP:
stop_this_cpu();
break;
case IPI_CALL_FUNC:
generic_smp_call_function_interrupt();
break;
case IPI_CALL_FUNC_SINGLE:
generic_smp_call_function_single_interrupt();
break;
#ifdef CONFIG_KEXEC
case IPI_KDUMP_CPU_STOP:
unw_init_running(kdump_cpu_freeze, NULL);
break;
#endif
default:
printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
this_cpu, which);
break;
}
} while (ops);
mb(); /* Order data access and bit testing. */
}
put_cpu();
return IRQ_HANDLED;
}
/*
* Called with preemption disabled.
*/
static inline void
send_IPI_single (int dest_cpu, int op)
{
set_bit(op, &per_cpu(ipi_operation, dest_cpu));
platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
}
/*
* Called with preemption disabled.
*/
static inline void
send_IPI_allbutself (int op)
{
unsigned int i;
for_each_online_cpu(i) {
if (i != smp_processor_id())
send_IPI_single(i, op);
}
}
/*
* Called with preemption disabled.
*/
static inline void
send_IPI_mask(const struct cpumask *mask, int op)
{
unsigned int cpu;
for_each_cpu(cpu, mask) {
send_IPI_single(cpu, op);
}
}
/*
* Called with preemption disabled.
*/
static inline void
send_IPI_all (int op)
{
int i;
for_each_online_cpu(i) {
send_IPI_single(i, op);
}
}
/*
* Called with preemption disabled.
*/
static inline void
send_IPI_self (int op)
{
send_IPI_single(smp_processor_id(), op);
}
#ifdef CONFIG_KEXEC
void
kdump_smp_send_stop(void)
{
send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
}
void
kdump_smp_send_init(void)
{
unsigned int cpu, self_cpu;
self_cpu = smp_processor_id();
for_each_online_cpu(cpu) {
if (cpu != self_cpu) {
if(kdump_status[cpu] == 0)
platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
}
}
}
#endif
/*
* Called with preemption disabled.
*/
void
smp_send_reschedule (int cpu)
{
platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
}
EXPORT_SYMBOL_GPL(smp_send_reschedule);
/*
* Called with preemption disabled.
*/
static void
smp_send_local_flush_tlb (int cpu)
{
platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0);
}
void
smp_local_flush_tlb(void)
{
/*
* Use atomic ops. Otherwise, the load/increment/store sequence from
* a "++" operation can have the line stolen between the load & store.
* The overhead of the atomic op in negligible in this case & offers
* significant benefit for the brief periods where lots of cpus
* are simultaneously flushing TLBs.
*/
ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq);
local_flush_tlb_all();
}
#define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */
void
smp_flush_tlb_cpumask(cpumask_t xcpumask)
{
unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts);
cpumask_t cpumask = xcpumask;
int mycpu, cpu, flush_mycpu = 0;
preempt_disable();
mycpu = smp_processor_id();
for_each_cpu_mask(cpu, cpumask)
counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff;
mb();
for_each_cpu_mask(cpu, cpumask) {
if (cpu == mycpu)
flush_mycpu = 1;
else
smp_send_local_flush_tlb(cpu);
}
if (flush_mycpu)
smp_local_flush_tlb();
for_each_cpu_mask(cpu, cpumask)
while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff))
udelay(FLUSH_DELAY);
preempt_enable();
}
void
smp_flush_tlb_all (void)
{
on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1);
}
void
smp_flush_tlb_mm (struct mm_struct *mm)
{
cpumask_var_t cpus;
preempt_disable();
/* this happens for the common case of a single-threaded fork(): */
if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
{
local_finish_flush_tlb_mm(mm);
preempt_enable();
return;
}
if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) {
smp_call_function((void (*)(void *))local_finish_flush_tlb_mm,
mm, 1);
} else {
cpumask_copy(cpus, mm_cpumask(mm));
smp_call_function_many(cpus,
(void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
free_cpumask_var(cpus);
}
local_irq_disable();
local_finish_flush_tlb_mm(mm);
local_irq_enable();
preempt_enable();
}
void arch_send_call_function_single_ipi(int cpu)
{
send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
}
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
send_IPI_mask(mask, IPI_CALL_FUNC);
}
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/
void
smp_send_stop (void)
{
send_IPI_allbutself(IPI_CPU_STOP);
}
int
setup_profiling_timer (unsigned int multiplier)
{
return -EINVAL;
}

858
arch/ia64/kernel/smpboot.c Normal file
View file

@ -0,0 +1,858 @@
/*
* SMP boot-related support
*
* Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 2001, 2004-2005 Intel Corp
* Rohit Seth <rohit.seth@intel.com>
* Suresh Siddha <suresh.b.siddha@intel.com>
* Gordon Jin <gordon.jin@intel.com>
* Ashok Raj <ashok.raj@intel.com>
*
* 01/05/16 Rohit Seth <rohit.seth@intel.com> Moved SMP booting functions from smp.c to here.
* 01/04/27 David Mosberger <davidm@hpl.hp.com> Added ITC synching code.
* 02/07/31 David Mosberger <davidm@hpl.hp.com> Switch over to hotplug-CPU boot-sequence.
* smp_boot_cpus()/smp_commence() is replaced by
* smp_prepare_cpus()/__cpu_up()/smp_cpus_done().
* 04/06/21 Ashok Raj <ashok.raj@intel.com> Added CPU Hotplug Support
* 04/12/26 Jin Gordon <gordon.jin@intel.com>
* 04/12/26 Rohit Seth <rohit.seth@intel.com>
* Add multi-threading and multi-core detection
* 05/01/30 Suresh Siddha <suresh.b.siddha@intel.com>
* Setup cpu_sibling_map and cpu_core_map
*/
#include <linux/module.h>
#include <linux/acpi.h>
#include <linux/bootmem.h>
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/efi.h>
#include <linux/percpu.h>
#include <linux/bitops.h>
#include <linux/atomic.h>
#include <asm/cache.h>
#include <asm/current.h>
#include <asm/delay.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/machvec.h>
#include <asm/mca.h>
#include <asm/page.h>
#include <asm/paravirt.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/sal.h>
#include <asm/tlbflush.h>
#include <asm/unistd.h>
#include <asm/sn/arch.h>
#define SMP_DEBUG 0
#if SMP_DEBUG
#define Dprintk(x...) printk(x)
#else
#define Dprintk(x...)
#endif
#ifdef CONFIG_HOTPLUG_CPU
#ifdef CONFIG_PERMIT_BSP_REMOVE
#define bsp_remove_ok 1
#else
#define bsp_remove_ok 0
#endif
/*
* Global array allocated for NR_CPUS at boot time
*/
struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS];
/*
* start_ap in head.S uses this to store current booting cpu
* info.
*/
struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
#define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]);
#else
#define set_brendez_area(x)
#endif
/*
* ITC synchronization related stuff:
*/
#define MASTER (0)
#define SLAVE (SMP_CACHE_BYTES/8)
#define NUM_ROUNDS 64 /* magic value */
#define NUM_ITERS 5 /* likewise */
static DEFINE_SPINLOCK(itc_sync_lock);
static volatile unsigned long go[SLAVE + 1];
#define DEBUG_ITC_SYNC 0
extern void start_ap (void);
extern unsigned long ia64_iobase;
struct task_struct *task_for_booting_cpu;
/*
* State for each CPU
*/
DEFINE_PER_CPU(int, cpu_state);
cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
int smp_num_siblings = 1;
/* which logical CPU number maps to which CPU (physical APIC ID) */
volatile int ia64_cpu_to_sapicid[NR_CPUS];
EXPORT_SYMBOL(ia64_cpu_to_sapicid);
static volatile cpumask_t cpu_callin_map;
struct smp_boot_data smp_boot_data __initdata;
unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */
char __initdata no_int_routing;
unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
#ifdef CONFIG_FORCE_CPEI_RETARGET
#define CPEI_OVERRIDE_DEFAULT (1)
#else
#define CPEI_OVERRIDE_DEFAULT (0)
#endif
unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
static int __init
cmdl_force_cpei(char *str)
{
int value=0;
get_option (&str, &value);
force_cpei_retarget = value;
return 1;
}
__setup("force_cpei=", cmdl_force_cpei);
static int __init
nointroute (char *str)
{
no_int_routing = 1;
printk ("no_int_routing on\n");
return 1;
}
__setup("nointroute", nointroute);
static void fix_b0_for_bsp(void)
{
#ifdef CONFIG_HOTPLUG_CPU
int cpuid;
static int fix_bsp_b0 = 1;
cpuid = smp_processor_id();
/*
* Cache the b0 value on the first AP that comes up
*/
if (!(fix_bsp_b0 && cpuid))
return;
sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
fix_bsp_b0 = 0;
#endif
}
void
sync_master (void *arg)
{
unsigned long flags, i;
go[MASTER] = 0;
local_irq_save(flags);
{
for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
while (!go[MASTER])
cpu_relax();
go[MASTER] = 0;
go[SLAVE] = ia64_get_itc();
}
}
local_irq_restore(flags);
}
/*
* Return the number of cycles by which our itc differs from the itc on the master
* (time-keeper) CPU. A positive number indicates our itc is ahead of the master,
* negative that it is behind.
*/
static inline long
get_delta (long *rt, long *master)
{
unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
unsigned long tcenter, t0, t1, tm;
long i;
for (i = 0; i < NUM_ITERS; ++i) {
t0 = ia64_get_itc();
go[MASTER] = 1;
while (!(tm = go[SLAVE]))
cpu_relax();
go[SLAVE] = 0;
t1 = ia64_get_itc();
if (t1 - t0 < best_t1 - best_t0)
best_t0 = t0, best_t1 = t1, best_tm = tm;
}
*rt = best_t1 - best_t0;
*master = best_tm - best_t0;
/* average best_t0 and best_t1 without overflow: */
tcenter = (best_t0/2 + best_t1/2);
if (best_t0 % 2 + best_t1 % 2 == 2)
++tcenter;
return tcenter - best_tm;
}
/*
* Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU
* (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of
* unaccounted-for errors (such as getting a machine check in the middle of a calibration
* step). The basic idea is for the slave to ask the master what itc value it has and to
* read its own itc before and after the master responds. Each iteration gives us three
* timestamps:
*
* slave master
*
* t0 ---\
* ---\
* --->
* tm
* /---
* /---
* t1 <---
*
*
* The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0
* and t1. If we achieve this, the clocks are synchronized provided the interconnect
* between the slave and the master is symmetric. Even if the interconnect were
* asymmetric, we would still know that the synchronization error is smaller than the
* roundtrip latency (t0 - t1).
*
* When the interconnect is quiet and symmetric, this lets us synchronize the itc to
* within one or two cycles. However, we can only *guarantee* that the synchronization is
* accurate to within a round-trip time, which is typically in the range of several
* hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually
* almost perfectly synchronized, but we shouldn't assume that the accuracy is much better
* than half a micro second or so.
*/
void
ia64_sync_itc (unsigned int master)
{
long i, delta, adj, adjust_latency = 0, done = 0;
unsigned long flags, rt, master_time_stamp, bound;
#if DEBUG_ITC_SYNC
struct {
long rt; /* roundtrip time */
long master; /* master's timestamp */
long diff; /* difference between midpoint and master's timestamp */
long lat; /* estimate of itc adjustment latency */
} t[NUM_ROUNDS];
#endif
/*
* Make sure local timer ticks are disabled while we sync. If
* they were enabled, we'd have to worry about nasty issues
* like setting the ITC ahead of (or a long time before) the
* next scheduled tick.
*/
BUG_ON((ia64_get_itv() & (1 << 16)) == 0);
go[MASTER] = 1;
if (smp_call_function_single(master, sync_master, NULL, 0) < 0) {
printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
return;
}
while (go[MASTER])
cpu_relax(); /* wait for master to be ready */
spin_lock_irqsave(&itc_sync_lock, flags);
{
for (i = 0; i < NUM_ROUNDS; ++i) {
delta = get_delta(&rt, &master_time_stamp);
if (delta == 0) {
done = 1; /* let's lock on to this... */
bound = rt;
}
if (!done) {
if (i > 0) {
adjust_latency += -delta;
adj = -delta + adjust_latency/4;
} else
adj = -delta;
ia64_set_itc(ia64_get_itc() + adj);
}
#if DEBUG_ITC_SYNC
t[i].rt = rt;
t[i].master = master_time_stamp;
t[i].diff = delta;
t[i].lat = adjust_latency/4;
#endif
}
}
spin_unlock_irqrestore(&itc_sync_lock, flags);
#if DEBUG_ITC_SYNC
for (i = 0; i < NUM_ROUNDS; ++i)
printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
t[i].rt, t[i].master, t[i].diff, t[i].lat);
#endif
printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, "
"maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt);
}
/*
* Ideally sets up per-cpu profiling hooks. Doesn't do much now...
*/
static inline void smp_setup_percpu_timer(void)
{
}
static void
smp_callin (void)
{
int cpuid, phys_id, itc_master;
struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo;
extern void ia64_init_itm(void);
extern volatile int time_keeper_id;
#ifdef CONFIG_PERFMON
extern void pfm_init_percpu(void);
#endif
cpuid = smp_processor_id();
phys_id = hard_smp_processor_id();
itc_master = time_keeper_id;
if (cpu_online(cpuid)) {
printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
phys_id, cpuid);
BUG();
}
fix_b0_for_bsp();
/*
* numa_node_id() works after this.
*/
set_numa_node(cpu_to_node_map[cpuid]);
set_numa_mem(local_memory_node(cpu_to_node_map[cpuid]));
spin_lock(&vector_lock);
/* Setup the per cpu irq handling data structures */
__setup_vector_irq(cpuid);
notify_cpu_starting(cpuid);
set_cpu_online(cpuid, true);
per_cpu(cpu_state, cpuid) = CPU_ONLINE;
spin_unlock(&vector_lock);
smp_setup_percpu_timer();
ia64_mca_cmc_vector_setup(); /* Setup vector on AP */
#ifdef CONFIG_PERFMON
pfm_init_percpu();
#endif
local_irq_enable();
if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
/*
* Synchronize the ITC with the BP. Need to do this after irqs are
* enabled because ia64_sync_itc() calls smp_call_function_single(), which
* calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
* local_bh_enable(), which bugs out if irqs are not enabled...
*/
Dprintk("Going to syncup ITC with ITC Master.\n");
ia64_sync_itc(itc_master);
}
/*
* Get our bogomips.
*/
ia64_init_itm();
/*
* Delay calibration can be skipped if new processor is identical to the
* previous processor.
*/
last_cpuinfo = cpu_data(cpuid - 1);
this_cpuinfo = local_cpu_data;
if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq ||
last_cpuinfo->proc_freq != this_cpuinfo->proc_freq ||
last_cpuinfo->features != this_cpuinfo->features ||
last_cpuinfo->revision != this_cpuinfo->revision ||
last_cpuinfo->family != this_cpuinfo->family ||
last_cpuinfo->archrev != this_cpuinfo->archrev ||
last_cpuinfo->model != this_cpuinfo->model)
calibrate_delay();
local_cpu_data->loops_per_jiffy = loops_per_jiffy;
/*
* Allow the master to continue.
*/
cpu_set(cpuid, cpu_callin_map);
Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
}
/*
* Activate a secondary processor. head.S calls this.
*/
int
start_secondary (void *unused)
{
/* Early console may use I/O ports */
ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
#ifndef CONFIG_PRINTK_TIME
Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
#endif
efi_map_pal_code();
cpu_init();
preempt_disable();
smp_callin();
cpu_startup_entry(CPUHP_ONLINE);
return 0;
}
static int
do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
{
int timeout;
task_for_booting_cpu = idle;
Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
set_brendez_area(cpu);
platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
/*
* Wait 10s total for the AP to start
*/
Dprintk("Waiting on callin_map ...");
for (timeout = 0; timeout < 100000; timeout++) {
if (cpu_isset(cpu, cpu_callin_map))
break; /* It has booted */
udelay(100);
}
Dprintk("\n");
if (!cpu_isset(cpu, cpu_callin_map)) {
printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
ia64_cpu_to_sapicid[cpu] = -1;
set_cpu_online(cpu, false); /* was set in smp_callin() */
return -EINVAL;
}
return 0;
}
static int __init
decay (char *str)
{
int ticks;
get_option (&str, &ticks);
return 1;
}
__setup("decay=", decay);
/*
* Initialize the logical CPU number to SAPICID mapping
*/
void __init
smp_build_cpu_map (void)
{
int sapicid, cpu, i;
int boot_cpu_id = hard_smp_processor_id();
for (cpu = 0; cpu < NR_CPUS; cpu++) {
ia64_cpu_to_sapicid[cpu] = -1;
}
ia64_cpu_to_sapicid[0] = boot_cpu_id;
init_cpu_present(cpumask_of(0));
set_cpu_possible(0, true);
for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
sapicid = smp_boot_data.cpu_phys_id[i];
if (sapicid == boot_cpu_id)
continue;
set_cpu_present(cpu, true);
set_cpu_possible(cpu, true);
ia64_cpu_to_sapicid[cpu] = sapicid;
cpu++;
}
}
/*
* Cycle through the APs sending Wakeup IPIs to boot each.
*/
void __init
smp_prepare_cpus (unsigned int max_cpus)
{
int boot_cpu_id = hard_smp_processor_id();
/*
* Initialize the per-CPU profiling counter/multiplier
*/
smp_setup_percpu_timer();
cpu_set(0, cpu_callin_map);
local_cpu_data->loops_per_jiffy = loops_per_jiffy;
ia64_cpu_to_sapicid[0] = boot_cpu_id;
printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
current_thread_info()->cpu = 0;
/*
* If SMP should be disabled, then really disable it!
*/
if (!max_cpus) {
printk(KERN_INFO "SMP mode deactivated.\n");
init_cpu_online(cpumask_of(0));
init_cpu_present(cpumask_of(0));
init_cpu_possible(cpumask_of(0));
return;
}
}
void smp_prepare_boot_cpu(void)
{
set_cpu_online(smp_processor_id(), true);
cpu_set(smp_processor_id(), cpu_callin_map);
set_numa_node(cpu_to_node_map[smp_processor_id()]);
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
paravirt_post_smp_prepare_boot_cpu();
}
#ifdef CONFIG_HOTPLUG_CPU
static inline void
clear_cpu_sibling_map(int cpu)
{
int i;
for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
for_each_cpu_mask(i, cpu_core_map[cpu])
cpu_clear(cpu, cpu_core_map[i]);
per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE;
}
static void
remove_siblinginfo(int cpu)
{
int last = 0;
if (cpu_data(cpu)->threads_per_core == 1 &&
cpu_data(cpu)->cores_per_socket == 1) {
cpu_clear(cpu, cpu_core_map[cpu]);
cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
return;
}
last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
/* remove it from all sibling map's */
clear_cpu_sibling_map(cpu);
}
extern void fixup_irqs(void);
int migrate_platform_irqs(unsigned int cpu)
{
int new_cpei_cpu;
struct irq_data *data = NULL;
const struct cpumask *mask;
int retval = 0;
/*
* dont permit CPEI target to removed.
*/
if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
printk ("CPU (%d) is CPEI Target\n", cpu);
if (can_cpei_retarget()) {
/*
* Now re-target the CPEI to a different processor
*/
new_cpei_cpu = cpumask_any(cpu_online_mask);
mask = cpumask_of(new_cpei_cpu);
set_cpei_target_cpu(new_cpei_cpu);
data = irq_get_irq_data(ia64_cpe_irq);
/*
* Switch for now, immediately, we need to do fake intr
* as other interrupts, but need to study CPEI behaviour with
* polling before making changes.
*/
if (data && data->chip) {
data->chip->irq_disable(data);
data->chip->irq_set_affinity(data, mask, false);
data->chip->irq_enable(data);
printk ("Re-targeting CPEI to cpu %d\n", new_cpei_cpu);
}
}
if (!data) {
printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
retval = -EBUSY;
}
}
return retval;
}
/* must be called with cpucontrol mutex held */
int __cpu_disable(void)
{
int cpu = smp_processor_id();
/*
* dont permit boot processor for now
*/
if (cpu == 0 && !bsp_remove_ok) {
printk ("Your platform does not support removal of BSP\n");
return (-EBUSY);
}
if (ia64_platform_is("sn2")) {
if (!sn_cpu_disable_allowed(cpu))
return -EBUSY;
}
set_cpu_online(cpu, false);
if (migrate_platform_irqs(cpu)) {
set_cpu_online(cpu, true);
return -EBUSY;
}
remove_siblinginfo(cpu);
fixup_irqs();
local_flush_tlb_all();
cpu_clear(cpu, cpu_callin_map);
return 0;
}
void __cpu_die(unsigned int cpu)
{
unsigned int i;
for (i = 0; i < 100; i++) {
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD)
{
printk ("CPU %d is now offline\n", cpu);
return;
}
msleep(100);
}
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
#endif /* CONFIG_HOTPLUG_CPU */
void
smp_cpus_done (unsigned int dummy)
{
int cpu;
unsigned long bogosum = 0;
/*
* Allow the user to impress friends.
*/
for_each_online_cpu(cpu) {
bogosum += cpu_data(cpu)->loops_per_jiffy;
}
printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
(int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
}
static inline void set_cpu_sibling_map(int cpu)
{
int i;
for_each_online_cpu(i) {
if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) {
cpu_set(i, cpu_core_map[cpu]);
cpu_set(cpu, cpu_core_map[i]);
if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
cpu_set(i, per_cpu(cpu_sibling_map, cpu));
cpu_set(cpu, per_cpu(cpu_sibling_map, i));
}
}
}
}
int
__cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int ret;
int sapicid;
sapicid = ia64_cpu_to_sapicid[cpu];
if (sapicid == -1)
return -EINVAL;
/*
* Already booted cpu? not valid anymore since we dont
* do idle loop tightspin anymore.
*/
if (cpu_isset(cpu, cpu_callin_map))
return -EINVAL;
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Processor goes to start_secondary(), sets online flag */
ret = do_boot_cpu(sapicid, cpu, tidle);
if (ret < 0)
return ret;
if (cpu_data(cpu)->threads_per_core == 1 &&
cpu_data(cpu)->cores_per_socket == 1) {
cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
cpu_set(cpu, cpu_core_map[cpu]);
return 0;
}
set_cpu_sibling_map(cpu);
return 0;
}
/*
* Assume that CPUs have been discovered by some platform-dependent interface. For
* SoftSDV/Lion, that would be ACPI.
*
* Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
*/
void __init
init_smp_config(void)
{
struct fptr {
unsigned long fp;
unsigned long gp;
} *ap_startup;
long sal_ret;
/* Tell SAL where to drop the APs. */
ap_startup = (struct fptr *) start_ap;
sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0);
if (sal_ret < 0)
printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
ia64_sal_strerror(sal_ret));
}
/*
* identify_siblings(cpu) gets called from identify_cpu. This populates the
* information related to logical execution units in per_cpu_data structure.
*/
void identify_siblings(struct cpuinfo_ia64 *c)
{
long status;
u16 pltid;
pal_logical_to_physical_t info;
status = ia64_pal_logical_to_phys(-1, &info);
if (status != PAL_STATUS_SUCCESS) {
if (status != PAL_STATUS_UNIMPLEMENTED) {
printk(KERN_ERR
"ia64_pal_logical_to_phys failed with %ld\n",
status);
return;
}
info.overview_ppid = 0;
info.overview_cpp = 1;
info.overview_tpc = 1;
}
status = ia64_sal_physical_id_info(&pltid);
if (status != PAL_STATUS_SUCCESS) {
if (status != PAL_STATUS_UNIMPLEMENTED)
printk(KERN_ERR
"ia64_sal_pltid failed with %ld\n",
status);
return;
}
c->socket_id = (pltid << 8) | info.overview_ppid;
if (info.overview_cpp == 1 && info.overview_tpc == 1)
return;
c->cores_per_socket = info.overview_cpp;
c->threads_per_core = info.overview_tpc;
c->num_log = info.overview_num_log;
c->core_id = info.log1_cid;
c->thread_id = info.log1_tid;
}
/*
* returns non zero, if multi-threading is enabled
* on at least one physical package. Due to hotplug cpu
* and (maxcpus=), all threads may not necessarily be enabled
* even though the processor supports multi-threading.
*/
int is_multithreading_enabled(void)
{
int i, j;
for_each_present_cpu(i) {
for_each_present_cpu(j) {
if (j == i)
continue;
if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) {
if (cpu_data(j)->core_id == cpu_data(i)->core_id)
return 1;
}
}
}
return 0;
}
EXPORT_SYMBOL_GPL(is_multithreading_enabled);

View file

@ -0,0 +1,39 @@
/*
* arch/ia64/kernel/stacktrace.c
*
* Stack trace management functions
*
*/
#include <linux/sched.h>
#include <linux/stacktrace.h>
#include <linux/module.h>
static void
ia64_do_save_stack(struct unw_frame_info *info, void *arg)
{
struct stack_trace *trace = arg;
unsigned long ip;
int skip = trace->skip;
trace->nr_entries = 0;
do {
unw_get_ip(info, &ip);
if (ip == 0)
break;
if (skip == 0) {
trace->entries[trace->nr_entries++] = ip;
if (trace->nr_entries == trace->max_entries)
break;
} else
skip--;
} while (unw_unwind(info) >= 0);
}
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
void save_stack_trace(struct stack_trace *trace)
{
unw_init_running(ia64_do_save_stack, trace);
}
EXPORT_SYMBOL(save_stack_trace);

183
arch/ia64/kernel/sys_ia64.c Normal file
View file

@ -0,0 +1,183 @@
/*
* This file contains various system calls that have different calling
* conventions on different platforms.
*
* Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/sched.h>
#include <linux/shm.h>
#include <linux/file.h> /* doh, must come after sched.h... */
#include <linux/smp.h>
#include <linux/syscalls.h>
#include <linux/highuid.h>
#include <linux/hugetlb.h>
#include <asm/shmparam.h>
#include <asm/uaccess.h>
unsigned long
arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
long map_shared = (flags & MAP_SHARED);
unsigned long align_mask = 0;
struct mm_struct *mm = current->mm;
struct vm_unmapped_area_info info;
if (len > RGN_MAP_LIMIT)
return -ENOMEM;
/* handle fixed mapping: prevent overlap with huge pages */
if (flags & MAP_FIXED) {
if (is_hugepage_only_range(mm, addr, len))
return -EINVAL;
return addr;
}
#ifdef CONFIG_HUGETLB_PAGE
if (REGION_NUMBER(addr) == RGN_HPAGE)
addr = 0;
#endif
if (!addr)
addr = TASK_UNMAPPED_BASE;
if (map_shared && (TASK_SIZE > 0xfffffffful))
/*
* For 64-bit tasks, align shared segments to 1MB to avoid potential
* performance penalty due to virtual aliasing (see ASDM). For 32-bit
* tasks, we prefer to avoid exhausting the address space too quickly by
* limiting alignment to a single page.
*/
align_mask = PAGE_MASK & (SHMLBA - 1);
info.flags = 0;
info.length = len;
info.low_limit = addr;
info.high_limit = TASK_SIZE;
info.align_mask = align_mask;
info.align_offset = 0;
return vm_unmapped_area(&info);
}
asmlinkage long
ia64_getpriority (int which, int who)
{
long prio;
prio = sys_getpriority(which, who);
if (prio >= 0) {
force_successful_syscall_return();
prio = 20 - prio;
}
return prio;
}
/* XXX obsolete, but leave it here until the old libc is gone... */
asmlinkage unsigned long
sys_getpagesize (void)
{
return PAGE_SIZE;
}
asmlinkage unsigned long
ia64_brk (unsigned long brk)
{
unsigned long retval = sys_brk(brk);
force_successful_syscall_return();
return retval;
}
/*
* On IA-64, we return the two file descriptors in ret0 and ret1 (r8
* and r9) as this is faster than doing a copy_to_user().
*/
asmlinkage long
sys_ia64_pipe (void)
{
struct pt_regs *regs = task_pt_regs(current);
int fd[2];
int retval;
retval = do_pipe_flags(fd, 0);
if (retval)
goto out;
retval = fd[0];
regs->r9 = fd[1];
out:
return retval;
}
int ia64_mmap_check(unsigned long addr, unsigned long len,
unsigned long flags)
{
unsigned long roff;
/*
* Don't permit mappings into unmapped space, the virtual page table
* of a region, or across a region boundary. Note: RGN_MAP_LIMIT is
* equal to 2^n-PAGE_SIZE (for some integer n <= 61) and len > 0.
*/
roff = REGION_OFFSET(addr);
if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len)))
return -EINVAL;
return 0;
}
/*
* mmap2() is like mmap() except that the offset is expressed in units
* of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces
* of) files that are larger than the address space of the CPU.
*/
asmlinkage unsigned long
sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
{
addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
if (!IS_ERR((void *) addr))
force_successful_syscall_return();
return addr;
}
asmlinkage unsigned long
sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off)
{
if (offset_in_page(off) != 0)
return -EINVAL;
addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
if (!IS_ERR((void *) addr))
force_successful_syscall_return();
return addr;
}
asmlinkage unsigned long
ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags,
unsigned long new_addr)
{
addr = sys_mremap(addr, old_len, new_len, flags, new_addr);
if (!IS_ERR((void *) addr))
force_successful_syscall_return();
return addr;
}
#ifndef CONFIG_PCI
asmlinkage long
sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
void *buf)
{
return -ENOSYS;
}
asmlinkage long
sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
void *buf)
{
return -ENOSYS;
}
#endif /* CONFIG_PCI */

456
arch/ia64/kernel/time.c Normal file
View file

@ -0,0 +1,456 @@
/*
* linux/arch/ia64/kernel/time.c
*
* Copyright (C) 1998-2003 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
* David Mosberger <davidm@hpl.hp.com>
* Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
* Copyright (C) 1999-2000 VA Linux Systems
* Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
*/
#include <linux/cpu.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/time.h>
#include <linux/interrupt.h>
#include <linux/efi.h>
#include <linux/timex.h>
#include <linux/timekeeper_internal.h>
#include <linux/platform_device.h>
#include <asm/machvec.h>
#include <asm/delay.h>
#include <asm/hw_irq.h>
#include <asm/paravirt.h>
#include <asm/ptrace.h>
#include <asm/sal.h>
#include <asm/sections.h>
#include "fsyscall_gtod_data.h"
static cycle_t itc_get_cycles(struct clocksource *cs);
struct fsyscall_gtod_data_t fsyscall_gtod_data;
struct itc_jitter_data_t itc_jitter_data;
volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
#ifdef CONFIG_IA64_DEBUG_IRQ
unsigned long last_cli_ip;
EXPORT_SYMBOL(last_cli_ip);
#endif
#ifdef CONFIG_PARAVIRT
/* We need to define a real function for sched_clock, to override the
weak default version */
unsigned long long sched_clock(void)
{
return paravirt_sched_clock();
}
#endif
#ifdef CONFIG_PARAVIRT
static void
paravirt_clocksource_resume(struct clocksource *cs)
{
if (pv_time_ops.clocksource_resume)
pv_time_ops.clocksource_resume();
}
#endif
static struct clocksource clocksource_itc = {
.name = "itc",
.rating = 350,
.read = itc_get_cycles,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
#ifdef CONFIG_PARAVIRT
.resume = paravirt_clocksource_resume,
#endif
};
static struct clocksource *itc_clocksource;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <linux/kernel_stat.h>
extern cputime_t cycle_to_cputime(u64 cyc);
void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_utime;
struct thread_info *ti = task_thread_info(tsk);
if (ti->ac_utime) {
delta_utime = cycle_to_cputime(ti->ac_utime);
account_user_time(tsk, delta_utime, delta_utime);
ti->ac_utime = 0;
}
}
/*
* Called from the context switch with interrupts disabled, to charge all
* accumulated times to the current process, and to prepare accounting on
* the next process.
*/
void arch_vtime_task_switch(struct task_struct *prev)
{
struct thread_info *pi = task_thread_info(prev);
struct thread_info *ni = task_thread_info(current);
pi->ac_stamp = ni->ac_stamp;
ni->ac_stime = ni->ac_utime = 0;
}
/*
* Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled.
*/
static cputime_t vtime_delta(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
cputime_t delta_stime;
__u64 now;
WARN_ON_ONCE(!irqs_disabled());
now = ia64_get_itc();
delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
ti->ac_stime = 0;
ti->ac_stamp = now;
return delta_stime;
}
void vtime_account_system(struct task_struct *tsk)
{
cputime_t delta = vtime_delta(tsk);
account_system_time(tsk, 0, delta, delta);
}
EXPORT_SYMBOL_GPL(vtime_account_system);
void vtime_account_idle(struct task_struct *tsk)
{
account_idle_time(vtime_delta(tsk));
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static irqreturn_t
timer_interrupt (int irq, void *dev_id)
{
unsigned long new_itm;
if (cpu_is_offline(smp_processor_id())) {
return IRQ_HANDLED;
}
platform_timer_interrupt(irq, dev_id);
new_itm = local_cpu_data->itm_next;
if (!time_after(ia64_get_itc(), new_itm))
printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
ia64_get_itc(), new_itm);
profile_tick(CPU_PROFILING);
if (paravirt_do_steal_accounting(&new_itm))
goto skip_process_time_accounting;
while (1) {
update_process_times(user_mode(get_irq_regs()));
new_itm += local_cpu_data->itm_delta;
if (smp_processor_id() == time_keeper_id)
xtime_update(1);
local_cpu_data->itm_next = new_itm;
if (time_after(new_itm, ia64_get_itc()))
break;
/*
* Allow IPIs to interrupt the timer loop.
*/
local_irq_enable();
local_irq_disable();
}
skip_process_time_accounting:
do {
/*
* If we're too close to the next clock tick for
* comfort, we increase the safety margin by
* intentionally dropping the next tick(s). We do NOT
* update itm.next because that would force us to call
* xtime_update() which in turn would let our clock run
* too fast (with the potentially devastating effect
* of losing monotony of time).
*/
while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
new_itm += local_cpu_data->itm_delta;
ia64_set_itm(new_itm);
/* double check, in case we got hit by a (slow) PMI: */
} while (time_after_eq(ia64_get_itc(), new_itm));
return IRQ_HANDLED;
}
/*
* Encapsulate access to the itm structure for SMP.
*/
void
ia64_cpu_local_tick (void)
{
int cpu = smp_processor_id();
unsigned long shift = 0, delta;
/* arrange for the cycle counter to generate a timer interrupt: */
ia64_set_itv(IA64_TIMER_VECTOR);
delta = local_cpu_data->itm_delta;
/*
* Stagger the timer tick for each CPU so they don't occur all at (almost) the
* same time:
*/
if (cpu) {
unsigned long hi = 1UL << ia64_fls(cpu);
shift = (2*(cpu - hi) + 1) * delta/hi/2;
}
local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
ia64_set_itm(local_cpu_data->itm_next);
}
static int nojitter;
static int __init nojitter_setup(char *str)
{
nojitter = 1;
printk("Jitter checking for ITC timers disabled\n");
return 1;
}
__setup("nojitter", nojitter_setup);
void ia64_init_itm(void)
{
unsigned long platform_base_freq, itc_freq;
struct pal_freq_ratio itc_ratio, proc_ratio;
long status, platform_base_drift, itc_drift;
/*
* According to SAL v2.6, we need to use a SAL call to determine the platform base
* frequency and then a PAL call to determine the frequency ratio between the ITC
* and the base frequency.
*/
status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
&platform_base_freq, &platform_base_drift);
if (status != 0) {
printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
} else {
status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
if (status != 0)
printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
}
if (status != 0) {
/* invent "random" values */
printk(KERN_ERR
"SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
platform_base_freq = 100000000;
platform_base_drift = -1; /* no drift info */
itc_ratio.num = 3;
itc_ratio.den = 1;
}
if (platform_base_freq < 40000000) {
printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
platform_base_freq);
platform_base_freq = 75000000;
platform_base_drift = -1;
}
if (!proc_ratio.den)
proc_ratio.den = 1; /* avoid division by zero */
if (!itc_ratio.den)
itc_ratio.den = 1; /* avoid division by zero */
itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, "
"ITC freq=%lu.%03luMHz", smp_processor_id(),
platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
if (platform_base_drift != -1) {
itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
printk("+/-%ldppm\n", itc_drift);
} else {
itc_drift = -1;
printk("\n");
}
local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
local_cpu_data->itc_freq = itc_freq;
local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
+ itc_freq/2)/itc_freq;
if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
#ifdef CONFIG_SMP
/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
* Jitter compensation requires a cmpxchg which may limit
* the scalability of the syscalls for retrieving time.
* The ITC synchronization is usually successful to within a few
* ITC ticks but this is not a sure thing. If you need to improve
* timer performance in SMP situations then boot the kernel with the
* "nojitter" option. However, doing so may result in time fluctuating (maybe
* even going backward) if the ITC offsets between the individual CPUs
* are too large.
*/
if (!nojitter)
itc_jitter_data.itc_jitter = 1;
#endif
} else
/*
* ITC is drifty and we have not synchronized the ITCs in smpboot.c.
* ITC values may fluctuate significantly between processors.
* Clock should not be used for hrtimers. Mark itc as only
* useful for boot and testing.
*
* Note that jitter compensation is off! There is no point of
* synchronizing ITCs since they may be large differentials
* that change over time.
*
* The only way to fix this would be to repeatedly sync the
* ITCs. Until that time we have to avoid ITC.
*/
clocksource_itc.rating = 50;
paravirt_init_missing_ticks_accounting(smp_processor_id());
/* avoid softlock up message when cpu is unplug and plugged again. */
touch_softlockup_watchdog();
/* Setup the CPU local timer tick */
ia64_cpu_local_tick();
if (!itc_clocksource) {
clocksource_register_hz(&clocksource_itc,
local_cpu_data->itc_freq);
itc_clocksource = &clocksource_itc;
}
}
static cycle_t itc_get_cycles(struct clocksource *cs)
{
unsigned long lcycle, now, ret;
if (!itc_jitter_data.itc_jitter)
return get_cycles();
lcycle = itc_jitter_data.itc_lastcycle;
now = get_cycles();
if (lcycle && time_after(lcycle, now))
return lcycle;
/*
* Keep track of the last timer value returned.
* In an SMP environment, you could lose out in contention of
* cmpxchg. If so, your cmpxchg returns new value which the
* winner of contention updated to. Use the new value instead.
*/
ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, now);
if (unlikely(ret != lcycle))
return ret;
return now;
}
static struct irqaction timer_irqaction = {
.handler = timer_interrupt,
.flags = IRQF_IRQPOLL,
.name = "timer"
};
void read_persistent_clock(struct timespec *ts)
{
efi_gettimeofday(ts);
}
void __init
time_init (void)
{
register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
ia64_init_itm();
}
/*
* Generic udelay assumes that if preemption is allowed and the thread
* migrates to another CPU, that the ITC values are synchronized across
* all CPUs.
*/
static void
ia64_itc_udelay (unsigned long usecs)
{
unsigned long start = ia64_get_itc();
unsigned long end = start + usecs*local_cpu_data->cyc_per_usec;
while (time_before(ia64_get_itc(), end))
cpu_relax();
}
void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay;
void
udelay (unsigned long usecs)
{
(*ia64_udelay)(usecs);
}
EXPORT_SYMBOL(udelay);
/* IA64 doesn't cache the timezone */
void update_vsyscall_tz(void)
{
}
void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
struct clocksource *c, u32 mult, cycle_t cycle_last)
{
write_seqcount_begin(&fsyscall_gtod_data.seq);
/* copy fsyscall clock data */
fsyscall_gtod_data.clk_mask = c->mask;
fsyscall_gtod_data.clk_mult = mult;
fsyscall_gtod_data.clk_shift = c->shift;
fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio;
fsyscall_gtod_data.clk_cycle_last = cycle_last;
/* copy kernel time structures */
fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec;
fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec;
fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec
+ wall->tv_sec;
fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec
+ wall->tv_nsec;
/* normalize */
while (fsyscall_gtod_data.monotonic_time.tv_nsec >= NSEC_PER_SEC) {
fsyscall_gtod_data.monotonic_time.tv_nsec -= NSEC_PER_SEC;
fsyscall_gtod_data.monotonic_time.tv_sec++;
}
write_seqcount_end(&fsyscall_gtod_data.seq);
}

472
arch/ia64/kernel/topology.c Normal file
View file

@ -0,0 +1,472 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* This file contains NUMA specific variables and functions which can
* be split away from DISCONTIGMEM and are used on NUMA machines with
* contiguous memory.
* 2002/08/07 Erich Focht <efocht@ess.nec.de>
* Populate cpu entries in sysfs for non-numa systems as well
* Intel Corporation - Ashok Raj
* 02/27/2006 Zhang, Yanmin
* Populate cpu cache entries in sysfs for cpu cache info
*/
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/node.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/nodemask.h>
#include <linux/notifier.h>
#include <linux/export.h>
#include <asm/mmzone.h>
#include <asm/numa.h>
#include <asm/cpu.h>
static struct ia64_cpu *sysfs_cpus;
void arch_fix_phys_package_id(int num, u32 slot)
{
#ifdef CONFIG_SMP
if (cpu_data(num)->socket_id == -1)
cpu_data(num)->socket_id = slot;
#endif
}
EXPORT_SYMBOL_GPL(arch_fix_phys_package_id);
#ifdef CONFIG_HOTPLUG_CPU
int __ref arch_register_cpu(int num)
{
#ifdef CONFIG_ACPI
/*
* If CPEI can be re-targeted or if this is not
* CPEI target, then it is hotpluggable
*/
if (can_cpei_retarget() || !is_cpu_cpei_target(num))
sysfs_cpus[num].cpu.hotpluggable = 1;
map_cpu_to_node(num, node_cpuid[num].nid);
#endif
return register_cpu(&sysfs_cpus[num].cpu, num);
}
EXPORT_SYMBOL(arch_register_cpu);
void __ref arch_unregister_cpu(int num)
{
unregister_cpu(&sysfs_cpus[num].cpu);
#ifdef CONFIG_ACPI
unmap_cpu_from_node(num, cpu_to_node(num));
#endif
}
EXPORT_SYMBOL(arch_unregister_cpu);
#else
static int __init arch_register_cpu(int num)
{
return register_cpu(&sysfs_cpus[num].cpu, num);
}
#endif /*CONFIG_HOTPLUG_CPU*/
static int __init topology_init(void)
{
int i, err = 0;
#ifdef CONFIG_NUMA
/*
* MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes?
*/
for_each_online_node(i) {
if ((err = register_one_node(i)))
goto out;
}
#endif
sysfs_cpus = kzalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL);
if (!sysfs_cpus)
panic("kzalloc in topology_init failed - NR_CPUS too big?");
for_each_present_cpu(i) {
if((err = arch_register_cpu(i)))
goto out;
}
out:
return err;
}
subsys_initcall(topology_init);
/*
* Export cpu cache information through sysfs
*/
/*
* A bunch of string array to get pretty printing
*/
static const char *cache_types[] = {
"", /* not used */
"Instruction",
"Data",
"Unified" /* unified */
};
static const char *cache_mattrib[]={
"WriteThrough",
"WriteBack",
"", /* reserved */
"" /* reserved */
};
struct cache_info {
pal_cache_config_info_t cci;
cpumask_t shared_cpu_map;
int level;
int type;
struct kobject kobj;
};
struct cpu_cache_info {
struct cache_info *cache_leaves;
int num_cache_leaves;
struct kobject kobj;
};
static struct cpu_cache_info all_cpu_cache_info[NR_CPUS];
#define LEAF_KOBJECT_PTR(x,y) (&all_cpu_cache_info[x].cache_leaves[y])
#ifdef CONFIG_SMP
static void cache_shared_cpu_map_setup(unsigned int cpu,
struct cache_info * this_leaf)
{
pal_cache_shared_info_t csi;
int num_shared, i = 0;
unsigned int j;
if (cpu_data(cpu)->threads_per_core <= 1 &&
cpu_data(cpu)->cores_per_socket <= 1) {
cpu_set(cpu, this_leaf->shared_cpu_map);
return;
}
if (ia64_pal_cache_shared_info(this_leaf->level,
this_leaf->type,
0,
&csi) != PAL_STATUS_SUCCESS)
return;
num_shared = (int) csi.num_shared;
do {
for_each_possible_cpu(j)
if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id
&& cpu_data(j)->core_id == csi.log1_cid
&& cpu_data(j)->thread_id == csi.log1_tid)
cpu_set(j, this_leaf->shared_cpu_map);
i++;
} while (i < num_shared &&
ia64_pal_cache_shared_info(this_leaf->level,
this_leaf->type,
i,
&csi) == PAL_STATUS_SUCCESS);
}
#else
static void cache_shared_cpu_map_setup(unsigned int cpu,
struct cache_info * this_leaf)
{
cpu_set(cpu, this_leaf->shared_cpu_map);
return;
}
#endif
static ssize_t show_coherency_line_size(struct cache_info *this_leaf,
char *buf)
{
return sprintf(buf, "%u\n", 1 << this_leaf->cci.pcci_line_size);
}
static ssize_t show_ways_of_associativity(struct cache_info *this_leaf,
char *buf)
{
return sprintf(buf, "%u\n", this_leaf->cci.pcci_assoc);
}
static ssize_t show_attributes(struct cache_info *this_leaf, char *buf)
{
return sprintf(buf,
"%s\n",
cache_mattrib[this_leaf->cci.pcci_cache_attr]);
}
static ssize_t show_size(struct cache_info *this_leaf, char *buf)
{
return sprintf(buf, "%uK\n", this_leaf->cci.pcci_cache_size / 1024);
}
static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf)
{
unsigned number_of_sets = this_leaf->cci.pcci_cache_size;
number_of_sets /= this_leaf->cci.pcci_assoc;
number_of_sets /= 1 << this_leaf->cci.pcci_line_size;
return sprintf(buf, "%u\n", number_of_sets);
}
static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
{
ssize_t len;
cpumask_t shared_cpu_map;
cpumask_and(&shared_cpu_map,
&this_leaf->shared_cpu_map, cpu_online_mask);
len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
len += sprintf(buf+len, "\n");
return len;
}
static ssize_t show_type(struct cache_info *this_leaf, char *buf)
{
int type = this_leaf->type + this_leaf->cci.pcci_unified;
return sprintf(buf, "%s\n", cache_types[type]);
}
static ssize_t show_level(struct cache_info *this_leaf, char *buf)
{
return sprintf(buf, "%u\n", this_leaf->level);
}
struct cache_attr {
struct attribute attr;
ssize_t (*show)(struct cache_info *, char *);
ssize_t (*store)(struct cache_info *, const char *, size_t count);
};
#ifdef define_one_ro
#undef define_one_ro
#endif
#define define_one_ro(_name) \
static struct cache_attr _name = \
__ATTR(_name, 0444, show_##_name, NULL)
define_one_ro(level);
define_one_ro(type);
define_one_ro(coherency_line_size);
define_one_ro(ways_of_associativity);
define_one_ro(size);
define_one_ro(number_of_sets);
define_one_ro(shared_cpu_map);
define_one_ro(attributes);
static struct attribute * cache_default_attrs[] = {
&type.attr,
&level.attr,
&coherency_line_size.attr,
&ways_of_associativity.attr,
&attributes.attr,
&size.attr,
&number_of_sets.attr,
&shared_cpu_map.attr,
NULL
};
#define to_object(k) container_of(k, struct cache_info, kobj)
#define to_attr(a) container_of(a, struct cache_attr, attr)
static ssize_t ia64_cache_show(struct kobject * kobj, struct attribute * attr, char * buf)
{
struct cache_attr *fattr = to_attr(attr);
struct cache_info *this_leaf = to_object(kobj);
ssize_t ret;
ret = fattr->show ? fattr->show(this_leaf, buf) : 0;
return ret;
}
static const struct sysfs_ops cache_sysfs_ops = {
.show = ia64_cache_show
};
static struct kobj_type cache_ktype = {
.sysfs_ops = &cache_sysfs_ops,
.default_attrs = cache_default_attrs,
};
static struct kobj_type cache_ktype_percpu_entry = {
.sysfs_ops = &cache_sysfs_ops,
};
static void cpu_cache_sysfs_exit(unsigned int cpu)
{
kfree(all_cpu_cache_info[cpu].cache_leaves);
all_cpu_cache_info[cpu].cache_leaves = NULL;
all_cpu_cache_info[cpu].num_cache_leaves = 0;
memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
return;
}
static int cpu_cache_sysfs_init(unsigned int cpu)
{
unsigned long i, levels, unique_caches;
pal_cache_config_info_t cci;
int j;
long status;
struct cache_info *this_cache;
int num_cache_leaves = 0;
if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
return -1;
}
this_cache=kzalloc(sizeof(struct cache_info)*unique_caches,
GFP_KERNEL);
if (this_cache == NULL)
return -ENOMEM;
for (i=0; i < levels; i++) {
for (j=2; j >0 ; j--) {
if ((status=ia64_pal_cache_config_info(i,j, &cci)) !=
PAL_STATUS_SUCCESS)
continue;
this_cache[num_cache_leaves].cci = cci;
this_cache[num_cache_leaves].level = i + 1;
this_cache[num_cache_leaves].type = j;
cache_shared_cpu_map_setup(cpu,
&this_cache[num_cache_leaves]);
num_cache_leaves ++;
}
}
all_cpu_cache_info[cpu].cache_leaves = this_cache;
all_cpu_cache_info[cpu].num_cache_leaves = num_cache_leaves;
memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
return 0;
}
/* Add cache interface for CPU device */
static int cache_add_dev(struct device *sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long i, j;
struct cache_info *this_object;
int retval = 0;
cpumask_t oldmask;
if (all_cpu_cache_info[cpu].kobj.parent)
return 0;
oldmask = current->cpus_allowed;
retval = set_cpus_allowed_ptr(current, cpumask_of(cpu));
if (unlikely(retval))
return retval;
retval = cpu_cache_sysfs_init(cpu);
set_cpus_allowed_ptr(current, &oldmask);
if (unlikely(retval < 0))
return retval;
retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj,
&cache_ktype_percpu_entry, &sys_dev->kobj,
"%s", "cache");
if (unlikely(retval < 0)) {
cpu_cache_sysfs_exit(cpu);
return retval;
}
for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
this_object = LEAF_KOBJECT_PTR(cpu,i);
retval = kobject_init_and_add(&(this_object->kobj),
&cache_ktype,
&all_cpu_cache_info[cpu].kobj,
"index%1lu", i);
if (unlikely(retval)) {
for (j = 0; j < i; j++) {
kobject_put(&(LEAF_KOBJECT_PTR(cpu,j)->kobj));
}
kobject_put(&all_cpu_cache_info[cpu].kobj);
cpu_cache_sysfs_exit(cpu);
return retval;
}
kobject_uevent(&(this_object->kobj), KOBJ_ADD);
}
kobject_uevent(&all_cpu_cache_info[cpu].kobj, KOBJ_ADD);
return retval;
}
/* Remove cache interface for CPU device */
static int cache_remove_dev(struct device *sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long i;
for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++)
kobject_put(&(LEAF_KOBJECT_PTR(cpu,i)->kobj));
if (all_cpu_cache_info[cpu].kobj.parent) {
kobject_put(&all_cpu_cache_info[cpu].kobj);
memset(&all_cpu_cache_info[cpu].kobj,
0,
sizeof(struct kobject));
}
cpu_cache_sysfs_exit(cpu);
return 0;
}
/*
* When a cpu is hot-plugged, do a check and initiate
* cache kobject if necessary
*/
static int cache_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct device *sys_dev;
sys_dev = get_cpu_device(cpu);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
cache_add_dev(sys_dev);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
cache_remove_dev(sys_dev);
break;
}
return NOTIFY_OK;
}
static struct notifier_block cache_cpu_notifier =
{
.notifier_call = cache_cpu_callback
};
static int __init cache_sysfs_init(void)
{
int i;
cpu_notifier_register_begin();
for_each_online_cpu(i) {
struct device *sys_dev = get_cpu_device((unsigned int)i);
cache_add_dev(sys_dev);
}
__register_hotcpu_notifier(&cache_cpu_notifier);
cpu_notifier_register_done();
return 0;
}
device_initcall(cache_sysfs_init);

652
arch/ia64/kernel/traps.c Normal file
View file

@ -0,0 +1,652 @@
/*
* Architecture-specific trap handling.
*
* Copyright (C) 1998-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/tty.h>
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/module.h> /* for EXPORT_SYMBOL */
#include <linux/hardirq.h>
#include <linux/kprobes.h>
#include <linux/delay.h> /* for ssleep() */
#include <linux/kdebug.h>
#include <asm/fpswa.h>
#include <asm/intrinsics.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/setup.h>
fpswa_interface_t *fpswa_interface;
EXPORT_SYMBOL(fpswa_interface);
void __init
trap_init (void)
{
if (ia64_boot_param->fpswa)
/* FPSWA fixup: make the interface pointer a kernel virtual address: */
fpswa_interface = __va(ia64_boot_param->fpswa);
}
int
die (const char *str, struct pt_regs *regs, long err)
{
static struct {
spinlock_t lock;
u32 lock_owner;
int lock_owner_depth;
} die = {
.lock = __SPIN_LOCK_UNLOCKED(die.lock),
.lock_owner = -1,
.lock_owner_depth = 0
};
static int die_counter;
int cpu = get_cpu();
if (die.lock_owner != cpu) {
console_verbose();
spin_lock_irq(&die.lock);
die.lock_owner = cpu;
die.lock_owner_depth = 0;
bust_spinlocks(1);
}
put_cpu();
if (++die.lock_owner_depth < 3) {
printk("%s[%d]: %s %ld [%d]\n",
current->comm, task_pid_nr(current), str, err, ++die_counter);
if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
!= NOTIFY_STOP)
show_regs(regs);
else
regs = NULL;
} else
printk(KERN_ERR "Recursive die() failure, output suppressed\n");
bust_spinlocks(0);
die.lock_owner = -1;
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
spin_unlock_irq(&die.lock);
if (!regs)
return 1;
if (panic_on_oops)
panic("Fatal exception");
do_exit(SIGSEGV);
return 0;
}
int
die_if_kernel (char *str, struct pt_regs *regs, long err)
{
if (!user_mode(regs))
return die(str, regs, err);
return 0;
}
void
__kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
{
siginfo_t siginfo;
int sig, code;
/* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
siginfo.si_imm = break_num;
siginfo.si_flags = 0; /* clear __ISR_VALID */
siginfo.si_isr = 0;
switch (break_num) {
case 0: /* unknown error (used by GCC for __builtin_abort()) */
if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP)
== NOTIFY_STOP)
return;
if (die_if_kernel("bugcheck!", regs, break_num))
return;
sig = SIGILL; code = ILL_ILLOPC;
break;
case 1: /* integer divide by zero */
sig = SIGFPE; code = FPE_INTDIV;
break;
case 2: /* integer overflow */
sig = SIGFPE; code = FPE_INTOVF;
break;
case 3: /* range check/bounds check */
sig = SIGFPE; code = FPE_FLTSUB;
break;
case 4: /* null pointer dereference */
sig = SIGSEGV; code = SEGV_MAPERR;
break;
case 5: /* misaligned data */
sig = SIGSEGV; code = BUS_ADRALN;
break;
case 6: /* decimal overflow */
sig = SIGFPE; code = __FPE_DECOVF;
break;
case 7: /* decimal divide by zero */
sig = SIGFPE; code = __FPE_DECDIV;
break;
case 8: /* packed decimal error */
sig = SIGFPE; code = __FPE_DECERR;
break;
case 9: /* invalid ASCII digit */
sig = SIGFPE; code = __FPE_INVASC;
break;
case 10: /* invalid decimal digit */
sig = SIGFPE; code = __FPE_INVDEC;
break;
case 11: /* paragraph stack overflow */
sig = SIGSEGV; code = __SEGV_PSTKOVF;
break;
case 0x3f000 ... 0x3ffff: /* bundle-update in progress */
sig = SIGILL; code = __ILL_BNDMOD;
break;
default:
if ((break_num < 0x40000 || break_num > 0x100000)
&& die_if_kernel("Bad break", regs, break_num))
return;
if (break_num < 0x80000) {
sig = SIGILL; code = __ILL_BREAK;
} else {
if (notify_die(DIE_BREAK, "bad break", regs, break_num, TRAP_BRKPT, SIGTRAP)
== NOTIFY_STOP)
return;
sig = SIGTRAP; code = TRAP_BRKPT;
}
}
siginfo.si_signo = sig;
siginfo.si_errno = 0;
siginfo.si_code = code;
force_sig_info(sig, &siginfo, current);
}
/*
* disabled_fph_fault() is called when a user-level process attempts to access f32..f127
* and it doesn't own the fp-high register partition. When this happens, we save the
* current fph partition in the task_struct of the fpu-owner (if necessary) and then load
* the fp-high partition of the current task (if necessary). Note that the kernel has
* access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes
* care of clearing psr.dfh.
*/
static inline void
disabled_fph_fault (struct pt_regs *regs)
{
struct ia64_psr *psr = ia64_psr(regs);
/* first, grant user-level access to fph partition: */
psr->dfh = 0;
/*
* Make sure that no other task gets in on this processor
* while we're claiming the FPU
*/
preempt_disable();
#ifndef CONFIG_SMP
{
struct task_struct *fpu_owner
= (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
if (ia64_is_local_fpu_owner(current)) {
preempt_enable_no_resched();
return;
}
if (fpu_owner)
ia64_flush_fph(fpu_owner);
}
#endif /* !CONFIG_SMP */
ia64_set_local_fpu_owner(current);
if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) {
__ia64_load_fpu(current->thread.fph);
psr->mfh = 0;
} else {
__ia64_init_fpu();
/*
* Set mfh because the state in thread.fph does not match the state in
* the fph partition.
*/
psr->mfh = 1;
}
preempt_enable_no_resched();
}
static inline int
fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs,
struct pt_regs *regs)
{
fp_state_t fp_state;
fpswa_ret_t ret;
if (!fpswa_interface)
return -1;
memset(&fp_state, 0, sizeof(fp_state_t));
/*
* compute fp_state. only FP registers f6 - f11 are used by the
* kernel, so set those bits in the mask and set the low volatile
* pointer to point to these registers.
*/
fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */
fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
/*
* unsigned long (*EFI_FPSWA) (
* unsigned long trap_type,
* void *Bundle,
* unsigned long *pipsr,
* unsigned long *pfsr,
* unsigned long *pisr,
* unsigned long *ppreds,
* unsigned long *pifs,
* void *fp_state);
*/
ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle,
(unsigned long *) ipsr, (unsigned long *) fpsr,
(unsigned long *) isr, (unsigned long *) pr,
(unsigned long *) ifs, &fp_state);
return ret.status;
}
struct fpu_swa_msg {
unsigned long count;
unsigned long time;
};
static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast);
DECLARE_PER_CPU(struct fpu_swa_msg, cpulast);
static struct fpu_swa_msg last __cacheline_aligned;
/*
* Handle floating-point assist faults and traps.
*/
static int
handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
{
long exception, bundle[2];
unsigned long fault_ip;
struct siginfo siginfo;
fault_ip = regs->cr_iip;
if (!fp_fault && (ia64_psr(regs)->ri == 0))
fault_ip -= 16;
if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle)))
return -1;
if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) {
unsigned long count, current_jiffies = jiffies;
struct fpu_swa_msg *cp = this_cpu_ptr(&cpulast);
if (unlikely(current_jiffies > cp->time))
cp->count = 0;
if (unlikely(cp->count < 5)) {
cp->count++;
cp->time = current_jiffies + 5 * HZ;
/* minimize races by grabbing a copy of count BEFORE checking last.time. */
count = last.count;
barrier();
/*
* Lower 4 bits are used as a count. Upper bits are a sequence
* number that is updated when count is reset. The cmpxchg will
* fail is seqno has changed. This minimizes mutiple cpus
* resetting the count.
*/
if (current_jiffies > last.time)
(void) cmpxchg_acq(&last.count, count, 16 + (count & ~15));
/* used fetchadd to atomically update the count */
if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
last.time = current_jiffies + 5 * HZ;
printk(KERN_WARNING
"%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
}
}
}
exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
&regs->cr_ifs, regs);
if (fp_fault) {
if (exception == 0) {
/* emulation was successful */
ia64_increment_ip(regs);
} else if (exception == -1) {
printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
return -1;
} else {
/* is next instruction a trap? */
if (exception & 2) {
ia64_increment_ip(regs);
}
siginfo.si_signo = SIGFPE;
siginfo.si_errno = 0;
siginfo.si_code = __SI_FAULT; /* default code */
siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
if (isr & 0x11) {
siginfo.si_code = FPE_FLTINV;
} else if (isr & 0x22) {
/* denormal operand gets the same si_code as underflow
* see arch/i386/kernel/traps.c:math_error() */
siginfo.si_code = FPE_FLTUND;
} else if (isr & 0x44) {
siginfo.si_code = FPE_FLTDIV;
}
siginfo.si_isr = isr;
siginfo.si_flags = __ISR_VALID;
siginfo.si_imm = 0;
force_sig_info(SIGFPE, &siginfo, current);
}
} else {
if (exception == -1) {
printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
return -1;
} else if (exception != 0) {
/* raise exception */
siginfo.si_signo = SIGFPE;
siginfo.si_errno = 0;
siginfo.si_code = __SI_FAULT; /* default code */
siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
if (isr & 0x880) {
siginfo.si_code = FPE_FLTOVF;
} else if (isr & 0x1100) {
siginfo.si_code = FPE_FLTUND;
} else if (isr & 0x2200) {
siginfo.si_code = FPE_FLTRES;
}
siginfo.si_isr = isr;
siginfo.si_flags = __ISR_VALID;
siginfo.si_imm = 0;
force_sig_info(SIGFPE, &siginfo, current);
}
}
return 0;
}
struct illegal_op_return {
unsigned long fkt, arg1, arg2, arg3;
};
struct illegal_op_return
ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
long arg4, long arg5, long arg6, long arg7,
struct pt_regs regs)
{
struct illegal_op_return rv;
struct siginfo si;
char buf[128];
#ifdef CONFIG_IA64_BRL_EMU
{
extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long);
rv = ia64_emulate_brl(&regs, ec);
if (rv.fkt != (unsigned long) -1)
return rv;
}
#endif
sprintf(buf, "IA-64 Illegal operation fault");
rv.fkt = 0;
if (die_if_kernel(buf, &regs, 0))
return rv;
memset(&si, 0, sizeof(si));
si.si_signo = SIGILL;
si.si_code = ILL_ILLOPC;
si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
force_sig_info(SIGILL, &si, current);
return rv;
}
void __kprobes
ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
unsigned long iim, unsigned long itir, long arg5, long arg6,
long arg7, struct pt_regs regs)
{
unsigned long code, error = isr, iip;
struct siginfo siginfo;
char buf[128];
int result, sig;
static const char *reason[] = {
"IA-64 Illegal Operation fault",
"IA-64 Privileged Operation fault",
"IA-64 Privileged Register fault",
"IA-64 Reserved Register/Field fault",
"Disabled Instruction Set Transition fault",
"Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault",
"Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12",
"Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
};
if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) {
/*
* This fault was due to lfetch.fault, set "ed" bit in the psr to cancel
* the lfetch.
*/
ia64_psr(&regs)->ed = 1;
return;
}
iip = regs.cr_iip + ia64_psr(&regs)->ri;
switch (vector) {
case 24: /* General Exception */
code = (isr >> 4) & 0xf;
sprintf(buf, "General Exception: %s%s", reason[code],
(code == 3) ? ((isr & (1UL << 37))
? " (RSE access)" : " (data access)") : "");
if (code == 8) {
# ifdef CONFIG_IA64_PRINT_HAZARDS
printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
current->comm, task_pid_nr(current),
regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
# endif
return;
}
break;
case 25: /* Disabled FP-Register */
if (isr & 2) {
disabled_fph_fault(&regs);
return;
}
sprintf(buf, "Disabled FPL fault---not supposed to happen!");
break;
case 26: /* NaT Consumption */
if (user_mode(&regs)) {
void __user *addr;
if (((isr >> 4) & 0xf) == 2) {
/* NaT page consumption */
sig = SIGSEGV;
code = SEGV_ACCERR;
addr = (void __user *) ifa;
} else {
/* register NaT consumption */
sig = SIGILL;
code = ILL_ILLOPN;
addr = (void __user *) (regs.cr_iip
+ ia64_psr(&regs)->ri);
}
siginfo.si_signo = sig;
siginfo.si_code = code;
siginfo.si_errno = 0;
siginfo.si_addr = addr;
siginfo.si_imm = vector;
siginfo.si_flags = __ISR_VALID;
siginfo.si_isr = isr;
force_sig_info(sig, &siginfo, current);
return;
} else if (ia64_done_with_exception(&regs))
return;
sprintf(buf, "NaT consumption");
break;
case 31: /* Unsupported Data Reference */
if (user_mode(&regs)) {
siginfo.si_signo = SIGILL;
siginfo.si_code = ILL_ILLOPN;
siginfo.si_errno = 0;
siginfo.si_addr = (void __user *) iip;
siginfo.si_imm = vector;
siginfo.si_flags = __ISR_VALID;
siginfo.si_isr = isr;
force_sig_info(SIGILL, &siginfo, current);
return;
}
sprintf(buf, "Unsupported data reference");
break;
case 29: /* Debug */
case 35: /* Taken Branch Trap */
case 36: /* Single Step Trap */
if (fsys_mode(current, &regs)) {
extern char __kernel_syscall_via_break[];
/*
* Got a trap in fsys-mode: Taken Branch Trap
* and Single Step trap need special handling;
* Debug trap is ignored (we disable it here
* and re-enable it in the lower-privilege trap).
*/
if (unlikely(vector == 29)) {
set_thread_flag(TIF_DB_DISABLED);
ia64_psr(&regs)->db = 0;
ia64_psr(&regs)->lp = 1;
return;
}
/* re-do the system call via break 0x100000: */
regs.cr_iip = (unsigned long) __kernel_syscall_via_break;
ia64_psr(&regs)->ri = 0;
ia64_psr(&regs)->cpl = 3;
return;
}
switch (vector) {
case 29:
siginfo.si_code = TRAP_HWBKPT;
#ifdef CONFIG_ITANIUM
/*
* Erratum 10 (IFA may contain incorrect address) now has
* "NoFix" status. There are no plans for fixing this.
*/
if (ia64_psr(&regs)->is == 0)
ifa = regs.cr_iip;
#endif
break;
case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
}
if (notify_die(DIE_FAULT, "ia64_fault", &regs, vector, siginfo.si_code, SIGTRAP)
== NOTIFY_STOP)
return;
siginfo.si_signo = SIGTRAP;
siginfo.si_errno = 0;
siginfo.si_addr = (void __user *) ifa;
siginfo.si_imm = 0;
siginfo.si_flags = __ISR_VALID;
siginfo.si_isr = isr;
force_sig_info(SIGTRAP, &siginfo, current);
return;
case 32: /* fp fault */
case 33: /* fp trap */
result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
siginfo.si_signo = SIGFPE;
siginfo.si_errno = 0;
siginfo.si_code = FPE_FLTINV;
siginfo.si_addr = (void __user *) iip;
siginfo.si_flags = __ISR_VALID;
siginfo.si_isr = isr;
siginfo.si_imm = 0;
force_sig_info(SIGFPE, &siginfo, current);
}
return;
case 34:
if (isr & 0x2) {
/* Lower-Privilege Transfer Trap */
/* If we disabled debug traps during an fsyscall,
* re-enable them here.
*/
if (test_thread_flag(TIF_DB_DISABLED)) {
clear_thread_flag(TIF_DB_DISABLED);
ia64_psr(&regs)->db = 1;
}
/*
* Just clear PSR.lp and then return immediately:
* all the interesting work (e.g., signal delivery)
* is done in the kernel exit path.
*/
ia64_psr(&regs)->lp = 0;
return;
} else {
/* Unimplemented Instr. Address Trap */
if (user_mode(&regs)) {
siginfo.si_signo = SIGILL;
siginfo.si_code = ILL_BADIADDR;
siginfo.si_errno = 0;
siginfo.si_flags = 0;
siginfo.si_isr = 0;
siginfo.si_imm = 0;
siginfo.si_addr = (void __user *) iip;
force_sig_info(SIGILL, &siginfo, current);
return;
}
sprintf(buf, "Unimplemented Instruction Address fault");
}
break;
case 45:
printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
iip, ifa, isr);
force_sig(SIGSEGV, current);
return;
case 46:
printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
iip, ifa, isr, iim);
force_sig(SIGSEGV, current);
return;
case 47:
sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
break;
default:
sprintf(buf, "Fault %lu", vector);
break;
}
if (!die_if_kernel(buf, &regs, error))
force_sig(SIGILL, current);
}

1542
arch/ia64/kernel/unaligned.c Normal file

File diff suppressed because it is too large Load diff

281
arch/ia64/kernel/uncached.c Normal file
View file

@ -0,0 +1,281 @@
/*
* Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License
* as published by the Free Software Foundation.
*
* A simple uncached page allocator using the generic allocator. This
* allocator first utilizes the spare (spill) pages found in the EFI
* memmap and will then start converting cached pages to uncached ones
* at a granule at a time. Node awareness is implemented by having a
* pool of pages per node.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/efi.h>
#include <linux/genalloc.h>
#include <linux/gfp.h>
#include <asm/page.h>
#include <asm/pal.h>
#include <asm/pgtable.h>
#include <linux/atomic.h>
#include <asm/tlbflush.h>
#include <asm/sn/arch.h>
extern void __init efi_memmap_walk_uc(efi_freemem_callback_t, void *);
struct uncached_pool {
struct gen_pool *pool;
struct mutex add_chunk_mutex; /* serialize adding a converted chunk */
int nchunks_added; /* #of converted chunks added to pool */
atomic_t status; /* smp called function's return status*/
};
#define MAX_CONVERTED_CHUNKS_PER_NODE 2
struct uncached_pool uncached_pools[MAX_NUMNODES];
static void uncached_ipi_visibility(void *data)
{
int status;
struct uncached_pool *uc_pool = (struct uncached_pool *)data;
status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
if ((status != PAL_VISIBILITY_OK) &&
(status != PAL_VISIBILITY_OK_REMOTE_NEEDED))
atomic_inc(&uc_pool->status);
}
static void uncached_ipi_mc_drain(void *data)
{
int status;
struct uncached_pool *uc_pool = (struct uncached_pool *)data;
status = ia64_pal_mc_drain();
if (status != PAL_STATUS_SUCCESS)
atomic_inc(&uc_pool->status);
}
/*
* Add a new chunk of uncached memory pages to the specified pool.
*
* @pool: pool to add new chunk of uncached memory to
* @nid: node id of node to allocate memory from, or -1
*
* This is accomplished by first allocating a granule of cached memory pages
* and then converting them to uncached memory pages.
*/
static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
{
struct page *page;
int status, i, nchunks_added = uc_pool->nchunks_added;
unsigned long c_addr, uc_addr;
if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0)
return -1; /* interrupted by a signal */
if (uc_pool->nchunks_added > nchunks_added) {
/* someone added a new chunk while we were waiting */
mutex_unlock(&uc_pool->add_chunk_mutex);
return 0;
}
if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) {
mutex_unlock(&uc_pool->add_chunk_mutex);
return -1;
}
/* attempt to allocate a granule's worth of cached memory pages */
page = alloc_pages_exact_node(nid,
GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
IA64_GRANULE_SHIFT-PAGE_SHIFT);
if (!page) {
mutex_unlock(&uc_pool->add_chunk_mutex);
return -1;
}
/* convert the memory pages from cached to uncached */
c_addr = (unsigned long)page_address(page);
uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;
/*
* There's a small race here where it's possible for someone to
* access the page through /dev/mem halfway through the conversion
* to uncached - not sure it's really worth bothering about
*/
for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
SetPageUncached(&page[i]);
flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);
status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
atomic_set(&uc_pool->status, 0);
status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
if (status || atomic_read(&uc_pool->status))
goto failed;
} else if (status != PAL_VISIBILITY_OK)
goto failed;
preempt_disable();
if (ia64_platform_is("sn2"))
sn_flush_all_caches(uc_addr, IA64_GRANULE_SIZE);
else
flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);
/* flush the just introduced uncached translation from the TLB */
local_flush_tlb_all();
preempt_enable();
status = ia64_pal_mc_drain();
if (status != PAL_STATUS_SUCCESS)
goto failed;
atomic_set(&uc_pool->status, 0);
status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
if (status || atomic_read(&uc_pool->status))
goto failed;
/*
* The chunk of memory pages has been converted to uncached so now we
* can add it to the pool.
*/
status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid);
if (status)
goto failed;
uc_pool->nchunks_added++;
mutex_unlock(&uc_pool->add_chunk_mutex);
return 0;
/* failed to convert or add the chunk so give it back to the kernel */
failed:
for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
ClearPageUncached(&page[i]);
free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT);
mutex_unlock(&uc_pool->add_chunk_mutex);
return -1;
}
/*
* uncached_alloc_page
*
* @starting_nid: node id of node to start with, or -1
* @n_pages: number of contiguous pages to allocate
*
* Allocate the specified number of contiguous uncached pages on the
* the requested node. If not enough contiguous uncached pages are available
* on the requested node, roundrobin starting with the next higher node.
*/
unsigned long uncached_alloc_page(int starting_nid, int n_pages)
{
unsigned long uc_addr;
struct uncached_pool *uc_pool;
int nid;
if (unlikely(starting_nid >= MAX_NUMNODES))
return 0;
if (starting_nid < 0)
starting_nid = numa_node_id();
nid = starting_nid;
do {
if (!node_state(nid, N_HIGH_MEMORY))
continue;
uc_pool = &uncached_pools[nid];
if (uc_pool->pool == NULL)
continue;
do {
uc_addr = gen_pool_alloc(uc_pool->pool,
n_pages * PAGE_SIZE);
if (uc_addr != 0)
return uc_addr;
} while (uncached_add_chunk(uc_pool, nid) == 0);
} while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid);
return 0;
}
EXPORT_SYMBOL(uncached_alloc_page);
/*
* uncached_free_page
*
* @uc_addr: uncached address of first page to free
* @n_pages: number of contiguous pages to free
*
* Free the specified number of uncached pages.
*/
void uncached_free_page(unsigned long uc_addr, int n_pages)
{
int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET);
struct gen_pool *pool = uncached_pools[nid].pool;
if (unlikely(pool == NULL))
return;
if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
panic("uncached_free_page invalid address %lx\n", uc_addr);
gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE);
}
EXPORT_SYMBOL(uncached_free_page);
/*
* uncached_build_memmap,
*
* @uc_start: uncached starting address of a chunk of uncached memory
* @uc_end: uncached ending address of a chunk of uncached memory
* @arg: ignored, (NULL argument passed in on call to efi_memmap_walk_uc())
*
* Called at boot time to build a map of pages that can be used for
* memory special operations.
*/
static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg)
{
int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET);
struct gen_pool *pool = uncached_pools[nid].pool;
size_t size = uc_end - uc_start;
touch_softlockup_watchdog();
if (pool != NULL) {
memset((char *)uc_start, 0, size);
(void) gen_pool_add(pool, uc_start, size, nid);
}
return 0;
}
static int __init uncached_init(void)
{
int nid;
for_each_node_state(nid, N_ONLINE) {
uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid);
mutex_init(&uncached_pools[nid].add_chunk_mutex);
}
efi_memmap_walk_uc(uncached_build_memmap, NULL);
return 0;
}
__initcall(uncached_init);

2319
arch/ia64/kernel/unwind.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,459 @@
/*
* Copyright (C) 2000 Hewlett-Packard Co
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
*
* Generic IA-64 unwind info decoder.
*
* This file is used both by the Linux kernel and objdump. Please keep
* the two copies of this file in sync.
*
* You need to customize the decoder by defining the following
* macros/constants before including this file:
*
* Types:
* unw_word Unsigned integer type with at least 64 bits
*
* Register names:
* UNW_REG_BSP
* UNW_REG_BSPSTORE
* UNW_REG_FPSR
* UNW_REG_LC
* UNW_REG_PFS
* UNW_REG_PR
* UNW_REG_RNAT
* UNW_REG_PSP
* UNW_REG_RP
* UNW_REG_UNAT
*
* Decoder action macros:
* UNW_DEC_BAD_CODE(code)
* UNW_DEC_ABI(fmt,abi,context,arg)
* UNW_DEC_BR_GR(fmt,brmask,gr,arg)
* UNW_DEC_BR_MEM(fmt,brmask,arg)
* UNW_DEC_COPY_STATE(fmt,label,arg)
* UNW_DEC_EPILOGUE(fmt,t,ecount,arg)
* UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg)
* UNW_DEC_FR_MEM(fmt,frmask,arg)
* UNW_DEC_GR_GR(fmt,grmask,gr,arg)
* UNW_DEC_GR_MEM(fmt,grmask,arg)
* UNW_DEC_LABEL_STATE(fmt,label,arg)
* UNW_DEC_MEM_STACK_F(fmt,t,size,arg)
* UNW_DEC_MEM_STACK_V(fmt,t,arg)
* UNW_DEC_PRIUNAT_GR(fmt,r,arg)
* UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
* UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
* UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg)
* UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg)
* UNW_DEC_PROLOGUE(fmt,body,rlen,arg)
* UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg)
* UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg)
* UNW_DEC_REG_REG(fmt,src,dst,arg)
* UNW_DEC_REG_SPREL(fmt,reg,spoff,arg)
* UNW_DEC_REG_WHEN(fmt,reg,t,arg)
* UNW_DEC_RESTORE(fmt,t,abreg,arg)
* UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg)
* UNW_DEC_SPILL_BASE(fmt,pspoff,arg)
* UNW_DEC_SPILL_MASK(fmt,imaskp,arg)
* UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg)
* UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg)
* UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg)
* UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg)
* UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg)
* UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg)
*/
static unw_word
unw_decode_uleb128 (unsigned char **dpp)
{
unsigned shift = 0;
unw_word byte, result = 0;
unsigned char *bp = *dpp;
while (1)
{
byte = *bp++;
result |= (byte & 0x7f) << shift;
if ((byte & 0x80) == 0)
break;
shift += 7;
}
*dpp = bp;
return result;
}
static unsigned char *
unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg)
{
unsigned char byte1, abreg;
unw_word t, off;
byte1 = *dp++;
t = unw_decode_uleb128 (&dp);
off = unw_decode_uleb128 (&dp);
abreg = (byte1 & 0x7f);
if (byte1 & 0x80)
UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg);
else
UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg);
return dp;
}
static unsigned char *
unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg)
{
unsigned char byte1, byte2, abreg, x, ytreg;
unw_word t;
byte1 = *dp++; byte2 = *dp++;
t = unw_decode_uleb128 (&dp);
abreg = (byte1 & 0x7f);
ytreg = byte2;
x = (byte1 >> 7) & 1;
if ((byte1 & 0x80) == 0 && ytreg == 0)
UNW_DEC_RESTORE(X2, t, abreg, arg);
else
UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg);
return dp;
}
static unsigned char *
unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg)
{
unsigned char byte1, byte2, abreg, qp;
unw_word t, off;
byte1 = *dp++; byte2 = *dp++;
t = unw_decode_uleb128 (&dp);
off = unw_decode_uleb128 (&dp);
qp = (byte1 & 0x3f);
abreg = (byte2 & 0x7f);
if (byte1 & 0x80)
UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg);
else
UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg);
return dp;
}
static unsigned char *
unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg)
{
unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg;
unw_word t;
byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
t = unw_decode_uleb128 (&dp);
qp = (byte1 & 0x3f);
abreg = (byte2 & 0x7f);
x = (byte2 >> 7) & 1;
ytreg = byte3;
if ((byte2 & 0x80) == 0 && byte3 == 0)
UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg);
else
UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg);
return dp;
}
static unsigned char *
unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg)
{
int body = (code & 0x20) != 0;
unw_word rlen;
rlen = (code & 0x1f);
UNW_DEC_PROLOGUE(R1, body, rlen, arg);
return dp;
}
static unsigned char *
unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg)
{
unsigned char byte1, mask, grsave;
unw_word rlen;
byte1 = *dp++;
mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
grsave = (byte1 & 0x7f);
rlen = unw_decode_uleb128 (&dp);
UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg);
return dp;
}
static unsigned char *
unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg)
{
unw_word rlen;
rlen = unw_decode_uleb128 (&dp);
UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg);
return dp;
}
static unsigned char *
unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg)
{
unsigned char brmask = (code & 0x1f);
UNW_DEC_BR_MEM(P1, brmask, arg);
return dp;
}
static unsigned char *
unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg)
{
if ((code & 0x10) == 0)
{
unsigned char byte1 = *dp++;
UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1),
(byte1 & 0x7f), arg);
}
else if ((code & 0x08) == 0)
{
unsigned char byte1 = *dp++, r, dst;
r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
dst = (byte1 & 0x7f);
switch (r)
{
case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break;
case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break;
case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break;
case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break;
case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break;
case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break;
case 6: UNW_DEC_RP_BR(P3, dst, arg); break;
case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break;
case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break;
case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break;
case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break;
case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break;
default: UNW_DEC_BAD_CODE(r); break;
}
}
else if ((code & 0x7) == 0)
UNW_DEC_SPILL_MASK(P4, dp, arg);
else if ((code & 0x7) == 1)
{
unw_word grmask, frmask, byte1, byte2, byte3;
byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
grmask = ((byte1 >> 4) & 0xf);
frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3;
UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg);
}
else
UNW_DEC_BAD_CODE(code);
return dp;
}
static unsigned char *
unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg)
{
int gregs = (code & 0x10) != 0;
unsigned char mask = (code & 0x0f);
if (gregs)
UNW_DEC_GR_MEM(P6, mask, arg);
else
UNW_DEC_FR_MEM(P6, mask, arg);
return dp;
}
static unsigned char *
unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg)
{
unsigned char r, byte1, byte2;
unw_word t, size;
if ((code & 0x10) == 0)
{
r = (code & 0xf);
t = unw_decode_uleb128 (&dp);
switch (r)
{
case 0:
size = unw_decode_uleb128 (&dp);
UNW_DEC_MEM_STACK_F(P7, t, size, arg);
break;
case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break;
case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break;
case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break;
case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break;
case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break;
case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break;
case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break;
case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break;
case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break;
case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break;
case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break;
case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break;
case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break;
case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break;
case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break;
default: UNW_DEC_BAD_CODE(r); break;
}
}
else
{
switch (code & 0xf)
{
case 0x0: /* p8 */
{
r = *dp++;
t = unw_decode_uleb128 (&dp);
switch (r)
{
case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break;
case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break;
case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break;
case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break;
case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break;
case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break;
case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break;
case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break;
case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break;
case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break;
case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break;
case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break;
case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break;
case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break;
case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break;
case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break;
case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break;
default: UNW_DEC_BAD_CODE(r); break;
}
}
break;
case 0x1:
byte1 = *dp++; byte2 = *dp++;
UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg);
break;
case 0xf: /* p10 */
byte1 = *dp++; byte2 = *dp++;
UNW_DEC_ABI(P10, byte1, byte2, arg);
break;
case 0x9:
return unw_decode_x1 (dp, code, arg);
case 0xa:
return unw_decode_x2 (dp, code, arg);
case 0xb:
return unw_decode_x3 (dp, code, arg);
case 0xc:
return unw_decode_x4 (dp, code, arg);
default:
UNW_DEC_BAD_CODE(code);
break;
}
}
return dp;
}
static unsigned char *
unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg)
{
unw_word label = (code & 0x1f);
if ((code & 0x20) != 0)
UNW_DEC_COPY_STATE(B1, label, arg);
else
UNW_DEC_LABEL_STATE(B1, label, arg);
return dp;
}
static unsigned char *
unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg)
{
unw_word t;
t = unw_decode_uleb128 (&dp);
UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg);
return dp;
}
static unsigned char *
unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg)
{
unw_word t, ecount, label;
if ((code & 0x10) == 0)
{
t = unw_decode_uleb128 (&dp);
ecount = unw_decode_uleb128 (&dp);
UNW_DEC_EPILOGUE(B3, t, ecount, arg);
}
else if ((code & 0x07) == 0)
{
label = unw_decode_uleb128 (&dp);
if ((code & 0x08) != 0)
UNW_DEC_COPY_STATE(B4, label, arg);
else
UNW_DEC_LABEL_STATE(B4, label, arg);
}
else
switch (code & 0x7)
{
case 1: return unw_decode_x1 (dp, code, arg);
case 2: return unw_decode_x2 (dp, code, arg);
case 3: return unw_decode_x3 (dp, code, arg);
case 4: return unw_decode_x4 (dp, code, arg);
default: UNW_DEC_BAD_CODE(code); break;
}
return dp;
}
typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *);
static unw_decoder unw_decode_table[2][8] =
{
/* prologue table: */
{
unw_decode_r1, /* 0 */
unw_decode_r1,
unw_decode_r2,
unw_decode_r3,
unw_decode_p1, /* 4 */
unw_decode_p2_p5,
unw_decode_p6,
unw_decode_p7_p10
},
{
unw_decode_r1, /* 0 */
unw_decode_r1,
unw_decode_r2,
unw_decode_r3,
unw_decode_b1, /* 4 */
unw_decode_b1,
unw_decode_b2,
unw_decode_b3_x4
}
};
/*
* Decode one descriptor and return address of next descriptor.
*/
static inline unsigned char *
unw_decode (unsigned char *dp, int inside_body, void *arg)
{
unw_decoder decoder;
unsigned char code;
code = *dp++;
decoder = unw_decode_table[inside_body][code >> 5];
dp = (*decoder) (dp, code, arg);
return dp;
}

164
arch/ia64/kernel/unwind_i.h Normal file
View file

@ -0,0 +1,164 @@
/*
* Copyright (C) 2000, 2002-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* Kernel unwind support.
*/
#define UNW_VER(x) ((x) >> 48)
#define UNW_FLAG_MASK 0x0000ffff00000000
#define UNW_FLAG_OSMASK 0x0000f00000000000
#define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L)
#define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L)
#define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL)
enum unw_register_index {
/* primary unat: */
UNW_REG_PRI_UNAT_GR,
UNW_REG_PRI_UNAT_MEM,
/* register stack */
UNW_REG_BSP, /* register stack pointer */
UNW_REG_BSPSTORE,
UNW_REG_PFS, /* previous function state */
UNW_REG_RNAT,
/* memory stack */
UNW_REG_PSP, /* previous memory stack pointer */
/* return pointer: */
UNW_REG_RP,
/* preserved registers: */
UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7,
UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR,
UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5,
UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5,
UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19,
UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23,
UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27,
UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31,
UNW_NUM_REGS
};
struct unw_info_block {
u64 header;
u64 desc[0]; /* unwind descriptors */
/* personality routine and language-specific data follow behind descriptors */
};
struct unw_table {
struct unw_table *next; /* must be first member! */
const char *name;
unsigned long gp; /* global pointer for this load-module */
unsigned long segment_base; /* base for offsets in the unwind table entries */
unsigned long start;
unsigned long end;
const struct unw_table_entry *array;
unsigned long length;
};
enum unw_where {
UNW_WHERE_NONE, /* register isn't saved at all */
UNW_WHERE_GR, /* register is saved in a general register */
UNW_WHERE_FR, /* register is saved in a floating-point register */
UNW_WHERE_BR, /* register is saved in a branch register */
UNW_WHERE_SPREL, /* register is saved on memstack (sp-relative) */
UNW_WHERE_PSPREL, /* register is saved on memstack (psp-relative) */
/*
* At the end of each prologue these locations get resolved to
* UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively:
*/
UNW_WHERE_SPILL_HOME, /* register is saved in its spill home */
UNW_WHERE_GR_SAVE /* register is saved in next general register */
};
#define UNW_WHEN_NEVER 0x7fffffff
struct unw_reg_info {
unsigned long val; /* save location: register number or offset */
enum unw_where where; /* where the register gets saved */
int when; /* when the register gets saved */
};
struct unw_reg_state {
struct unw_reg_state *next; /* next (outer) element on state stack */
struct unw_reg_info reg[UNW_NUM_REGS]; /* register save locations */
};
struct unw_labeled_state {
struct unw_labeled_state *next; /* next labeled state (or NULL) */
unsigned long label; /* label for this state */
struct unw_reg_state saved_state;
};
struct unw_state_record {
unsigned int first_region : 1; /* is this the first region? */
unsigned int done : 1; /* are we done scanning descriptors? */
unsigned int any_spills : 1; /* got any register spills? */
unsigned int in_body : 1; /* are we inside a body (as opposed to a prologue)? */
unsigned long flags; /* see UNW_FLAG_* in unwind.h */
u8 *imask; /* imask of spill_mask record or NULL */
unsigned long pr_val; /* predicate values */
unsigned long pr_mask; /* predicate mask */
long spill_offset; /* psp-relative offset for spill base */
int region_start;
int region_len;
int epilogue_start;
int epilogue_count;
int when_target;
u8 gr_save_loc; /* next general register to use for saving a register */
u8 return_link_reg; /* branch register in which the return link is passed */
struct unw_labeled_state *labeled_states; /* list of all labeled states */
struct unw_reg_state curr; /* current state */
};
enum unw_nat_type {
UNW_NAT_NONE, /* NaT not represented */
UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */
UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */
UNW_NAT_REGSTK /* NaT is in rnat */
};
enum unw_insn_opcode {
UNW_INSN_ADD, /* s[dst] += val */
UNW_INSN_ADD_PSP, /* s[dst] = (s.psp + val) */
UNW_INSN_ADD_SP, /* s[dst] = (s.sp + val) */
UNW_INSN_MOVE, /* s[dst] = s[val] */
UNW_INSN_MOVE2, /* s[dst] = s[val]; s[dst+1] = s[val+1] */
UNW_INSN_MOVE_STACKED, /* s[dst] = ia64_rse_skip(*s.bsp, val) */
UNW_INSN_SETNAT_MEMSTK, /* s[dst+1].nat.type = MEMSTK;
s[dst+1].nat.off = *s.pri_unat - s[dst] */
UNW_INSN_SETNAT_TYPE, /* s[dst+1].nat.type = val */
UNW_INSN_LOAD, /* s[dst] = *s[val] */
UNW_INSN_MOVE_SCRATCH, /* s[dst] = scratch reg "val" */
UNW_INSN_MOVE_CONST, /* s[dst] = constant reg "val" */
};
struct unw_insn {
unsigned int opc : 4;
unsigned int dst : 9;
signed int val : 19;
};
/*
* Preserved general static registers (r4-r7) give rise to two script
* instructions; everything else yields at most one instruction; at
* the end of the script, the psp gets popped, accounting for one more
* instruction.
*/
#define UNW_MAX_SCRIPT_LEN (UNW_NUM_REGS + 5)
struct unw_script {
unsigned long ip; /* ip this script is for */
unsigned long pr_mask; /* mask of predicates script depends on */
unsigned long pr_val; /* predicate values this script is for */
rwlock_t lock;
unsigned int flags; /* see UNW_FLAG_* in unwind.h */
unsigned short lru_chain; /* used for least-recently-used chain */
unsigned short coll_chain; /* used for hash collisions */
unsigned short hint; /* hint for next script to try (or -1) */
unsigned short count; /* number of instructions in script */
struct unw_insn insn[UNW_MAX_SCRIPT_LEN];
};

View file

@ -0,0 +1,248 @@
#include <asm/cache.h>
#include <asm/ptrace.h>
#include <asm/pgtable.h>
#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT("elf64-ia64-little")
OUTPUT_ARCH(ia64)
ENTRY(phys_start)
jiffies = jiffies_64;
PHDRS {
code PT_LOAD;
percpu PT_LOAD;
data PT_LOAD;
note PT_NOTE;
unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
}
SECTIONS {
/*
* unwind exit sections must be discarded before
* the rest of the sections get included.
*/
/DISCARD/ : {
*(.IA_64.unwind.exit.text)
*(.IA_64.unwind_info.exit.text)
*(.comment)
*(.note)
}
v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
phys_start = _start - LOAD_OFFSET;
code : {
} :code
. = KERNEL_START;
_text = .;
_stext = .;
.text : AT(ADDR(.text) - LOAD_OFFSET) {
__start_ivt_text = .;
*(.text..ivt)
__end_ivt_text = .;
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
*(.gnu.linkonce.t*)
}
.text2 : AT(ADDR(.text2) - LOAD_OFFSET) {
*(.text2)
}
#ifdef CONFIG_SMP
.text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) {
*(.text..lock)
}
#endif
_etext = .;
/*
* Read-only data
*/
NOTES :code :note /* put .notes in text and mark in PT_NOTE */
code_continues : {
} : code /* switch back to regular program... */
EXCEPTION_TABLE(16)
/* MCA table */
. = ALIGN(16);
__mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) {
__start___mca_table = .;
*(__mca_table)
__stop___mca_table = .;
}
.data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) {
__start___phys_stack_reg_patchlist = .;
*(.data..patch.phys_stack_reg)
__end___phys_stack_reg_patchlist = .;
}
/*
* Global data
*/
_data = .;
/* Unwind info & table: */
. = ALIGN(8);
.IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) {
*(.IA_64.unwind_info*)
}
.IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) {
__start_unwind = .;
*(.IA_64.unwind*)
__end_unwind = .;
} :code :unwind
code_continues2 : {
} : code
RODATA
.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
*(.opd)
}
/*
* Initialization code and data:
*/
. = ALIGN(PAGE_SIZE);
__init_begin = .;
INIT_TEXT_SECTION(PAGE_SIZE)
INIT_DATA_SECTION(16)
.data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) {
__start___vtop_patchlist = .;
*(.data..patch.vtop)
__end___vtop_patchlist = .;
}
.data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) {
__start___rse_patchlist = .;
*(.data..patch.rse)
__end___rse_patchlist = .;
}
.data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) {
__start___mckinley_e9_bundles = .;
*(.data..patch.mckinley_e9)
__end___mckinley_e9_bundles = .;
}
#if defined(CONFIG_PARAVIRT)
. = ALIGN(16);
.paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET) {
__start_paravirt_bundles = .;
*(.paravirt_bundles)
__stop_paravirt_bundles = .;
}
. = ALIGN(16);
.paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET) {
__start_paravirt_insts = .;
*(.paravirt_insts)
__stop_paravirt_insts = .;
}
. = ALIGN(16);
.paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET) {
__start_paravirt_branches = .;
*(.paravirt_branches)
__stop_paravirt_branches = .;
}
#endif
#if defined(CONFIG_IA64_GENERIC)
/* Machine Vector */
. = ALIGN(16);
.machvec : AT(ADDR(.machvec) - LOAD_OFFSET) {
machvec_start = .;
*(.machvec)
machvec_end = .;
}
#endif
#ifdef CONFIG_SMP
. = ALIGN(PERCPU_PAGE_SIZE);
__cpu0_per_cpu = .;
. = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */
#endif
. = ALIGN(PAGE_SIZE);
__init_end = .;
.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
PAGE_ALIGNED_DATA(PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__start_gate_section = .;
*(.data..gate)
__stop_gate_section = .;
}
/*
* make sure the gate page doesn't expose
* kernel data
*/
. = ALIGN(PAGE_SIZE);
/* Per-cpu data: */
. = ALIGN(PERCPU_PAGE_SIZE);
PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
__phys_per_cpu_start = __per_cpu_load;
/*
* ensure percpu data fits
* into percpu page size
*/
. = __phys_per_cpu_start + PERCPU_PAGE_SIZE;
data : {
} :data
.data : AT(ADDR(.data) - LOAD_OFFSET) {
_sdata = .;
INIT_TASK_DATA(PAGE_SIZE)
CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
READ_MOSTLY_DATA(SMP_CACHE_BYTES)
DATA_DATA
*(.data1)
*(.gnu.linkonce.d*)
CONSTRUCTORS
}
. = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
.got : AT(ADDR(.got) - LOAD_OFFSET) {
*(.got.plt)
*(.got)
}
__gp = ADDR(.got) + 0x200000;
/*
* We want the small data sections together,
* so single-instruction offsets can access
* them all, and initialized data all before
* uninitialized, so we can shorten the
* on-disk segment size.
*/
.sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
*(.sdata)
*(.sdata1)
*(.srdata)
}
_edata = .;
BSS_SECTION(0, 0, 0)
_end = .;
code : {
} :code
STABS_DEBUG
DWARF_DEBUG
/* Default discards */
DISCARDS
}