Fixed MTP to work with TWRP

2025-09-09 01:28:05 -04:00 · 2018-06-19 23:16:04 +02:00 · 2018-06-19 23:16:04 +02:00 · f6dfaef42e
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@ -0,0 +1,113 @@
+#
+# Makefile for the linux kernel.
+#
+
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
+extra-y	:= head.o init_task.o vmlinux.lds
+
+obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
+	 irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o		\
+	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+	 unwind.o mca.o mca_asm.o topology.o dma-mapping.o
+
+obj-$(CONFIG_ACPI)		+= acpi.o acpi-ext.o
+obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
+
+obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
+obj-$(CONFIG_IOSAPIC)		+= iosapic.o
+obj-$(CONFIG_MODULES)		+= module.o
+obj-$(CONFIG_SMP)		+= smp.o smpboot.o
+obj-$(CONFIG_NUMA)		+= numa.o
+obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
+obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
+obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
+obj-$(CONFIG_AUDIT)		+= audit.o
+obj-$(CONFIG_PCI_MSI)		+= msi_ia64.o
+mca_recovery-y			+= mca_drv.o mca_drv_asm.o
+obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirtentry.o \
+				   paravirt_patch.o
+
+obj-$(CONFIG_IA64_ESI)		+= esi.o
+ifneq ($(CONFIG_IA64_ESI),)
+obj-y				+= esi_stub.o	# must be in kernel proper
+endif
+obj-$(CONFIG_INTEL_IOMMU)	+= pci-dma.o
+obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
+
+obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
+
+# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
+CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
+
+# The gate DSO image is built using a special linker script.
+include $(srctree)/arch/ia64/kernel/Makefile.gate
+# tell compiled for native
+CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE
+
+# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
+define sed-y
+	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+quiet_cmd_nr_irqs = GEN     $@
+define cmd_nr_irqs
+	(set -e; \
+	 echo "#ifndef __ASM_NR_IRQS_H__"; \
+	 echo "#define __ASM_NR_IRQS_H__"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Kbuild"; \
+	 echo " *"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-y) $<; \
+	 echo ""; \
+	 echo "#endif" ) > $@
+endef
+
+# We use internal kbuild rules to avoid the "is up to date" message from make
+arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c
+	$(Q)mkdir -p $(dir $@)
+	$(call if_changed_dep,cc_s_c)
+
+include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
+	$(Q)mkdir -p $(dir $@)
+	$(call cmd,nr_irqs)
+
+#
+# native ivt.S, entry.S and fsys.S
+#
+ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o
+define paravirtualized_native
+AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
+AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK
+extra-y += pvchk-$(1)
+endef
+$(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj))))
+
+#
+# Checker for paravirtualizations of privileged operations.
+#
+quiet_cmd_pv_check_sed = PVCHK   $@
+define cmd_pv_check_sed
+	sed -f $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed $< > $@
+endef
+
+$(obj)/pvchk-sed-%.s: $(src)/%.S $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed FORCE
+	$(call if_changed_dep,as_s_S)
+$(obj)/pvchk-%.s: $(obj)/pvchk-sed-%.s FORCE
+	$(call if_changed,pv_check_sed)
+$(obj)/pvchk-%.o: $(obj)/pvchk-%.s FORCE
+	$(call if_changed,as_o_S)
+.PRECIOUS: $(obj)/pvchk-sed-%.s $(obj)/pvchk-%.s $(obj)/pvchk-%.o
--- a/arch/ia64/kernel/Makefile.gate
+++ b/arch/ia64/kernel/Makefile.gate
@ -0,0 +1,27 @@
+# The gate DSO image is built using a special linker script.
+
+targets += gate.so gate-syms.o
+
+extra-y += gate.so gate-syms.o gate.lds gate.o
+
+CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+
+quiet_cmd_gate = GATE $@
+      cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
+		     $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+$(obj)/built-in.o: $(obj)/gate-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+
+GATECFLAGS_gate-syms.o = -r
+$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+# gate-data.o contains the gate DSO image as data in section .data..gate.
+# We must build gate.so before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/gate-data.o: $(obj)/gate.so
--- a/arch/ia64/kernel/acpi-ext.c
+++ b/arch/ia64/kernel/acpi-ext.c
@ -0,0 +1,104 @@
+/*
+ * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P.
+ *	Alex Williamson <alex.williamson@hp.com>
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+
+#include <asm/acpi-ext.h>
+
+/*
+ * Device CSRs that do not appear in PCI config space should be described
+ * via ACPI.  This would normally be done with Address Space Descriptors
+ * marked as "consumer-only," but old versions of Windows and Linux ignore
+ * the producer/consumer flag, so HP invented a vendor-defined resource to
+ * describe the location and size of CSR space.
+ */
+
+struct acpi_vendor_uuid hp_ccsr_uuid = {
+	.subtype = 2,
+	.data = { 0xf9, 0xad, 0xe9, 0x69, 0x4f, 0x92, 0x5f, 0xab, 0xf6, 0x4a,
+	    0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad },
+};
+
+static acpi_status hp_ccsr_locate(acpi_handle obj, u64 *base, u64 *length)
+{
+	acpi_status status;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_resource *resource;
+	struct acpi_resource_vendor_typed *vendor;
+
+	status = acpi_get_vendor_resource(obj, METHOD_NAME__CRS, &hp_ccsr_uuid,
+		&buffer);
+
+	resource = buffer.pointer;
+	vendor = &resource->data.vendor_typed;
+
+	if (ACPI_FAILURE(status) || vendor->byte_length < 16) {
+		status = AE_NOT_FOUND;
+		goto exit;
+	}
+
+	memcpy(base, vendor->byte_data, sizeof(*base));
+	memcpy(length, vendor->byte_data + 8, sizeof(*length));
+
+  exit:
+	kfree(buffer.pointer);
+	return status;
+}
+
+struct csr_space {
+	u64	base;
+	u64	length;
+};
+
+static acpi_status find_csr_space(struct acpi_resource *resource, void *data)
+{
+	struct csr_space *space = data;
+	struct acpi_resource_address64 addr;
+	acpi_status status;
+
+	status = acpi_resource_to_address64(resource, &addr);
+	if (ACPI_SUCCESS(status) &&
+	    addr.resource_type == ACPI_MEMORY_RANGE &&
+	    addr.address_length &&
+	    addr.producer_consumer == ACPI_CONSUMER) {
+		space->base = addr.minimum;
+		space->length = addr.address_length;
+		return AE_CTRL_TERMINATE;
+	}
+	return AE_OK;		/* keep looking */
+}
+
+static acpi_status hp_crs_locate(acpi_handle obj, u64 *base, u64 *length)
+{
+	struct csr_space space = { 0, 0 };
+
+	acpi_walk_resources(obj, METHOD_NAME__CRS, find_csr_space, &space);
+	if (!space.length)
+		return AE_NOT_FOUND;
+
+	*base = space.base;
+	*length = space.length;
+	return AE_OK;
+}
+
+acpi_status hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
+{
+	acpi_status status;
+
+	status = hp_ccsr_locate(obj, csr_base, csr_length);
+	if (ACPI_SUCCESS(status))
+		return status;
+
+	return hp_crs_locate(obj, csr_base, csr_length);
+}
+EXPORT_SYMBOL(hp_acpi_csr_space);
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@ -0,0 +1,290 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#define ASM_OFFSETS_C 1
+
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/clocksource.h>
+#include <linux/kbuild.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/siginfo.h>
+#include <asm/sigcontext.h>
+#include <asm/mca.h>
+
+#include "../kernel/sigframe.h"
+#include "../kernel/fsyscall_gtod_data.h"
+
+void foo(void)
+{
+	DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
+	DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info));
+	DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs));
+	DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack));
+	DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo));
+	DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64));
+	DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
+	DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
+
+	BUILD_BUG_ON(sizeof(struct upid) != 32);
+	DEFINE(IA64_UPID_SHIFT, 5);
+
+	BLANK();
+
+	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+	DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
+
+	BLANK();
+
+	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
+	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
+	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
+	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
+	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
+	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
+	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
+	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
+	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
+	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
+
+	BLANK();
+
+	DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
+
+	BLANK();
+
+	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
+							     group_stop_count));
+	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+
+	BLANK();
+
+	DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
+	DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
+	DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
+	DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd));
+	DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8));
+	DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9));
+	DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10));
+	DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11));
+	DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr));
+	DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip));
+	DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs));
+	DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat));
+	DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs));
+	DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc));
+	DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat));
+
+	DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore));
+	DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr));
+	DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0));
+	DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs));
+	DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1));
+	DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12));
+	DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13));
+	DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr));
+	DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15));
+	DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14));
+	DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
+	DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
+	DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16));
+	DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17));
+	DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18));
+	DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19));
+	DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20));
+	DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21));
+	DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22));
+	DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23));
+	DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24));
+	DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25));
+	DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26));
+	DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27));
+	DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28));
+	DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29));
+	DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30));
+	DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31));
+	DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv));
+	DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6));
+	DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7));
+	DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8));
+	DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9));
+	DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10));
+	DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11));
+
+	BLANK();
+
+	DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat));
+	DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr));
+	DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2));
+	DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3));
+	DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4));
+	DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5));
+	DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12));
+	DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13));
+	DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14));
+	DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15));
+	DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16));
+	DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17));
+	DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18));
+	DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19));
+	DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20));
+	DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21));
+	DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22));
+	DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23));
+	DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24));
+	DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25));
+	DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26));
+	DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27));
+	DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28));
+	DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29));
+	DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30));
+	DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31));
+	DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4));
+	DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5));
+	DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6));
+	DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7));
+	DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0));
+	DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1));
+	DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2));
+	DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3));
+	DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4));
+	DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5));
+	DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs));
+	DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc));
+	DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat));
+	DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat));
+	DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore));
+	DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr));
+
+	BLANK();
+
+	DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip));
+	DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp));
+	DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr));
+	DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat));
+	DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat));
+	DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0]));
+	DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm));
+	DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags));
+	DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6]));
+	DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr));
+	DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12]));
+	DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base));
+	DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs));
+
+	BLANK();
+
+	DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
+
+	BLANK();
+
+	DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
+	DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
+	DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));
+	DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler));
+	DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc));
+	BLANK();
+    /* for assembly files which can't include sched.h: */
+	DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
+	DEFINE(IA64_CLONE_VM, CLONE_VM);
+
+	BLANK();
+	DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
+	       offsetof (struct cpuinfo_ia64, nsec_per_cyc));
+	DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET,
+	       offsetof (struct cpuinfo_ia64, ptce_base));
+	DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET,
+	       offsetof (struct cpuinfo_ia64, ptce_count));
+	DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET,
+	       offsetof (struct cpuinfo_ia64, ptce_stride));
+	BLANK();
+	DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET,
+	       offsetof (struct timespec, tv_nsec));
+
+	DEFINE(CLONE_SETTLS_BIT, 19);
+#if CLONE_SETTLS != (1<<19)
+# error "CLONE_SETTLS_BIT incorrect, please fix"
+#endif
+
+	BLANK();
+	DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET,
+	       offsetof (struct ia64_mca_cpu, mca_stack));
+	DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
+	       offsetof (struct ia64_mca_cpu, init_stack));
+	BLANK();
+	DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
+	       offsetof (struct ia64_sal_os_state, os_gp));
+	DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
+	       offsetof (struct ia64_sal_os_state, proc_state_param));
+	DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET,
+	       offsetof (struct ia64_sal_os_state, sal_ra));
+	DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET,
+	       offsetof (struct ia64_sal_os_state, sal_gp));
+	DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
+	       offsetof (struct ia64_sal_os_state, pal_min_state));
+	DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET,
+	       offsetof (struct ia64_sal_os_state, os_status));
+	DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET,
+	       offsetof (struct ia64_sal_os_state, context));
+	DEFINE(IA64_SAL_OS_STATE_SIZE,
+	       sizeof (struct ia64_sal_os_state));
+	BLANK();
+
+	DEFINE(IA64_PMSA_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_gr));
+	DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_bank1_gr));
+	DEFINE(IA64_PMSA_PR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_pr));
+	DEFINE(IA64_PMSA_BR0_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_br0));
+	DEFINE(IA64_PMSA_RSC_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_rsc));
+	DEFINE(IA64_PMSA_IIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_iip));
+	DEFINE(IA64_PMSA_IPSR_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ipsr));
+	DEFINE(IA64_PMSA_IFS_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_ifs));
+	DEFINE(IA64_PMSA_XIP_OFFSET,
+	       offsetof (struct pal_min_state_area_s, pmsa_xip));
+	BLANK();
+
+	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
+	DEFINE(IA64_GTOD_SEQ_OFFSET,
+	       offsetof (struct fsyscall_gtod_data_t, seq));
+	DEFINE(IA64_GTOD_WALL_TIME_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, wall_time));
+	DEFINE(IA64_GTOD_MONO_TIME_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, monotonic_time));
+	DEFINE(IA64_CLKSRC_MASK_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_mask));
+	DEFINE(IA64_CLKSRC_MULT_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_mult));
+	DEFINE(IA64_CLKSRC_SHIFT_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_shift));
+	DEFINE(IA64_CLKSRC_MMIO_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_fsys_mmio));
+	DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_cycle_last));
+	DEFINE(IA64_ITC_JITTER_OFFSET,
+		offsetof (struct itc_jitter_data_t, itc_jitter));
+	DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
+		offsetof (struct itc_jitter_data_t, itc_lastcycle));
+
+}
--- a/arch/ia64/kernel/audit.c
+++ b/arch/ia64/kernel/audit.c
@ -0,0 +1,60 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+	switch(syscall) {
+	case __NR_open:
+		return 2;
+	case __NR_openat:
+		return 3;
+	case __NR_execve:
+		return 5;
+	default:
+		return 0;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+__initcall(audit_classes_init);
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@ -0,0 +1,234 @@
+/*
+ *  Emulation of the "brl" instruction for IA64 processors that
+ *  don't support it in hardware.
+ *  Author: Stephan Zeisset, Intel Corp. <Stephan.Zeisset@intel.com>
+ *
+ *    02/22/02	D. Mosberger	Clear si_flgs, si_isr, and si_imm to avoid
+ *				leaking kernel bits.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5;
+
+struct illegal_op_return {
+	unsigned long fkt, arg1, arg2, arg3;
+};
+
+/*
+ *  The unimplemented bits of a virtual address must be set
+ *  to the value of the most significant implemented bit.
+ *  unimpl_va_mask includes all unimplemented bits and
+ *  the most significant implemented bit, so the result
+ *  of an and operation with the mask must be all 0's
+ *  or all 1's for the address to be valid.
+ */
+#define unimplemented_virtual_address(va) (						\
+	((va) & local_cpu_data->unimpl_va_mask) != 0 &&					\
+	((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask	\
+)
+
+/*
+ *  The unimplemented bits of a physical address must be 0.
+ *  unimpl_pa_mask includes all unimplemented bits, so the result
+ *  of an and operation with the mask must be all 0's for the
+ *  address to be valid.
+ */
+#define unimplemented_physical_address(pa) (		\
+	((pa) & local_cpu_data->unimpl_pa_mask) != 0	\
+)
+
+/*
+ *  Handle an illegal operation fault that was caused by an
+ *  unimplemented "brl" instruction.
+ *  If we are not successful (e.g because the illegal operation
+ *  wasn't caused by a "brl" after all), we return -1.
+ *  If we are successful, we return either 0 or the address
+ *  of a "fixup" function for manipulating preserved register
+ *  state.
+ */
+
+struct illegal_op_return
+ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
+{
+	unsigned long bundle[2];
+	unsigned long opcode, btype, qp, offset, cpl;
+	unsigned long next_ip;
+	struct siginfo siginfo;
+	struct illegal_op_return rv;
+	long tmp_taken, unimplemented_address;
+
+	rv.fkt = (unsigned long) -1;
+
+	/*
+	 *  Decode the instruction bundle.
+	 */
+
+	if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle)))
+		return rv;
+
+	next_ip = (unsigned long) regs->cr_iip + 16;
+
+	/* "brl" must be in slot 2. */
+	if (ia64_psr(regs)->ri != 1) return rv;
+
+	/* Must be "mlx" template */
+	if ((bundle[0] & 0x1e) != 0x4) return rv;
+
+	opcode = (bundle[1] >> 60);
+	btype = ((bundle[1] >> 29) & 0x7);
+	qp = ((bundle[1] >> 23) & 0x3f);
+	offset = ((bundle[1] & 0x0800000000000000L) << 4)
+		| ((bundle[1] & 0x00fffff000000000L) >> 32)
+		| ((bundle[1] & 0x00000000007fffffL) << 40)
+		| ((bundle[0] & 0xffff000000000000L) >> 24);
+
+	tmp_taken = regs->pr & (1L << qp);
+
+	switch(opcode) {
+
+		case 0xC:
+			/*
+			 *  Long Branch.
+			 */
+			if (btype != 0) return rv;
+			rv.fkt = 0;
+			if (!(tmp_taken)) {
+				/*
+				 *  Qualifying predicate is 0.
+				 *  Skip instruction.
+				 */
+				regs->cr_iip = next_ip;
+				ia64_psr(regs)->ri = 0;
+				return rv;
+			}
+			break;
+
+		case 0xD:
+			/*
+			 *  Long Call.
+			 */
+			rv.fkt = 0;
+			if (!(tmp_taken)) {
+				/*
+				 *  Qualifying predicate is 0.
+				 *  Skip instruction.
+				 */
+				regs->cr_iip = next_ip;
+				ia64_psr(regs)->ri = 0;
+				return rv;
+			}
+
+			/*
+			 *  BR[btype] = IP+16
+			 */
+			switch(btype) {
+				case 0:
+					regs->b0 = next_ip;
+					break;
+				case 1:
+					rv.fkt = (unsigned long) &ia64_set_b1;
+					break;
+				case 2:
+					rv.fkt = (unsigned long) &ia64_set_b2;
+					break;
+				case 3:
+					rv.fkt = (unsigned long) &ia64_set_b3;
+					break;
+				case 4:
+					rv.fkt = (unsigned long) &ia64_set_b4;
+					break;
+				case 5:
+					rv.fkt = (unsigned long) &ia64_set_b5;
+					break;
+				case 6:
+					regs->b6 = next_ip;
+					break;
+				case 7:
+					regs->b7 = next_ip;
+					break;
+			}
+			rv.arg1 = next_ip;
+
+			/*
+			 *  AR[PFS].pfm = CFM
+			 *  AR[PFS].pec = AR[EC]
+			 *  AR[PFS].ppl = PSR.cpl
+			 */
+			cpl = ia64_psr(regs)->cpl;
+			regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff)
+					| (ar_ec << 52) | (cpl << 62));
+
+			/*
+			 *  CFM.sof -= CFM.sol
+			 *  CFM.sol = 0
+			 *  CFM.sor = 0
+			 *  CFM.rrb.gr = 0
+			 *  CFM.rrb.fr = 0
+			 *  CFM.rrb.pr = 0
+			 */
+			regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f)
+					- ((regs->cr_ifs >> 7) & 0x7f));
+
+			break;
+
+		default:
+			/*
+			 *  Unknown opcode.
+			 */
+			return rv;
+
+	}
+
+	regs->cr_iip += offset;
+	ia64_psr(regs)->ri = 0;
+
+	if (ia64_psr(regs)->it == 0)
+		unimplemented_address = unimplemented_physical_address(regs->cr_iip);
+	else
+		unimplemented_address = unimplemented_virtual_address(regs->cr_iip);
+
+	if (unimplemented_address) {
+		/*
+		 *  The target address contains unimplemented bits.
+		 */
+		printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n");
+		siginfo.si_signo = SIGILL;
+		siginfo.si_errno = 0;
+		siginfo.si_flags = 0;
+		siginfo.si_isr = 0;
+		siginfo.si_imm = 0;
+		siginfo.si_code = ILL_BADIADDR;
+		force_sig_info(SIGILL, &siginfo, current);
+	} else if (ia64_psr(regs)->tb) {
+		/*
+		 *  Branch Tracing is enabled.
+		 *  Force a taken branch signal.
+		 */
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		siginfo.si_code = TRAP_BRANCH;
+		siginfo.si_flags = 0;
+		siginfo.si_isr = 0;
+		siginfo.si_addr = 0;
+		siginfo.si_imm = 0;
+		force_sig_info(SIGTRAP, &siginfo, current);
+	} else if (ia64_psr(regs)->ss) {
+		/*
+		 *  Single Step is enabled.
+		 *  Force a trace signal.
+		 */
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		siginfo.si_code = TRAP_TRACE;
+		siginfo.si_flags = 0;
+		siginfo.si_isr = 0;
+		siginfo.si_addr = 0;
+		siginfo.si_imm = 0;
+		force_sig_info(SIGTRAP, &siginfo, current);
+	}
+	return rv;
+}
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@ -0,0 +1,286 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp	Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
+#include <linux/elfcore.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+#include <linux/kdebug.h>
+
+#include <asm/mca.h>
+
+int kdump_status[NR_CPUS];
+static atomic_t kdump_cpu_frozen;
+atomic_t kdump_in_progress;
+static int kdump_freeze_monarch;
+static int kdump_on_init = 1;
+static int kdump_on_fatal_mca = 1;
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+		size_t data_len)
+{
+	struct elf_note *note = (struct elf_note *)buf;
+	note->n_namesz = strlen(name) + 1;
+	note->n_descsz = data_len;
+	note->n_type   = type;
+	buf += (sizeof(*note) + 3)/4;
+	memcpy(buf, name, note->n_namesz);
+	buf += (note->n_namesz + 3)/4;
+	memcpy(buf, data, data_len);
+	buf += (data_len + 3)/4;
+	return buf;
+}
+
+static void
+final_note(void *buf)
+{
+	memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
+void
+crash_save_this_cpu(void)
+{
+	void *buf;
+	unsigned long cfm, sof, sol;
+
+	int cpu = smp_processor_id();
+	struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+	elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+	memset(prstatus, 0, sizeof(*prstatus));
+	prstatus->pr_pid = current->pid;
+
+	ia64_dump_cpu_regs(dst);
+	cfm = dst[43];
+	sol = (cfm >> 7) & 0x7f;
+	sof = cfm & 0x7f;
+	dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+			sof - sol);
+
+	buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+	if (!buf)
+		return;
+	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus,
+			sizeof(*prstatus));
+	final_note(buf);
+}
+
+#ifdef CONFIG_SMP
+static int
+kdump_wait_cpu_freeze(void)
+{
+	int cpu_num = num_online_cpus() - 1;
+	int timeout = 1000;
+	while(timeout-- > 0) {
+		if (atomic_read(&kdump_cpu_frozen) == cpu_num)
+			return 0;
+		udelay(1000);
+	}
+	return 1;
+}
+#endif
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+	/* This function is only called after the system
+	 * has paniced or is otherwise in a critical state.
+	 * The minimum amount of code to allow a kexec'd kernel
+	 * to run successfully needs to happen here.
+	 *
+	 * In practice this means shooting down the other cpus in
+	 * an SMP system.
+	 */
+	kexec_disable_iosapic();
+#ifdef CONFIG_SMP
+	/*
+	 * If kdump_on_init is set and an INIT is asserted here, kdump will
+	 * be started again via INIT monarch.
+	 */
+	local_irq_disable();
+	ia64_set_psr_mc();	/* mask MCA/INIT */
+	if (atomic_inc_return(&kdump_in_progress) != 1)
+		unw_init_running(kdump_cpu_freeze, NULL);
+
+	/*
+	 * Now this cpu is ready for kdump.
+	 * Stop all others by IPI or INIT.  They could receive INIT from
+	 * outside and might be INIT monarch, but only thing they have to
+	 * do is falling into kdump_cpu_freeze().
+	 *
+	 * If an INIT is asserted here:
+	 * - All receivers might be slaves, since some of cpus could already
+	 *   be frozen and INIT might be masked on monarch.  In this case,
+	 *   all slaves will be frozen soon since kdump_in_progress will let
+	 *   them into DIE_INIT_SLAVE_LEAVE.
+	 * - One might be a monarch, but INIT rendezvous will fail since
+	 *   at least this cpu already have INIT masked so it never join
+	 *   to the rendezvous.  In this case, all slaves and monarch will
+	 *   be frozen soon with no wait since the INIT rendezvous is skipped
+	 *   by kdump_in_progress.
+	 */
+	kdump_smp_send_stop();
+	/* not all cpu response to IPI, send INIT to freeze them */
+	if (kdump_wait_cpu_freeze()) {
+		kdump_smp_send_init();
+		/* wait again, don't go ahead if possible */
+		kdump_wait_cpu_freeze();
+	}
+#endif
+}
+
+static void
+machine_kdump_on_init(void)
+{
+	crash_save_vmcoreinfo();
+	local_irq_disable();
+	kexec_disable_iosapic();
+	machine_kexec(ia64_kimage);
+}
+
+void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+	int cpuid;
+
+	local_irq_disable();
+	cpuid = smp_processor_id();
+	crash_save_this_cpu();
+	current->thread.ksp = (__u64)info->sw - 16;
+
+	ia64_set_psr_mc();	/* mask MCA/INIT and stop reentrance */
+
+	atomic_inc(&kdump_cpu_frozen);
+	kdump_status[cpuid] = 1;
+	mb();
+	for (;;)
+		cpu_relax();
+}
+
+static int
+kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct ia64_mca_notify_die *nd;
+	struct die_args *args = data;
+
+	if (atomic_read(&kdump_in_progress)) {
+		switch (val) {
+		case DIE_INIT_MONARCH_LEAVE:
+			if (!kdump_freeze_monarch)
+				break;
+			/* fall through */
+		case DIE_INIT_SLAVE_LEAVE:
+		case DIE_INIT_MONARCH_ENTER:
+		case DIE_MCA_RENDZVOUS_LEAVE:
+			unw_init_running(kdump_cpu_freeze, NULL);
+			break;
+		}
+	}
+
+	if (!kdump_on_init && !kdump_on_fatal_mca)
+		return NOTIFY_DONE;
+
+	if (!ia64_kimage) {
+		if (val == DIE_INIT_MONARCH_LEAVE)
+			ia64_mca_printk(KERN_NOTICE
+					"%s: kdump not configured\n",
+					__func__);
+		return NOTIFY_DONE;
+	}
+
+	if (val != DIE_INIT_MONARCH_LEAVE &&
+	    val != DIE_INIT_MONARCH_PROCESS &&
+	    val != DIE_MCA_MONARCH_LEAVE)
+		return NOTIFY_DONE;
+
+	nd = (struct ia64_mca_notify_die *)args->err;
+
+	switch (val) {
+	case DIE_INIT_MONARCH_PROCESS:
+		/* Reason code 1 means machine check rendezvous*/
+		if (kdump_on_init && (nd->sos->rv_rc != 1)) {
+			if (atomic_inc_return(&kdump_in_progress) != 1)
+				kdump_freeze_monarch = 1;
+		}
+		break;
+	case DIE_INIT_MONARCH_LEAVE:
+		/* Reason code 1 means machine check rendezvous*/
+		if (kdump_on_init && (nd->sos->rv_rc != 1))
+			machine_kdump_on_init();
+		break;
+	case DIE_MCA_MONARCH_LEAVE:
+		/* *(nd->data) indicate if MCA is recoverable */
+		if (kdump_on_fatal_mca && !(*(nd->data))) {
+			if (atomic_inc_return(&kdump_in_progress) == 1)
+				machine_kdump_on_init();
+			/* We got fatal MCA while kdump!? No way!! */
+		}
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table kdump_ctl_table[] = {
+	{
+		.procname = "kdump_on_init",
+		.data = &kdump_on_init,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{
+		.procname = "kdump_on_fatal_mca",
+		.data = &kdump_on_fatal_mca,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{ }
+};
+
+static struct ctl_table sys_table[] = {
+	{
+	  .procname = "kernel",
+	  .mode = 0555,
+	  .child = kdump_ctl_table,
+	},
+	{ }
+};
+#endif
+
+static int
+machine_crash_setup(void)
+{
+	/* be notified before default_monarch_init_process */
+	static struct notifier_block kdump_init_notifier_nb = {
+		.notifier_call = kdump_init_notifier,
+		.priority = 1,
+	};
+	int ret;
+	if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
+		return ret;
+#ifdef CONFIG_SYSCTL
+	register_sysctl_table(sys_table);
+#endif
+	return 0;
+}
+
+__initcall(machine_crash_setup);
+
--- a/arch/ia64/kernel/crash_dump.c
+++ b/arch/ia64/kernel/crash_dump.c
@ -0,0 +1,50 @@
+/*
+ *	kernel/crash_dump.c - Memory preserving reboot related code.
+ *
+ *	Created by: Simon Horman <horms@verge.net.au>
+ *	Original code moved from kernel/crash.c
+ *	Original code comment copied from the i386 version of this file
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+/**
+ * copy_oldmem_page - copy one page from "oldmem"
+ * @pfn: page frame number to be copied
+ * @buf: target memory address for the copy; this can be in kernel address
+ *	space or user address space (see @userbuf)
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page (based on pfn) to begin the copy
+ * @userbuf: if set, @buf is in user address space, use copy_to_user(),
+ *	otherwise @buf is in kernel address space, use memcpy().
+ *
+ * Copy a page from "oldmem". For this page, there is no pte mapped
+ * in the current kernel. We stitch up a pte, similar to kmap_atomic.
+ *
+ * Calling copy_to_user() in atomic context is not desirable. Hence first
+ * copying the data to a pre-allocated kernel page and then copying to user
+ * space in non-atomic context.
+ */
+ssize_t
+copy_oldmem_page(unsigned long pfn, char *buf,
+		size_t csize, unsigned long offset, int userbuf)
+{
+	void  *vaddr;
+
+	if (!csize)
+		return 0;
+	vaddr = __va(pfn<<PAGE_SHIFT);
+	if (userbuf) {
+		if (copy_to_user(buf, (vaddr + offset), csize)) {
+			return -EFAULT;
+		}
+	} else
+		memcpy(buf, (vaddr + offset), csize);
+	return csize;
+}
+
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@ -0,0 +1,124 @@
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/timex.h>
+#include <linux/clocksource.h>
+#include <asm/io.h>
+
+/* IBM Summit (EXA) Cyclone counter code*/
+#define CYCLONE_CBAR_ADDR 0xFEB00CD0
+#define CYCLONE_PMCC_OFFSET 0x51A0
+#define CYCLONE_MPMC_OFFSET 0x51D0
+#define CYCLONE_MPCS_OFFSET 0x51A8
+#define CYCLONE_TIMER_FREQ 100000000
+
+int use_cyclone;
+void __init cyclone_setup(void)
+{
+	use_cyclone = 1;
+}
+
+static void __iomem *cyclone_mc;
+
+static cycle_t read_cyclone(struct clocksource *cs)
+{
+	return (cycle_t)readq((void __iomem *)cyclone_mc);
+}
+
+static struct clocksource clocksource_cyclone = {
+        .name           = "cyclone",
+        .rating         = 300,
+        .read           = read_cyclone,
+        .mask           = (1LL << 40) - 1,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+int __init init_cyclone_clock(void)
+{
+	u64 __iomem *reg;
+	u64 base;	/* saved cyclone base address */
+	u64 offset;	/* offset from pageaddr to cyclone_timer register */
+	int i;
+	u32 __iomem *cyclone_timer;	/* Cyclone MPMC0 register */
+
+	if (!use_cyclone)
+		return 0;
+
+	printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
+
+	/* find base address */
+	offset = (CYCLONE_CBAR_ADDR);
+	reg = ioremap_nocache(offset, sizeof(u64));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
+				" register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	base = readq(reg);
+	iounmap(reg);
+	if(!base){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
+				" value.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+
+	/* setup PMCC */
+	offset = (base + CYCLONE_PMCC_OFFSET);
+	reg = ioremap_nocache(offset, sizeof(u64));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid PMCC"
+				" register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	writel(0x00000001,reg);
+	iounmap(reg);
+
+	/* setup MPCS */
+	offset = (base + CYCLONE_MPCS_OFFSET);
+	reg = ioremap_nocache(offset, sizeof(u64));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPCS"
+				" register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+	writel(0x00000001,reg);
+	iounmap(reg);
+
+	/* map in cyclone_timer */
+	offset = (base + CYCLONE_MPMC_OFFSET);
+	cyclone_timer = ioremap_nocache(offset, sizeof(u32));
+	if(!cyclone_timer){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPMC"
+				" register.\n");
+		use_cyclone = 0;
+		return -ENODEV;
+	}
+
+	/*quick test to make sure its ticking*/
+	for(i=0; i<3; i++){
+		u32 old = readl(cyclone_timer);
+		int stall = 100;
+		while(stall--) barrier();
+		if(readl(cyclone_timer) == old){
+			printk(KERN_ERR "Summit chipset: Counter not counting!"
+					" DISABLED\n");
+			iounmap(cyclone_timer);
+			cyclone_timer = NULL;
+			use_cyclone = 0;
+			return -ENODEV;
+		}
+	}
+	/* initialize last tick */
+	cyclone_mc = cyclone_timer;
+	clocksource_cyclone.archdata.fsys_mmio = cyclone_timer;
+	clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ);
+
+	return 0;
+}
+
+__initcall(init_cyclone_clock);
--- a/arch/ia64/kernel/dma-mapping.c
+++ b/arch/ia64/kernel/dma-mapping.c
@ -0,0 +1,24 @@
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+
+/* Set this to 1 if there is a HW IOMMU in the system */
+int iommu_detected __read_mostly;
+
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+static int __init dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+
+	return 0;
+}
+fs_initcall(dma_init);
+
+struct dma_map_ops *dma_get_ops(struct device *dev)
+{
+	return dma_ops;
+}
+EXPORT_SYMBOL(dma_get_ops);
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
--- a/arch/ia64/kernel/efi_stub.S
+++ b/arch/ia64/kernel/efi_stub.S
@ -0,0 +1,86 @@
+/*
+ * EFI call stub.
+ *
+ * Copyright (C) 1999-2001 Hewlett-Packard Co
+ *	David Mosberger <davidm@hpl.hp.com>
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off.  We need this because we can't call SetVirtualMap() until
+ * the kernel has booted far enough to allow allocation of struct vma_struct
+ * entries (which we would need to map stuff with memory attributes other
+ * than uncached or writeback...).  Since the GetTime() service gets called
+ * earlier than that, we need to be able to make physical mode EFI calls from
+ * the kernel.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e).  Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared).  Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR						\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+/*
+ * Inputs:
+ *	in0 = address of function descriptor of EFI routine to call
+ *	in1..in7 = arguments to routine
+ *
+ * Outputs:
+ *	r8 = EFI_STATUS returned by called function
+ */
+
+GLOBAL_ENTRY(efi_call_phys)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,7,7,0
+	ld8 r2=[in0],8			// load EFI function's entry point
+	mov loc0=rp
+	.body
+	;;
+	mov loc2=gp			// save global pointer
+	mov loc4=ar.rsc			// save RSE configuration
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	;;
+	ld8 gp=[in0]			// load EFI function's global pointer
+	movl r16=PSR_BITS_TO_CLEAR
+	mov loc3=psr			// save processor status word
+	movl r17=PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17
+	mov b6=r2
+	;;
+	andcm r16=loc3,r16	// get psr with IT, DT, and RT bits cleared
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret0:	mov out4=in5
+	mov out0=in1
+	mov out1=in2
+	mov out2=in3
+	mov out3=in4
+	mov out5=in6
+	mov out6=in7
+	mov loc5=r19
+	mov loc6=r20
+	br.call.sptk.many rp=b6		// call the EFI function
+.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:	mov ar.rsc=loc4			// restore RSE configuration
+	mov ar.pfs=loc1
+	mov rp=loc0
+	mov gp=loc2
+	br.ret.sptk.many rp
+END(efi_call_phys)
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@ -0,0 +1,76 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf64_Half elf_core_extra_phdrs(void)
+{
+	return GATE_EHDR->e_phnum;
+}
+
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	Elf64_Off ofs = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		struct elf_phdr phdr = gate_phdrs[i];
+
+		if (phdr.p_type == PT_LOAD) {
+			phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);
+			phdr.p_filesz = phdr.p_memsz;
+			if (ofs == 0) {
+				ofs = phdr.p_offset = offset;
+				offset += phdr.p_filesz;
+			} else {
+				phdr.p_offset = ofs;
+			}
+		} else {
+			phdr.p_offset += ofs;
+		}
+		phdr.p_paddr = 0; /* match other core phdrs */
+		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
+			return 0;
+	}
+	return 1;
+}
+
+int elf_core_write_extra_data(struct coredump_params *cprm)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			void *addr = (void *)gate_phdrs[i].p_vaddr;
+			size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
+
+			if (!dump_emit(cprm, addr, memsz))
+				return 0;
+			break;
+		}
+	}
+	return 1;
+}
+
+size_t elf_core_extra_data_size(void)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	size_t size = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
+			break;
+		}
+	}
+	return size;
+}
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
--- a/arch/ia64/kernel/entry.h
+++ b/arch/ia64/kernel/entry.h
@ -0,0 +1,82 @@
+
+/*
+ * Preserved registers that are shared between code in ivt.S and
+ * entry.S.  Be careful not to step on these!
+ */
+#define PRED_LEAVE_SYSCALL	1 /* TRUE iff leave from syscall */
+#define PRED_KERNEL_STACK	2 /* returning to kernel-stacks? */
+#define PRED_USER_STACK		3 /* returning to user-stacks? */
+#define PRED_SYSCALL		4 /* inside a system call? */
+#define PRED_NON_SYSCALL	5 /* complement of PRED_SYSCALL */
+
+#ifdef __ASSEMBLY__
+# define PASTE2(x,y)	x##y
+# define PASTE(x,y)	PASTE2(x,y)
+
+# define pLvSys		PASTE(p,PRED_LEAVE_SYSCALL)
+# define pKStk		PASTE(p,PRED_KERNEL_STACK)
+# define pUStk		PASTE(p,PRED_USER_STACK)
+# define pSys		PASTE(p,PRED_SYSCALL)
+# define pNonSys	PASTE(p,PRED_NON_SYSCALL)
+#endif
+
+#define PT(f)		(IA64_PT_REGS_##f##_OFFSET)
+#define SW(f)		(IA64_SWITCH_STACK_##f##_OFFSET)
+#define SOS(f)		(IA64_SAL_OS_STATE_##f##_OFFSET)
+
+#define PT_REGS_SAVES(off)			\
+	.unwabi 3, 'i';				\
+	.fframe IA64_PT_REGS_SIZE+16+(off);	\
+	.spillsp rp, PT(CR_IIP)+16+(off);	\
+	.spillsp ar.pfs, PT(CR_IFS)+16+(off);	\
+	.spillsp ar.unat, PT(AR_UNAT)+16+(off);	\
+	.spillsp ar.fpsr, PT(AR_FPSR)+16+(off);	\
+	.spillsp pr, PT(PR)+16+(off);
+
+#define PT_REGS_UNWIND_INFO(off)		\
+	.prologue;				\
+	PT_REGS_SAVES(off);			\
+	.body
+
+#define SWITCH_STACK_SAVES(off)							\
+	.savesp ar.unat,SW(CALLER_UNAT)+16+(off);				\
+	.savesp ar.fpsr,SW(AR_FPSR)+16+(off);					\
+	.spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off);		\
+	.spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off);		\
+	.spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off);		\
+	.spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off);		\
+	.spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off);		\
+	.spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off);		\
+	.spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off);		\
+	.spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off);		\
+	.spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off);		\
+	.spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off);		\
+	.spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off);		\
+	.spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off);		\
+	.spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off);		\
+	.spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off);		\
+	.spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off);		\
+	.spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off);	\
+	.spillsp @priunat,SW(AR_UNAT)+16+(off);					\
+	.spillsp ar.rnat,SW(AR_RNAT)+16+(off);					\
+	.spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off);				\
+	.spillsp pr,SW(PR)+16+(off)
+
+#define DO_SAVE_SWITCH_STACK			\
+	movl r28=1f;				\
+	;;					\
+	.fframe IA64_SWITCH_STACK_SIZE;		\
+	adds sp=-IA64_SWITCH_STACK_SIZE,sp;	\
+	mov.ret.sptk b7=r28,1f;			\
+	SWITCH_STACK_SAVES(0);			\
+	br.cond.sptk.many save_switch_stack;	\
+1:
+
+#define DO_LOAD_SWITCH_STACK			\
+	movl r28=1f;				\
+	;;					\
+	invala;					\
+	mov.ret.sptk b7=r28,1f;			\
+	br.cond.sptk.many load_switch_stack;	\
+1:	.restore sp;				\
+	adds sp=IA64_SWITCH_STACK_SIZE,sp
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@ -0,0 +1,314 @@
+/*
+ * err_inject.c -
+ *	1.) Inject errors to a processor.
+ *	2.) Query error injection capabilities.
+ * This driver along with user space code can be acting as an error
+ * injection tool.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Written by: Fenghua Yu <fenghua.yu@intel.com>, Intel Corporation
+ * Copyright (C) 2006, Intel Corp.  All rights reserved.
+ *
+ */
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#define ERR_INJ_DEBUG
+
+#define ERR_DATA_BUFFER_SIZE 3 		// Three 8-byte;
+
+#define define_one_ro(name) 						\
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
+
+#define define_one_rw(name) 						\
+static DEVICE_ATTR(name, 0644, show_##name, store_##name)
+
+static u64 call_start[NR_CPUS];
+static u64 phys_addr[NR_CPUS];
+static u64 err_type_info[NR_CPUS];
+static u64 err_struct_info[NR_CPUS];
+static struct {
+	u64 data1;
+	u64 data2;
+	u64 data3;
+} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS];
+static s64 status[NR_CPUS];
+static u64 capabilities[NR_CPUS];
+static u64 resources[NR_CPUS];
+
+#define show(name) 							\
+static ssize_t 								\
+show_##name(struct device *dev, struct device_attribute *attr,	\
+		char *buf)						\
+{									\
+	u32 cpu=dev->id;						\
+	return sprintf(buf, "%lx\n", name[cpu]);			\
+}
+
+#define store(name)							\
+static ssize_t 								\
+store_##name(struct device *dev, struct device_attribute *attr,	\
+					const char *buf, size_t size)	\
+{									\
+	unsigned int cpu=dev->id;					\
+	name[cpu] = simple_strtoull(buf, NULL, 16);			\
+	return size;							\
+}
+
+show(call_start)
+
+/* It's user's responsibility to call the PAL procedure on a specific
+ * processor. The cpu number in driver is only used for storing data.
+ */
+static ssize_t
+store_call_start(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	unsigned long call_start = simple_strtoull(buf, NULL, 16);
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu);
+	printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]);
+	printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]);
+	printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n",
+			  err_data_buffer[cpu].data1,
+			  err_data_buffer[cpu].data2,
+			  err_data_buffer[cpu].data3);
+#endif
+	switch (call_start) {
+	    case 0: /* Do nothing. */
+		break;
+	    case 1: /* Call pal_mc_error_inject in physical mode. */
+		status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu],
+					err_struct_info[cpu],
+					ia64_tpa(&err_data_buffer[cpu]),
+					&capabilities[cpu],
+			 		&resources[cpu]);
+		break;
+	    case 2: /* Call pal_mc_error_inject in virtual mode. */
+		status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu],
+					err_struct_info[cpu],
+					ia64_tpa(&err_data_buffer[cpu]),
+					&capabilities[cpu],
+			 		&resources[cpu]);
+		break;
+	    default:
+		status[cpu] = -EINVAL;
+		break;
+	}
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]);
+	printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]);
+	printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);
+#endif
+	return size;
+}
+
+show(err_type_info)
+store(err_type_info)
+
+static ssize_t
+show_virtual_to_phys(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	unsigned int cpu=dev->id;
+	return sprintf(buf, "%lx\n", phys_addr[cpu]);
+}
+
+static ssize_t
+store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	u64 virt_addr=simple_strtoull(buf, NULL, 16);
+	int ret;
+
+        ret = get_user_pages(current, current->mm, virt_addr,
+                        1, VM_READ, 0, NULL, NULL);
+	if (ret<=0) {
+#ifdef ERR_INJ_DEBUG
+		printk("Virtual address %lx is not existing.\n",virt_addr);
+#endif
+		return -EINVAL;
+	}
+
+	phys_addr[cpu] = ia64_tpa(virt_addr);
+	return size;
+}
+
+show(err_struct_info)
+store(err_struct_info)
+
+static ssize_t
+show_err_data_buffer(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	unsigned int cpu=dev->id;
+
+	return sprintf(buf, "%lx, %lx, %lx\n",
+			err_data_buffer[cpu].data1,
+			err_data_buffer[cpu].data2,
+			err_data_buffer[cpu].data3);
+}
+
+static ssize_t
+store_err_data_buffer(struct device *dev,
+			struct device_attribute *attr,
+			const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	int ret;
+
+#ifdef ERR_INJ_DEBUG
+	printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n",
+		 err_data_buffer[cpu].data1,
+		 err_data_buffer[cpu].data2,
+		 err_data_buffer[cpu].data3,
+		 cpu);
+#endif
+	ret=sscanf(buf, "%lx, %lx, %lx",
+			&err_data_buffer[cpu].data1,
+			&err_data_buffer[cpu].data2,
+			&err_data_buffer[cpu].data3);
+	if (ret!=ERR_DATA_BUFFER_SIZE)
+		return -EINVAL;
+
+	return size;
+}
+
+show(status)
+show(capabilities)
+show(resources)
+
+define_one_rw(call_start);
+define_one_rw(err_type_info);
+define_one_rw(err_struct_info);
+define_one_rw(err_data_buffer);
+define_one_rw(virtual_to_phys);
+define_one_ro(status);
+define_one_ro(capabilities);
+define_one_ro(resources);
+
+static struct attribute *default_attrs[] = {
+	&dev_attr_call_start.attr,
+	&dev_attr_virtual_to_phys.attr,
+	&dev_attr_err_type_info.attr,
+	&dev_attr_err_struct_info.attr,
+	&dev_attr_err_data_buffer.attr,
+	&dev_attr_status.attr,
+	&dev_attr_capabilities.attr,
+	&dev_attr_resources.attr,
+	NULL
+};
+
+static struct attribute_group err_inject_attr_group = {
+	.attrs = default_attrs,
+	.name = "err_inject"
+};
+/* Add/Remove err_inject interface for CPU device */
+static int err_inject_add_dev(struct device *sys_dev)
+{
+	return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group);
+}
+
+static int err_inject_remove_dev(struct device *sys_dev)
+{
+	sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
+	return 0;
+}
+static int err_inject_cpu_callback(struct notifier_block *nfb,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct device *sys_dev;
+
+	sys_dev = get_cpu_device(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		err_inject_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		err_inject_remove_dev(sys_dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block err_inject_cpu_notifier =
+{
+	.notifier_call = err_inject_cpu_callback,
+};
+
+static int __init
+err_inject_init(void)
+{
+	int i;
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_INFO "Enter error injection driver.\n");
+#endif
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(i) {
+		err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE,
+				(void *)(long)i);
+	}
+
+	__register_hotcpu_notifier(&err_inject_cpu_notifier);
+
+	cpu_notifier_register_done();
+
+	return 0;
+}
+
+static void __exit
+err_inject_exit(void)
+{
+	int i;
+	struct device *sys_dev;
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_INFO "Exit error injection driver.\n");
+#endif
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(i) {
+		sys_dev = get_cpu_device(i);
+		sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
+	}
+
+	__unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+
+	cpu_notifier_register_done();
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_AUTHOR("Fenghua Yu <fenghua.yu@intel.com>");
+MODULE_DESCRIPTION("MC error injection kernel sysfs interface");
+MODULE_LICENSE("GPL");
--- a/arch/ia64/kernel/esi.c
+++ b/arch/ia64/kernel/esi.c
@ -0,0 +1,205 @@
+/*
+ * Extensible SAL Interface (ESI) support routines.
+ *
+ * Copyright (C) 2006 Hewlett-Packard Co
+ * 	Alex Williamson <alex.williamson@hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/esi.h>
+#include <asm/sal.h>
+
+MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>");
+MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support");
+MODULE_LICENSE("GPL");
+
+#define MODULE_NAME	"esi"
+
+#define ESI_TABLE_GUID					\
+    EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3,		\
+	     0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4)
+
+enum esi_systab_entry_type {
+	ESI_DESC_ENTRY_POINT = 0
+};
+
+/*
+ * Entry type:	Size:
+ *	0	48
+ */
+#define ESI_DESC_SIZE(type)	"\060"[(unsigned) (type)]
+
+typedef struct ia64_esi_desc_entry_point {
+	u8 type;
+	u8 reserved1[15];
+	u64 esi_proc;
+	u64 gp;
+	efi_guid_t guid;
+} ia64_esi_desc_entry_point_t;
+
+struct pdesc {
+	void *addr;
+	void *gp;
+};
+
+static struct ia64_sal_systab *esi_systab;
+
+static int __init esi_init (void)
+{
+	efi_config_table_t *config_tables;
+	struct ia64_sal_systab *systab;
+	unsigned long esi = 0;
+	char *p;
+	int i;
+
+	config_tables = __va(efi.systab->tables);
+
+	for (i = 0; i < (int) efi.systab->nr_tables; ++i) {
+		if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) {
+			esi = config_tables[i].table;
+			break;
+		}
+	}
+
+	if (!esi)
+		return -ENODEV;
+
+	systab = __va(esi);
+
+	if (strncmp(systab->signature, "ESIT", 4) != 0) {
+		printk(KERN_ERR "bad signature in ESI system table!");
+		return -ENODEV;
+	}
+
+	p = (char *) (systab + 1);
+	for (i = 0; i < systab->entry_count; i++) {
+		/*
+		 * The first byte of each entry type contains the type
+		 * descriptor.
+		 */
+		switch (*p) {
+		      case ESI_DESC_ENTRY_POINT:
+			break;
+		      default:
+			printk(KERN_WARNING "Unknown table type %d found in "
+			       "ESI table, ignoring rest of table\n", *p);
+			return -ENODEV;
+		}
+
+		p += ESI_DESC_SIZE(*p);
+	}
+
+	esi_systab = systab;
+	return 0;
+}
+
+
+int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+		   enum esi_proc_type proc_type, u64 func,
+		   u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+		   u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags = 0;
+	int i;
+	char *p;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = __va(esi->esi_proc);
+				pdesc.gp = __va(esi->gp);
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				ia64_save_scratch_fpregs(fr);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_lock_irqsave(&sal_lock, flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_save(flags);
+				else
+					preempt_disable();
+				*isrvp = (*esi_proc)(func, arg1, arg2, arg3,
+						     arg4, arg5, arg6, arg7);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_unlock_irqrestore(&sal_lock,
+							       flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_restore(flags);
+				else
+					preempt_enable();
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call);
+
+int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+			u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4,
+			u64 arg5, u64 arg6, u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags;
+	u64 esi_params[8];
+	char *p;
+	int i;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = (void *)esi->esi_proc;
+				pdesc.gp = (void *)esi->gp;
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				esi_params[0] = func;
+				esi_params[1] = arg1;
+				esi_params[2] = arg2;
+				esi_params[3] = arg3;
+				esi_params[4] = arg4;
+				esi_params[5] = arg5;
+				esi_params[6] = arg6;
+				esi_params[7] = arg7;
+				ia64_save_scratch_fpregs(fr);
+				spin_lock_irqsave(&sal_lock, flags);
+				*isrvp = esi_call_phys(esi_proc, esi_params);
+				spin_unlock_irqrestore(&sal_lock, flags);
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call_phys);
+
+static void __exit esi_exit (void)
+{
+}
+
+module_init(esi_init);
+module_exit(esi_exit);	/* makes module removable... */
--- a/arch/ia64/kernel/esi_stub.S
+++ b/arch/ia64/kernel/esi_stub.S
@ -0,0 +1,96 @@
+/*
+ * ESI call stub.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Alex Williamson <alex.williamson@hp.com>
+ *
+ * Based on EFI call stub by David Mosberger.  The stub is virtually
+ * identical to the one for EFI phys-mode calls, except that ESI
+ * calls may have up to 8 arguments, so they get passed to this routine
+ * through memory.
+ *
+ * This stub allows us to make ESI calls in physical mode with interrupts
+ * turned off.  ESI calls may not support calling from virtual mode.
+ *
+ * Google for "Extensible SAL specification" for a document describing the
+ * ESI standard.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e).  Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared).  Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR						\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+/*
+ * Inputs:
+ *	in0 = address of function descriptor of ESI routine to call
+ *	in1 = address of array of ESI parameters
+ *
+ * Outputs:
+ *	r8 = result returned by called function
+ */
+GLOBAL_ENTRY(esi_call_phys)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+	alloc loc1=ar.pfs,2,7,8,0
+	ld8 r2=[in0],8			// load ESI function's entry point
+	mov loc0=rp
+	.body
+	;;
+	ld8 out0=[in1],8		// ESI params loaded from array
+	;;				// passing all as inputs doesn't work
+	ld8 out1=[in1],8
+	;;
+	ld8 out2=[in1],8
+	;;
+	ld8 out3=[in1],8
+	;;
+	ld8 out4=[in1],8
+	;;
+	ld8 out5=[in1],8
+	;;
+	ld8 out6=[in1],8
+	;;
+	ld8 out7=[in1]
+	mov loc2=gp			// save global pointer
+	mov loc4=ar.rsc			// save RSE configuration
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	;;
+	ld8 gp=[in0]			// load ESI function's global pointer
+	movl r16=PSR_BITS_TO_CLEAR
+	mov loc3=psr			// save processor status word
+	movl r17=PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17
+	mov b6=r2
+	;;
+	andcm r16=loc3,r16	// get psr with IT, DT, and RT bits cleared
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret0:	mov loc5=r19			// old ar.bsp
+	mov loc6=r20			// old sp
+	br.call.sptk.many rp=b6		// call the ESI function
+.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// save virtual mode psr
+	mov r19=loc5			// save virtual mode bspstore
+	mov r20=loc6			// save virtual mode sp
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:	mov ar.rsc=loc4			// restore RSE configuration
+	mov ar.pfs=loc1
+	mov rp=loc0
+	mov gp=loc2
+	br.ret.sptk.many rp
+END(esi_call_phys)
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@ -0,0 +1,836 @@
+/*
+ * This file contains the light-weight system call handlers (fsyscall-handlers).
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ * 	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 25-Sep-03 davidm	Implement fsys_rt_sigprocmask().
+ * 18-Feb-03 louisk	Implement fsys_gettimeofday().
+ * 28-Feb-03 davidm	Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
+ *			probably broke it along the way... ;-)
+ * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
+ *                      it capable of using memory based clocks without falling back to C code.
+ * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
+ *
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+#include <asm/percpu.h>
+#include <asm/thread_info.h>
+#include <asm/sal.h>
+#include <asm/signal.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+#include "paravirt_inst.h"
+
+/*
+ * See Documentation/ia64/fsys.txt for details on fsyscalls.
+ *
+ * On entry to an fsyscall handler:
+ *   r10	= 0 (i.e., defaults to "successful syscall return")
+ *   r11	= saved ar.pfs (a user-level value)
+ *   r15	= system call number
+ *   r16	= "current" task pointer (in normal kernel-mode, this is in r13)
+ *   r32-r39	= system call arguments
+ *   b6		= return address (a user-level value)
+ *   ar.pfs	= previous frame-state (a user-level value)
+ *   PSR.be	= cleared to zero (i.e., little-endian byte order is in effect)
+ *   all other registers may contain values passed in from user-mode
+ *
+ * On return from an fsyscall handler:
+ *   r11	= saved ar.pfs (as passed into the fsyscall handler)
+ *   r15	= system call number (as passed into the fsyscall handler)
+ *   r32-r39	= system call arguments (as passed into the fsyscall handler)
+ *   b6		= return address (as passed into the fsyscall handler)
+ *   ar.pfs	= previous frame-state (as passed into the fsyscall handler)
+ */
+
+ENTRY(fsys_ni_syscall)
+	.prologue
+	.altrp b6
+	.body
+	mov r8=ENOSYS
+	mov r10=-1
+	FSYS_RETURN
+END(fsys_ni_syscall)
+
+ENTRY(fsys_getpid)
+	.prologue
+	.altrp b6
+	.body
+	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	;;
+	ld8 r17=[r17]				// r17 = current->group_leader
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+	ld4 r9=[r9]
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
+	;;
+	add r8=IA64_PID_LEVEL_OFFSET,r17
+	;;
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
+	mov r17=0
+	;;
+	cmp.ne p8,p0=0,r9
+(p8)	br.spnt.many fsys_fallback_syscall
+	FSYS_RETURN
+END(fsys_getpid)
+
+ENTRY(fsys_set_tid_address)
+	.prologue
+	.altrp b6
+	.body
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
+	;;
+	ld4 r9=[r9]
+	tnat.z p6,p7=r32		// check argument register for being NaT
+	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+	add r8=IA64_PID_LEVEL_OFFSET,r17
+	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
+	;;
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
+	cmp.ne p8,p0=0,r9
+	mov r17=-1
+	;;
+(p6)	st8 [r18]=r32
+(p7)	st8 [r18]=r17
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
+	FSYS_RETURN
+END(fsys_set_tid_address)
+
+#if IA64_GTOD_SEQ_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
+#endif
+#if IA64_ITC_JITTER_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
+#endif
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_DIVIDE_BY_1000 0x4000
+#define CLOCK_ADD_MONOTONIC 0x8000
+
+ENTRY(fsys_gettimeofday)
+	.prologue
+	.altrp b6
+	.body
+	mov r31 = r32
+	tnat.nz p6,p0 = r33		// guard against NaT argument
+(p6)    br.cond.spnt.few .fail_einval
+	mov r30 = CLOCK_DIVIDE_BY_1000
+	;;
+.gettime:
+	// Register map
+	// Incoming r31 = pointer to address where to place result
+	//          r30 = flags determining how time is processed
+	// r2,r3 = temp r4-r7 preserved
+	// r8 = result nanoseconds
+	// r9 = result seconds
+	// r10 = temporary storage for clock difference
+	// r11 = preserved: saved ar.pfs
+	// r12 = preserved: memory stack
+	// r13 = preserved: thread pointer
+	// r14 = address of mask / mask value
+	// r15 = preserved: system call number
+	// r16 = preserved: current task pointer
+	// r17 = (not used)
+	// r18 = (not used)
+	// r19 = address of itc_lastcycle
+	// r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
+	// r21 = address of mmio_ptr
+	// r22 = address of wall_time or monotonic_time
+	// r23 = address of shift / value
+	// r24 = address mult factor / cycle_last value
+	// r25 = itc_lastcycle value
+	// r26 = address clocksource cycle_last
+	// r27 = (not used)
+	// r28 = sequence number at the beginning of critcal section
+	// r29 = address of itc_jitter
+	// r30 = time processing flags / memory address
+	// r31 = pointer to result
+	// Predicates
+	// p6,p7 short term use
+	// p8 = timesource ar.itc
+	// p9 = timesource mmio64
+	// p10 = timesource mmio32 - not used
+	// p11 = timesource not to be handled by asm code
+	// p12 = memory time source ( = p9 | p10) - not used
+	// p13 = do cmpxchg with itc_lastcycle
+	// p14 = Divide by 1000
+	// p15 = Add monotonic
+	//
+	// Note that instructions are optimized for McKinley. McKinley can
+	// process two bundles simultaneously and therefore we continuously
+	// try to feed the CPU two bundles and then a stop.
+
+	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r31		// guard against Nat argument
+(p6)	br.cond.spnt.few .fail_einval
+	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
+	;;
+	ld4 r2 = [r2]			// process work pending flags
+	movl r29 = itc_jitter_data	// itc_jitter
+	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
+	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
+	mov pr = r30,0xc000	// Set predicates according to function
+	;;
+	and r2 = TIF_ALLWORK_MASK,r2
+	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
+	;;
+	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20	// clksrc_cycle_last
+	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
+(p6)	br.cond.spnt.many fsys_fallback_syscall
+	;;
+	// Begin critical section
+.time_redo:
+	ld4.acq r28 = [r20]	// gtod_lock.sequence, Must take first
+	;;
+	and r28 = ~1,r28	// And make sequence even to force retry if odd
+	;;
+	ld8 r30 = [r21]		// clocksource->mmio_ptr
+	add r24 = IA64_CLKSRC_MULT_OFFSET,r20
+	ld4 r2 = [r29]		// itc_jitter value
+	add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
+	add r14 = IA64_CLKSRC_MASK_OFFSET,r20
+	;;
+	ld4 r3 = [r24]		// clocksource mult value
+	ld8 r14 = [r14]         // clocksource mask value
+	cmp.eq p8,p9 = 0,r30	// use cpu timer if no mmio_ptr
+	;;
+	setf.sig f7 = r3	// Setup for mult scaling of counter
+(p8)	cmp.ne p13,p0 = r2,r0	// need itc_jitter compensation, set p13
+	ld4 r23 = [r23]		// clocksource shift value
+	ld8 r24 = [r26]		// get clksrc_cycle_last value
+(p9)	cmp.eq p13,p0 = 0,r30	// if mmio_ptr, clear p13 jitter control
+	;;
+	.pred.rel.mutex p8,p9
+	MOV_FROM_ITC(p8, p6, r2, r10)	// CPU_TIMER. 36 clocks latency!!!
+(p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
+(p13)	ld8 r25 = [r19]		// get itc_lastcycle value
+	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
+	;;
+	ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_nsec
+(p13)	sub r3 = r25,r2		// Diff needed before comparison (thanks davidm)
+	;;
+(p13)	cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
+	sub r10 = r2,r24	// current_cycle - last_cycle
+	;;
+(p6)	sub r10 = r25,r24	// time we got was less than last_cycle
+(p7)	mov ar.ccv = r25	// more than last_cycle. Prep for cmpxchg
+	;;
+(p7)	cmpxchg8.rel r3 = [r19],r2,ar.ccv
+	;;
+(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful
+	;;
+(p7)	sub r10 = r3,r24	// then use new last_cycle instead
+	;;
+	and r10 = r10,r14	// Apply mask
+	;;
+	setf.sig f8 = r10
+	nop.i 123
+	;;
+	// fault check takes 5 cycles and we have spare time
+EX(.fail_efault, probe.w.fault r31, 3)
+	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
+	;;
+	getf.sig r2 = f8
+	mf
+	;;
+	ld4 r10 = [r20]		// gtod_lock.sequence
+	shr.u r2 = r2,r23	// shift by factor
+	;;
+	add r8 = r8,r2		// Add xtime.nsecs
+	cmp4.ne p7,p0 = r28,r10
+(p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
+	// End critical section.
+	// Now r8=tv->tv_nsec and r9=tv->tv_sec
+	mov r10 = r0
+	movl r2 = 1000000000
+	add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
+(p14)	movl r3 = 2361183241434822607	// Prep for / 1000 hack
+	;;
+.time_normalize:
+	mov r21 = r8
+	cmp.ge p6,p0 = r8,r2
+(p14)	shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
+	;;
+(p14)	setf.sig f8 = r20
+(p6)	sub r8 = r8,r2
+(p6)	add r9 = 1,r9		// two nops before the branch.
+(p14)	setf.sig f7 = r3	// Chances for repeats are 1 in 10000 for gettod
+(p6)	br.cond.dpnt.few .time_normalize
+	;;
+	// Divided by 8 though shift. Now divide by 125
+	// The compiler was able to do that with a multiply
+	// and a shift and we do the same
+EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
+(p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
+	;;
+(p14)	getf.sig r2 = f8
+	;;
+	mov r8 = r0
+(p14)	shr.u r21 = r2, 4
+	;;
+EX(.fail_efault, st8 [r31] = r9)
+EX(.fail_efault, st8 [r23] = r21)
+	FSYS_RETURN
+.fail_einval:
+	mov r8 = EINVAL
+	mov r10 = -1
+	FSYS_RETURN
+.fail_efault:
+	mov r8 = EFAULT
+	mov r10 = -1
+	FSYS_RETURN
+END(fsys_gettimeofday)
+
+ENTRY(fsys_clock_gettime)
+	.prologue
+	.altrp b6
+	.body
+	cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
+	// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
+(p6)	br.spnt.few fsys_fallback_syscall
+	mov r31 = r33
+	shl r30 = r32,15
+	br.many .gettime
+END(fsys_clock_gettime)
+
+/*
+ * fsys_getcpu doesn't use the third parameter in this implementation. It reads
+ * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
+ */
+ENTRY(fsys_getcpu)
+	.prologue
+	.altrp b6
+	.body
+	;;
+	add r2=TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r32			// guard against NaT argument
+	add r3=TI_CPU+IA64_TASK_SIZE,r16
+	;;
+	ld4 r3=[r3]				// M r3 = thread_info->cpu
+	ld4 r2=[r2]				// M r2 = thread_info->flags
+(p6)    br.cond.spnt.few .fail_einval		// B
+	;;
+	tnat.nz p7,p0 = r33			// I guard against NaT argument
+(p7)    br.cond.spnt.few .fail_einval		// B
+	;;
+	cmp.ne p6,p0=r32,r0
+	cmp.ne p7,p0=r33,r0
+	;;
+#ifdef CONFIG_NUMA
+	movl r17=cpu_to_node_map
+	;;
+EX(.fail_efault, (p6) probe.w.fault r32, 3)		// M This takes 5 cycles
+EX(.fail_efault, (p7) probe.w.fault r33, 3)		// M This takes 5 cycles
+	shladd r18=r3,1,r17
+	;;
+	ld2 r20=[r18]				// r20 = cpu_to_node_map[cpu]
+	and r2 = TIF_ALLWORK_MASK,r2
+	;;
+	cmp.ne p8,p0=0,r2
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	;;
+EX(.fail_efault, (p6) st4 [r32] = r3)
+EX(.fail_efault, (p7) st2 [r33] = r20)
+	mov r8=0
+	;;
+#else
+EX(.fail_efault, (p6) probe.w.fault r32, 3)		// M This takes 5 cycles
+EX(.fail_efault, (p7) probe.w.fault r33, 3)		// M This takes 5 cycles
+	and r2 = TIF_ALLWORK_MASK,r2
+	;;
+	cmp.ne p8,p0=0,r2
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+EX(.fail_efault, (p6) st4 [r32] = r3)
+EX(.fail_efault, (p7) st2 [r33] = r0)
+	mov r8=0
+	;;
+#endif
+	FSYS_RETURN
+END(fsys_getcpu)
+
+ENTRY(fsys_fallback_syscall)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * We only get here from light-weight syscall handlers.  Thus, we already
+	 * know that r15 contains a valid syscall number.  No need to re-check.
+	 */
+	adds r17=-1024,r15
+	movl r14=sys_call_table
+	;;
+	RSM_PSR_I(p0, r26, r27)
+	shladd r18=r17,3,r14
+	;;
+	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
+	MOV_FROM_PSR(p0, r29, r26)		// read psr (12 cyc load latency)
+	mov r27=ar.rsc
+	mov r21=ar.fpsr
+	mov r26=ar.pfs
+END(fsys_fallback_syscall)
+	/* FALL THROUGH */
+GLOBAL_ENTRY(paravirt_fsys_bubble_down)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * We get here for syscalls that don't have a lightweight
+	 * handler.  For those, we need to bubble down into the kernel
+	 * and that requires setting up a minimal pt_regs structure,
+	 * and initializing the CPU state more or less as if an
+	 * interruption had occurred.  To make syscall-restarts work,
+	 * we setup pt_regs such that cr_iip points to the second
+	 * instruction in syscall_via_break.  Decrementing the IP
+	 * hence will restart the syscall via break and not
+	 * decrementing IP will return us to the caller, as usual.
+	 * Note that we preserve the value of psr.pp rather than
+	 * initializing it from dcr.pp.  This makes it possible to
+	 * distinguish fsyscall execution from other privileged
+	 * execution.
+	 *
+	 * On entry:
+	 *	- normal fsyscall handler register usage, except
+	 *	  that we also have:
+	 *	- r18: address of syscall entry point
+	 *	- r21: ar.fpsr
+	 *	- r26: ar.pfs
+	 *	- r27: ar.rsc
+	 *	- r29: psr
+	 *
+	 * We used to clear some PSR bits here but that requires slow
+	 * serialization.  Fortuntely, that isn't really necessary.
+	 * The rationale is as follows: we used to clear bits
+	 * ~PSR_PRESERVED_BITS in PSR.L.  Since
+	 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
+	 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
+	 * However,
+	 *
+	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
+	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
+	 * PSR.I  : already turned off by the time paravirt_fsys_bubble_down gets
+	 *	    invoked
+	 * PSR.DFL: always 0 (kernel never turns it on)
+	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
+	 *	    initiative
+	 * PSR.DI : always 0 (kernel never turns it on)
+	 * PSR.SI : always 0 (kernel never turns it on)
+	 * PSR.DB : don't care --- kernel never enables kernel-level
+	 *	    breakpoints
+	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
+	 *          __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
+	 *          will trigger a taken branch; the taken-trap-handler then
+	 *          converts the syscall into a break-based system-call.
+	 */
+	/*
+	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
+	 * The rest we have to synthesize.
+	 */
+#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT)	\
+					 | (0x1 << IA64_PSR_RI_BIT)	\
+					 | IA64_PSR_BN | IA64_PSR_I)
+
+	invala					// M0|1
+	movl r14=ia64_ret_from_syscall		// X
+
+	nop.m 0
+	movl r28=__kernel_syscall_via_break	// X	create cr.iip
+	;;
+
+	mov r2=r16				// A    get task addr to addl-addressable register
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
+	mov r31=pr				// I0   save pr (2 cyc)
+	;;
+	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+	addl r22=IA64_RBS_OFFSET,r2		// A    compute base of RBS
+	add r3=TI_FLAGS+IA64_TASK_SIZE,r2	// A
+	;;
+	ld4 r3=[r3]				// M0|1 r3 = current_thread_info()->flags
+	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch register backing-store
+	nop.i 0
+	;;
+	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
+#else
+	nop.m 0
+#endif
+	nop.i 0
+	;;
+	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
+	mov.m r24=ar.rnat			// M2 (5 cyc) read ar.rnat (dual-issues!)
+	nop.i 0
+	;;
+	mov ar.bspstore=r22			// M2 (6 cyc) switch to kernel RBS
+	movl r8=PSR_ONE_BITS			// X
+	;;
+	mov r25=ar.unat				// M2 (5 cyc) save ar.unat
+	mov r19=b6				// I0   save b6 (2 cyc)
+	mov r20=r1				// A    save caller's gp in r20
+	;;
+	or r29=r8,r29				// A    construct cr.ipsr value to save
+	mov b6=r18				// I0   copy syscall entry-point to b6 (7 cyc)
+	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
+
+	mov r18=ar.bsp				// M2   save (kernel) ar.bsp (12 cyc)
+	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
+	br.call.sptk.many b7=ia64_syscall_setup	// B
+	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	// mov.m r30=ar.itc is called in advance
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r30,r19				// elapsed time in user mode
+	;;
+	add r20=r20,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r20				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+#endif
+	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
+	mov rp=r14				// I0   set the real return addr
+	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
+	;;
+	SSM_PSR_I(p0, p6, r22)			// M2   we're on kernel stacks now, reenable irqs
+	cmp.eq p8,p0=r3,r0			// A
+(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
+
+	nop.m 0
+(p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
+	br.cond.spnt ia64_trace_syscall		// B
+END(paravirt_fsys_bubble_down)
+
+	.rodata
+	.align 8
+	.globl paravirt_fsyscall_table
+
+	data8 paravirt_fsys_bubble_down
+paravirt_fsyscall_table:
+	data8 fsys_ni_syscall
+	data8 0				// exit			// 1025
+	data8 0				// read
+	data8 0				// write
+	data8 0				// open
+	data8 0				// close
+	data8 0				// creat		// 1030
+	data8 0				// link
+	data8 0				// unlink
+	data8 0				// execve
+	data8 0				// chdir
+	data8 0				// fchdir		// 1035
+	data8 0				// utimes
+	data8 0				// mknod
+	data8 0				// chmod
+	data8 0				// chown
+	data8 0				// lseek		// 1040
+	data8 fsys_getpid		// getpid
+	data8 0				// getppid
+	data8 0				// mount
+	data8 0				// umount
+	data8 0				// setuid		// 1045
+	data8 0				// getuid
+	data8 0				// geteuid
+	data8 0				// ptrace
+	data8 0				// access
+	data8 0				// sync			// 1050
+	data8 0				// fsync
+	data8 0				// fdatasync
+	data8 0				// kill
+	data8 0				// rename
+	data8 0				// mkdir		// 1055
+	data8 0				// rmdir
+	data8 0				// dup
+	data8 0				// pipe
+	data8 0				// times
+	data8 0				// brk			// 1060
+	data8 0				// setgid
+	data8 0				// getgid
+	data8 0				// getegid
+	data8 0				// acct
+	data8 0				// ioctl		// 1065
+	data8 0				// fcntl
+	data8 0				// umask
+	data8 0				// chroot
+	data8 0				// ustat
+	data8 0				// dup2			// 1070
+	data8 0				// setreuid
+	data8 0				// setregid
+	data8 0				// getresuid
+	data8 0				// setresuid
+	data8 0				// getresgid		// 1075
+	data8 0				// setresgid
+	data8 0				// getgroups
+	data8 0				// setgroups
+	data8 0				// getpgid
+	data8 0				// setpgid		// 1080
+	data8 0				// setsid
+	data8 0				// getsid
+	data8 0				// sethostname
+	data8 0				// setrlimit
+	data8 0				// getrlimit		// 1085
+	data8 0				// getrusage
+	data8 fsys_gettimeofday		// gettimeofday
+	data8 0				// settimeofday
+	data8 0				// select
+	data8 0				// poll			// 1090
+	data8 0				// symlink
+	data8 0				// readlink
+	data8 0				// uselib
+	data8 0				// swapon
+	data8 0				// swapoff		// 1095
+	data8 0				// reboot
+	data8 0				// truncate
+	data8 0				// ftruncate
+	data8 0				// fchmod
+	data8 0				// fchown		// 1100
+	data8 0				// getpriority
+	data8 0				// setpriority
+	data8 0				// statfs
+	data8 0				// fstatfs
+	data8 0				// gettid		// 1105
+	data8 0				// semget
+	data8 0				// semop
+	data8 0				// semctl
+	data8 0				// msgget
+	data8 0				// msgsnd		// 1110
+	data8 0				// msgrcv
+	data8 0				// msgctl
+	data8 0				// shmget
+	data8 0				// shmat
+	data8 0				// shmdt		// 1115
+	data8 0				// shmctl
+	data8 0				// syslog
+	data8 0				// setitimer
+	data8 0				// getitimer
+	data8 0					 		// 1120
+	data8 0
+	data8 0
+	data8 0				// vhangup
+	data8 0				// lchown
+	data8 0				// remap_file_pages	// 1125
+	data8 0				// wait4
+	data8 0				// sysinfo
+	data8 0				// clone
+	data8 0				// setdomainname
+	data8 0				// newuname		// 1130
+	data8 0				// adjtimex
+	data8 0
+	data8 0				// init_module
+	data8 0				// delete_module
+	data8 0							// 1135
+	data8 0
+	data8 0				// quotactl
+	data8 0				// bdflush
+	data8 0				// sysfs
+	data8 0				// personality		// 1140
+	data8 0				// afs_syscall
+	data8 0				// setfsuid
+	data8 0				// setfsgid
+	data8 0				// getdents
+	data8 0				// flock		// 1145
+	data8 0				// readv
+	data8 0				// writev
+	data8 0				// pread64
+	data8 0				// pwrite64
+	data8 0				// sysctl		// 1150
+	data8 0				// mmap
+	data8 0				// munmap
+	data8 0				// mlock
+	data8 0				// mlockall
+	data8 0				// mprotect		// 1155
+	data8 0				// mremap
+	data8 0				// msync
+	data8 0				// munlock
+	data8 0				// munlockall
+	data8 0				// sched_getparam	// 1160
+	data8 0				// sched_setparam
+	data8 0				// sched_getscheduler
+	data8 0				// sched_setscheduler
+	data8 0				// sched_yield
+	data8 0				// sched_get_priority_max	// 1165
+	data8 0				// sched_get_priority_min
+	data8 0				// sched_rr_get_interval
+	data8 0				// nanosleep
+	data8 0				// nfsservctl
+	data8 0				// prctl		// 1170
+	data8 0				// getpagesize
+	data8 0				// mmap2
+	data8 0				// pciconfig_read
+	data8 0				// pciconfig_write
+	data8 0				// perfmonctl		// 1175
+	data8 0				// sigaltstack
+	data8 0				// rt_sigaction
+	data8 0				// rt_sigpending
+	data8 0				// rt_sigprocmask
+	data8 0				// rt_sigqueueinfo	// 1180
+	data8 0				// rt_sigreturn
+	data8 0				// rt_sigsuspend
+	data8 0				// rt_sigtimedwait
+	data8 0				// getcwd
+	data8 0				// capget		// 1185
+	data8 0				// capset
+	data8 0				// sendfile
+	data8 0
+	data8 0
+	data8 0				// socket		// 1190
+	data8 0				// bind
+	data8 0				// connect
+	data8 0				// listen
+	data8 0				// accept
+	data8 0				// getsockname		// 1195
+	data8 0				// getpeername
+	data8 0				// socketpair
+	data8 0				// send
+	data8 0				// sendto
+	data8 0				// recv			// 1200
+	data8 0				// recvfrom
+	data8 0				// shutdown
+	data8 0				// setsockopt
+	data8 0				// getsockopt
+	data8 0				// sendmsg		// 1205
+	data8 0				// recvmsg
+	data8 0				// pivot_root
+	data8 0				// mincore
+	data8 0				// madvise
+	data8 0				// newstat		// 1210
+	data8 0				// newlstat
+	data8 0				// newfstat
+	data8 0				// clone2
+	data8 0				// getdents64
+	data8 0				// getunwind		// 1215
+	data8 0				// readahead
+	data8 0				// setxattr
+	data8 0				// lsetxattr
+	data8 0				// fsetxattr
+	data8 0				// getxattr		// 1220
+	data8 0				// lgetxattr
+	data8 0				// fgetxattr
+	data8 0				// listxattr
+	data8 0				// llistxattr
+	data8 0				// flistxattr		// 1225
+	data8 0				// removexattr
+	data8 0				// lremovexattr
+	data8 0				// fremovexattr
+	data8 0				// tkill
+	data8 0				// futex		// 1230
+	data8 0				// sched_setaffinity
+	data8 0				// sched_getaffinity
+	data8 fsys_set_tid_address	// set_tid_address
+	data8 0				// fadvise64_64
+	data8 0				// tgkill		// 1235
+	data8 0				// exit_group
+	data8 0				// lookup_dcookie
+	data8 0				// io_setup
+	data8 0				// io_destroy
+	data8 0				// io_getevents		// 1240
+	data8 0				// io_submit
+	data8 0				// io_cancel
+	data8 0				// epoll_create
+	data8 0				// epoll_ctl
+	data8 0				// epoll_wait		// 1245
+	data8 0				// restart_syscall
+	data8 0				// semtimedop
+	data8 0				// timer_create
+	data8 0				// timer_settime
+	data8 0				// timer_gettime 	// 1250
+	data8 0				// timer_getoverrun
+	data8 0				// timer_delete
+	data8 0				// clock_settime
+	data8 fsys_clock_gettime	// clock_gettime
+	data8 0				// clock_getres		// 1255
+	data8 0				// clock_nanosleep
+	data8 0				// fstatfs64
+	data8 0				// statfs64
+	data8 0				// mbind
+	data8 0				// get_mempolicy	// 1260
+	data8 0				// set_mempolicy
+	data8 0				// mq_open
+	data8 0				// mq_unlink
+	data8 0				// mq_timedsend
+	data8 0				// mq_timedreceive	// 1265
+	data8 0				// mq_notify
+	data8 0				// mq_getsetattr
+	data8 0				// kexec_load
+	data8 0				// vserver
+	data8 0				// waitid		// 1270
+	data8 0				// add_key
+	data8 0				// request_key
+	data8 0				// keyctl
+	data8 0				// ioprio_set
+	data8 0				// ioprio_get		// 1275
+	data8 0				// move_pages
+	data8 0				// inotify_init
+	data8 0				// inotify_add_watch
+	data8 0				// inotify_rm_watch
+	data8 0				// migrate_pages	// 1280
+	data8 0				// openat
+	data8 0				// mkdirat
+	data8 0				// mknodat
+	data8 0				// fchownat
+	data8 0				// futimesat		// 1285
+	data8 0				// newfstatat
+	data8 0				// unlinkat
+	data8 0				// renameat
+	data8 0				// linkat
+	data8 0				// symlinkat		// 1290
+	data8 0				// readlinkat
+	data8 0				// fchmodat
+	data8 0				// faccessat
+	data8 0
+	data8 0							// 1295
+	data8 0				// unshare
+	data8 0				// splice
+	data8 0				// set_robust_list
+	data8 0				// get_robust_list
+	data8 0				// sync_file_range	// 1300
+	data8 0				// tee
+	data8 0				// vmsplice
+	data8 0
+	data8 fsys_getcpu		// getcpu		// 1304
+
+	// fill in zeros for the remaining entries
+	.zero:
+	.space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
--- a/arch/ia64/kernel/fsyscall_gtod_data.h
+++ b/arch/ia64/kernel/fsyscall_gtod_data.h
@ -0,0 +1,23 @@
+/*
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *        Contributed by Peter Keilty <peter.keilty@hp.com>
+ *
+ * fsyscall gettimeofday data
+ */
+
+struct fsyscall_gtod_data_t {
+	seqcount_t	seq;
+	struct timespec	wall_time;
+	struct timespec monotonic_time;
+	cycle_t		clk_mask;
+	u32		clk_mult;
+	u32		clk_shift;
+	void		*clk_fsys_mmio;
+	cycle_t		clk_cycle_last;
+} ____cacheline_aligned;
+
+struct itc_jitter_data_t {
+	int		itc_jitter;
+	cycle_t		itc_lastcycle;
+} ____cacheline_aligned;
+
--- a/arch/ia64/kernel/ftrace.c
+++ b/arch/ia64/kernel/ftrace.c
@ -0,0 +1,204 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_orig_code[MCOUNT_INSN_SIZE] = {
+	0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+	0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+	0x05, 0x00, 0xc4, 0x00,             /* mov r42=b0 */
+	0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+	0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+	0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_orig_insn {
+	u64 dummy1, dummy2, dummy3;
+	u64 dummy4:64-41+13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 sign:1;
+	u64 dummy6:4;
+};
+
+/* mcount stub will be converted below for nop */
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
+	0x00, 0x00, 0x04, 0x00
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+	return ftrace_nop_code;
+}
+
+/*
+ * mcount stub will be converted below for call
+ * Note: Just the last instruction is changed against nop
+ * */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
+	0xf8, 0xff, 0xff, 0xc8
+};
+
+struct ftrace_call_insn {
+	u64 dummy1, dummy2;
+	u64 dummy3:48;
+	u64 imm39_l:16;
+	u64 imm39_h:23;
+	u64 dummy4:13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 i:1;
+	u64 dummy6:4;
+};
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+	struct ftrace_call_insn *code = (void *)ftrace_call_code;
+	unsigned long offset = addr - (ip + 0x10);
+
+	code->imm39_l = offset >> 24;
+	code->imm39_h = offset >> 40;
+	code->imm20 = offset >> 4;
+	code->i = offset >> 63;
+	return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+		   unsigned char *new_code, int do_check)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing. We do this by using the
+	 *  probe_kernel_* functions.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine, or before SMP starts.
+	 */
+
+	if (!do_check)
+		goto skip_check;
+
+	/* read the text we want to modify */
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+skip_check:
+	/* replace the text with the new text */
+	if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
+		return -EPERM;
+	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+	unsigned long ip = rec->ip;
+
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+	if (rec->flags & FTRACE_FL_CONVERTED) {
+		struct ftrace_call_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_call_code;
+		tmp_call = (void *)replaced;
+		call_insn->imm39_l = tmp_call->imm39_l;
+		call_insn->imm39_h = tmp_call->imm39_h;
+		call_insn->imm20 = tmp_call->imm20;
+		call_insn->i = tmp_call->i;
+		if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	} else {
+		struct ftrace_orig_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_orig_code;
+		tmp_call = (void *)replaced;
+		call_insn->sign = tmp_call->sign;
+		call_insn->imm20 = tmp_call->imm20;
+		if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	}
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	int ret;
+	char *new;
+
+	ret = ftrace_make_nop_check(rec, addr);
+	if (ret)
+		return ret;
+	new = ftrace_nop_replace();
+	return ftrace_modify_code(rec->ip, NULL, new, 0);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned char *old, *new;
+
+	old=  ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+	return ftrace_modify_code(ip, old, new, 1);
+}
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip;
+	unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+
+	if (func == ftrace_stub)
+		return 0;
+	ip = ((struct fnptr *)func)->ip;
+
+	ia64_patch_imm64(addr + 2, ip);
+
+	flush_icache_range(addr, addr + 16);
+	return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
--- a/arch/ia64/kernel/gate-data.S
+++ b/arch/ia64/kernel/gate-data.S
@ -0,0 +1,3 @@
+	.section .data..gate, "aw"
+
+	.incbin "arch/ia64/kernel/gate.so"
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@ -0,0 +1,386 @@
+/*
+ * This file contains the code that gets mapped at the upper end of each task's text
+ * region.  For now, it contains the signal trampoline code only.
+ *
+ * Copyright (C) 1999-2003 Hewlett-Packard Co
+ * 	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+#include <asm/sigcontext.h>
+#include <asm/unistd.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include "paravirt_inst.h"
+
+/*
+ * We can't easily refer to symbols inside the kernel.  To avoid full runtime relocation,
+ * complications with the linker (which likes to create PLT stubs for branches
+ * to targets outside the shared object) and to avoid multi-phase kernel builds, we
+ * simply create minimalistic "patch lists" in special ELF sections.
+ */
+	.section ".data..patch.fsyscall_table", "a"
+	.previous
+#define LOAD_FSYSCALL_TABLE(reg)			\
+[1:]	movl reg=0;					\
+	.xdata4 ".data..patch.fsyscall_table", 1b-.
+
+	.section ".data..patch.brl_fsys_bubble_down", "a"
+	.previous
+#define BRL_COND_FSYS_BUBBLE_DOWN(pr)			\
+[1:](pr)brl.cond.sptk 0;				\
+	;;						\
+	.xdata4 ".data..patch.brl_fsys_bubble_down", 1b-.
+
+GLOBAL_ENTRY(__kernel_syscall_via_break)
+	.prologue
+	.altrp b6
+	.body
+	/*
+	 * Note: for (fast) syscall restart to work, the break instruction must be
+	 *	 the first one in the bundle addressed by syscall_via_break.
+	 */
+{ .mib
+	break 0x100000
+	nop.i 0
+	br.ret.sptk.many b6
+}
+END(__kernel_syscall_via_break)
+
+#	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)
+#	define ARG1_OFF		(16 + IA64_SIGFRAME_ARG1_OFFSET)
+#	define ARG2_OFF		(16 + IA64_SIGFRAME_ARG2_OFFSET)
+#	define SIGHANDLER_OFF	(16 + IA64_SIGFRAME_HANDLER_OFFSET)
+#	define SIGCONTEXT_OFF	(16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET)
+
+#	define FLAGS_OFF	IA64_SIGCONTEXT_FLAGS_OFFSET
+#	define CFM_OFF		IA64_SIGCONTEXT_CFM_OFFSET
+#	define FR6_OFF		IA64_SIGCONTEXT_FR6_OFFSET
+#	define BSP_OFF		IA64_SIGCONTEXT_AR_BSP_OFFSET
+#	define RNAT_OFF		IA64_SIGCONTEXT_AR_RNAT_OFFSET
+#	define UNAT_OFF		IA64_SIGCONTEXT_AR_UNAT_OFFSET
+#	define FPSR_OFF		IA64_SIGCONTEXT_AR_FPSR_OFFSET
+#	define PR_OFF		IA64_SIGCONTEXT_PR_OFFSET
+#	define RP_OFF		IA64_SIGCONTEXT_IP_OFFSET
+#	define SP_OFF		IA64_SIGCONTEXT_R12_OFFSET
+#	define RBS_BASE_OFF	IA64_SIGCONTEXT_RBS_BASE_OFFSET
+#	define LOADRS_OFF	IA64_SIGCONTEXT_LOADRS_OFFSET
+#	define base0		r2
+#	define base1		r3
+	/*
+	 * When we get here, the memory stack looks like this:
+	 *
+	 *   +===============================+
+       	 *   |				     |
+       	 *   //	    struct sigframe          //
+       	 *   |				     |
+	 *   +-------------------------------+ <-- sp+16
+	 *   |      16 byte of scratch       |
+	 *   |            space              |
+	 *   +-------------------------------+ <-- sp
+	 *
+	 * The register stack looks _exactly_ the way it looked at the time the signal
+	 * occurred.  In other words, we're treading on a potential mine-field: each
+	 * incoming general register may be a NaT value (including sp, in which case the
+	 * process ends up dying with a SIGSEGV).
+	 *
+	 * The first thing need to do is a cover to get the registers onto the backing
+	 * store.  Once that is done, we invoke the signal handler which may modify some
+	 * of the machine state.  After returning from the signal handler, we return
+	 * control to the previous context by executing a sigreturn system call.  A signal
+	 * handler may call the rt_sigreturn() function to directly return to a given
+	 * sigcontext.  However, the user-level sigreturn() needs to do much more than
+	 * calling the rt_sigreturn() system call as it needs to unwind the stack to
+	 * restore preserved registers that may have been saved on the signal handler's
+	 * call stack.
+	 */
+
+#define SIGTRAMP_SAVES										\
+	.unwabi 3, 's';		/* mark this as a sigtramp handler (saves scratch regs) */	\
+	.unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */	\
+	.savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF;						\
+	.savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF;						\
+	.savesp pr, PR_OFF+SIGCONTEXT_OFF;     							\
+	.savesp rp, RP_OFF+SIGCONTEXT_OFF;							\
+	.savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF;							\
+	.vframesp SP_OFF+SIGCONTEXT_OFF
+
+GLOBAL_ENTRY(__kernel_sigtramp)
+	// describe the state that is active when we get here:
+	.prologue
+	SIGTRAMP_SAVES
+	.body
+
+	.label_state 1
+
+	adds base0=SIGHANDLER_OFF,sp
+	adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp
+	br.call.sptk.many rp=1f
+1:
+	ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF)	// get pointer to signal handler's plabel
+	ld8 r15=[base1]					// get address of new RBS base (or NULL)
+	cover				// push args in interrupted frame onto backing store
+	;;
+	cmp.ne p1,p0=r15,r0		// do we need to switch rbs? (note: pr is saved by kernel)
+	mov.m r9=ar.bsp			// fetch ar.bsp
+	.spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+(p1)	br.cond.spnt setup_rbs		// yup -> (clobbers p8, r14-r16, and r18-r20)
+back_from_setup_rbs:
+	alloc r8=ar.pfs,0,0,3,0
+	ld8 out0=[base0],16		// load arg0 (signum)
+	adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1
+	;;
+	ld8 out1=[base1]		// load arg1 (siginfop)
+	ld8 r10=[r17],8			// get signal handler entry point
+	;;
+	ld8 out2=[base0]		// load arg2 (sigcontextp)
+	ld8 gp=[r17]			// get signal handler's global pointer
+	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+	;;
+	.spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF
+	st8 [base0]=r9			// save sc_ar_bsp
+	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+	;;
+	stf.spill [base0]=f6,32
+	stf.spill [base1]=f7,32
+	;;
+	stf.spill [base0]=f8,32
+	stf.spill [base1]=f9,32
+	mov b6=r10
+	;;
+	stf.spill [base0]=f10,32
+	stf.spill [base1]=f11,32
+	;;
+	stf.spill [base0]=f12,32
+	stf.spill [base1]=f13,32
+	;;
+	stf.spill [base0]=f14,32
+	stf.spill [base1]=f15,32
+	br.call.sptk.many rp=b6			// call the signal handler
+.ret0:	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+	;;
+	ld8 r15=[base0]				// fetch sc_ar_bsp
+	mov r14=ar.bsp
+	;;
+	cmp.ne p1,p0=r14,r15			// do we need to restore the rbs?
+(p1)	br.cond.spnt restore_rbs		// yup -> (clobbers r14-r18, f6 & f7)
+	;;
+back_from_restore_rbs:
+	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+	;;
+	ldf.fill f6=[base0],32
+	ldf.fill f7=[base1],32
+	;;
+	ldf.fill f8=[base0],32
+	ldf.fill f9=[base1],32
+	;;
+	ldf.fill f10=[base0],32
+	ldf.fill f11=[base1],32
+	;;
+	ldf.fill f12=[base0],32
+	ldf.fill f13=[base1],32
+	;;
+	ldf.fill f14=[base0],32
+	ldf.fill f15=[base1],32
+	mov r15=__NR_rt_sigreturn
+	.restore sp				// pop .prologue
+	break __BREAK_SYSCALL
+
+	.prologue
+	SIGTRAMP_SAVES
+setup_rbs:
+	mov ar.rsc=0				// put RSE into enforced lazy mode
+	;;
+	.save ar.rnat, r19
+	mov r19=ar.rnat				// save RNaT before switching backing store area
+	adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp
+
+	mov r18=ar.bspstore
+	mov ar.bspstore=r15			// switch over to new register backing store area
+	;;
+
+	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+	st8 [r14]=r19				// save sc_ar_rnat
+	.body
+	mov.m r16=ar.bsp			// sc_loadrs <- (new bsp - new bspstore) << 16
+	adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
+	;;
+	invala
+	sub r15=r16,r15
+	extr.u r20=r18,3,6
+	;;
+	mov ar.rsc=0xf				// set RSE into eager mode, pl 3
+	cmp.eq p8,p0=63,r20
+	shl r15=r15,16
+	;;
+	st8 [r14]=r15				// save sc_loadrs
+(p8)	st8 [r18]=r19		// if bspstore points at RNaT slot, store RNaT there now
+	.restore sp				// pop .prologue
+	br.cond.sptk back_from_setup_rbs
+
+	.prologue
+	SIGTRAMP_SAVES
+	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
+	.body
+restore_rbs:
+	// On input:
+	//	r14 = bsp1 (bsp at the time of return from signal handler)
+	//	r15 = bsp0 (bsp at the time the signal occurred)
+	//
+	// Here, we need to calculate bspstore0, the value that ar.bspstore needs
+	// to be set to, based on bsp0 and the size of the dirty partition on
+	// the alternate stack (sc_loadrs >> 16).  This can be done with the
+	// following algorithm:
+	//
+	//  bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
+	//
+	// This is what the code below does.
+	//
+	alloc r2=ar.pfs,0,0,0,0			// alloc null frame
+	adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
+	adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
+	;;
+	ld8 r17=[r16]
+	ld8 r16=[r18]			// get new rnat
+	extr.u r18=r15,3,6	// r18 <- rse_slot_num(bsp0)
+	;;
+	mov ar.rsc=r17			// put RSE into enforced lazy mode
+	shr.u r17=r17,16
+	;;
+	sub r14=r14,r17		// r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
+	shr.u r17=r17,3		// r17 <- (sc_loadrs >> 19)
+	;;
+	loadrs			// restore dirty partition
+	extr.u r14=r14,3,6	// r14 <- rse_slot_num(bspstore1)
+	;;
+	add r14=r14,r17		// r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
+	;;
+	shr.u r14=r14,6		// r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
+	;;
+	sub r14=r14,r17		// r14 <- -rse_num_regs(bspstore1, bsp1)
+	movl r17=0x8208208208208209
+	;;
+	add r18=r18,r14		// r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
+	setf.sig f7=r17
+	cmp.lt p7,p0=r14,r0	// p7 <- (r14 < 0)?
+	;;
+(p7)	adds r18=-62,r18	// delta -= 62
+	;;
+	setf.sig f6=r18
+	;;
+	xmpy.h f6=f6,f7
+	;;
+	getf.sig r17=f6
+	;;
+	add r17=r17,r18
+	shr r18=r18,63
+	;;
+	shr r17=r17,5
+	;;
+	sub r17=r17,r18		// r17 = delta/63
+	;;
+	add r17=r14,r17		// r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
+	;;
+	shladd r15=r17,3,r15	// r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
+	;;
+	mov ar.bspstore=r15			// switch back to old register backing store area
+	;;
+	mov ar.rnat=r16				// restore RNaT
+	mov ar.rsc=0xf				// (will be restored later on from sc_ar_rsc)
+	// invala not necessary as that will happen when returning to user-mode
+	br.cond.sptk back_from_restore_rbs
+END(__kernel_sigtramp)
+
+/*
+ * On entry:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	b6  = return address
+ * On exit:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	all other "scratch" registers:	undefined
+ *	all "preserved" registers:	same as on entry
+ */
+
+GLOBAL_ENTRY(__kernel_syscall_via_epc)
+	.prologue
+	.altrp b6
+	.body
+{
+	/*
+	 * Note: the kernel cannot assume that the first two instructions in this
+	 * bundle get executed.  The remaining code must be safe even if
+	 * they do not get executed.
+	 */
+	adds r17=-1024,r15			// A
+	mov r10=0				// A    default to successful syscall execution
+	epc					// B	causes split-issue
+}
+	;;
+	RSM_PSR_BE_I(r20, r22)			// M2 (5 cyc to srlz.d)
+	LOAD_FSYSCALL_TABLE(r14)		// X
+	;;
+	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
+	shladd r18=r17,3,r14			// A
+	mov r19=NR_syscalls-1			// A
+	;;
+	lfetch [r18]				// M0|1
+	MOV_FROM_PSR(p0, r29, r8)		// M2 (12 cyc)
+	// If r17 is a NaT, p6 will be zero
+	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+	;;
+	mov r21=ar.fpsr				// M2 (12 cyc)
+	tnat.nz p10,p9=r15			// I0
+	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
+	;;
+	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
+(p6)	ld8 r18=[r18]				// M0|1
+	nop.i 0
+	;;
+	nop.m 0
+(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
+	nop.i 0
+	;;
+	SSM_PSR_I(p8, p14, r25)
+(p6)	mov b7=r18				// I0
+(p8)	br.dptk.many b7				// B
+
+	mov r27=ar.rsc				// M2 (12 cyc)
+/*
+ * brl.cond doesn't work as intended because the linker would convert this branch
+ * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
+ * future version of the linker.  In the meantime, we just use an indirect branch
+ * instead.
+ */
+#ifdef CONFIG_ITANIUM
+(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
+	;;
+(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
+	;;
+(p6)	mov b7=r14
+(p6)	br.sptk.many b7
+#else
+	BRL_COND_FSYS_BUBBLE_DOWN(p6)
+#endif
+	SSM_PSR_I(p0, p14, r10)
+	mov r10=-1
+(p10)	mov r8=EINVAL
+(p9)	mov r8=ENOSYS
+	FSYS_RETURN
+
+#ifdef CONFIG_PARAVIRT
+	/*
+	 * padd to make the size of this symbol constant
+	 * independent of paravirtualization.
+	 */
+	.align PAGE_SIZE / 8
+#endif
+END(__kernel_syscall_via_epc)
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@ -0,0 +1,108 @@
+/*
+ * Linker script for gate DSO.  The gate pages are an ELF shared object
+ * prelinked to its virtual address, with only one read-only segment and
+ * one execute-only segment (both fit in one page).  This script controls
+ * its layout.
+ */
+
+#include <asm/page.h>
+#include "paravirt_patchlist.h"
+
+SECTIONS
+{
+	. = GATE_ADDR + SIZEOF_HEADERS;
+
+	.hash			: { *(.hash) }		:readable
+	.gnu.hash		: { *(.gnu.hash) }
+	.dynsym			: { *(.dynsym) }
+	.dynstr			: { *(.dynstr) }
+	.gnu.version		: { *(.gnu.version) }
+	.gnu.version_d		: { *(.gnu.version_d) }
+	.gnu.version_r		: { *(.gnu.version_r) }
+
+	.note			: { *(.note*) }		:readable	:note
+
+	.dynamic		: { *(.dynamic) }	:readable	:dynamic
+
+	/*
+	 * This linker script is used both with -r and with -shared.  For
+	 * the layouts to match, we need to skip more than enough space for
+	 * the dynamic symbol table et al.  If this amount is insufficient,
+	 * ld -shared will barf.  Just increase it here.
+	 */
+	. = GATE_ADDR + 0x600;
+
+	.data..patch		: {
+		__paravirt_start_gate_mckinley_e9_patchlist = .;
+		*(.data..patch.mckinley_e9)
+		__paravirt_end_gate_mckinley_e9_patchlist = .;
+
+		__paravirt_start_gate_vtop_patchlist = .;
+		*(.data..patch.vtop)
+		__paravirt_end_gate_vtop_patchlist = .;
+
+		__paravirt_start_gate_fsyscall_patchlist = .;
+		*(.data..patch.fsyscall_table)
+		__paravirt_end_gate_fsyscall_patchlist = .;
+
+		__paravirt_start_gate_brl_fsys_bubble_down_patchlist = .;
+		*(.data..patch.brl_fsys_bubble_down)
+		__paravirt_end_gate_brl_fsys_bubble_down_patchlist = .;
+	}						:readable
+
+	.IA_64.unwind_info	: { *(.IA_64.unwind_info*) }
+	.IA_64.unwind		: { *(.IA_64.unwind*) }	:readable	:unwind
+#ifdef HAVE_BUGGY_SEGREL
+	.text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) }	:readable
+#else
+	. = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
+	.text			: { *(.text) *(.text.*) }	:epc
+#endif
+
+	/DISCARD/		: {
+		*(.got.plt) *(.got)
+		*(.data .data.* .gnu.linkonce.d.*)
+		*(.dynbss)
+		*(.bss .bss.* .gnu.linkonce.b.*)
+		*(__ex_table)
+		*(__mca_table)
+	}
+}
+
+/*
+ * ld does not recognize this name token; use the constant.
+ */
+#define	PT_IA_64_UNWIND	0x70000001
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	readable	PT_LOAD	FILEHDR	PHDRS	FLAGS(4);	/* PF_R */
+#ifndef HAVE_BUGGY_SEGREL
+	epc		PT_LOAD	FILEHDR PHDRS	FLAGS(1);	/* PF_X */
+#endif
+	dynamic		PT_DYNAMIC		FLAGS(4);	/* PF_R */
+	note		PT_NOTE			FLAGS(4);	/* PF_R */
+	unwind		PT_IA_64_UNWIND;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	LINUX_2.5 {
+	global:
+		__kernel_syscall_via_break;
+		__kernel_syscall_via_epc;
+		__kernel_sigtramp;
+
+	local: *;
+	};
+}
+
+/* The ELF entry point can be used to set the AT_SYSINFO value.  */
+ENTRY(__kernel_syscall_via_epc)
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@ -0,0 +1,98 @@
+/*
+ * Architecture-specific kernel symbols
+ *
+ * Don't put any exports here unless it's defined in an assembler file.
+ * All other exports should be put directly after the definition.
+ */
+
+#include <linux/module.h>
+
+#include <linux/string.h>
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(strlen);
+
+#include <asm/pgtable.h>
+EXPORT_SYMBOL_GPL(empty_zero_page);
+
+#include <asm/checksum.h>
+EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
+EXPORT_SYMBOL(csum_ipv6_magic);
+
+#include <asm/page.h>
+EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+#include <linux/bootmem.h>
+EXPORT_SYMBOL(min_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
+EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
+#endif
+
+#include <asm/processor.h>
+EXPORT_SYMBOL(ia64_cpu_info);
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(local_per_cpu_offset);
+#endif
+
+#include <asm/uaccess.h>
+EXPORT_SYMBOL(__copy_user);
+EXPORT_SYMBOL(__do_clear_user);
+EXPORT_SYMBOL(__strlen_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__strnlen_user);
+
+/* from arch/ia64/lib */
+extern void __divsi3(void);
+extern void __udivsi3(void);
+extern void __modsi3(void);
+extern void __umodsi3(void);
+extern void __divdi3(void);
+extern void __udivdi3(void);
+extern void __moddi3(void);
+extern void __umoddi3(void);
+
+EXPORT_SYMBOL(__divsi3);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__modsi3);
+EXPORT_SYMBOL(__umodsi3);
+EXPORT_SYMBOL(__divdi3);
+EXPORT_SYMBOL(__udivdi3);
+EXPORT_SYMBOL(__moddi3);
+EXPORT_SYMBOL(__umoddi3);
+
+#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
+extern void xor_ia64_2(void);
+extern void xor_ia64_3(void);
+extern void xor_ia64_4(void);
+extern void xor_ia64_5(void);
+
+EXPORT_SYMBOL(xor_ia64_2);
+EXPORT_SYMBOL(xor_ia64_3);
+EXPORT_SYMBOL(xor_ia64_4);
+EXPORT_SYMBOL(xor_ia64_5);
+#endif
+
+#include <asm/pal.h>
+EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
+EXPORT_SYMBOL(ia64_pal_call_phys_static);
+EXPORT_SYMBOL(ia64_pal_call_stacked);
+EXPORT_SYMBOL(ia64_pal_call_static);
+EXPORT_SYMBOL(ia64_load_scratch_fpregs);
+EXPORT_SYMBOL(ia64_save_scratch_fpregs);
+
+#include <asm/unwind.h>
+EXPORT_SYMBOL(unw_init_running);
+
+#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE)
+extern void esi_call_phys (void);
+EXPORT_SYMBOL_GPL(esi_call_phys);
+#endif
+extern char ia64_ivt[];
+EXPORT_SYMBOL(ia64_ivt);
+
+#include <asm/ftrace.h>
+#ifdef CONFIG_FUNCTION_TRACER
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(_mcount);
+#endif
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@ -0,0 +1,42 @@
+/*
+ * This is where we statically allocate and initialize the initial
+ * task.
+ *
+ * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is properly aligned due to the way process stacks are
+ * handled. This is done by having a special ".data..init_task" section...
+ */
+#define init_thread_info	init_task_mem.s.thread_info
+
+union {
+	struct {
+		struct task_struct task;
+		struct thread_info thread_info;
+	} s;
+	unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
+} init_task_mem asm ("init_task") __init_task_data =
+	{{
+	.task =		INIT_TASK(init_task_mem.s.task),
+	.thread_info =	INIT_THREAD_INFO(init_task_mem.s.task)
+}};
+
+EXPORT_SYMBOL(init_task);
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@ -0,0 +1,202 @@
+/*
+ *	linux/arch/ia64/kernel/irq.c
+ *
+ *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQs should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004
+ *
+ * 4/14/2004: Added code to handle cpu migration and do safe irq
+ *			migration without losing interrupts for iosapic
+ *			architecture.
+ */
+
+#include <asm/delay.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/mca.h>
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
+}
+
+#ifdef CONFIG_IA64_GENERIC
+ia64_vector __ia64_irq_to_vector(int irq)
+{
+	return irq_cfg[irq].vector;
+}
+
+unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
+{
+	return __this_cpu_read(vector_irq[vec]);
+}
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
+
+void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
+{
+	if (irq < NR_IRQS) {
+		cpumask_copy(irq_get_irq_data(irq)->affinity,
+			     cpumask_of(cpu_logical_id(hwid)));
+		irq_redir[irq] = (char) (redir & 0xff);
+	}
+}
+
+bool is_affinity_mask_valid(const struct cpumask *cpumask)
+{
+	if (ia64_platform_is("sn2")) {
+		/* Only allow one CPU to be specified in the smp_affinity mask */
+		if (cpumask_weight(cpumask) != 1)
+			return false;
+	}
+	return true;
+}
+
+#endif /* CONFIG_SMP */
+
+int __init arch_early_irq_init(void)
+{
+	ia64_mca_irq_init();
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+unsigned int vectors_in_migration[NR_IRQS];
+
+/*
+ * Since cpu_online_mask is already updated, we just need to check for
+ * affinity that has zeros
+ */
+static void migrate_irqs(void)
+{
+	int 		irq, new_cpu;
+
+	for (irq=0; irq < NR_IRQS; irq++) {
+		struct irq_desc *desc = irq_to_desc(irq);
+		struct irq_data *data = irq_desc_get_irq_data(desc);
+		struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+		if (irqd_irq_disabled(data))
+			continue;
+
+		/*
+		 * No handling for now.
+		 * TBD: Implement a disable function so we can now
+		 * tell CPU not to respond to these local intr sources.
+		 * such as ITV,CPEI,MCA etc.
+		 */
+		if (irqd_is_per_cpu(data))
+			continue;
+
+		if (cpumask_any_and(data->affinity, cpu_online_mask)
+		    >= nr_cpu_ids) {
+			/*
+			 * Save it for phase 2 processing
+			 */
+			vectors_in_migration[irq] = irq;
+
+			new_cpu = cpumask_any(cpu_online_mask);
+
+			/*
+			 * Al three are essential, currently WARN_ON.. maybe panic?
+			 */
+			if (chip && chip->irq_disable &&
+				chip->irq_enable && chip->irq_set_affinity) {
+				chip->irq_disable(data);
+				chip->irq_set_affinity(data,
+						       cpumask_of(new_cpu), false);
+				chip->irq_enable(data);
+			} else {
+				WARN_ON((!chip || !chip->irq_disable ||
+					 !chip->irq_enable ||
+					 !chip->irq_set_affinity));
+			}
+		}
+	}
+}
+
+void fixup_irqs(void)
+{
+	unsigned int irq;
+	extern void ia64_process_pending_intr(void);
+	extern volatile int time_keeper_id;
+
+	/* Mask ITV to disable timer */
+	ia64_set_itv(1 << 16);
+
+	/*
+	 * Find a new timesync master
+	 */
+	if (smp_processor_id() == time_keeper_id) {
+		time_keeper_id = cpumask_first(cpu_online_mask);
+		printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
+	}
+
+	/*
+	 * Phase 1: Locate IRQs bound to this cpu and
+	 * relocate them for cpu removal.
+	 */
+	migrate_irqs();
+
+	/*
+	 * Phase 2: Perform interrupt processing for all entries reported in
+	 * local APIC.
+	 */
+	ia64_process_pending_intr();
+
+	/*
+	 * Phase 3: Now handle any interrupts not captured in local APIC.
+	 * This is to account for cases that device interrupted during the time the
+	 * rte was being disabled and re-programmed.
+	 */
+	for (irq=0; irq < NR_IRQS; irq++) {
+		if (vectors_in_migration[irq]) {
+			struct pt_regs *old_regs = set_irq_regs(NULL);
+
+			vectors_in_migration[irq]=0;
+			generic_handle_irq(irq);
+			set_irq_regs(old_regs);
+		}
+	}
+
+	/*
+	 * Now let processor die. We do irq disable and max_xtp() to
+	 * ensure there is no more interrupts routed to this processor.
+	 * But the local timer interrupt can have 1 pending which we
+	 * take care in timer_interrupt().
+	 */
+	max_xtp();
+	local_irq_disable();
+}
+#endif
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@ -0,0 +1,672 @@
+/*
+ * linux/arch/ia64/kernel/irq_ia64.c
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ *  6/10/99: Updated to bring in sync with x86 version to facilitate
+ *	     support for SMP and different interrupt controllers.
+ *
+ * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
+ *                      PCI to vector allocation routine.
+ * 04/14/2004 Ashok Raj <ashok.raj@intel.com>
+ *						Added CPU Hotplug handling for IPF.
+ */
+
+#include <linux/module.h>
+
+#include <linux/jiffies.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/ptrace.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/ratelimit.h>
+#include <linux/acpi.h>
+#include <linux/sched.h>
+
+#include <asm/delay.h>
+#include <asm/intrinsics.h>
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#define IRQ_DEBUG	0
+
+#define IRQ_VECTOR_UNASSIGNED	(0)
+
+#define IRQ_UNUSED		(0)
+#define IRQ_USED		(1)
+#define IRQ_RSVD		(2)
+
+/* These can be overridden in platform_irq_init */
+int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR;
+int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR;
+
+/* default base addr of IPI table */
+void __iomem *ipi_base_addr = ((void __iomem *)
+			       (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
+
+static cpumask_t vector_allocation_domain(int cpu);
+
+/*
+ * Legacy IRQ to IA-64 vector translation table.
+ */
+__u8 isa_irq_to_vector_map[16] = {
+	/* 8259 IRQ translation, first 16 entries */
+	0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
+	0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
+};
+EXPORT_SYMBOL(isa_irq_to_vector_map);
+
+DEFINE_SPINLOCK(vector_lock);
+
+struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
+	[0 ... NR_IRQS - 1] = {
+		.vector = IRQ_VECTOR_UNASSIGNED,
+		.domain = CPU_MASK_NONE
+	}
+};
+
+DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = {
+	[0 ... IA64_NUM_VECTORS - 1] = -1
+};
+
+static cpumask_t vector_table[IA64_NUM_VECTORS] = {
+	[0 ... IA64_NUM_VECTORS - 1] = CPU_MASK_NONE
+};
+
+static int irq_status[NR_IRQS] = {
+	[0 ... NR_IRQS -1] = IRQ_UNUSED
+};
+
+static inline int find_unassigned_irq(void)
+{
+	int irq;
+
+	for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++)
+		if (irq_status[irq] == IRQ_UNUSED)
+			return irq;
+	return -ENOSPC;
+}
+
+static inline int find_unassigned_vector(cpumask_t domain)
+{
+	cpumask_t mask;
+	int pos, vector;
+
+	cpumask_and(&mask, &domain, cpu_online_mask);
+	if (cpus_empty(mask))
+		return -EINVAL;
+
+	for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) {
+		vector = IA64_FIRST_DEVICE_VECTOR + pos;
+		cpus_and(mask, domain, vector_table[vector]);
+		if (!cpus_empty(mask))
+			continue;
+		return vector;
+	}
+	return -ENOSPC;
+}
+
+static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+	cpumask_t mask;
+	int cpu;
+	struct irq_cfg *cfg = &irq_cfg[irq];
+
+	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON((unsigned)vector >= IA64_NUM_VECTORS);
+
+	cpumask_and(&mask, &domain, cpu_online_mask);
+	if (cpus_empty(mask))
+		return -EINVAL;
+	if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
+		return 0;
+	if (cfg->vector != IRQ_VECTOR_UNASSIGNED)
+		return -EBUSY;
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	cfg->vector = vector;
+	cfg->domain = domain;
+	irq_status[irq] = IRQ_USED;
+	cpus_or(vector_table[vector], vector_table[vector], domain);
+	return 0;
+}
+
+int bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	ret = __bind_irq_vector(irq, vector, domain);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+static void __clear_irq_vector(int irq)
+{
+	int vector, cpu;
+	cpumask_t mask;
+	cpumask_t domain;
+	struct irq_cfg *cfg = &irq_cfg[irq];
+
+	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED);
+	vector = cfg->vector;
+	domain = cfg->domain;
+	cpumask_and(&mask, &cfg->domain, cpu_online_mask);
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+	cfg->vector = IRQ_VECTOR_UNASSIGNED;
+	cfg->domain = CPU_MASK_NONE;
+	irq_status[irq] = IRQ_UNUSED;
+	cpus_andnot(vector_table[vector], vector_table[vector], domain);
+}
+
+static void clear_irq_vector(int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_irq_vector(irq);
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int
+ia64_native_assign_irq_vector (int irq)
+{
+	unsigned long flags;
+	int vector, cpu;
+	cpumask_t domain = CPU_MASK_NONE;
+
+	vector = -ENOSPC;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	for_each_online_cpu(cpu) {
+		domain = vector_allocation_domain(cpu);
+		vector = find_unassigned_vector(domain);
+		if (vector >= 0)
+			break;
+	}
+	if (vector < 0)
+		goto out;
+	if (irq == AUTO_ASSIGN)
+		irq = vector;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+ out:
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return vector;
+}
+
+void
+ia64_native_free_irq_vector (int vector)
+{
+	if (vector < IA64_FIRST_DEVICE_VECTOR ||
+	    vector > IA64_LAST_DEVICE_VECTOR)
+		return;
+	clear_irq_vector(vector);
+}
+
+int
+reserve_irq_vector (int vector)
+{
+	if (vector < IA64_FIRST_DEVICE_VECTOR ||
+	    vector > IA64_LAST_DEVICE_VECTOR)
+		return -EINVAL;
+	return !!bind_irq_vector(vector, vector, CPU_MASK_ALL);
+}
+
+/*
+ * Initialize vector_irq on a new cpu. This function must be called
+ * with vector_lock held.
+ */
+void __setup_vector_irq(int cpu)
+{
+	int irq, vector;
+
+	/* Clear vector_irq */
+	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+	/* Mark the inuse vectors */
+	for (irq = 0; irq < NR_IRQS; ++irq) {
+		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+			continue;
+		vector = irq_to_vector(irq);
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+}
+
+#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
+
+static enum vector_domain_type {
+	VECTOR_DOMAIN_NONE,
+	VECTOR_DOMAIN_PERCPU
+} vector_domain_type = VECTOR_DOMAIN_NONE;
+
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	if (vector_domain_type == VECTOR_DOMAIN_PERCPU)
+		return cpumask_of_cpu(cpu);
+	return CPU_MASK_ALL;
+}
+
+static int __irq_prepare_move(int irq, int cpu)
+{
+	struct irq_cfg *cfg = &irq_cfg[irq];
+	int vector;
+	cpumask_t domain;
+
+	if (cfg->move_in_progress || cfg->move_cleanup_count)
+		return -EBUSY;
+	if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
+		return -EINVAL;
+	if (cpu_isset(cpu, cfg->domain))
+		return 0;
+	domain = vector_allocation_domain(cpu);
+	vector = find_unassigned_vector(domain);
+	if (vector < 0)
+		return -ENOSPC;
+	cfg->move_in_progress = 1;
+	cfg->old_domain = cfg->domain;
+	cfg->vector = IRQ_VECTOR_UNASSIGNED;
+	cfg->domain = CPU_MASK_NONE;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+	return 0;
+}
+
+int irq_prepare_move(int irq, int cpu)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	ret = __irq_prepare_move(irq, cpu);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+void irq_complete_move(unsigned irq)
+{
+	struct irq_cfg *cfg = &irq_cfg[irq];
+	cpumask_t cleanup_mask;
+	int i;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain)))
+		return;
+
+	cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask);
+	cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+	for_each_cpu_mask(i, cleanup_mask)
+		platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0);
+	cfg->move_in_progress = 0;
+}
+
+static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id)
+{
+	int me = smp_processor_id();
+	ia64_vector vector;
+	unsigned long flags;
+
+	for (vector = IA64_FIRST_DEVICE_VECTOR;
+	     vector < IA64_LAST_DEVICE_VECTOR; vector++) {
+		int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __this_cpu_read(vector_irq[vector]);
+		if (irq < 0)
+			continue;
+
+		desc = irq_to_desc(irq);
+		cfg = irq_cfg + irq;
+		raw_spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if (!cpu_isset(me, cfg->old_domain))
+			goto unlock;
+
+		spin_lock_irqsave(&vector_lock, flags);
+		__this_cpu_write(vector_irq[vector], -1);
+		cpu_clear(me, vector_table[vector]);
+		spin_unlock_irqrestore(&vector_lock, flags);
+		cfg->move_cleanup_count--;
+	unlock:
+		raw_spin_unlock(&desc->lock);
+	}
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq_move_irqaction = {
+	.handler =	smp_irq_move_cleanup_interrupt,
+	.name =		"irq_move"
+};
+
+static int __init parse_vector_domain(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+	if (!strcmp(arg, "percpu")) {
+		vector_domain_type = VECTOR_DOMAIN_PERCPU;
+		no_int_routing = 1;
+	}
+	return 0;
+}
+early_param("vector", parse_vector_domain);
+#else
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	return CPU_MASK_ALL;
+}
+#endif
+
+
+void destroy_and_reserve_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	irq_init_desc(irq);
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_irq_vector(irq);
+	irq_status[irq] = IRQ_RSVD;
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+/*
+ * Dynamic irq allocate and deallocation for MSI
+ */
+int create_irq(void)
+{
+	unsigned long flags;
+	int irq, vector, cpu;
+	cpumask_t domain = CPU_MASK_NONE;
+
+	irq = vector = -ENOSPC;
+	spin_lock_irqsave(&vector_lock, flags);
+	for_each_online_cpu(cpu) {
+		domain = vector_allocation_domain(cpu);
+		vector = find_unassigned_vector(domain);
+		if (vector >= 0)
+			break;
+	}
+	if (vector < 0)
+		goto out;
+	irq = find_unassigned_irq();
+	if (irq < 0)
+		goto out;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+ out:
+	spin_unlock_irqrestore(&vector_lock, flags);
+	if (irq >= 0)
+		irq_init_desc(irq);
+	return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+	irq_init_desc(irq);
+	clear_irq_vector(irq);
+}
+
+#ifdef CONFIG_SMP
+#	define IS_RESCHEDULE(vec)	(vec == IA64_IPI_RESCHEDULE)
+#	define IS_LOCAL_TLB_FLUSH(vec)	(vec == IA64_IPI_LOCAL_TLB_FLUSH)
+#else
+#	define IS_RESCHEDULE(vec)	(0)
+#	define IS_LOCAL_TLB_FLUSH(vec)	(0)
+#endif
+/*
+ * That's where the IVT branches when we get an external
+ * interrupt. This branches to the correct hardware IRQ handler via
+ * function ptr.
+ */
+void
+ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	unsigned long saved_tpr;
+
+#if IRQ_DEBUG
+	{
+		unsigned long bsp, sp;
+
+		/*
+		 * Note: if the interrupt happened while executing in
+		 * the context switch routine (ia64_switch_to), we may
+		 * get a spurious stack overflow here.  This is
+		 * because the register and the memory stack are not
+		 * switched atomically.
+		 */
+		bsp = ia64_getreg(_IA64_REG_AR_BSP);
+		sp = ia64_getreg(_IA64_REG_SP);
+
+		if ((sp - bsp) < 1024) {
+			static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
+
+			if (__ratelimit(&ratelimit)) {
+				printk("ia64_handle_irq: DANGER: less than "
+				       "1KB of free stack space!!\n"
+				       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
+			}
+		}
+	}
+#endif /* IRQ_DEBUG */
+
+	/*
+	 * Always set TPR to limit maximum interrupt nesting depth to
+	 * 16 (without this, it would be ~240, which could easily lead
+	 * to kernel stack overflows).
+	 */
+	irq_enter();
+	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+	ia64_srlz_d();
+	while (vector != IA64_SPURIOUS_INT_VECTOR) {
+		int irq = local_vector_to_irq(vector);
+
+		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+			smp_local_flush_tlb();
+			kstat_incr_irq_this_cpu(irq);
+		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			scheduler_ipi();
+			kstat_incr_irq_this_cpu(irq);
+		} else {
+			ia64_setreg(_IA64_REG_CR_TPR, vector);
+			ia64_srlz_d();
+
+			if (unlikely(irq < 0)) {
+				printk(KERN_ERR "%s: Unexpected interrupt "
+				       "vector %d on CPU %d is not mapped "
+				       "to any IRQ!\n", __func__, vector,
+				       smp_processor_id());
+			} else
+				generic_handle_irq(irq);
+
+			/*
+			 * Disable interrupts and send EOI:
+			 */
+			local_irq_disable();
+			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+		}
+		ia64_eoi();
+		vector = ia64_get_ivr();
+	}
+	/*
+	 * This must be done *after* the ia64_eoi().  For example, the keyboard softirq
+	 * handler needs to be able to wait for further keyboard interrupts, which can't
+	 * come through until ia64_eoi() has been done.
+	 */
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * This function emulates a interrupt processing when a cpu is about to be
+ * brought down.
+ */
+void ia64_process_pending_intr(void)
+{
+	ia64_vector vector;
+	unsigned long saved_tpr;
+	extern unsigned int vectors_in_migration[NR_IRQS];
+
+	vector = ia64_get_ivr();
+
+	irq_enter();
+	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+	ia64_srlz_d();
+
+	 /*
+	  * Perform normal interrupt style processing
+	  */
+	while (vector != IA64_SPURIOUS_INT_VECTOR) {
+		int irq = local_vector_to_irq(vector);
+
+		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+			smp_local_flush_tlb();
+			kstat_incr_irq_this_cpu(irq);
+		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			kstat_incr_irq_this_cpu(irq);
+		} else {
+			struct pt_regs *old_regs = set_irq_regs(NULL);
+
+			ia64_setreg(_IA64_REG_CR_TPR, vector);
+			ia64_srlz_d();
+
+			/*
+			 * Now try calling normal ia64_handle_irq as it would have got called
+			 * from a real intr handler. Try passing null for pt_regs, hopefully
+			 * it will work. I hope it works!.
+			 * Probably could shared code.
+			 */
+			if (unlikely(irq < 0)) {
+				printk(KERN_ERR "%s: Unexpected interrupt "
+				       "vector %d on CPU %d not being mapped "
+				       "to any IRQ!!\n", __func__, vector,
+				       smp_processor_id());
+			} else {
+				vectors_in_migration[irq]=0;
+				generic_handle_irq(irq);
+			}
+			set_irq_regs(old_regs);
+
+			/*
+			 * Disable interrupts and send EOI
+			 */
+			local_irq_disable();
+			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+		}
+		ia64_eoi();
+		vector = ia64_get_ivr();
+	}
+	irq_exit();
+}
+#endif
+
+
+#ifdef CONFIG_SMP
+
+static irqreturn_t dummy_handler (int irq, void *dev_id)
+{
+	BUG();
+}
+
+static struct irqaction ipi_irqaction = {
+	.handler =	handle_IPI,
+	.name =		"IPI"
+};
+
+/*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
+static struct irqaction resched_irqaction = {
+	.handler =	dummy_handler,
+	.name =		"resched"
+};
+
+static struct irqaction tlb_irqaction = {
+	.handler =	dummy_handler,
+	.name =		"tlb_flush"
+};
+
+#endif
+
+void
+ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action)
+{
+	unsigned int irq;
+
+	irq = vec;
+	BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL));
+	irq_set_status_flags(irq, IRQ_PER_CPU);
+	irq_set_chip(irq, &irq_type_ia64_lsapic);
+	if (action)
+		setup_irq(irq, action);
+	irq_set_handler(irq, handle_percpu_irq);
+}
+
+void __init
+ia64_native_register_ipi(void)
+{
+#ifdef CONFIG_SMP
+	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
+	register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
+	register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction);
+#endif
+}
+
+void __init
+init_IRQ (void)
+{
+#ifdef CONFIG_ACPI
+	acpi_boot_init();
+#endif
+	ia64_register_ipi();
+	register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
+#ifdef CONFIG_SMP
+#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)
+	if (vector_domain_type != VECTOR_DOMAIN_NONE)
+		register_percpu_irq(IA64_IRQ_MOVE_VECTOR, &irq_move_irqaction);
+#endif
+#endif
+#ifdef CONFIG_PERFMON
+	pfm_init_percpu();
+#endif
+	platform_irq_init();
+}
+
+void
+ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
+{
+	void __iomem *ipi_addr;
+	unsigned long ipi_data;
+	unsigned long phys_cpu_id;
+
+	phys_cpu_id = cpu_physical_id(cpu);
+
+	/*
+	 * cpu number is in 8bit ID and 8bit EID
+	 */
+
+	ipi_data = (delivery_mode << 8) | (vector & 0xff);
+	ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
+
+	writeq(ipi_data, ipi_addr);
+}
--- a/arch/ia64/kernel/irq_lsapic.c
+++ b/arch/ia64/kernel/irq_lsapic.c
@ -0,0 +1,44 @@
+/*
+ * LSAPIC Interrupt Controller
+ *
+ * This takes care of interrupts that are generated by the CPU's
+ * internal Streamlined Advanced Programmable Interrupt Controller
+ * (LSAPIC), such as the ITC and IPI interrupts.
+    *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+static unsigned int
+lsapic_noop_startup (struct irq_data *data)
+{
+	return 0;
+}
+
+static void
+lsapic_noop (struct irq_data *data)
+{
+	/* nothing to do... */
+}
+
+static int lsapic_retrigger(struct irq_data *data)
+{
+	ia64_resend_irq(data->irq);
+
+	return 1;
+}
+
+struct irq_chip irq_type_ia64_lsapic = {
+	.name =			"LSAPIC",
+	.irq_startup =		lsapic_noop_startup,
+	.irq_shutdown =		lsapic_noop,
+	.irq_enable =		lsapic_noop,
+	.irq_disable =		lsapic_noop,
+	.irq_ack =		lsapic_noop,
+	.irq_retrigger =	lsapic_retrigger,
+};
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
--- a/arch/ia64/kernel/jprobes.S
+++ b/arch/ia64/kernel/jprobes.S
@ -0,0 +1,90 @@
+/*
+ * Jprobe specific operations
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-May     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ *              <anil.s.keshavamurthy@intel.com> initial implementation
+ *
+ * Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a
+ * probe to be inserted into the beginning of a function call.  The fundamental
+ * difference between a jprobe and a kprobe is the jprobe handler is executed
+ * in the same context as the target function, while the kprobe handlers
+ * are executed in interrupt context.
+ *
+ * For jprobes we initially gain control by placing a break point in the
+ * first instruction of the targeted function.  When we catch that specific
+ * break, we:
+ *        * set the return address to our jprobe_inst_return() function
+ *        * jump to the jprobe handler function
+ *
+ * Since we fixed up the return address, the jprobe handler will return to our
+ * jprobe_inst_return() function, giving us control again.  At this point we
+ * are back in the parents frame marker, so we do yet another call to our
+ * jprobe_break() function to fix up the frame marker as it would normally
+ * exist in the target function.
+ *
+ * Our jprobe_return function then transfers control back to kprobes.c by
+ * executing a break instruction using one of our reserved numbers.  When we
+ * catch that break in kprobes.c, we continue like we do for a normal kprobe
+ * by single stepping the emulated instruction, and then returning execution
+ * to the correct location.
+ */
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+
+	/*
+	 * void jprobe_break(void)
+	 */
+	.section .kprobes.text, "ax"
+ENTRY(jprobe_break)
+	break.m __IA64_BREAK_JPROBE
+END(jprobe_break)
+
+	/*
+	 * void jprobe_inst_return(void)
+	 */
+GLOBAL_ENTRY(jprobe_inst_return)
+	br.call.sptk.many b0=jprobe_break
+END(jprobe_inst_return)
+
+GLOBAL_ENTRY(invalidate_stacked_regs)
+	movl r16=invalidate_restore_cfm
+	;;
+	mov b6=r16
+	;;
+	br.ret.sptk.many b6
+	;;
+invalidate_restore_cfm:
+	mov r16=ar.rsc
+	;;
+	mov ar.rsc=r0
+	;;
+	loadrs
+	;;
+	mov ar.rsc=r16
+	;;
+	br.cond.sptk.many rp
+END(invalidate_stacked_regs)
+
+GLOBAL_ENTRY(flush_register_stack)
+	// flush dirty regs to backing store (must be first in insn group)
+	flushrs
+	;;
+	br.ret.sptk.many rp
+END(flush_register_stack)
+
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@ -0,0 +1,170 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <linux/efi.h>
+#include <linux/numa.h>
+#include <linux/mmzone.h>
+
+#include <asm/numa.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+
+typedef void (*relocate_new_kernel_t)(
+					unsigned long indirection_page,
+					unsigned long start_address,
+					struct ia64_boot_param *boot_param,
+					unsigned long pal_addr) __noreturn;
+
+struct kimage *ia64_kimage;
+
+struct resource efi_memmap_res = {
+        .name  = "EFI Memory Map",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+        .name  = "Boot parameter",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+	void *control_code_buffer;
+	const unsigned long *func;
+
+	func = (unsigned long *)&relocate_new_kernel;
+	/* Pre-load control code buffer to minimize work in kexec path */
+	control_code_buffer = page_address(image->control_code_page);
+	memcpy((void *)control_code_buffer, (const void *)func[0],
+			relocate_new_kernel_size);
+	flush_icache_range((unsigned long)control_code_buffer,
+			(unsigned long)control_code_buffer + relocate_new_kernel_size);
+	ia64_kimage = image;
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+{
+	struct kimage *image = arg;
+	relocate_new_kernel_t rnk;
+	void *pal_addr = efi_get_pal_addr();
+	unsigned long code_addr;
+	int ii;
+	u64 fp, gp;
+	ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump;
+
+	BUG_ON(!image);
+	code_addr = (unsigned long)page_address(image->control_code_page);
+	if (image->type == KEXEC_TYPE_CRASH) {
+		crash_save_this_cpu();
+		current->thread.ksp = (__u64)info->sw - 16;
+
+		/* Register noop init handler */
+		fp = ia64_tpa(init_handler->fp);
+		gp = ia64_tpa(ia64_getreg(_IA64_REG_GP));
+		ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0);
+	} else {
+		/* Unregister init handlers of current kernel */
+		ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0);
+	}
+
+	/* Unregister mca handler - No more recovery on current kernel */
+	ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0);
+
+	/* Interrupts aren't acceptable while we reboot */
+	local_irq_disable();
+
+	/* Mask CMC and Performance Monitor interrupts */
+	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+	/* Mask ITV and Local Redirect Registers */
+	ia64_set_itv(1 << 16);
+	ia64_set_lrr0(1 << 16);
+	ia64_set_lrr1(1 << 16);
+
+	/* terminate possible nested in-service interrupts */
+	for (ii = 0; ii < 16; ii++)
+		ia64_eoi();
+
+	/* unmask TPR and clear any pending interrupts */
+	ia64_setreg(_IA64_REG_CR_TPR, 0);
+	ia64_srlz_d();
+	while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR)
+		ia64_eoi();
+	platform_kernel_launch_event();
+	rnk = (relocate_new_kernel_t)&code_addr;
+	(*rnk)(image->head, image->start, ia64_boot_param,
+		     GRANULEROUNDDOWN((unsigned long) pal_addr));
+	BUG();
+}
+
+void machine_kexec(struct kimage *image)
+{
+	BUG_ON(!image);
+	unw_init_running(ia64_machine_kexec, image);
+	for(;;);
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+#if defined(CONFIG_DISCONTIGMEM) || defined(CONFIG_SPARSEMEM)
+	VMCOREINFO_SYMBOL(pgdat_list);
+	VMCOREINFO_LENGTH(pgdat_list, MAX_NUMNODES);
+#endif
+#ifdef CONFIG_NUMA
+	VMCOREINFO_SYMBOL(node_memblk);
+	VMCOREINFO_LENGTH(node_memblk, NR_NODE_MEMBLKS);
+	VMCOREINFO_STRUCT_SIZE(node_memblk_s);
+	VMCOREINFO_OFFSET(node_memblk_s, start_paddr);
+	VMCOREINFO_OFFSET(node_memblk_s, size);
+#endif
+#ifdef CONFIG_PGTABLE_3
+	VMCOREINFO_CONFIG(PGTABLE_3);
+#elif defined(CONFIG_PGTABLE_4)
+	VMCOREINFO_CONFIG(PGTABLE_4);
+#endif
+}
+
+unsigned long paddr_vmcoreinfo_note(void)
+{
+	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
+}
+
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@ -0,0 +1,90 @@
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <asm/machvec.h>
+
+#ifdef CONFIG_IA64_GENERIC
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+
+struct ia64_machine_vector ia64_mv;
+EXPORT_SYMBOL(ia64_mv);
+
+static struct ia64_machine_vector * __init
+lookup_machvec (const char *name)
+{
+	extern struct ia64_machine_vector machvec_start[];
+	extern struct ia64_machine_vector machvec_end[];
+	struct ia64_machine_vector *mv;
+
+	for (mv = machvec_start; mv < machvec_end; ++mv)
+		if (strcmp (mv->name, name) == 0)
+			return mv;
+
+	return 0;
+}
+
+void __init
+machvec_init (const char *name)
+{
+	struct ia64_machine_vector *mv;
+
+	if (!name)
+		name = acpi_get_sysname();
+	mv = lookup_machvec(name);
+	if (!mv)
+		panic("generic kernel failed to find machine vector for"
+		      " platform %s!", name);
+
+	ia64_mv = *mv;
+	printk(KERN_INFO "booting generic kernel on platform %s\n", name);
+}
+
+void __init
+machvec_init_from_cmdline(const char *cmdline)
+{
+	char str[64];
+	const char *start;
+	char *end;
+
+	if (! (start = strstr(cmdline, "machvec=")) )
+		return machvec_init(NULL);
+
+	strlcpy(str, start + strlen("machvec="), sizeof(str));
+	if ( (end = strchr(str, ' ')) )
+		*end = '\0';
+
+	return machvec_init(str);
+}
+
+#endif /* CONFIG_IA64_GENERIC */
+
+void
+machvec_setup (char **arg)
+{
+}
+EXPORT_SYMBOL(machvec_setup);
+
+void
+machvec_timer_interrupt (int irq, void *dev_id)
+{
+}
+EXPORT_SYMBOL(machvec_timer_interrupt);
+
+void
+machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction dir)
+{
+	mb();
+}
+EXPORT_SYMBOL(machvec_dma_sync_single);
+
+void
+machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n,
+		    enum dma_data_direction dir)
+{
+	mb();
+}
+EXPORT_SYMBOL(machvec_dma_sync_sg);
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@ -0,0 +1,795 @@
+/*
+ * File:	mca_drv.c
+ * Purpose:	Generic MCA handling layer
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ * Copyright (C) 2005 Silicon Graphics, Inc
+ * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
+ * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kallsyms.h>
+#include <linux/bootmem.h>
+#include <linux/acpi.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/workqueue.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+
+#include "mca_drv.h"
+
+/* max size of SAL error record (default) */
+static int sal_rec_max = 10000;
+
+/* from mca_drv_asm.S */
+extern void *mca_handler_bhhook(void);
+
+static DEFINE_SPINLOCK(mca_bh_lock);
+
+typedef enum {
+	MCA_IS_LOCAL  = 0,
+	MCA_IS_GLOBAL = 1
+} mca_type_t;
+
+#define MAX_PAGE_ISOLATE 1024
+
+static struct page *page_isolate[MAX_PAGE_ISOLATE];
+static int num_page_isolate = 0;
+
+typedef enum {
+	ISOLATE_NG,
+	ISOLATE_OK,
+	ISOLATE_NONE
+} isolate_status_t;
+
+typedef enum {
+	MCA_NOT_RECOVERED = 0,
+	MCA_RECOVERED	  = 1
+} recovery_status_t;
+
+/*
+ *  This pool keeps pointers to the section part of SAL error record
+ */
+static struct {
+	slidx_list_t *buffer; /* section pointer list pool */
+	int	     cur_idx; /* Current index of section pointer list pool */
+	int	     max_idx; /* Maximum index of section pointer list pool */
+} slidx_pool;
+
+static int
+fatal_mca(const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
+
+	return MCA_NOT_RECOVERED;
+}
+
+static int
+mca_recovered(const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
+
+	return MCA_RECOVERED;
+}
+
+/**
+ * mca_page_isolate - isolate a poisoned page in order not to use it later
+ * @paddr:	poisoned memory location
+ *
+ * Return value:
+ *	one of isolate_status_t, ISOLATE_OK/NG/NONE.
+ */
+
+static isolate_status_t
+mca_page_isolate(unsigned long paddr)
+{
+	int i;
+	struct page *p;
+
+	/* whether physical address is valid or not */
+	if (!ia64_phys_addr_valid(paddr))
+		return ISOLATE_NONE;
+
+	if (!pfn_valid(paddr >> PAGE_SHIFT))
+		return ISOLATE_NONE;
+
+	/* convert physical address to physical page number */
+	p = pfn_to_page(paddr>>PAGE_SHIFT);
+
+	/* check whether a page number have been already registered or not */
+	for (i = 0; i < num_page_isolate; i++)
+		if (page_isolate[i] == p)
+			return ISOLATE_OK; /* already listed */
+
+	/* limitation check */
+	if (num_page_isolate == MAX_PAGE_ISOLATE)
+		return ISOLATE_NG;
+
+	/* kick pages having attribute 'SLAB' or 'Reserved' */
+	if (PageSlab(p) || PageReserved(p))
+		return ISOLATE_NG;
+
+	/* add attribute 'Reserved' and register the page */
+	get_page(p);
+	SetPageReserved(p);
+	page_isolate[num_page_isolate++] = p;
+
+	return ISOLATE_OK;
+}
+
+/**
+ * mca_hanlder_bh - Kill the process which occurred memory read error
+ * @paddr:	poisoned address received from MCA Handler
+ */
+
+void
+mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
+{
+	ia64_mlogbuf_dump();
+	printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
+		"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
+	       raw_smp_processor_id(), current->pid,
+		from_kuid(&init_user_ns, current_uid()),
+		iip, ipsr, paddr, current->comm);
+
+	spin_lock(&mca_bh_lock);
+	switch (mca_page_isolate(paddr)) {
+	case ISOLATE_OK:
+		printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
+		break;
+	case ISOLATE_NG:
+		printk(KERN_CRIT "Page isolation: ( %lx ) failure.\n", paddr);
+		break;
+	default:
+		break;
+	}
+	spin_unlock(&mca_bh_lock);
+
+	/* This process is about to be killed itself */
+	do_exit(SIGKILL);
+}
+
+/**
+ * mca_make_peidx - Make index of processor error section
+ * @slpi:	pointer to record of processor error section
+ * @peidx:	pointer to index of processor error section
+ */
+
+static void
+mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
+{
+	/*
+	 * calculate the start address of
+	 *   "struct cpuid_info" and "sal_processor_static_info_t".
+	 */
+	u64 total_check_num = slpi->valid.num_cache_check
+				+ slpi->valid.num_tlb_check
+				+ slpi->valid.num_bus_check
+				+ slpi->valid.num_reg_file_check
+				+ slpi->valid.num_ms_check;
+	u64 head_size =	sizeof(sal_log_mod_error_info_t) * total_check_num
+			+ sizeof(sal_log_processor_info_t);
+	u64 mid_size  = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info);
+
+	peidx_head(peidx)   = slpi;
+	peidx_mid(peidx)    = (struct sal_cpuid_info *)
+		(slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL);
+	peidx_bottom(peidx) = (sal_processor_static_info_t *)
+		(slpi->valid.psi_static_struct ?
+			((char*)slpi + head_size + mid_size) : NULL);
+}
+
+/**
+ * mca_make_slidx -  Make index of SAL error record
+ * @buffer:	pointer to SAL error record
+ * @slidx:	pointer to index of SAL error record
+ *
+ * Return value:
+ *	1 if record has platform error / 0 if not
+ */
+#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
+	{slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
+	hl->hdr = ptr; \
+	list_add(&hl->list, &(sect)); \
+	slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
+
+static int
+mca_make_slidx(void *buffer, slidx_table_t *slidx)
+{
+	int platform_err = 0;
+	int record_len = ((sal_log_record_header_t*)buffer)->len;
+	u32 ercd_pos;
+	int sects;
+	sal_log_section_hdr_t *sp;
+
+	/*
+	 * Initialize index referring current record
+	 */
+	INIT_LIST_HEAD(&(slidx->proc_err));
+	INIT_LIST_HEAD(&(slidx->mem_dev_err));
+	INIT_LIST_HEAD(&(slidx->sel_dev_err));
+	INIT_LIST_HEAD(&(slidx->pci_bus_err));
+	INIT_LIST_HEAD(&(slidx->smbios_dev_err));
+	INIT_LIST_HEAD(&(slidx->pci_comp_err));
+	INIT_LIST_HEAD(&(slidx->plat_specific_err));
+	INIT_LIST_HEAD(&(slidx->host_ctlr_err));
+	INIT_LIST_HEAD(&(slidx->plat_bus_err));
+	INIT_LIST_HEAD(&(slidx->unsupported));
+
+	/*
+	 * Extract a Record Header
+	 */
+	slidx->header = buffer;
+
+	/*
+	 * Extract each section records
+	 * (arranged from "int ia64_log_platform_info_print()")
+	 */
+	for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0;
+		ercd_pos < record_len; ercd_pos += sp->len, sects++) {
+		sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
+		if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
+			LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
+		} else if (!efi_guidcmp(sp->guid,
+				SAL_PLAT_BUS_ERR_SECT_GUID)) {
+			platform_err = 1;
+			LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
+		} else {
+			LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp);
+		}
+	}
+	slidx->n_sections = sects;
+
+	return platform_err;
+}
+
+/**
+ * init_record_index_pools - Initialize pool of lists for SAL record index
+ *
+ * Return value:
+ *	0 on Success / -ENOMEM on Failure
+ */
+static int
+init_record_index_pools(void)
+{
+	int i;
+	int rec_max_size;  /* Maximum size of SAL error records */
+	int sect_min_size; /* Minimum size of SAL error sections */
+	/* minimum size table of each section */
+	static int sal_log_sect_min_sizes[] = {
+		sizeof(sal_log_processor_info_t)
+		+ sizeof(sal_processor_static_info_t),
+		sizeof(sal_log_mem_dev_err_info_t),
+		sizeof(sal_log_sel_dev_err_info_t),
+		sizeof(sal_log_pci_bus_err_info_t),
+		sizeof(sal_log_smbios_dev_err_info_t),
+		sizeof(sal_log_pci_comp_err_info_t),
+		sizeof(sal_log_plat_specific_err_info_t),
+		sizeof(sal_log_host_ctlr_err_info_t),
+		sizeof(sal_log_plat_bus_err_info_t),
+	};
+
+	/*
+	 * MCA handler cannot allocate new memory on flight,
+	 * so we preallocate enough memory to handle a SAL record.
+	 *
+	 * Initialize a handling set of slidx_pool:
+	 *   1. Pick up the max size of SAL error records
+	 *   2. Pick up the min size of SAL error sections
+	 *   3. Allocate the pool as enough to 2 SAL records
+	 *     (now we can estimate the maxinum of section in a record.)
+	 */
+
+	/* - 1 - */
+	rec_max_size = sal_rec_max;
+
+	/* - 2 - */
+	sect_min_size = sal_log_sect_min_sizes[0];
+	for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++)
+		if (sect_min_size > sal_log_sect_min_sizes[i])
+			sect_min_size = sal_log_sect_min_sizes[i];
+
+	/* - 3 - */
+	slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
+	slidx_pool.buffer =
+		kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
+
+	return slidx_pool.buffer ? 0 : -ENOMEM;
+}
+
+
+/*****************************************************************************
+ * Recovery functions                                                        *
+ *****************************************************************************/
+
+/**
+ * is_mca_global - Check whether this MCA is global or not
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer to pal_bus_check_info_t
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	MCA_IS_LOCAL / MCA_IS_GLOBAL
+ */
+
+static mca_type_t
+is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+	      struct ia64_sal_os_state *sos)
+{
+	pal_processor_state_info_t *psp =
+		(pal_processor_state_info_t*)peidx_psp(peidx);
+
+	/*
+	 * PAL can request a rendezvous, if the MCA has a global scope.
+	 * If "rz_always" flag is set, SAL requests MCA rendezvous
+	 * in spite of global MCA.
+	 * Therefore it is local MCA when rendezvous has not been requested.
+	 * Failed to rendezvous, the system must be down.
+	 */
+	switch (sos->rv_rc) {
+		case -1: /* SAL rendezvous unsuccessful */
+			return MCA_IS_GLOBAL;
+		case  0: /* SAL rendezvous not required */
+			return MCA_IS_LOCAL;
+		case  1: /* SAL rendezvous successful int */
+		case  2: /* SAL rendezvous successful int with init */
+		default:
+			break;
+	}
+
+	/*
+	 * If One or more Cache/TLB/Reg_File/Uarch_Check is here,
+	 * it would be a local MCA. (i.e. processor internal error)
+	 */
+	if (psp->tc || psp->cc || psp->rc || psp->uc)
+		return MCA_IS_LOCAL;
+	
+	/*
+	 * Bus_Check structure with Bus_Check.ib (internal bus error) flag set
+	 * would be a global MCA. (e.g. a system bus address parity error)
+	 */
+	if (!pbci || pbci->ib)
+		return MCA_IS_GLOBAL;
+
+	/*
+	 * Bus_Check structure with Bus_Check.eb (external bus error) flag set
+	 * could be either a local MCA or a global MCA.
+	 *
+	 * Referring Bus_Check.bsi:
+	 *   0: Unknown/unclassified
+	 *   1: BERR#
+	 *   2: BINIT#
+	 *   3: Hard Fail
+	 * (FIXME: Are these SGI specific or generic bsi values?)
+	 */
+	if (pbci->eb)
+		switch (pbci->bsi) {
+			case 0:
+				/* e.g. a load from poisoned memory */
+				return MCA_IS_LOCAL;
+			case 1:
+			case 2:
+			case 3:
+				return MCA_IS_GLOBAL;
+		}
+
+	return MCA_IS_GLOBAL;
+}
+
+/**
+ * get_target_identifier - Get the valid Cache or Bus check target identifier.
+ * @peidx:	pointer of index of processor error section
+ *
+ * Return value:
+ *	target address on Success / 0 on Failure
+ */
+static u64
+get_target_identifier(peidx_table_t *peidx)
+{
+	u64 target_address = 0;
+	sal_log_mod_error_info_t *smei;
+	pal_cache_check_info_t *pcci;
+	int i, level = 9;
+
+	/*
+	 * Look through the cache checks for a valid target identifier
+	 * If more than one valid target identifier, return the one
+	 * with the lowest cache level.
+	 */
+	for (i = 0; i < peidx_cache_check_num(peidx); i++) {
+		smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i);
+		if (smei->valid.target_identifier && smei->target_identifier) {
+			pcci = (pal_cache_check_info_t *)&(smei->check_info);
+			if (!target_address || (pcci->level < level)) {
+				target_address = smei->target_identifier;
+				level = pcci->level;
+				continue;
+			}
+		}
+	}
+	if (target_address)
+		return target_address;
+
+	/*
+	 * Look at the bus check for a valid target identifier
+	 */
+	smei = peidx_bus_check(peidx, 0);
+	if (smei && smei->valid.target_identifier)
+		return smei->target_identifier;
+
+	return 0;
+}
+
+/**
+ * recover_from_read_error - Try to recover the errors which type are "read"s.
+ * @slidx:	pointer of index of SAL error record
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer of pal_bus_check_info
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_read_error(slidx_table_t *slidx,
+			peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			struct ia64_sal_os_state *sos)
+{
+	u64 target_identifier;
+	pal_min_state_area_t *pmsa;
+	struct ia64_psr *psr1, *psr2;
+	ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
+
+	/* Is target address valid? */
+	target_identifier = get_target_identifier(peidx);
+	if (!target_identifier)
+		return fatal_mca("target address not valid");
+
+	/*
+	 * cpu read or memory-mapped io read
+	 *
+	 *    offending process  affected process  OS MCA do
+	 *     kernel mode        kernel mode       down system
+	 *     kernel mode        user   mode       kill the process
+	 *     user   mode        kernel mode       down system (*)
+	 *     user   mode        user   mode       kill the process
+	 *
+	 * (*) You could terminate offending user-mode process
+	 *    if (pbci->pv && pbci->pl != 0) *and* if you sure
+	 *    the process not have any locks of kernel.
+	 */
+
+	/* Is minstate valid? */
+	if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
+		return fatal_mca("minstate not valid");
+	psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
+	psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
+
+	/*
+	 *  Check the privilege level of interrupted context.
+	 *   If it is user-mode, then terminate affected process.
+	 */
+
+	pmsa = sos->pal_min_state;
+	if (psr1->cpl != 0 ||
+	   ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
+		/*
+		 *  setup for resume to bottom half of MCA,
+		 * "mca_handler_bhhook"
+		 */
+		/* pass to bhhook as argument (gr8, ...) */
+		pmsa->pmsa_gr[8-1] = target_identifier;
+		pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
+		pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
+		/* set interrupted return address (but no use) */
+		pmsa->pmsa_br0 = pmsa->pmsa_iip;
+		/* change resume address to bottom half */
+		pmsa->pmsa_iip = mca_hdlr_bh->fp;
+		pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
+		/* set cpl with kernel mode */
+		psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
+		psr2->cpl = 0;
+		psr2->ri  = 0;
+		psr2->bn  = 1;
+		psr2->i  = 0;
+
+		return mca_recovered("user memory corruption. "
+				"kill affected process - recovered.");
+	}
+
+	return fatal_mca("kernel context not recovered, iip 0x%lx\n",
+			 pmsa->pmsa_iip);
+}
+
+/**
+ * recover_from_platform_error - Recover from platform error.
+ * @slidx:	pointer of index of SAL error record
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer of pal_bus_check_info
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
+			    pal_bus_check_info_t *pbci,
+			    struct ia64_sal_os_state *sos)
+{
+	int status = 0;
+	pal_processor_state_info_t *psp =
+		(pal_processor_state_info_t*)peidx_psp(peidx);
+
+	if (psp->bc && pbci->eb && pbci->bsi == 0) {
+		switch(pbci->type) {
+		case 1: /* partial read */
+		case 3: /* full line(cpu) read */
+		case 9: /* I/O space read */
+			status = recover_from_read_error(slidx, peidx, pbci,
+							 sos);
+			break;
+		case 0: /* unknown */
+		case 2: /* partial write */
+		case 4: /* full line write */
+		case 5: /* implicit or explicit write-back operation */
+		case 6: /* snoop probe */
+		case 7: /* incoming or outgoing ptc.g */
+		case 8: /* write coalescing transactions */
+		case 10: /* I/O space write */
+		case 11: /* inter-processor interrupt message(IPI) */
+		case 12: /* interrupt acknowledge or
+				external task priority cycle */
+		default:
+			break;
+		}
+	} else if (psp->cc && !psp->bc) {	/* Cache error */
+		status = recover_from_read_error(slidx, peidx, pbci, sos);
+	}
+
+	return status;
+}
+
+/*
+ * recover_from_tlb_check
+ * @peidx:	pointer of index of processor error section
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+static int
+recover_from_tlb_check(peidx_table_t *peidx)
+{
+	sal_log_mod_error_info_t *smei;
+	pal_tlb_check_info_t *ptci;
+
+	smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
+	ptci = (pal_tlb_check_info_t *)&(smei->check_info);
+
+	/*
+	 * Look for signature of a duplicate TLB DTC entry, which is
+	 * a SW bug and always fatal.
+	 */
+	if (ptci->op == PAL_TLB_CHECK_OP_PURGE
+	    && !(ptci->itr || ptci->dtc || ptci->itc))
+		return fatal_mca("Duplicate TLB entry");
+
+	return mca_recovered("TLB check recovered");
+}
+
+/**
+ * recover_from_processor_error
+ * @platform:	whether there are some platform error section or not
+ * @slidx:	pointer of index of SAL error record
+ * @peidx:	pointer of index of processor error section
+ * @pbci:	pointer of pal_bus_check_info
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+recover_from_processor_error(int platform, slidx_table_t *slidx,
+			     peidx_table_t *peidx, pal_bus_check_info_t *pbci,
+			     struct ia64_sal_os_state *sos)
+{
+	pal_processor_state_info_t *psp =
+		(pal_processor_state_info_t*)peidx_psp(peidx);
+
+	/*
+	 * Processor recovery status must key off of the PAL recovery
+	 * status in the Processor State Parameter.
+	 */
+
+	/*
+	 * The machine check is corrected.
+	 */
+	if (psp->cm == 1)
+		return mca_recovered("machine check is already corrected.");
+
+	/*
+	 * The error was not contained.  Software must be reset.
+	 */
+	if (psp->us || psp->ci == 0)
+		return fatal_mca("error not contained");
+
+	/*
+	 * Look for recoverable TLB check
+	 */
+	if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
+		return recover_from_tlb_check(peidx);
+
+	/*
+	 * The cache check and bus check bits have four possible states
+	 *   cc bc
+	 *    1  1	Memory error, attempt recovery
+	 *    1  0	Cache error, attempt recovery
+	 *    0  1	I/O error, attempt recovery
+	 *    0  0	Other error type, not recovered
+	 */
+	if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL))
+		return fatal_mca("No cache or bus check");
+
+	/*
+	 * Cannot handle more than one bus check.
+	 */
+	if (peidx_bus_check_num(peidx) > 1)
+		return fatal_mca("Too many bus checks");
+
+	if (pbci->ib)
+		return fatal_mca("Internal Bus error");
+	if (pbci->eb && pbci->bsi > 0)
+		return fatal_mca("External bus check fatal status");
+
+	/*
+	 * This is a local MCA and estimated as a recoverable error.
+	 */
+	if (platform)
+		return recover_from_platform_error(slidx, peidx, pbci, sos);
+
+	/*
+	 * On account of strange SAL error record, we cannot recover.
+	 */
+	return fatal_mca("Strange SAL record");
+}
+
+/**
+ * mca_try_to_recover - Try to recover from MCA
+ * @rec:	pointer to a SAL error record
+ * @sos:	pointer to hand off struct between SAL and OS
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+
+static int
+mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
+{
+	int platform_err;
+	int n_proc_err;
+	slidx_table_t slidx;
+	peidx_table_t peidx;
+	pal_bus_check_info_t pbci;
+
+	/* Make index of SAL error record */
+	platform_err = mca_make_slidx(rec, &slidx);
+
+	/* Count processor error sections */
+	n_proc_err = slidx_count(&slidx, proc_err);
+
+	 /* Now, OS can recover when there is one processor error section */
+	if (n_proc_err > 1)
+		return fatal_mca("Too Many Errors");
+	else if (n_proc_err == 0)
+		/* Weird SAL record ... We can't do anything */
+		return fatal_mca("Weird SAL record");
+
+	/* Make index of processor error section */
+	mca_make_peidx((sal_log_processor_info_t*)
+		slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
+
+	/* Extract Processor BUS_CHECK[0] */
+	*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
+
+	/* Check whether MCA is global or not */
+	if (is_mca_global(&peidx, &pbci, sos))
+		return fatal_mca("global MCA");
+	
+	/* Try to recover a processor error */
+	return recover_from_processor_error(platform_err, &slidx, &peidx,
+					    &pbci, sos);
+}
+
+/*
+ * =============================================================================
+ */
+
+int __init mca_external_handler_init(void)
+{
+	if (init_record_index_pools())
+		return -ENOMEM;
+
+	/* register external mca handlers */
+	if (ia64_reg_MCA_extension(mca_try_to_recover)) {	
+		printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
+		kfree(slidx_pool.buffer);
+		return -EFAULT;
+	}
+	return 0;
+}
+
+void __exit mca_external_handler_exit(void)
+{
+	/* unregister external mca handlers */
+	ia64_unreg_MCA_extension();
+	kfree(slidx_pool.buffer);
+}
+
+module_init(mca_external_handler_init);
+module_exit(mca_external_handler_exit);
+
+module_param(sal_rec_max, int, 0644);
+MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record");
+
+MODULE_DESCRIPTION("ia64 platform dependent mca handler driver");
+MODULE_LICENSE("GPL");
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@ -0,0 +1,122 @@
+/*
+ * File:	mca_drv.h
+ * Purpose:	Define helpers for Generic MCA handling
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ */
+/*
+ * Processor error section:
+ *
+ *  +-sal_log_processor_info_t *info-------------+
+ *  | sal_log_section_hdr_t header;              |
+ *  | ...                                        |
+ *  | sal_log_mod_error_info_t info[0];          |
+ *  +-+----------------+-------------------------+
+ *    | CACHE_CHECK    |  ^ num_cache_check v
+ *    +----------------+
+ *    | TLB_CHECK      |  ^ num_tlb_check v
+ *    +----------------+
+ *    | BUS_CHECK      |  ^ num_bus_check v
+ *    +----------------+
+ *    | REG_FILE_CHECK |  ^ num_reg_file_check v
+ *    +----------------+
+ *    | MS_CHECK       |  ^ num_ms_check v
+ *  +-struct cpuid_info *id----------------------+
+ *  | regs[5];                                   |
+ *  | reserved;                                  |
+ *  +-sal_processor_static_info_t *regs----------+
+ *  | valid;                                     |
+ *  | ...                                        |
+ *  | fr[128];                                   |
+ *  +--------------------------------------------+
+ */
+
+/* peidx: index of processor error section */
+typedef struct peidx_table {
+	sal_log_processor_info_t        *info;
+	struct sal_cpuid_info           *id;
+	sal_processor_static_info_t     *regs;
+} peidx_table_t;
+
+#define peidx_head(p)   (((p)->info))
+#define peidx_mid(p)    (((p)->id))
+#define peidx_bottom(p) (((p)->regs))
+
+#define peidx_psp(p)           (&(peidx_head(p)->proc_state_parameter))
+#define peidx_field_valid(p)   (&(peidx_head(p)->valid))
+#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area))
+
+#define peidx_cache_check_num(p)    (peidx_head(p)->valid.num_cache_check)
+#define peidx_tlb_check_num(p)      (peidx_head(p)->valid.num_tlb_check)
+#define peidx_bus_check_num(p)      (peidx_head(p)->valid.num_bus_check)
+#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check)
+#define peidx_ms_check_num(p)       (peidx_head(p)->valid.num_ms_check)
+
+#define peidx_cache_check_idx(p, n)    (n)
+#define peidx_tlb_check_idx(p, n)      (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n)
+#define peidx_bus_check_idx(p, n)      (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n)
+#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n)
+#define peidx_ms_check_idx(p, n)       (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n)
+
+#define peidx_mod_error_info(p, name, n) \
+({	int __idx = peidx_##name##_idx(p, n); \
+	sal_log_mod_error_info_t *__ret = NULL; \
+	if (peidx_##name##_num(p) > n) /*BUG*/ \
+		__ret = &(peidx_head(p)->info[__idx]); \
+	__ret; })
+
+#define peidx_cache_check(p, n)    peidx_mod_error_info(p, cache_check, n)
+#define peidx_tlb_check(p, n)      peidx_mod_error_info(p, tlb_check, n)
+#define peidx_bus_check(p, n)      peidx_mod_error_info(p, bus_check, n)
+#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n)
+#define peidx_ms_check(p, n)       peidx_mod_error_info(p, ms_check, n)
+
+#define peidx_check_info(proc, name, n) \
+({ \
+	sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\
+	u64 __temp = __info && __info->valid.check_info \
+		? __info->check_info : 0; \
+	__temp; })
+
+/* slidx: index of SAL log error record */
+
+typedef struct slidx_list {
+	struct list_head list;
+	sal_log_section_hdr_t *hdr;
+} slidx_list_t;
+
+typedef struct slidx_table {
+	sal_log_record_header_t *header;
+	int n_sections;			/* # of section headers */
+	struct list_head proc_err;
+	struct list_head mem_dev_err;
+	struct list_head sel_dev_err;
+	struct list_head pci_bus_err;
+	struct list_head smbios_dev_err;
+	struct list_head pci_comp_err;
+	struct list_head plat_specific_err;
+	struct list_head host_ctlr_err;
+	struct list_head plat_bus_err;
+	struct list_head unsupported;	/* list of unsupported sections */
+} slidx_table_t;
+
+#define slidx_foreach_entry(pos, head) \
+	list_for_each_entry(pos, head, list)
+#define slidx_first_entry(head) \
+	(((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL)
+#define slidx_count(slidx, sec) \
+({	int __count = 0; \
+	slidx_list_t *__pos; \
+	slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
+	__count; })
+
+struct mca_table_entry {
+	int start_addr;	/* location-relative starting address of MCA recoverable range */
+	int end_addr;	/* location-relative ending address of MCA recoverable range */
+};
+
+extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
+extern int mca_recover_range(unsigned long);
+extern void ia64_mlogbuf_dump(void);
+
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ b/arch/ia64/kernel/mca_drv_asm.S
@ -0,0 +1,55 @@
+/*
+ * File:        mca_drv_asm.S
+ * Purpose:     Assembly portion of Generic MCA handling
+ *
+ * Copyright (C) 2004 FUJITSU LIMITED
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ */
+#include <linux/threads.h>
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+GLOBAL_ENTRY(mca_handler_bhhook)
+	invala				// clear RSE ?
+	cover
+	;;
+	clrrrb
+	;;						
+	alloc	r16=ar.pfs,0,2,3,0	// make a new frame
+	mov	ar.rsc=0
+	mov	r13=IA64_KR(CURRENT)	// current task pointer
+	;;
+	mov	r2=r13
+	;;
+	addl	r22=IA64_RBS_OFFSET,r2
+	;;
+	mov	ar.bspstore=r22
+	addl	sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
+	;;
+	adds	r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+	st1	[r2]=r0		// clear current->thread.on_ustack flag
+	mov	loc0=r16
+	movl	loc1=mca_handler_bh	// recovery C function
+	;;
+	mov	out0=r8			// poisoned address
+	mov	out1=r9			// iip
+	mov	out2=r10		// psr
+	mov	b6=loc1
+	;;
+	mov	loc1=rp
+	ssm	psr.ic
+	;;
+	srlz.i
+	;;
+	ssm	psr.i
+	br.call.sptk.many rp=b6		// does not return ...
+	;;
+	mov	ar.pfs=loc0
+	mov 	rp=loc1
+	;;
+	mov	r8=r0
+	br.ret.sptk.many rp
+END(mca_handler_bhhook)
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@ -0,0 +1,250 @@
+
+#include <asm/cache.h>
+
+#include "entry.h"
+#include "paravirt_inst.h"
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP				\
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER				\
+(pUStk) br.call.spnt rp=account_sys_enter		\
+	;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
+.section ".data..patch.rse", "a"
+.previous
+
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *	psr.ic: off
+ *	r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *	psr.ic: off
+ *	 r2 = points to &pt_regs.r16
+ *	 r8 = contents of ar.ccv
+ *	 r9 = contents of ar.csd
+ *	r10 = contents of ar.ssd
+ *	r11 = FPSR_DEFAULT
+ *	r12 = kernel sp (kernel virtual address)
+ *	r13 = points to current task_struct (kernel virtual address)
+ *	p15 = TRUE if psr.i is set in cr.ipsr
+ *	predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *		preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND)				\
+	mov r16=IA64_KR(CURRENT);	/* M */							\
+	mov r27=ar.rsc;			/* M */							\
+	mov r20=r1;			/* A */							\
+	mov r25=ar.unat;		/* M */							\
+	MOV_FROM_IPSR(p0,r29);		/* M */							\
+	mov r26=ar.pfs;			/* I */							\
+	MOV_FROM_IIP(r28);			/* M */						\
+	mov r21=ar.fpsr;		/* M */							\
+	__COVER;				/* B;; (or nothing) */				\
+	;;											\
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
+	;;											\
+	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
+	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
+	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
+	/* switch from user to kernel RBS: */							\
+	;;											\
+	invala;				/* M */							\
+	SAVE_IFS;										\
+	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
+	;;											\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(pUStk)	mov.m r24=ar.rnat;									\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
+	;;											\
+(pUStk) lfetch.fault.excl.nt1 [r22];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+	;;											\
+(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
+	adds r16=PT(CR_IPSR),r1;								\
+	;;											\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
+	st8 [r16]=r29;		/* save cr.ipsr */						\
+	;;											\
+	lfetch.fault.excl.nt1 [r17];								\
+	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
+	mov r29=b0										\
+	;;											\
+	WORKAROUND;										\
+	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
+	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
+(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r8,16;								\
+.mem.offset 8,0; st8.spill [r17]=r9,16;								\
+        ;;											\
+.mem.offset 0,0; st8.spill [r16]=r10,24;							\
+.mem.offset 8,0; st8.spill [r17]=r11,24;							\
+        ;;											\
+	st8 [r16]=r28,16;	/* save cr.iip */						\
+	st8 [r17]=r30,16;	/* save cr.ifs */						\
+(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
+	mov r8=ar.ccv;										\
+	mov r9=ar.csd;										\
+	mov r10=ar.ssd;										\
+	movl r11=FPSR_DEFAULT;   /* L-unit */							\
+	;;											\
+	st8 [r16]=r25,16;	/* save ar.unat */						\
+	st8 [r17]=r26,16;	/* save ar.pfs */						\
+	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
+	;;											\
+	st8 [r16]=r27,16;	/* save ar.rsc */						\
+(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
+(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
+	;;			/* avoid RAW on r16 & r17 */					\
+(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
+	st8 [r17]=r31,16;	/* save predicates */						\
+(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
+	;;											\
+	st8 [r16]=r29,16;	/* save b0 */							\
+	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
+	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
+.mem.offset 8,0; st8.spill [r17]=r12,16;							\
+	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r13,16;							\
+.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
+	mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r15,16;							\
+.mem.offset 8,0; st8.spill [r17]=r14,16;							\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r2,16;								\
+.mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	ACCOUNT_GET_STAMP									\
+	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
+	;;											\
+	EXTRA;											\
+	movl r1=__gp;		/* establish kernel global pointer */				\
+	;;											\
+	ACCOUNT_SYS_ENTER									\
+	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
+	;;
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ *	psr.ic: on
+ *	r2:	points to &pt_regs.r16
+ *	r3:	points to &pt_regs.r17
+ *	r8:	contents of ar.ccv
+ *	r9:	contents of ar.csd
+ *	r10:	contents of ar.ssd
+ *	r11:	FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define SAVE_REST				\
+.mem.offset 0,0; st8.spill [r2]=r16,16;		\
+.mem.offset 8,0; st8.spill [r3]=r17,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r18,16;		\
+.mem.offset 8,0; st8.spill [r3]=r19,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r20,16;		\
+.mem.offset 8,0; st8.spill [r3]=r21,16;		\
+	mov r18=b6;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r22,16;		\
+.mem.offset 8,0; st8.spill [r3]=r23,16;		\
+	mov r19=b7;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r24,16;		\
+.mem.offset 8,0; st8.spill [r3]=r25,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r26,16;		\
+.mem.offset 8,0; st8.spill [r3]=r27,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r28,16;		\
+.mem.offset 8,0; st8.spill [r3]=r29,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r30,16;		\
+.mem.offset 8,0; st8.spill [r3]=r31,32;		\
+	;;					\
+	mov ar.fpsr=r11;	/* M-unit */	\
+	st8 [r2]=r8,8;		/* ar.ccv */	\
+	adds r24=PT(B6)-PT(F7),r3;		\
+	;;					\
+	stf.spill [r2]=f6,32;			\
+	stf.spill [r3]=f7,32;			\
+	;;					\
+	stf.spill [r2]=f8,32;			\
+	stf.spill [r3]=f9,32;			\
+	;;					\
+	stf.spill [r2]=f10;			\
+	stf.spill [r3]=f11;			\
+	adds r25=PT(B7)-PT(F11),r3;		\
+	;;					\
+	st8 [r24]=r18,16;       /* b6 */	\
+	st8 [r25]=r19,16;       /* b7 */	\
+	;;					\
+	st8 [r24]=r9;        	/* ar.csd */	\
+	st8 [r25]=r10;      	/* ar.ssd */	\
+	;;
+
+#define RSE_WORKAROUND				\
+(pUStk) extr.u r17=r18,3,6;			\
+(pUStk)	sub r16=r18,r22;			\
+[1:](pKStk)	br.cond.sptk.many 1f;		\
+	.xdata4 ".data..patch.rse",1b-.		\
+	;;					\
+	cmp.ge p6,p7 = 33,r17;			\
+	;;					\
+(p6)	mov r17=0x310;				\
+(p7)	mov r17=0x308;				\
+	;;					\
+	cmp.leu p1,p0=r16,r17;			\
+(p1)	br.cond.sptk.many 1f;			\
+	dep.z r17=r26,0,62;			\
+	movl r16=2f;				\
+	;;					\
+	mov ar.pfs=r17;				\
+	dep r27=r0,r27,16,14;			\
+	mov b0=r16;				\
+	;;					\
+	br.ret.sptk b0;				\
+	;;					\
+2:						\
+	mov ar.rsc=r0				\
+	;;					\
+	flushrs;				\
+	;;					\
+	mov ar.bspstore=r22			\
+	;;					\
+	mov r18=ar.bsp;				\
+	;;					\
+1:						\
+	.pred.rel "mutex", pKStk, pUStk
+
+#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND)
+#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
+#define SAVE_MIN			DO_SAVE_MIN(     , mov r30=r0, , )
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@ -0,0 +1,953 @@
+/*
+ * IA-64-specific support for kernel module loader.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Loosely based on patch by Rusty Russell.
+ */
+
+/* relocs tested so far:
+
+   DIR64LSB
+   FPTR64LSB
+   GPREL22
+   LDXMOV
+   LDXMOV
+   LTOFF22
+   LTOFF22X
+   LTOFF22X
+   LTOFF_FPTR22
+   PCREL21B	(for br.call only; br.cond is not supported out of modules!)
+   PCREL60B	(for brl.cond only; brl.call is not supported for modules!)
+   PCREL64LSB
+   SECREL32LSB
+   SEGREL64LSB
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include <asm/patch.h>
+#include <asm/unaligned.h>
+
+#define ARCH_MODULE_DEBUG 0
+
+#if ARCH_MODULE_DEBUG
+# define DEBUGP printk
+# define inline
+#else
+# define DEBUGP(fmt , a...)
+#endif
+
+#ifdef CONFIG_ITANIUM
+# define USE_BRL	0
+#else
+# define USE_BRL	1
+#endif
+
+#define MAX_LTOFF	((uint64_t) (1 << 22))	/* max. allowable linkage-table offset */
+
+/* Define some relocation helper macros/types: */
+
+#define FORMAT_SHIFT	0
+#define FORMAT_BITS	3
+#define FORMAT_MASK	((1 << FORMAT_BITS) - 1)
+#define VALUE_SHIFT	3
+#define VALUE_BITS	5
+#define VALUE_MASK	((1 << VALUE_BITS) - 1)
+
+enum reloc_target_format {
+	/* direct encoded formats: */
+	RF_NONE = 0,
+	RF_INSN14 = 1,
+	RF_INSN22 = 2,
+	RF_INSN64 = 3,
+	RF_32MSB = 4,
+	RF_32LSB = 5,
+	RF_64MSB = 6,
+	RF_64LSB = 7,
+
+	/* formats that cannot be directly decoded: */
+	RF_INSN60,
+	RF_INSN21B,	/* imm21 form 1 */
+	RF_INSN21M,	/* imm21 form 2 */
+	RF_INSN21F	/* imm21 form 3 */
+};
+
+enum reloc_value_formula {
+	RV_DIRECT = 4,		/* S + A */
+	RV_GPREL = 5,		/* @gprel(S + A) */
+	RV_LTREL = 6,		/* @ltoff(S + A) */
+	RV_PLTREL = 7,		/* @pltoff(S + A) */
+	RV_FPTR = 8,		/* @fptr(S + A) */
+	RV_PCREL = 9,		/* S + A - P */
+	RV_LTREL_FPTR = 10,	/* @ltoff(@fptr(S + A)) */
+	RV_SEGREL = 11,		/* @segrel(S + A) */
+	RV_SECREL = 12,		/* @secrel(S + A) */
+	RV_BDREL = 13,		/* BD + A */
+	RV_LTV = 14,		/* S + A (like RV_DIRECT, except frozen at static link-time) */
+	RV_PCREL2 = 15,		/* S + A - P */
+	RV_SPECIAL = 16,	/* various (see below) */
+	RV_RSVD17 = 17,
+	RV_TPREL = 18,		/* @tprel(S + A) */
+	RV_LTREL_TPREL = 19,	/* @ltoff(@tprel(S + A)) */
+	RV_DTPMOD = 20,		/* @dtpmod(S + A) */
+	RV_LTREL_DTPMOD = 21,	/* @ltoff(@dtpmod(S + A)) */
+	RV_DTPREL = 22,		/* @dtprel(S + A) */
+	RV_LTREL_DTPREL = 23,	/* @ltoff(@dtprel(S + A)) */
+	RV_RSVD24 = 24,
+	RV_RSVD25 = 25,
+	RV_RSVD26 = 26,
+	RV_RSVD27 = 27
+	/* 28-31 reserved for implementation-specific purposes.  */
+};
+
+#define N(reloc)	[R_IA64_##reloc] = #reloc
+
+static const char *reloc_name[256] = {
+	N(NONE),		N(IMM14),		N(IMM22),		N(IMM64),
+	N(DIR32MSB),		N(DIR32LSB),		N(DIR64MSB),		N(DIR64LSB),
+	N(GPREL22),		N(GPREL64I),		N(GPREL32MSB),		N(GPREL32LSB),
+	N(GPREL64MSB),		N(GPREL64LSB),		N(LTOFF22),		N(LTOFF64I),
+	N(PLTOFF22),		N(PLTOFF64I),		N(PLTOFF64MSB),		N(PLTOFF64LSB),
+	N(FPTR64I),		N(FPTR32MSB),		N(FPTR32LSB),		N(FPTR64MSB),
+	N(FPTR64LSB),		N(PCREL60B),		N(PCREL21B),		N(PCREL21M),
+	N(PCREL21F),		N(PCREL32MSB),		N(PCREL32LSB),		N(PCREL64MSB),
+	N(PCREL64LSB),		N(LTOFF_FPTR22),	N(LTOFF_FPTR64I),	N(LTOFF_FPTR32MSB),
+	N(LTOFF_FPTR32LSB),	N(LTOFF_FPTR64MSB),	N(LTOFF_FPTR64LSB),	N(SEGREL32MSB),
+	N(SEGREL32LSB),		N(SEGREL64MSB),		N(SEGREL64LSB),		N(SECREL32MSB),
+	N(SECREL32LSB),		N(SECREL64MSB),		N(SECREL64LSB),		N(REL32MSB),
+	N(REL32LSB),		N(REL64MSB),		N(REL64LSB),		N(LTV32MSB),
+	N(LTV32LSB),		N(LTV64MSB),		N(LTV64LSB),		N(PCREL21BI),
+	N(PCREL22),		N(PCREL64I),		N(IPLTMSB),		N(IPLTLSB),
+	N(COPY),		N(LTOFF22X),		N(LDXMOV),		N(TPREL14),
+	N(TPREL22),		N(TPREL64I),		N(TPREL64MSB),		N(TPREL64LSB),
+	N(LTOFF_TPREL22),	N(DTPMOD64MSB),		N(DTPMOD64LSB),		N(LTOFF_DTPMOD22),
+	N(DTPREL14),		N(DTPREL22),		N(DTPREL64I),		N(DTPREL32MSB),
+	N(DTPREL32LSB),		N(DTPREL64MSB),		N(DTPREL64LSB),		N(LTOFF_DTPREL22)
+};
+
+#undef N
+
+/* Opaque struct for insns, to protect against derefs. */
+struct insn;
+
+static inline uint64_t
+bundle (const struct insn *insn)
+{
+	return (uint64_t) insn & ~0xfUL;
+}
+
+static inline int
+slot (const struct insn *insn)
+{
+	return (uint64_t) insn & 0x3;
+}
+
+static int
+apply_imm64 (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (slot(insn) != 2) {
+		printk(KERN_ERR "%s: invalid slot number %d for IMM64\n",
+		       mod->name, slot(insn));
+		return 0;
+	}
+	ia64_patch_imm64((u64) insn, val);
+	return 1;
+}
+
+static int
+apply_imm60 (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (slot(insn) != 2) {
+		printk(KERN_ERR "%s: invalid slot number %d for IMM60\n",
+		       mod->name, slot(insn));
+		return 0;
+	}
+	if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) {
+		printk(KERN_ERR "%s: value %ld out of IMM60 range\n",
+			mod->name, (long) val);
+		return 0;
+	}
+	ia64_patch_imm60((u64) insn, val);
+	return 1;
+}
+
+static int
+apply_imm22 (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (val + (1 << 21) >= (1 << 22)) {
+		printk(KERN_ERR "%s: value %li out of IMM22 range\n",
+			mod->name, (long)val);
+		return 0;
+	}
+	ia64_patch((u64) insn, 0x01fffcfe000UL, (  ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
+					         | ((val & 0x1f0000UL) <<  6) /* bit 16 -> 22 */
+					         | ((val & 0x00ff80UL) << 20) /* bit  7 -> 27 */
+					         | ((val & 0x00007fUL) << 13) /* bit  0 -> 13 */));
+	return 1;
+}
+
+static int
+apply_imm21b (struct module *mod, struct insn *insn, uint64_t val)
+{
+	if (val + (1 << 20) >= (1 << 21)) {
+		printk(KERN_ERR "%s: value %li out of IMM21b range\n",
+			mod->name, (long)val);
+		return 0;
+	}
+	ia64_patch((u64) insn, 0x11ffffe000UL, (  ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
+					        | ((val & 0x0fffffUL) << 13) /* bit  0 -> 13 */));
+	return 1;
+}
+
+#if USE_BRL
+
+struct plt_entry {
+	/* Three instruction bundles in PLT. */
+ 	unsigned char bundle[2][16];
+};
+
+static const struct plt_entry ia64_plt_template = {
+	{
+		{
+			0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /*	     movl gp=TARGET_GP */
+			0x00, 0x00, 0x00, 0x60
+		},
+		{
+			0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*	     brl.many gp=TARGET_GP */
+			0x08, 0x00, 0x00, 0xc0
+		}
+	}
+};
+
+static int
+patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
+{
+	if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp)
+	    && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2),
+			   (target_ip - (int64_t) plt->bundle[1]) / 16))
+		return 1;
+	return 0;
+}
+
+unsigned long
+plt_target (struct plt_entry *plt)
+{
+	uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1];
+	long off;
+
+	b0 = b[0]; b1 = b[1];
+	off = (  ((b1 & 0x00fffff000000000UL) >> 36)		/* imm20b -> bit 0 */
+	       | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36)	/* imm39 -> bit 20 */
+	       | ((b1 & 0x0800000000000000UL) << 0));		/* i -> bit 59 */
+	return (long) plt->bundle[1] + 16*off;
+}
+
+#else /* !USE_BRL */
+
+struct plt_entry {
+	/* Three instruction bundles in PLT. */
+ 	unsigned char bundle[3][16];
+};
+
+static const struct plt_entry ia64_plt_template = {
+	{
+		{
+			0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*	     movl r16=TARGET_IP */
+			0x02, 0x00, 0x00, 0x60
+		},
+		{
+			0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /*	     movl gp=TARGET_GP */
+			0x00, 0x00, 0x00, 0x60
+		},
+		{
+			0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */
+			0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /*	     mov b6=r16 */
+			0x60, 0x00, 0x80, 0x00		    /*	     br.few b6 */
+		}
+	}
+};
+
+static int
+patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
+{
+	if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip)
+	    && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp))
+		return 1;
+	return 0;
+}
+
+unsigned long
+plt_target (struct plt_entry *plt)
+{
+	uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0];
+
+	b0 = b[0]; b1 = b[1];
+	return (  ((b1 & 0x000007f000000000) >> 36)		/* imm7b -> bit 0 */
+		| ((b1 & 0x07fc000000000000) >> 43)		/* imm9d -> bit 7 */
+		| ((b1 & 0x0003e00000000000) >> 29)		/* imm5c -> bit 16 */
+		| ((b1 & 0x0000100000000000) >> 23)		/* ic -> bit 21 */
+		| ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40)	/* imm41 -> bit 22 */
+		| ((b1 & 0x0800000000000000) <<  4));		/* i -> bit 63 */
+}
+
+#endif /* !USE_BRL */
+
+void
+module_free (struct module *mod, void *module_region)
+{
+	if (mod && mod->arch.init_unw_table &&
+	    module_region == mod->module_init) {
+		unw_remove_unwind_table(mod->arch.init_unw_table);
+		mod->arch.init_unw_table = NULL;
+	}
+	vfree(module_region);
+}
+
+/* Have we already seen one of these relocations? */
+/* FIXME: we could look in other sections, too --RR */
+static int
+duplicate_reloc (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend)
+			return 1;
+	}
+	return 0;
+}
+
+/* Count how many GOT entries we may need */
+static unsigned int
+count_gots (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not
+           time critical */
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		      case R_IA64_LTOFF22:
+		      case R_IA64_LTOFF22X:
+		      case R_IA64_LTOFF64I:
+		      case R_IA64_LTOFF_FPTR22:
+		      case R_IA64_LTOFF_FPTR64I:
+		      case R_IA64_LTOFF_FPTR32MSB:
+		      case R_IA64_LTOFF_FPTR32LSB:
+		      case R_IA64_LTOFF_FPTR64MSB:
+		      case R_IA64_LTOFF_FPTR64LSB:
+			if (!duplicate_reloc(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+/* Count how many PLT entries we may need */
+static unsigned int
+count_plts (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not
+           time critical */
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		      case R_IA64_PCREL21B:
+		      case R_IA64_PLTOFF22:
+		      case R_IA64_PLTOFF64I:
+		      case R_IA64_PLTOFF64MSB:
+		      case R_IA64_PLTOFF64LSB:
+		      case R_IA64_IPLTMSB:
+		      case R_IA64_IPLTLSB:
+			if (!duplicate_reloc(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+/* We need to create an function-descriptors for any internal function
+   which is referenced. */
+static unsigned int
+count_fdescs (const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not time critical.  */
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		      case R_IA64_FPTR64I:
+		      case R_IA64_FPTR32LSB:
+		      case R_IA64_FPTR32MSB:
+		      case R_IA64_FPTR64LSB:
+		      case R_IA64_FPTR64MSB:
+		      case R_IA64_LTOFF_FPTR22:
+		      case R_IA64_LTOFF_FPTR32LSB:
+		      case R_IA64_LTOFF_FPTR32MSB:
+		      case R_IA64_LTOFF_FPTR64I:
+		      case R_IA64_LTOFF_FPTR64LSB:
+		      case R_IA64_LTOFF_FPTR64MSB:
+		      case R_IA64_IPLTMSB:
+		      case R_IA64_IPLTLSB:
+			/*
+			 * Jumps to static functions sometimes go straight to their
+			 * offset.  Of course, that may not be possible if the jump is
+			 * from init -> core or vice. versa, so we need to generate an
+			 * FDESC (and PLT etc) for that.
+			 */
+		      case R_IA64_PCREL21B:
+			if (!duplicate_reloc(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+int
+module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
+			   struct module *mod)
+{
+	unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0;
+	Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+
+	/*
+	 * To store the PLTs and function-descriptors, we expand the .text section for
+	 * core module-code and the .init.text section for initialization code.
+	 */
+	for (s = sechdrs; s < sechdrs_end; ++s)
+		if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
+			mod->arch.core_plt = s;
+		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+			mod->arch.init_plt = s;
+		else if (strcmp(".got", secstrings + s->sh_name) == 0)
+			mod->arch.got = s;
+		else if (strcmp(".opd", secstrings + s->sh_name) == 0)
+			mod->arch.opd = s;
+		else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
+			mod->arch.unwind = s;
+#ifdef CONFIG_PARAVIRT
+		else if (strcmp(".paravirt_bundles",
+				secstrings + s->sh_name) == 0)
+			mod->arch.paravirt_bundles = s;
+		else if (strcmp(".paravirt_insts",
+				secstrings + s->sh_name) == 0)
+			mod->arch.paravirt_insts = s;
+#endif
+
+	if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
+		printk(KERN_ERR "%s: sections missing\n", mod->name);
+		return -ENOEXEC;
+	}
+
+	/* GOT and PLTs can occur in any relocated section... */
+	for (s = sechdrs + 1; s < sechdrs_end; ++s) {
+		const Elf64_Rela *rels = (void *)ehdr + s->sh_offset;
+		unsigned long numrels = s->sh_size/sizeof(Elf64_Rela);
+
+		if (s->sh_type != SHT_RELA)
+			continue;
+
+		gots += count_gots(rels, numrels);
+		fdescs += count_fdescs(rels, numrels);
+		if (strstr(secstrings + s->sh_name, ".init"))
+			init_plts += count_plts(rels, numrels);
+		else
+			core_plts += count_plts(rels, numrels);
+	}
+
+	mod->arch.core_plt->sh_type = SHT_NOBITS;
+	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.core_plt->sh_addralign = 16;
+	mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry);
+	mod->arch.init_plt->sh_type = SHT_NOBITS;
+	mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.init_plt->sh_addralign = 16;
+	mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry);
+	mod->arch.got->sh_type = SHT_NOBITS;
+	mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC;
+	mod->arch.got->sh_addralign = 8;
+	mod->arch.got->sh_size = gots * sizeof(struct got_entry);
+	mod->arch.opd->sh_type = SHT_NOBITS;
+	mod->arch.opd->sh_flags = SHF_ALLOC;
+	mod->arch.opd->sh_addralign = 8;
+	mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc);
+	DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n",
+	       __func__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
+	       mod->arch.got->sh_size, mod->arch.opd->sh_size);
+	return 0;
+}
+
+static inline int
+in_init (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_init < mod->init_size;
+}
+
+static inline int
+in_core (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_core < mod->core_size;
+}
+
+static inline int
+is_internal (const struct module *mod, uint64_t value)
+{
+	return in_init(mod, value) || in_core(mod, value);
+}
+
+/*
+ * Get gp-relative offset for the linkage-table entry of VALUE.
+ */
+static uint64_t
+get_ltoff (struct module *mod, uint64_t value, int *okp)
+{
+	struct got_entry *got, *e;
+
+	if (!*okp)
+		return 0;
+
+	got = (void *) mod->arch.got->sh_addr;
+	for (e = got; e < got + mod->arch.next_got_entry; ++e)
+		if (e->val == value)
+			goto found;
+
+	/* Not enough GOT entries? */
+	BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size));
+
+	e->val = value;
+	++mod->arch.next_got_entry;
+  found:
+	return (uint64_t) e - mod->arch.gp;
+}
+
+static inline int
+gp_addressable (struct module *mod, uint64_t value)
+{
+	return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF;
+}
+
+/* Get PC-relative PLT entry for this value.  Returns 0 on failure. */
+static uint64_t
+get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp)
+{
+	struct plt_entry *plt, *plt_end;
+	uint64_t target_ip, target_gp;
+
+	if (!*okp)
+		return 0;
+
+	if (in_init(mod, (uint64_t) insn)) {
+		plt = (void *) mod->arch.init_plt->sh_addr;
+		plt_end = (void *) plt + mod->arch.init_plt->sh_size;
+	} else {
+		plt = (void *) mod->arch.core_plt->sh_addr;
+		plt_end = (void *) plt + mod->arch.core_plt->sh_size;
+	}
+
+	/* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */
+	target_ip = ((uint64_t *) value)[0];
+	target_gp = ((uint64_t *) value)[1];
+
+	/* Look for existing PLT entry. */
+	while (plt->bundle[0][0]) {
+		if (plt_target(plt) == target_ip)
+			goto found;
+		if (++plt >= plt_end)
+			BUG();
+	}
+	*plt = ia64_plt_template;
+	if (!patch_plt(mod, plt, target_ip, target_gp)) {
+		*okp = 0;
+		return 0;
+	}
+#if ARCH_MODULE_DEBUG
+	if (plt_target(plt) != target_ip) {
+		printk("%s: mistargeted PLT: wanted %lx, got %lx\n",
+		       __func__, target_ip, plt_target(plt));
+		*okp = 0;
+		return 0;
+	}
+#endif
+  found:
+	return (uint64_t) plt;
+}
+
+/* Get function descriptor for VALUE. */
+static uint64_t
+get_fdesc (struct module *mod, uint64_t value, int *okp)
+{
+	struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr;
+
+	if (!*okp)
+		return 0;
+
+	if (!value) {
+		printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name);
+		return 0;
+	}
+
+	if (!is_internal(mod, value))
+		/*
+		 * If it's not a module-local entry-point, "value" already points to a
+		 * function-descriptor.
+		 */
+		return value;
+
+	/* Look for existing function descriptor. */
+	while (fdesc->ip) {
+		if (fdesc->ip == value)
+			return (uint64_t)fdesc;
+		if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size)
+			BUG();
+	}
+
+	/* Create new one */
+	fdesc->ip = value;
+	fdesc->gp = mod->arch.gp;
+	return (uint64_t) fdesc;
+}
+
+static inline int
+do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
+	  Elf64_Shdr *sec, void *location)
+{
+	enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK;
+	enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK;
+	uint64_t val;
+	int ok = 1;
+
+	val = sym->st_value + addend;
+
+	switch (formula) {
+	      case RV_SEGREL:	/* segment base is arbitrarily chosen to be 0 for kernel modules */
+	      case RV_DIRECT:
+		break;
+
+	      case RV_GPREL:	  val -= mod->arch.gp; break;
+	      case RV_LTREL:	  val = get_ltoff(mod, val, &ok); break;
+	      case RV_PLTREL:	  val = get_plt(mod, location, val, &ok); break;
+	      case RV_FPTR:	  val = get_fdesc(mod, val, &ok); break;
+	      case RV_SECREL:	  val -= sec->sh_addr; break;
+	      case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break;
+
+	      case RV_PCREL:
+		switch (r_type) {
+		      case R_IA64_PCREL21B:
+			if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) ||
+			    (in_core(mod, val) && in_init(mod, (uint64_t)location))) {
+				/*
+				 * Init section may have been allocated far away from core,
+				 * if the branch won't reach, then allocate a plt for it.
+				 */
+				uint64_t delta = ((int64_t)val - (int64_t)location) / 16;
+				if (delta + (1 << 20) >= (1 << 21)) {
+					val = get_fdesc(mod, val, &ok);
+					val = get_plt(mod, location, val, &ok);
+				}
+			} else if (!is_internal(mod, val))
+				val = get_plt(mod, location, val, &ok);
+			/* FALL THROUGH */
+		      default:
+			val -= bundle(location);
+			break;
+
+		      case R_IA64_PCREL32MSB:
+		      case R_IA64_PCREL32LSB:
+		      case R_IA64_PCREL64MSB:
+		      case R_IA64_PCREL64LSB:
+			val -= (uint64_t) location;
+			break;
+
+		}
+		switch (r_type) {
+		      case R_IA64_PCREL60B: format = RF_INSN60; break;
+		      case R_IA64_PCREL21B: format = RF_INSN21B; break;
+		      case R_IA64_PCREL21M: format = RF_INSN21M; break;
+		      case R_IA64_PCREL21F: format = RF_INSN21F; break;
+		      default: break;
+		}
+		break;
+
+	      case RV_BDREL:
+		val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core);
+		break;
+
+	      case RV_LTV:
+		/* can link-time value relocs happen here?  */
+		BUG();
+		break;
+
+	      case RV_PCREL2:
+		if (r_type == R_IA64_PCREL21BI) {
+			if (!is_internal(mod, val)) {
+				printk(KERN_ERR "%s: %s reloc against "
+					"non-local symbol (%lx)\n", __func__,
+					reloc_name[r_type], (unsigned long)val);
+				return -ENOEXEC;
+			}
+			format = RF_INSN21B;
+		}
+		val -= bundle(location);
+		break;
+
+	      case RV_SPECIAL:
+		switch (r_type) {
+		      case R_IA64_IPLTMSB:
+		      case R_IA64_IPLTLSB:
+			val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok);
+			format = RF_64LSB;
+			if (r_type == R_IA64_IPLTMSB)
+				format = RF_64MSB;
+			break;
+
+		      case R_IA64_SUB:
+			val = addend - sym->st_value;
+			format = RF_INSN64;
+			break;
+
+		      case R_IA64_LTOFF22X:
+			if (gp_addressable(mod, val))
+				val -= mod->arch.gp;
+			else
+				val = get_ltoff(mod, val, &ok);
+			format = RF_INSN22;
+			break;
+
+		      case R_IA64_LDXMOV:
+			if (gp_addressable(mod, val)) {
+				/* turn "ld8" into "mov": */
+				DEBUGP("%s: patching ld8 at %p to mov\n", __func__, location);
+				ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL);
+			}
+			return 0;
+
+		      default:
+			if (reloc_name[r_type])
+				printk(KERN_ERR "%s: special reloc %s not supported",
+				       mod->name, reloc_name[r_type]);
+			else
+				printk(KERN_ERR "%s: unknown special reloc %x\n",
+				       mod->name, r_type);
+			return -ENOEXEC;
+		}
+		break;
+
+	      case RV_TPREL:
+	      case RV_LTREL_TPREL:
+	      case RV_DTPMOD:
+	      case RV_LTREL_DTPMOD:
+	      case RV_DTPREL:
+	      case RV_LTREL_DTPREL:
+		printk(KERN_ERR "%s: %s reloc not supported\n",
+		       mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?");
+		return -ENOEXEC;
+
+	      default:
+		printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type);
+		return -ENOEXEC;
+	}
+
+	if (!ok)
+		return -ENOEXEC;
+
+	DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __func__, location, val,
+	       reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend);
+
+	switch (format) {
+	      case RF_INSN21B:	ok = apply_imm21b(mod, location, (int64_t) val / 16); break;
+	      case RF_INSN22:	ok = apply_imm22(mod, location, val); break;
+	      case RF_INSN64:	ok = apply_imm64(mod, location, val); break;
+	      case RF_INSN60:	ok = apply_imm60(mod, location, (int64_t) val / 16); break;
+	      case RF_32LSB:	put_unaligned(val, (uint32_t *) location); break;
+	      case RF_64LSB:	put_unaligned(val, (uint64_t *) location); break;
+	      case RF_32MSB:	/* ia64 Linux is little-endian... */
+	      case RF_64MSB:	/* ia64 Linux is little-endian... */
+	      case RF_INSN14:	/* must be within-module, i.e., resolved by "ld -r" */
+	      case RF_INSN21M:	/* must be within-module, i.e., resolved by "ld -r" */
+	      case RF_INSN21F:	/* must be within-module, i.e., resolved by "ld -r" */
+		printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n",
+		       mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?");
+		return -ENOEXEC;
+
+	      default:
+		printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n",
+		       mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format);
+		return -ENOEXEC;
+	}
+	return ok ? 0 : -ENOEXEC;
+}
+
+int
+apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
+		    unsigned int relsec, struct module *mod)
+{
+	unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela);
+	Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr;
+	Elf64_Shdr *target_sec;
+	int ret;
+
+	DEBUGP("%s: applying section %u (%u relocs) to %u\n", __func__,
+	       relsec, n, sechdrs[relsec].sh_info);
+
+	target_sec = sechdrs + sechdrs[relsec].sh_info;
+
+	if (target_sec->sh_entsize == ~0UL)
+		/*
+		 * If target section wasn't allocated, we don't need to relocate it.
+		 * Happens, e.g., for debug sections.
+		 */
+		return 0;
+
+	if (!mod->arch.gp) {
+		/*
+		 * XXX Should have an arch-hook for running this after final section
+		 *     addresses have been selected...
+		 */
+		uint64_t gp;
+		if (mod->core_size > MAX_LTOFF)
+			/*
+			 * This takes advantage of fact that SHF_ARCH_SMALL gets allocated
+			 * at the end of the module.
+			 */
+			gp = mod->core_size - MAX_LTOFF / 2;
+		else
+			gp = mod->core_size / 2;
+		gp = (uint64_t) mod->module_core + ((gp + 7) & -8);
+		mod->arch.gp = gp;
+		DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp);
+	}
+
+	for (i = 0; i < n; i++) {
+		ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info),
+			       ((Elf64_Sym *) sechdrs[symindex].sh_addr
+				+ ELF64_R_SYM(rela[i].r_info)),
+			       rela[i].r_addend, target_sec,
+			       (void *) target_sec->sh_addr + rela[i].r_offset);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+/*
+ * Modules contain a single unwind table which covers both the core and the init text
+ * sections but since the two are not contiguous, we need to split this table up such that
+ * we can register (and unregister) each "segment" separately.  Fortunately, this sounds
+ * more complicated than it really is.
+ */
+static void
+register_unwind_table (struct module *mod)
+{
+	struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr;
+	struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start);
+	struct unw_table_entry tmp, *e1, *e2, *core, *init;
+	unsigned long num_init = 0, num_core = 0;
+
+	/* First, count how many init and core unwind-table entries there are.  */
+	for (e1 = start; e1 < end; ++e1)
+		if (in_init(mod, e1->start_offset))
+			++num_init;
+		else
+			++num_core;
+	/*
+	 * Second, sort the table such that all unwind-table entries for the init and core
+	 * text sections are nicely separated.  We do this with a stupid bubble sort
+	 * (unwind tables don't get ridiculously huge).
+	 */
+	for (e1 = start; e1 < end; ++e1) {
+		for (e2 = e1 + 1; e2 < end; ++e2) {
+			if (e2->start_offset < e1->start_offset) {
+				tmp = *e1;
+				*e1 = *e2;
+				*e2 = tmp;
+			}
+		}
+	}
+	/*
+	 * Third, locate the init and core segments in the unwind table:
+	 */
+	if (in_init(mod, start->start_offset)) {
+		init = start;
+		core = start + num_init;
+	} else {
+		core = start;
+		init = start + num_core;
+	}
+
+	DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __func__,
+	       mod->name, mod->arch.gp, num_init, num_core);
+
+	/*
+	 * Fourth, register both tables (if not empty).
+	 */
+	if (num_core > 0) {
+		mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
+								core, core + num_core);
+		DEBUGP("%s:  core: handle=%p [%p-%p)\n", __func__,
+		       mod->arch.core_unw_table, core, core + num_core);
+	}
+	if (num_init > 0) {
+		mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
+								init, init + num_init);
+		DEBUGP("%s:  init: handle=%p [%p-%p)\n", __func__,
+		       mod->arch.init_unw_table, init, init + num_init);
+	}
+}
+
+int
+module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod)
+{
+	DEBUGP("%s: init: entry=%p\n", __func__, mod->init);
+	if (mod->arch.unwind)
+		register_unwind_table(mod);
+#ifdef CONFIG_PARAVIRT
+        if (mod->arch.paravirt_bundles) {
+                struct paravirt_patch_site_bundle *start =
+                        (struct paravirt_patch_site_bundle *)
+                        mod->arch.paravirt_bundles->sh_addr;
+                struct paravirt_patch_site_bundle *end =
+                        (struct paravirt_patch_site_bundle *)
+                        (mod->arch.paravirt_bundles->sh_addr +
+                         mod->arch.paravirt_bundles->sh_size);
+
+                paravirt_patch_apply_bundle(start, end);
+        }
+        if (mod->arch.paravirt_insts) {
+                struct paravirt_patch_site_inst *start =
+                        (struct paravirt_patch_site_inst *)
+                        mod->arch.paravirt_insts->sh_addr;
+                struct paravirt_patch_site_inst *end =
+                        (struct paravirt_patch_site_inst *)
+                        (mod->arch.paravirt_insts->sh_addr +
+                         mod->arch.paravirt_insts->sh_size);
+
+                paravirt_patch_apply_inst(start, end);
+        }
+#endif
+	return 0;
+}
+
+void
+module_arch_cleanup (struct module *mod)
+{
+	if (mod->arch.init_unw_table)
+		unw_remove_unwind_table(mod->arch.init_unw_table);
+	if (mod->arch.core_unw_table)
+		unw_remove_unwind_table(mod->arch.core_unw_table);
+}
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@ -0,0 +1,208 @@
+/*
+ * MSI hooks for standard x86 apic
+ */
+
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/dmar.h>
+#include <asm/smp.h>
+#include <asm/msidef.h>
+
+static struct irq_chip	ia64_msi_chip;
+
+#ifdef CONFIG_SMP
+static int ia64_set_msi_irq_affinity(struct irq_data *idata,
+				     const cpumask_t *cpu_mask, bool force)
+{
+	struct msi_msg msg;
+	u32 addr, data;
+	int cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
+	unsigned int irq = idata->irq;
+
+	if (irq_prepare_move(irq, cpu))
+		return -1;
+
+	__get_cached_msi_msg(idata->msi_desc, &msg);
+
+	addr = msg.address_lo;
+	addr &= MSI_ADDR_DEST_ID_MASK;
+	addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
+	msg.address_lo = addr;
+
+	data = msg.data;
+	data &= MSI_DATA_VECTOR_MASK;
+	data |= MSI_DATA_VECTOR(irq_to_vector(irq));
+	msg.data = data;
+
+	write_msi_msg(irq, &msg);
+	cpumask_copy(idata->affinity, cpumask_of(cpu));
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+	struct msi_msg	msg;
+	unsigned long	dest_phys_id;
+	int	irq, vector;
+	cpumask_t mask;
+
+	irq = create_irq();
+	if (irq < 0)
+		return irq;
+
+	irq_set_msi_desc(irq, desc);
+	cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
+	dest_phys_id = cpu_physical_id(first_cpu(mask));
+	vector = irq_to_vector(irq);
+
+	msg.address_hi = 0;
+	msg.address_lo =
+		MSI_ADDR_HEADER |
+		MSI_ADDR_DEST_MODE_PHYS |
+		MSI_ADDR_REDIRECTION_CPU |
+		MSI_ADDR_DEST_ID_CPU(dest_phys_id);
+
+	msg.data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		MSI_DATA_DELIVERY_FIXED |
+		MSI_DATA_VECTOR(vector);
+
+	write_msi_msg(irq, &msg);
+	irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq);
+
+	return 0;
+}
+
+void ia64_teardown_msi_irq(unsigned int irq)
+{
+	destroy_irq(irq);
+}
+
+static void ia64_ack_msi_irq(struct irq_data *data)
+{
+	irq_complete_move(data->irq);
+	irq_move_irq(data);
+	ia64_eoi();
+}
+
+static int ia64_msi_retrigger_irq(struct irq_data *data)
+{
+	unsigned int vector = irq_to_vector(data->irq);
+	ia64_resend_irq(vector);
+
+	return 1;
+}
+
+/*
+ * Generic ops used on most IA64 platforms.
+ */
+static struct irq_chip ia64_msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_mask		= mask_msi_irq,
+	.irq_unmask		= unmask_msi_irq,
+	.irq_ack		= ia64_ack_msi_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= ia64_set_msi_irq_affinity,
+#endif
+	.irq_retrigger		= ia64_msi_retrigger_irq,
+};
+
+
+int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+	if (platform_setup_msi_irq)
+		return platform_setup_msi_irq(pdev, desc);
+
+	return ia64_setup_msi_irq(pdev, desc);
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+	if (platform_teardown_msi_irq)
+		return platform_teardown_msi_irq(irq);
+
+	return ia64_teardown_msi_irq(irq);
+}
+
+#ifdef CONFIG_INTEL_IOMMU
+#ifdef CONFIG_SMP
+static int dmar_msi_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
+{
+	unsigned int irq = data->irq;
+	struct irq_cfg *cfg = irq_cfg + irq;
+	struct msi_msg msg;
+	int cpu = cpumask_first_and(mask, cpu_online_mask);
+
+	if (irq_prepare_move(irq, cpu))
+		return -1;
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
+
+	dmar_msi_write(irq, &msg);
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+static struct irq_chip dmar_msi_type = {
+	.name = "DMAR_MSI",
+	.irq_unmask = dmar_msi_unmask,
+	.irq_mask = dmar_msi_mask,
+	.irq_ack = ia64_ack_msi_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = dmar_msi_set_affinity,
+#endif
+	.irq_retrigger = ia64_msi_retrigger_irq,
+};
+
+static int
+msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	unsigned dest;
+	cpumask_t mask;
+
+	cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
+	dest = cpu_physical_id(first_cpu(mask));
+
+	msg->address_hi = 0;
+	msg->address_lo =
+		MSI_ADDR_HEADER |
+		MSI_ADDR_DEST_MODE_PHYS |
+		MSI_ADDR_REDIRECTION_CPU |
+		MSI_ADDR_DEST_ID_CPU(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		MSI_DATA_DELIVERY_FIXED |
+		MSI_DATA_VECTOR(cfg->vector);
+	return 0;
+}
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+				      "edge");
+	return 0;
+}
+#endif /* CONFIG_INTEL_IOMMU */
+
--- a/arch/ia64/kernel/nr-irqs.c
+++ b/arch/ia64/kernel/nr-irqs.c
@ -0,0 +1,21 @@
+/*
+ * calculate
+ * NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...)
+ * depending on config.
+ * This must be calculated before processing asm-offset.c.
+ */
+
+#define ASM_OFFSETS_C 1
+
+#include <linux/kbuild.h>
+#include <linux/threads.h>
+#include <asm/native/irq.h>
+
+void foo(void)
+{
+	union paravirt_nr_irqs_max {
+		char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS];
+	};
+
+	DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max));
+}
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@ -0,0 +1,85 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ia64 kernel NUMA specific stuff
+ *
+ * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (C) 2004 Silicon Graphics, Inc.
+ *   Jesse Barnes <jbarnes@sgi.com>
+ */
+#include <linux/topology.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_to_node_map);
+
+cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+EXPORT_SYMBOL(node_to_cpu_mask);
+
+void map_cpu_to_node(int cpu, int nid)
+{
+	int oldnid;
+	if (nid < 0) { /* just initialize by zero */
+		cpu_to_node_map[cpu] = 0;
+		return;
+	}
+	/* sanity check first */
+	oldnid = cpu_to_node_map[cpu];
+	if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
+		return; /* nothing to do */
+	}
+	/* we don't have cpu-driven node hot add yet...
+	   In usual case, node is created from SRAT at boot time. */
+	if (!node_online(nid))
+		nid = first_online_node;
+	cpu_to_node_map[cpu] = nid;
+	cpu_set(cpu, node_to_cpu_mask[nid]);
+	return;
+}
+
+void unmap_cpu_from_node(int cpu, int nid)
+{
+	WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
+	WARN_ON(cpu_to_node_map[cpu] != nid);
+	cpu_to_node_map[cpu] = 0;
+	cpu_clear(cpu, node_to_cpu_mask[nid]);
+}
+
+
+/**
+ * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
+ *
+ * Build cpu to node mapping and initialize the per node cpu masks using
+ * info from the node_cpuid array handed to us by ACPI.
+ */
+void __init build_cpu_to_node_map(void)
+{
+	int cpu, i, node;
+
+	for(node=0; node < MAX_NUMNODES; node++)
+		cpus_clear(node_to_cpu_mask[node]);
+
+	for_each_possible_early_cpu(cpu) {
+		node = -1;
+		for (i = 0; i < NR_CPUS; ++i)
+			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
+				node = node_cpuid[i].nid;
+				break;
+			}
+		map_cpu_to_node(cpu, node);
+	}
+}
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@ -0,0 +1,298 @@
+/*
+ * PAL Firmware support
+ * IA-64 Processor Programmers Reference Vol 2
+ *
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
+ *	David Mosberger <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 05/22/2000 eranian Added support for stacked register calls
+ * 05/24/2000 eranian Added support for physical mode static calls
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+	.data
+pal_entry_point:
+	data8 ia64_pal_default_handler
+	.text
+
+/*
+ * Set the PAL entry point address.  This could be written in C code, but we
+ * do it here to keep it all in one module (besides, it's so trivial that it's
+ * not a big deal).
+ *
+ * in0		Address of the PAL entry point (text address, NOT a function
+ *		descriptor).
+ */
+GLOBAL_ENTRY(ia64_pal_handler_init)
+	alloc r3=ar.pfs,1,0,0,0
+	movl r2=pal_entry_point
+	;;
+	st8 [r2]=in0
+	br.ret.sptk.many rp
+END(ia64_pal_handler_init)
+
+/*
+ * Default PAL call handler.  This needs to be coded in assembly because it
+ * uses the static calling convention, i.e., the RSE may not be used and
+ * calls are done via "br.cond" (not "br.call").
+ */
+GLOBAL_ENTRY(ia64_pal_default_handler)
+	mov r8=-1
+	br.cond.sptk.many rp
+END(ia64_pal_default_handler)
+
+/*
+ * Make a PAL call using the static calling convention.
+ *
+ * in0         Index of PAL service
+ * in1 - in3   Remaining PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_static)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,5,0,0
+	movl loc2 = pal_entry_point
+1:	{
+	  mov r28 = in0
+	  mov r29 = in1
+	  mov r8 = ip
+	}
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	adds r8 = 1f-1b,r8
+	mov loc4=ar.rsc			// save RSE configuration
+	;;
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov loc3 = psr
+	mov loc0 = rp
+	.body
+	mov r30 = in2
+
+	mov r31 = in3
+	mov b7 = loc2
+
+	rsm psr.i
+	;;
+	mov rp = r8
+	br.cond.sptk.many b7
+1:	mov psr.l = loc3
+	mov ar.rsc = loc4		// restore RSE configuration
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_static)
+
+/*
+ * Make a PAL call using the stacked registers calling convention.
+ *
+ * Inputs:
+ *	in0         Index of PAL service
+ *	in2 - in3   Remaining PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_stacked)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,4,4,0
+	movl loc2 = pal_entry_point
+
+	mov r28  = in0			// Index MUST be copied to r28
+	mov out0 = in0			// AND in0 of PAL function
+	mov loc0 = rp
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov out1 = in1
+	mov out2 = in2
+	mov out3 = in3
+	mov loc3 = psr
+	;;
+	rsm psr.i
+	mov b7 = loc2
+	;;
+	br.call.sptk.many rp=b7		// now make the call
+.ret0:	mov psr.l  = loc3
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	srlz.d				// serialize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_stacked)
+
+/*
+ * Make a physical mode PAL call using the static registers calling convention.
+ *
+ * Inputs:
+ *	in0         Index of PAL service
+ *	in2 - in3   Remaining PAL arguments
+ *
+ * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
+ * So we don't need to clear them.
+ */
+#define PAL_PSR_BITS_TO_CLEAR						      \
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	      \
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PAL_PSR_BITS_TO_SET						      \
+	(IA64_PSR_BN)
+
+
+GLOBAL_ENTRY(ia64_pal_call_phys_static)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,7,0,0
+	movl loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov r8   = ip			// save ip to compute branch
+	  mov loc0 = rp			// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov r29  = in1			// first argument
+	mov r30  = in2			// copy arg2
+	mov r31  = in3			// copy arg3
+	;;
+	mov loc3 = psr			// save psr
+	adds r8  = 1f-1b,r8		// calculate return address for call
+	;;
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	tpa r8=r8			// convert rp to physical
+	;;
+	mov b7 = loc2			// install target to branch reg
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.many rp=ia64_switch_mode_phys
+	mov rp = r8			// install return address (physical)
+	mov loc5 = r19
+	mov loc6 = r20
+	br.cond.sptk.many b7
+1:
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+	mov psr.l = loc3		// restore init PSR
+
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_phys_static)
+
+/*
+ * Make a PAL call using the stacked registers in physical mode.
+ *
+ * Inputs:
+ *	in0         Index of PAL service
+ *	in2 - in3   Remaining PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+	alloc	loc1 = ar.pfs,5,7,4,0
+	movl	loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov loc0 = rp			// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov loc3 = psr			// save psr
+	;;
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	;;
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	mov b7 = loc2			// install target to branch reg
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.many rp=ia64_switch_mode_phys
+
+	mov out0 = in0			// first argument
+	mov out1 = in1			// copy arg2
+	mov out2 = in2			// copy arg3
+	mov out3 = in3			// copy arg3
+	mov loc5 = r19
+	mov loc6 = r20
+
+	br.call.sptk.many rp=b7		// now make the call
+
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+
+	mov psr.l  = loc3		// restore init PSR
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_phys_stacked)
+
+/*
+ * Save scratch fp scratch regs which aren't saved in pt_regs already
+ * (fp10-fp15).
+ *
+ * NOTE: We need to do this since firmware (SAL and PAL) may use any of the
+ * scratch regs fp-low partition.
+ *
+ * Inputs:
+ *      in0	Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_save_scratch_fpregs)
+	alloc r3=ar.pfs,1,0,0,0
+	add r2=16,in0
+	;;
+	stf.spill [in0] = f10,32
+	stf.spill [r2]  = f11,32
+	;;
+	stf.spill [in0] = f12,32
+	stf.spill [r2]  = f13,32
+	;;
+	stf.spill [in0] = f14,32
+	stf.spill [r2]  = f15,32
+	br.ret.sptk.many rp
+END(ia64_save_scratch_fpregs)
+
+/*
+ * Load scratch fp scratch regs (fp10-fp15)
+ *
+ * Inputs:
+ *      in0	Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_load_scratch_fpregs)
+	alloc r3=ar.pfs,1,0,0,0
+	add r2=16,in0
+	;;
+	ldf.fill  f10 = [in0],32
+	ldf.fill  f11 = [r2],32
+	;;
+	ldf.fill  f12 = [in0],32
+	ldf.fill  f13 = [r2],32
+	;;
+	ldf.fill  f14 = [in0],32
+	ldf.fill  f15 = [r2],32
+	br.ret.sptk.many rp
+END(ia64_load_scratch_fpregs)
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@ -0,0 +1,902 @@
+/******************************************************************************
+ * arch/ia64/kernel/paravirt.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *     Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+
+#include <linux/compiler.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/iosapic.h>
+#include <asm/paravirt.h>
+
+/***************************************************************************
+ * general info
+ */
+struct pv_info pv_info = {
+	.kernel_rpl = 0,
+	.paravirt_enabled = 0,
+	.name = "bare hardware"
+};
+
+/***************************************************************************
+ * pv_init_ops
+ * initialization hooks.
+ */
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type);
+
+struct pv_init_ops pv_init_ops =
+{
+#ifdef ASM_SUPPORTED
+	.patch_bundle = ia64_native_patch_bundle,
+#endif
+	.patch_branch = ia64_native_patch_branch,
+};
+
+/***************************************************************************
+ * pv_cpu_ops
+ * intrinsics hooks.
+ */
+
+#ifndef ASM_SUPPORTED
+/* ia64_native_xxx are macros so that we have to make them real functions */
+
+#define DEFINE_VOID_FUNC1(name)					\
+	static void						\
+	ia64_native_ ## name ## _func(unsigned long arg)	\
+	{							\
+		ia64_native_ ## name(arg);			\
+	}
+
+#define DEFINE_VOID_FUNC1_VOID(name)				\
+	static void						\
+	ia64_native_ ## name ## _func(void *arg)		\
+	{							\
+		ia64_native_ ## name(arg);			\
+	}
+
+#define DEFINE_VOID_FUNC2(name)					\
+	static void						\
+	ia64_native_ ## name ## _func(unsigned long arg0,	\
+				      unsigned long arg1)	\
+	{							\
+		ia64_native_ ## name(arg0, arg1);		\
+	}
+
+#define DEFINE_FUNC0(name)			\
+	static unsigned long			\
+	ia64_native_ ## name ## _func(void)	\
+	{					\
+		return ia64_native_ ## name();	\
+	}
+
+#define DEFINE_FUNC1(name, type)			\
+	static unsigned long				\
+	ia64_native_ ## name ## _func(type arg)		\
+	{						\
+		return ia64_native_ ## name(arg);	\
+	}						\
+
+DEFINE_VOID_FUNC1_VOID(fc);
+DEFINE_VOID_FUNC1(intrin_local_irq_restore);
+
+DEFINE_VOID_FUNC2(ptcga);
+DEFINE_VOID_FUNC2(set_rr);
+
+DEFINE_FUNC0(get_psr_i);
+
+DEFINE_FUNC1(thash, unsigned long);
+DEFINE_FUNC1(get_cpuid, int);
+DEFINE_FUNC1(get_pmd, int);
+DEFINE_FUNC1(get_rr, unsigned long);
+
+static void
+ia64_native_ssm_i_func(void)
+{
+	ia64_native_ssm(IA64_PSR_I);
+}
+
+static void
+ia64_native_rsm_i_func(void)
+{
+	ia64_native_rsm(IA64_PSR_I);
+}
+
+static void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+				unsigned long val2, unsigned long val3,
+				unsigned long val4)
+{
+	ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4);
+}
+
+#define CASE_GET_REG(id)				\
+	case _IA64_REG_ ## id:				\
+	res = ia64_native_getreg(_IA64_REG_ ## id);	\
+	break;
+#define CASE_GET_AR(id) CASE_GET_REG(AR_ ## id)
+#define CASE_GET_CR(id) CASE_GET_REG(CR_ ## id)
+
+unsigned long
+ia64_native_getreg_func(int regnum)
+{
+	unsigned long res = -1;
+	switch (regnum) {
+	CASE_GET_REG(GP);
+	/*CASE_GET_REG(IP);*/ /* returned ip value shouldn't be constant */
+	CASE_GET_REG(PSR);
+	CASE_GET_REG(TP);
+	CASE_GET_REG(SP);
+
+	CASE_GET_AR(KR0);
+	CASE_GET_AR(KR1);
+	CASE_GET_AR(KR2);
+	CASE_GET_AR(KR3);
+	CASE_GET_AR(KR4);
+	CASE_GET_AR(KR5);
+	CASE_GET_AR(KR6);
+	CASE_GET_AR(KR7);
+	CASE_GET_AR(RSC);
+	CASE_GET_AR(BSP);
+	CASE_GET_AR(BSPSTORE);
+	CASE_GET_AR(RNAT);
+	CASE_GET_AR(FCR);
+	CASE_GET_AR(EFLAG);
+	CASE_GET_AR(CSD);
+	CASE_GET_AR(SSD);
+	CASE_GET_AR(CFLAG);
+	CASE_GET_AR(FSR);
+	CASE_GET_AR(FIR);
+	CASE_GET_AR(FDR);
+	CASE_GET_AR(CCV);
+	CASE_GET_AR(UNAT);
+	CASE_GET_AR(FPSR);
+	CASE_GET_AR(ITC);
+	CASE_GET_AR(PFS);
+	CASE_GET_AR(LC);
+	CASE_GET_AR(EC);
+
+	CASE_GET_CR(DCR);
+	CASE_GET_CR(ITM);
+	CASE_GET_CR(IVA);
+	CASE_GET_CR(PTA);
+	CASE_GET_CR(IPSR);
+	CASE_GET_CR(ISR);
+	CASE_GET_CR(IIP);
+	CASE_GET_CR(IFA);
+	CASE_GET_CR(ITIR);
+	CASE_GET_CR(IIPA);
+	CASE_GET_CR(IFS);
+	CASE_GET_CR(IIM);
+	CASE_GET_CR(IHA);
+	CASE_GET_CR(LID);
+	CASE_GET_CR(IVR);
+	CASE_GET_CR(TPR);
+	CASE_GET_CR(EOI);
+	CASE_GET_CR(IRR0);
+	CASE_GET_CR(IRR1);
+	CASE_GET_CR(IRR2);
+	CASE_GET_CR(IRR3);
+	CASE_GET_CR(ITV);
+	CASE_GET_CR(PMV);
+	CASE_GET_CR(CMCV);
+	CASE_GET_CR(LRR0);
+	CASE_GET_CR(LRR1);
+
+	default:
+		printk(KERN_CRIT "wrong_getreg %d\n", regnum);
+		break;
+	}
+	return res;
+}
+
+#define CASE_SET_REG(id)				\
+	case _IA64_REG_ ## id:				\
+	ia64_native_setreg(_IA64_REG_ ## id, val);	\
+	break;
+#define CASE_SET_AR(id) CASE_SET_REG(AR_ ## id)
+#define CASE_SET_CR(id) CASE_SET_REG(CR_ ## id)
+
+void
+ia64_native_setreg_func(int regnum, unsigned long val)
+{
+	switch (regnum) {
+	case _IA64_REG_PSR_L:
+		ia64_native_setreg(_IA64_REG_PSR_L, val);
+		ia64_dv_serialize_data();
+		break;
+	CASE_SET_REG(SP);
+	CASE_SET_REG(GP);
+
+	CASE_SET_AR(KR0);
+	CASE_SET_AR(KR1);
+	CASE_SET_AR(KR2);
+	CASE_SET_AR(KR3);
+	CASE_SET_AR(KR4);
+	CASE_SET_AR(KR5);
+	CASE_SET_AR(KR6);
+	CASE_SET_AR(KR7);
+	CASE_SET_AR(RSC);
+	CASE_SET_AR(BSP);
+	CASE_SET_AR(BSPSTORE);
+	CASE_SET_AR(RNAT);
+	CASE_SET_AR(FCR);
+	CASE_SET_AR(EFLAG);
+	CASE_SET_AR(CSD);
+	CASE_SET_AR(SSD);
+	CASE_SET_AR(CFLAG);
+	CASE_SET_AR(FSR);
+	CASE_SET_AR(FIR);
+	CASE_SET_AR(FDR);
+	CASE_SET_AR(CCV);
+	CASE_SET_AR(UNAT);
+	CASE_SET_AR(FPSR);
+	CASE_SET_AR(ITC);
+	CASE_SET_AR(PFS);
+	CASE_SET_AR(LC);
+	CASE_SET_AR(EC);
+
+	CASE_SET_CR(DCR);
+	CASE_SET_CR(ITM);
+	CASE_SET_CR(IVA);
+	CASE_SET_CR(PTA);
+	CASE_SET_CR(IPSR);
+	CASE_SET_CR(ISR);
+	CASE_SET_CR(IIP);
+	CASE_SET_CR(IFA);
+	CASE_SET_CR(ITIR);
+	CASE_SET_CR(IIPA);
+	CASE_SET_CR(IFS);
+	CASE_SET_CR(IIM);
+	CASE_SET_CR(IHA);
+	CASE_SET_CR(LID);
+	CASE_SET_CR(IVR);
+	CASE_SET_CR(TPR);
+	CASE_SET_CR(EOI);
+	CASE_SET_CR(IRR0);
+	CASE_SET_CR(IRR1);
+	CASE_SET_CR(IRR2);
+	CASE_SET_CR(IRR3);
+	CASE_SET_CR(ITV);
+	CASE_SET_CR(PMV);
+	CASE_SET_CR(CMCV);
+	CASE_SET_CR(LRR0);
+	CASE_SET_CR(LRR1);
+	default:
+		printk(KERN_CRIT "wrong setreg %d\n", regnum);
+		break;
+	}
+}
+#else
+
+#define __DEFINE_FUNC(name, code)					\
+	extern const char ia64_native_ ## name ## _direct_start[];	\
+	extern const char ia64_native_ ## name ## _direct_end[];	\
+	asm (".align 32\n"						\
+	     ".proc ia64_native_" #name "_func\n"			\
+	     "ia64_native_" #name "_func:\n"				\
+	     "ia64_native_" #name "_direct_start:\n"			\
+	     code							\
+	     "ia64_native_" #name "_direct_end:\n"			\
+	     "br.cond.sptk.many b6\n"					\
+	     ".endp ia64_native_" #name "_func\n")
+
+#define DEFINE_VOID_FUNC0(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(void);			\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(unsigned long arg);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1_VOID(name, code)			\
+	extern void						\
+	ia64_native_ ## name ## _func(void *arg);		\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC2(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(unsigned long arg0,	\
+				      unsigned long arg1);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC0(name, code)		\
+	extern unsigned long			\
+	ia64_native_ ## name ## _func(void);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC1(name, type, code)			\
+	extern unsigned long				\
+	ia64_native_ ## name ## _func(type arg);	\
+	__DEFINE_FUNC(name, code)
+
+DEFINE_VOID_FUNC1_VOID(fc,
+		       "fc r8\n");
+DEFINE_VOID_FUNC1(intrin_local_irq_restore,
+		  ";;\n"
+		  "     cmp.ne p6, p7 = r8, r0\n"
+		  ";;\n"
+		  "(p6) ssm psr.i\n"
+		  "(p7) rsm psr.i\n"
+		  ";;\n"
+		  "(p6) srlz.d\n");
+
+DEFINE_VOID_FUNC2(ptcga,
+		  "ptc.ga r8, r9\n");
+DEFINE_VOID_FUNC2(set_rr,
+		  "mov rr[r8] = r9\n");
+
+/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */
+DEFINE_FUNC0(get_psr_i,
+	     "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n"
+	     "mov r8 = psr\n"
+	     ";;\n"
+	     "and r8 = r2, r8\n");
+
+DEFINE_FUNC1(thash, unsigned long,
+	     "thash r8 = r8\n");
+DEFINE_FUNC1(get_cpuid, int,
+	     "mov r8 = cpuid[r8]\n");
+DEFINE_FUNC1(get_pmd, int,
+	     "mov r8 = pmd[r8]\n");
+DEFINE_FUNC1(get_rr, unsigned long,
+	     "mov r8 = rr[r8]\n");
+
+DEFINE_VOID_FUNC0(ssm_i,
+		  "ssm psr.i\n");
+DEFINE_VOID_FUNC0(rsm_i,
+		  "rsm psr.i\n");
+
+extern void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+				unsigned long val2, unsigned long val3,
+				unsigned long val4);
+__DEFINE_FUNC(set_rr0_to_rr4,
+	      "mov rr[r0] = r8\n"
+	      "movl r2 = 0x2000000000000000\n"
+	      ";;\n"
+	      "mov rr[r2] = r9\n"
+	      "shl r3 = r2, 1\n"	/* movl r3 = 0x4000000000000000 */
+	      ";;\n"
+	      "add r2 = r2, r3\n"	/* movl r2 = 0x6000000000000000 */
+	      "mov rr[r3] = r10\n"
+	      ";;\n"
+	      "mov rr[r2] = r11\n"
+	      "shl r3 = r3, 1\n"	/* movl r3 = 0x8000000000000000 */
+	      ";;\n"
+	      "mov rr[r3] = r14\n");
+
+extern unsigned long ia64_native_getreg_func(int regnum);
+asm(".global ia64_native_getreg_func\n");
+#define __DEFINE_GET_REG(id, reg)			\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"	\
+	";;\n"						\
+	"cmp.eq p6, p0 = r2, r8\n"			\
+	";;\n"						\
+	"(p6) mov r8 = " #reg "\n"			\
+	"(p6) br.cond.sptk.many b6\n"			\
+	";;\n"
+#define __DEFINE_GET_AR(id, reg)	__DEFINE_GET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_GET_CR(id, reg)	__DEFINE_GET_REG(CR_ ## id, cr.reg)
+
+__DEFINE_FUNC(getreg,
+	      __DEFINE_GET_REG(GP, gp)
+	      /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */
+	      __DEFINE_GET_REG(PSR, psr)
+	      __DEFINE_GET_REG(TP, tp)
+	      __DEFINE_GET_REG(SP, sp)
+
+	      __DEFINE_GET_REG(AR_KR0, ar0)
+	      __DEFINE_GET_REG(AR_KR1, ar1)
+	      __DEFINE_GET_REG(AR_KR2, ar2)
+	      __DEFINE_GET_REG(AR_KR3, ar3)
+	      __DEFINE_GET_REG(AR_KR4, ar4)
+	      __DEFINE_GET_REG(AR_KR5, ar5)
+	      __DEFINE_GET_REG(AR_KR6, ar6)
+	      __DEFINE_GET_REG(AR_KR7, ar7)
+	      __DEFINE_GET_AR(RSC, rsc)
+	      __DEFINE_GET_AR(BSP, bsp)
+	      __DEFINE_GET_AR(BSPSTORE, bspstore)
+	      __DEFINE_GET_AR(RNAT, rnat)
+	      __DEFINE_GET_AR(FCR, fcr)
+	      __DEFINE_GET_AR(EFLAG, eflag)
+	      __DEFINE_GET_AR(CSD, csd)
+	      __DEFINE_GET_AR(SSD, ssd)
+	      __DEFINE_GET_REG(AR_CFLAG, ar27)
+	      __DEFINE_GET_AR(FSR, fsr)
+	      __DEFINE_GET_AR(FIR, fir)
+	      __DEFINE_GET_AR(FDR, fdr)
+	      __DEFINE_GET_AR(CCV, ccv)
+	      __DEFINE_GET_AR(UNAT, unat)
+	      __DEFINE_GET_AR(FPSR, fpsr)
+	      __DEFINE_GET_AR(ITC, itc)
+	      __DEFINE_GET_AR(PFS, pfs)
+	      __DEFINE_GET_AR(LC, lc)
+	      __DEFINE_GET_AR(EC, ec)
+
+	      __DEFINE_GET_CR(DCR, dcr)
+	      __DEFINE_GET_CR(ITM, itm)
+	      __DEFINE_GET_CR(IVA, iva)
+	      __DEFINE_GET_CR(PTA, pta)
+	      __DEFINE_GET_CR(IPSR, ipsr)
+	      __DEFINE_GET_CR(ISR, isr)
+	      __DEFINE_GET_CR(IIP, iip)
+	      __DEFINE_GET_CR(IFA, ifa)
+	      __DEFINE_GET_CR(ITIR, itir)
+	      __DEFINE_GET_CR(IIPA, iipa)
+	      __DEFINE_GET_CR(IFS, ifs)
+	      __DEFINE_GET_CR(IIM, iim)
+	      __DEFINE_GET_CR(IHA, iha)
+	      __DEFINE_GET_CR(LID, lid)
+	      __DEFINE_GET_CR(IVR, ivr)
+	      __DEFINE_GET_CR(TPR, tpr)
+	      __DEFINE_GET_CR(EOI, eoi)
+	      __DEFINE_GET_CR(IRR0, irr0)
+	      __DEFINE_GET_CR(IRR1, irr1)
+	      __DEFINE_GET_CR(IRR2, irr2)
+	      __DEFINE_GET_CR(IRR3, irr3)
+	      __DEFINE_GET_CR(ITV, itv)
+	      __DEFINE_GET_CR(PMV, pmv)
+	      __DEFINE_GET_CR(CMCV, cmcv)
+	      __DEFINE_GET_CR(LRR0, lrr0)
+	      __DEFINE_GET_CR(LRR1, lrr1)
+
+	      "mov r8 = -1\n"	/* unsupported case */
+	);
+
+extern void ia64_native_setreg_func(int regnum, unsigned long val);
+asm(".global ia64_native_setreg_func\n");
+#define __DEFINE_SET_REG(id, reg)			\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"	\
+	";;\n"						\
+	"cmp.eq p6, p0 = r2, r9\n"			\
+	";;\n"						\
+	"(p6) mov " #reg " = r8\n"			\
+	"(p6) br.cond.sptk.many b6\n"			\
+	";;\n"
+#define __DEFINE_SET_AR(id, reg)	__DEFINE_SET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_SET_CR(id, reg)	__DEFINE_SET_REG(CR_ ## id, cr.reg)
+__DEFINE_FUNC(setreg,
+	      "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r9\n"
+	      ";;\n"
+	      "(p6) mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+	      ".serialize.data\n"
+#endif
+	      "(p6) br.cond.sptk.many b6\n"
+	      __DEFINE_SET_REG(GP, gp)
+	      __DEFINE_SET_REG(SP, sp)
+
+	      __DEFINE_SET_REG(AR_KR0, ar0)
+	      __DEFINE_SET_REG(AR_KR1, ar1)
+	      __DEFINE_SET_REG(AR_KR2, ar2)
+	      __DEFINE_SET_REG(AR_KR3, ar3)
+	      __DEFINE_SET_REG(AR_KR4, ar4)
+	      __DEFINE_SET_REG(AR_KR5, ar5)
+	      __DEFINE_SET_REG(AR_KR6, ar6)
+	      __DEFINE_SET_REG(AR_KR7, ar7)
+	      __DEFINE_SET_AR(RSC, rsc)
+	      __DEFINE_SET_AR(BSP, bsp)
+	      __DEFINE_SET_AR(BSPSTORE, bspstore)
+	      __DEFINE_SET_AR(RNAT, rnat)
+	      __DEFINE_SET_AR(FCR, fcr)
+	      __DEFINE_SET_AR(EFLAG, eflag)
+	      __DEFINE_SET_AR(CSD, csd)
+	      __DEFINE_SET_AR(SSD, ssd)
+	      __DEFINE_SET_REG(AR_CFLAG, ar27)
+	      __DEFINE_SET_AR(FSR, fsr)
+	      __DEFINE_SET_AR(FIR, fir)
+	      __DEFINE_SET_AR(FDR, fdr)
+	      __DEFINE_SET_AR(CCV, ccv)
+	      __DEFINE_SET_AR(UNAT, unat)
+	      __DEFINE_SET_AR(FPSR, fpsr)
+	      __DEFINE_SET_AR(ITC, itc)
+	      __DEFINE_SET_AR(PFS, pfs)
+	      __DEFINE_SET_AR(LC, lc)
+	      __DEFINE_SET_AR(EC, ec)
+
+	      __DEFINE_SET_CR(DCR, dcr)
+	      __DEFINE_SET_CR(ITM, itm)
+	      __DEFINE_SET_CR(IVA, iva)
+	      __DEFINE_SET_CR(PTA, pta)
+	      __DEFINE_SET_CR(IPSR, ipsr)
+	      __DEFINE_SET_CR(ISR, isr)
+	      __DEFINE_SET_CR(IIP, iip)
+	      __DEFINE_SET_CR(IFA, ifa)
+	      __DEFINE_SET_CR(ITIR, itir)
+	      __DEFINE_SET_CR(IIPA, iipa)
+	      __DEFINE_SET_CR(IFS, ifs)
+	      __DEFINE_SET_CR(IIM, iim)
+	      __DEFINE_SET_CR(IHA, iha)
+	      __DEFINE_SET_CR(LID, lid)
+	      __DEFINE_SET_CR(IVR, ivr)
+	      __DEFINE_SET_CR(TPR, tpr)
+	      __DEFINE_SET_CR(EOI, eoi)
+	      __DEFINE_SET_CR(IRR0, irr0)
+	      __DEFINE_SET_CR(IRR1, irr1)
+	      __DEFINE_SET_CR(IRR2, irr2)
+	      __DEFINE_SET_CR(IRR3, irr3)
+	      __DEFINE_SET_CR(ITV, itv)
+	      __DEFINE_SET_CR(PMV, pmv)
+	      __DEFINE_SET_CR(CMCV, cmcv)
+	      __DEFINE_SET_CR(LRR0, lrr0)
+	      __DEFINE_SET_CR(LRR1, lrr1)
+	);
+#endif
+
+struct pv_cpu_ops pv_cpu_ops = {
+	.fc		= ia64_native_fc_func,
+	.thash		= ia64_native_thash_func,
+	.get_cpuid	= ia64_native_get_cpuid_func,
+	.get_pmd	= ia64_native_get_pmd_func,
+	.ptcga		= ia64_native_ptcga_func,
+	.get_rr		= ia64_native_get_rr_func,
+	.set_rr		= ia64_native_set_rr_func,
+	.set_rr0_to_rr4	= ia64_native_set_rr0_to_rr4_func,
+	.ssm_i		= ia64_native_ssm_i_func,
+	.getreg		= ia64_native_getreg_func,
+	.setreg		= ia64_native_setreg_func,
+	.rsm_i		= ia64_native_rsm_i_func,
+	.get_psr_i	= ia64_native_get_psr_i_func,
+	.intrin_local_irq_restore
+			= ia64_native_intrin_local_irq_restore_func,
+};
+EXPORT_SYMBOL(pv_cpu_ops);
+
+/******************************************************************************
+ * replacement of hand written assembly codes.
+ */
+
+void
+paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch)
+{
+	extern unsigned long paravirt_switch_to_targ;
+	extern unsigned long paravirt_leave_syscall_targ;
+	extern unsigned long paravirt_work_processed_syscall_targ;
+	extern unsigned long paravirt_leave_kernel_targ;
+
+	paravirt_switch_to_targ = cpu_asm_switch->switch_to;
+	paravirt_leave_syscall_targ = cpu_asm_switch->leave_syscall;
+	paravirt_work_processed_syscall_targ =
+		cpu_asm_switch->work_processed_syscall;
+	paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel;
+}
+
+/***************************************************************************
+ * pv_iosapic_ops
+ * iosapic read/write hooks.
+ */
+
+static unsigned int
+ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	return __ia64_native_iosapic_read(iosapic, reg);
+}
+
+static void
+ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	__ia64_native_iosapic_write(iosapic, reg, val);
+}
+
+struct pv_iosapic_ops pv_iosapic_ops = {
+	.pcat_compat_init = ia64_native_iosapic_pcat_compat_init,
+	.__get_irq_chip = ia64_native_iosapic_get_irq_chip,
+
+	.__read = ia64_native_iosapic_read,
+	.__write = ia64_native_iosapic_write,
+};
+
+/***************************************************************************
+ * pv_irq_ops
+ * irq operations
+ */
+
+struct pv_irq_ops pv_irq_ops = {
+	.register_ipi = ia64_native_register_ipi,
+
+	.assign_irq_vector = ia64_native_assign_irq_vector,
+	.free_irq_vector = ia64_native_free_irq_vector,
+	.register_percpu_irq = ia64_native_register_percpu_irq,
+
+	.resend_irq = ia64_native_resend_irq,
+};
+
+/***************************************************************************
+ * pv_time_ops
+ * time operations
+ */
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static int
+ia64_native_do_steal_accounting(unsigned long *new_itm)
+{
+	return 0;
+}
+
+struct pv_time_ops pv_time_ops = {
+	.do_steal_accounting = ia64_native_do_steal_accounting,
+	.sched_clock = ia64_native_sched_clock,
+};
+
+/***************************************************************************
+ * binary pacthing
+ * pv_init_ops.patch_bundle
+ */
+
+#ifdef ASM_SUPPORTED
+#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg)	\
+	__DEFINE_FUNC(get_ ## name,			\
+		      ";;\n"				\
+		      "mov r8 = " #reg "\n"		\
+		      ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg)	\
+	__DEFINE_FUNC(set_ ## name,			\
+		      ";;\n"				\
+		      "mov " #reg " = r8\n"		\
+		      ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg)		\
+	IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg);	\
+	IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg)	\
+
+#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg)			\
+	IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg)
+
+#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg)			\
+	IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg)
+
+
+IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr);
+IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp);
+
+/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */
+__DEFINE_FUNC(set_psr_l,
+	      ";;\n"
+	      "mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+	      ".serialize.data\n"
+#endif
+	      ";;\n");
+
+IA64_NATIVE_PATCH_DEFINE_REG(gp, gp);
+IA64_NATIVE_PATCH_DEFINE_REG(sp, sp);
+
+IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0);
+IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1);
+IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2);
+IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3);
+IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4);
+IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5);
+IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6);
+IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7);
+
+IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc);
+IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp);
+IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore);
+IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat);
+IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr);
+IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag);
+IA64_NATIVE_PATCH_DEFINE_AR(csd, csd);
+IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd);
+IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27);
+IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr);
+IA64_NATIVE_PATCH_DEFINE_AR(fir, fir);
+IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr);
+IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv);
+IA64_NATIVE_PATCH_DEFINE_AR(unat, unat);
+IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr);
+IA64_NATIVE_PATCH_DEFINE_AR(itc, itc);
+IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs);
+IA64_NATIVE_PATCH_DEFINE_AR(lc, lc);
+IA64_NATIVE_PATCH_DEFINE_AR(ec, ec);
+
+IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr);
+IA64_NATIVE_PATCH_DEFINE_CR(itm, itm);
+IA64_NATIVE_PATCH_DEFINE_CR(iva, iva);
+IA64_NATIVE_PATCH_DEFINE_CR(pta, pta);
+IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr);
+IA64_NATIVE_PATCH_DEFINE_CR(isr, isr);
+IA64_NATIVE_PATCH_DEFINE_CR(iip, iip);
+IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa);
+IA64_NATIVE_PATCH_DEFINE_CR(itir, itir);
+IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa);
+IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs);
+IA64_NATIVE_PATCH_DEFINE_CR(iim, iim);
+IA64_NATIVE_PATCH_DEFINE_CR(iha, iha);
+IA64_NATIVE_PATCH_DEFINE_CR(lid, lid);
+IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr);
+IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr);
+IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi);
+IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0);
+IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1);
+IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2);
+IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3);
+IA64_NATIVE_PATCH_DEFINE_CR(itv, itv);
+IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv);
+IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1);
+
+static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[]
+__initdata_or_module =
+{
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type)		\
+	{							\
+		(void*)ia64_native_ ## name ## _direct_start,	\
+		(void*)ia64_native_ ## name ## _direct_end,	\
+		PARAVIRT_PATCH_TYPE_ ## type,			\
+	}
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore,
+				      INTRIN_LOCAL_IRQ_RESTORE),
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg)			\
+	{								\
+		(void*)ia64_native_get_ ## name ## _direct_start,	\
+		(void*)ia64_native_get_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg,		\
+	}
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg)			\
+	{								\
+		(void*)ia64_native_set_ ## name ## _direct_start,	\
+		(void*)ia64_native_set_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg,		\
+	}
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg),	\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg)		\
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg)
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg)
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1),
+};
+
+unsigned long __init_or_module
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
+{
+	const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) /
+		sizeof(ia64_native_patch_bundle_elems[0]);
+
+	return __paravirt_patch_apply_bundle(sbundle, ebundle, type,
+					      ia64_native_patch_bundle_elems,
+					      nelems, NULL);
+}
+#endif /* ASM_SUPPOTED */
+
+extern const char ia64_native_switch_to[];
+extern const char ia64_native_leave_syscall[];
+extern const char ia64_native_work_processed_syscall[];
+extern const char ia64_native_leave_kernel[];
+
+const struct paravirt_patch_branch_target ia64_native_branch_target[]
+__initconst = {
+#define PARAVIRT_BR_TARGET(name, type)			\
+	{						\
+		ia64_native_ ## name,			\
+		PARAVIRT_PATCH_TYPE_BR_ ## type,	\
+	}
+	PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
+	PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
+	PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
+	PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
+};
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type)
+{
+	const unsigned long nelem =
+		sizeof(ia64_native_branch_target) /
+		sizeof(ia64_native_branch_target[0]);
+	__paravirt_patch_apply_branch(tag, type,
+				      ia64_native_branch_target, nelem);
+}
--- a/arch/ia64/kernel/paravirt_inst.h
+++ b/arch/ia64/kernel/paravirt_inst.h
@ -0,0 +1,28 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK
+#include <asm/native/pvchk_inst.h>
+#else
+#include <asm/native/inst.h>
+#endif
+
--- a/arch/ia64/kernel/paravirt_patch.c
+++ b/arch/ia64/kernel/paravirt_patch.c
@ -0,0 +1,514 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patch.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/intrinsics.h>
+#include <asm/kprobes.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
+
+typedef union ia64_inst {
+        struct {
+		unsigned long long qp : 6;
+		unsigned long long : 31;
+		unsigned long long opcode : 4;
+		unsigned long long reserved : 23;
+        } generic;
+        unsigned long long l;
+} ia64_inst_t;
+
+/*
+ * flush_icache_range() can't be used here.
+ * we are here before cpu_init() which initializes
+ * ia64_i_cache_stride_shift. flush_icache_range() uses it.
+ */
+void __init_or_module
+paravirt_flush_i_cache_range(const void *instr, unsigned long size)
+{
+	extern void paravirt_fc_i(const void *addr);
+	unsigned long i;
+
+	for (i = 0; i < size; i += sizeof(bundle_t))
+		paravirt_fc_i(instr + i);
+}
+
+bundle_t* __init_or_module
+paravirt_get_bundle(unsigned long tag)
+{
+	return (bundle_t *)(tag & ~3UL);
+}
+
+unsigned long __init_or_module
+paravirt_get_slot(unsigned long tag)
+{
+	return tag & 3UL;
+}
+
+unsigned long __init_or_module
+paravirt_get_num_inst(unsigned long stag, unsigned long etag)
+{
+	bundle_t *sbundle = paravirt_get_bundle(stag);
+	unsigned long sslot = paravirt_get_slot(stag);
+	bundle_t *ebundle = paravirt_get_bundle(etag);
+	unsigned long eslot = paravirt_get_slot(etag);
+
+	return (ebundle - sbundle) * 3 + eslot - sslot + 1;
+}
+
+unsigned long __init_or_module
+paravirt_get_next_tag(unsigned long tag)
+{
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+	case 1:
+		return tag + 1;
+	case 2: {
+		bundle_t *bundle = paravirt_get_bundle(tag);
+		return (unsigned long)(bundle + 1);
+	}
+	default:
+		BUG();
+	}
+	/* NOTREACHED */
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot0(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad0.slot0;
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot1(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad0.slot1_p0 |
+		((unsigned long long)bundle->quad1.slot1_p1 << 18UL);
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot2(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad1.slot2;
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_inst(unsigned long tag)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+		return paravirt_read_slot0(bundle);
+	case 1:
+		return paravirt_read_slot1(bundle);
+	case 2:
+		return paravirt_read_slot2(bundle);
+	default:
+		BUG();
+	}
+	/* NOTREACHED */
+}
+
+void __init_or_module
+paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad0.slot0 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad0.slot1_p0 = inst.l;
+	bundle->quad1.slot1_p1 = inst.l >> 18UL;
+}
+
+void __init_or_module
+paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad1.slot2 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_inst(unsigned long tag, ia64_inst_t inst)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+		paravirt_write_slot0(bundle, inst);
+		break;
+	case 1:
+		paravirt_write_slot1(bundle, inst);
+		break;
+	case 2:
+		paravirt_write_slot2(bundle, inst);
+		break;
+	default:
+		BUG();
+		break;
+	}
+	paravirt_flush_i_cache_range(bundle, sizeof(*bundle));
+}
+
+/* for debug */
+void
+paravirt_print_bundle(const bundle_t *bundle)
+{
+	const unsigned long *quad = (const unsigned long *)bundle;
+	ia64_inst_t slot0 = paravirt_read_slot0(bundle);
+	ia64_inst_t slot1 = paravirt_read_slot1(bundle);
+	ia64_inst_t slot2 = paravirt_read_slot2(bundle);
+
+	printk(KERN_DEBUG
+	       "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]);
+	printk(KERN_DEBUG
+	       "bundle template 0x%x\n",
+	       bundle->quad0.template);
+	printk(KERN_DEBUG
+	       "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n",
+	       (unsigned long)bundle->quad0.slot0,
+	       (unsigned long)bundle->quad0.slot1_p0,
+	       (unsigned long)bundle->quad1.slot1_p1,
+	       (unsigned long)bundle->quad1.slot2);
+	printk(KERN_DEBUG
+	       "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n",
+	       slot0.l, slot1.l, slot2.l);
+}
+
+static int noreplace_paravirt __init_or_module = 0;
+
+static int __init setup_noreplace_paravirt(char *str)
+{
+	noreplace_paravirt = 1;
+	return 1;
+}
+__setup("noreplace-paravirt", setup_noreplace_paravirt);
+
+#ifdef ASM_SUPPORTED
+static void __init_or_module
+fill_nop_bundle(void *sbundle, void *ebundle)
+{
+	extern const char paravirt_nop_bundle[];
+	extern const unsigned long paravirt_nop_bundle_size;
+
+	void *bundle = sbundle;
+
+	BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+	BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+	while (bundle < ebundle) {
+		memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size);
+
+		bundle += paravirt_nop_bundle_size;
+	}
+}
+
+/* helper function */
+unsigned long __init_or_module
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+			      const struct paravirt_patch_bundle_elem *elems,
+			      unsigned long nelems,
+			      const struct paravirt_patch_bundle_elem **found)
+{
+	unsigned long used = 0;
+	unsigned long i;
+
+	BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+	BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+	found = NULL;
+	for (i = 0; i < nelems; i++) {
+		const struct paravirt_patch_bundle_elem *p = &elems[i];
+		if (p->type == type) {
+			unsigned long need = p->ebundle - p->sbundle;
+			unsigned long room = ebundle - sbundle;
+
+			if (found != NULL)
+				*found = p;
+
+			if (room < need) {
+				/* no room to replace. skip it */
+				printk(KERN_DEBUG
+				       "the space is too small to put "
+				       "bundles. type %ld need %ld room %ld\n",
+				       type, need, room);
+				break;
+			}
+
+			used = need;
+			memcpy(sbundle, p->sbundle, used);
+			break;
+		}
+	}
+
+	return used;
+}
+
+void __init_or_module
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+			    const struct paravirt_patch_site_bundle *end)
+{
+	const struct paravirt_patch_site_bundle *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_bundle == NULL)
+		return;
+
+	for (p = start; p < end; p++) {
+		unsigned long used;
+
+		used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle,
+						   p->type);
+		if (used == 0)
+			continue;
+
+		fill_nop_bundle(p->sbundle + used, p->ebundle);
+		paravirt_flush_i_cache_range(p->sbundle,
+					     p->ebundle - p->sbundle);
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+/*
+ * nop.i, nop.m, nop.f instruction are same format.
+ * but nop.b has differennt format.
+ * This doesn't support nop.b for now.
+ */
+static void __init_or_module
+fill_nop_inst(unsigned long stag, unsigned long etag)
+{
+	extern const bundle_t paravirt_nop_mfi_inst_bundle[];
+	unsigned long tag;
+	const ia64_inst_t nop_inst =
+		paravirt_read_slot0(paravirt_nop_mfi_inst_bundle);
+
+	for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag))
+		paravirt_write_inst(tag, nop_inst);
+}
+
+void __init_or_module
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+			  const struct paravirt_patch_site_inst *end)
+{
+	const struct paravirt_patch_site_inst *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_inst == NULL)
+		return;
+
+	for (p = start; p < end; p++) {
+		unsigned long tag;
+		bundle_t *sbundle;
+		bundle_t *ebundle;
+
+		tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type);
+		if (tag == p->stag)
+			continue;
+
+		fill_nop_inst(tag, p->etag);
+		sbundle = paravirt_get_bundle(p->stag);
+		ebundle = paravirt_get_bundle(p->etag) + 1;
+		paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) *
+					     sizeof(bundle_t));
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+#endif /* ASM_SUPPOTED */
+
+/* brl.cond.sptk.many <target64> X3 */
+typedef union inst_x3_op {
+	ia64_inst_t inst;
+	struct {
+		unsigned long qp: 6;
+		unsigned long btyp: 3;
+		unsigned long unused: 3;
+		unsigned long p: 1;
+		unsigned long imm20b: 20;
+		unsigned long wh: 2;
+		unsigned long d: 1;
+		unsigned long i: 1;
+		unsigned long opcode: 4;
+	};
+	unsigned long l;
+} inst_x3_op_t;
+
+typedef union inst_x3_imm {
+	ia64_inst_t inst;
+	struct {
+		unsigned long unused: 2;
+		unsigned long imm39: 39;
+	};
+	unsigned long l;
+} inst_x3_imm_t;
+
+void __init_or_module
+paravirt_patch_reloc_brl(unsigned long tag, const void *target)
+{
+	unsigned long tag_op = paravirt_get_next_tag(tag);
+	unsigned long tag_imm = tag;
+	bundle_t *bundle = paravirt_get_bundle(tag);
+
+	ia64_inst_t inst_op = paravirt_read_inst(tag_op);
+	ia64_inst_t inst_imm = paravirt_read_inst(tag_imm);
+
+	inst_x3_op_t inst_x3_op = { .l = inst_op.l };
+	inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l };
+
+	unsigned long imm60 =
+		((unsigned long)target - (unsigned long)bundle) >> 4;
+
+	BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */
+	BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+	/* imm60[59] 1bit */
+	inst_x3_op.i = (imm60 >> 59) & 1;
+	/* imm60[19:0] 20bit */
+	inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1);
+	/* imm60[58:20] 39bit */
+	inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1);
+
+	inst_op.l = inst_x3_op.l;
+	inst_imm.l = inst_x3_imm.l;
+
+	paravirt_write_inst(tag_op, inst_op);
+	paravirt_write_inst(tag_imm, inst_imm);
+}
+
+/* br.cond.sptk.many <target25>	B1 */
+typedef union inst_b1 {
+	ia64_inst_t inst;
+	struct {
+		unsigned long qp: 6;
+		unsigned long btype: 3;
+		unsigned long unused: 3;
+		unsigned long p: 1;
+		unsigned long imm20b: 20;
+		unsigned long wh: 2;
+		unsigned long d: 1;
+		unsigned long s: 1;
+		unsigned long opcode: 4;
+	};
+	unsigned long l;
+} inst_b1_t;
+
+void __init
+paravirt_patch_reloc_br(unsigned long tag, const void *target)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	ia64_inst_t inst = paravirt_read_inst(tag);
+	unsigned long target25 = (unsigned long)target - (unsigned long)bundle;
+	inst_b1_t inst_b1;
+
+	BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+	inst_b1.l = inst.l;
+	if (target25 & (1UL << 63))
+		inst_b1.s = 1;
+	else
+		inst_b1.s = 0;
+
+	inst_b1.imm20b = target25 >> 4;
+	inst.l = inst_b1.l;
+
+	paravirt_write_inst(tag, inst);
+}
+
+void __init
+__paravirt_patch_apply_branch(
+	unsigned long tag, unsigned long type,
+	const struct paravirt_patch_branch_target *entries,
+	unsigned int nr_entries)
+{
+	unsigned int i;
+	for (i = 0; i < nr_entries; i++) {
+		if (entries[i].type == type) {
+			paravirt_patch_reloc_br(tag, entries[i].entry);
+			break;
+		}
+	}
+}
+
+static void __init
+paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start,
+			    const struct paravirt_patch_site_branch *end)
+{
+	const struct paravirt_patch_site_branch *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_branch == NULL)
+		return;
+
+	for (p = start; p < end; p++)
+		(*pv_init_ops.patch_branch)(p->tag, p->type);
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void __init
+paravirt_patch_apply(void)
+{
+	extern const char __start_paravirt_bundles[];
+	extern const char __stop_paravirt_bundles[];
+	extern const char __start_paravirt_insts[];
+	extern const char __stop_paravirt_insts[];
+	extern const char __start_paravirt_branches[];
+	extern const char __stop_paravirt_branches[];
+
+	paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *)
+				    __start_paravirt_bundles,
+				    (const struct paravirt_patch_site_bundle *)
+				    __stop_paravirt_bundles);
+	paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *)
+				  __start_paravirt_insts,
+				  (const struct paravirt_patch_site_inst *)
+				  __stop_paravirt_insts);
+	paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *)
+				    __start_paravirt_branches,
+				    (const struct paravirt_patch_site_branch *)
+				    __stop_paravirt_branches);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
--- a/arch/ia64/kernel/paravirt_patchlist.c
+++ b/arch/ia64/kernel/paravirt_patchlist.c
@ -0,0 +1,81 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/paravirt.h>
+
+#define DECLARE(name)						\
+	extern unsigned long					\
+		__ia64_native_start_gate_##name##_patchlist[];	\
+	extern unsigned long					\
+		__ia64_native_end_gate_##name##_patchlist[]
+
+DECLARE(fsyscall);
+DECLARE(brl_fsys_bubble_down);
+DECLARE(vtop);
+DECLARE(mckinley_e9);
+
+extern unsigned long __start_gate_section[];
+
+#define ASSIGN(name)							    \
+	.start_##name##_patchlist =					    \
+		(unsigned long)__ia64_native_start_gate_##name##_patchlist, \
+	.end_##name##_patchlist =					    \
+		(unsigned long)__ia64_native_end_gate_##name##_patchlist
+
+struct pv_patchdata pv_patchdata __initdata = {
+	ASSIGN(fsyscall),
+	ASSIGN(brl_fsys_bubble_down),
+	ASSIGN(vtop),
+	ASSIGN(mckinley_e9),
+
+	.gate_section = (void*)__start_gate_section,
+};
+
+
+unsigned long __init
+paravirt_get_gate_patchlist(enum pv_gate_patchlist type)
+{
+
+#define CASE(NAME, name)					\
+	case PV_GATE_START_##NAME:				\
+		return pv_patchdata.start_##name##_patchlist;	\
+	case PV_GATE_END_##NAME:				\
+		return pv_patchdata.end_##name##_patchlist;	\
+
+	switch (type) {
+		CASE(FSYSCALL, fsyscall);
+		CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down);
+		CASE(VTOP, vtop);
+		CASE(MCKINLEY_E9, mckinley_e9);
+	default:
+		BUG();
+		break;
+	}
+	return 0;
+}
+
+void * __init
+paravirt_get_gate_section(void)
+{
+	return pv_patchdata.gate_section;
+}
--- a/arch/ia64/kernel/paravirt_patchlist.h
+++ b/arch/ia64/kernel/paravirt_patchlist.h
@ -0,0 +1,24 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patchlist.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <asm/native/patchlist.h>
+
--- a/arch/ia64/kernel/paravirtentry.S
+++ b/arch/ia64/kernel/paravirtentry.S
@ -0,0 +1,121 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirtentry.S
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-offsets.h>
+#include <asm/paravirt_privop.h>
+#include <asm/paravirt_patch.h>
+#include "entry.h"
+
+#define DATA8(sym, init_value)			\
+	.pushsection .data..read_mostly ;	\
+	.align 8 ;				\
+	.global sym ;				\
+	sym: ;					\
+	data8 init_value ;			\
+	.popsection
+
+#define BRANCH(targ, reg, breg, type)					\
+	PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ;	\
+	;;								\
+	movl reg=targ ;							\
+	;;								\
+	ld8 reg=[reg] ;							\
+	;;								\
+	mov breg=reg ;							\
+	br.cond.sptk.many breg
+
+#define BRANCH_PROC(sym, reg, breg, type)				\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ;		\
+	GLOBAL_ENTRY(paravirt_ ## sym) ;				\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ;	\
+	END(paravirt_ ## sym)
+
+#define BRANCH_PROC_UNWINFO(sym, reg, breg, type)			\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ;		\
+	GLOBAL_ENTRY(paravirt_ ## sym) ;				\
+		PT_REGS_UNWIND_INFO(0) ;				\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ;	\
+	END(paravirt_ ## sym)
+
+
+BRANCH_PROC(switch_to, r22, b7, SWITCH_TO)
+BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL)
+BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL)
+BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL)
+
+
+#ifdef CONFIG_MODULES
+#define __INIT_OR_MODULE	.text
+#define __INITDATA_OR_MODULE	.data
+#else
+#define __INIT_OR_MODULE	__INIT
+#define __INITDATA_OR_MODULE	__INITDATA
+#endif /* CONFIG_MODULES */
+
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_fc_i)
+	fc.i r32
+	br.ret.sptk.many rp
+	END(paravirt_fc_i)
+	__FINIT
+
+	__INIT_OR_MODULE
+	.align 32
+	GLOBAL_ENTRY(paravirt_nop_b_inst_bundle)
+	{
+		nop.b 0
+		nop.b 0
+		nop.b 0
+	}
+	END(paravirt_nop_b_inst_bundle)
+	__FINIT
+
+	/* NOTE: nop.[mfi] has same format */
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle)
+	{
+		nop.m 0
+		nop.f 0
+		nop.i 0
+	}
+	END(paravirt_nop_mfi_inst_bundle)
+	__FINIT
+
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_nop_bundle)
+paravirt_nop_bundle_start:
+	{
+		nop 0
+		nop 0
+		nop 0
+	}
+paravirt_nop_bundle_end:
+	END(paravirt_nop_bundle)
+	__FINIT
+
+	__INITDATA_OR_MODULE
+	.align 8
+	.global paravirt_nop_bundle_size
+paravirt_nop_bundle_size:
+	data8	paravirt_nop_bundle_end - paravirt_nop_bundle_start
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@ -0,0 +1,256 @@
+/*
+ * Instruction-patching support.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/init.h>
+#include <linux/string.h>
+
+#include <asm/paravirt.h>
+#include <asm/patch.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/unistd.h>
+
+/*
+ * This was adapted from code written by Tony Luck:
+ *
+ * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
+ * like this:
+ *
+ * 6  6         5         4         3         2         1
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
+ *
+ * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
+ */
+static u64
+get_imm64 (u64 insn_addr)
+{
+	u64 *p = (u64 *) (insn_addr & -16);	/* mask out slot number */
+
+	return ( (p[1] & 0x0800000000000000UL) << 4)  | /*A*/
+		((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
+		((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
+		((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
+		((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
+		((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
+		((p[1] & 0x000007f000000000UL) >> 36);  /*G*/
+}
+
+/* Patch instruction with "val" where "mask" has 1 bits. */
+void
+ia64_patch (u64 insn_addr, u64 mask, u64 val)
+{
+	u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
+#	define insn_mask ((1UL << 41) - 1)
+	unsigned long shift;
+
+	b0 = b[0]; b1 = b[1];
+	shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */
+	if (shift >= 64) {
+		m1 = mask << (shift - 64);
+		v1 = val << (shift - 64);
+	} else {
+		m0 = mask << shift; m1 = mask >> (64 - shift);
+		v0 = val  << shift; v1 = val >> (64 - shift);
+		b[0] = (b0 & ~m0) | (v0 & m0);
+	}
+	b[1] = (b1 & ~m1) | (v1 & m1);
+}
+
+void
+ia64_patch_imm64 (u64 insn_addr, u64 val)
+{
+	/* The assembler may generate offset pointing to either slot 1
+	   or slot 2 for a long (2-slot) instruction, occupying slots 1
+	   and 2.  */
+  	insn_addr &= -16UL;
+	ia64_patch(insn_addr + 2,
+		   0x01fffefe000UL, (  ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
+				     | ((val & 0x0000000000200000UL) <<  0) /* bit 21 -> 21 */
+				     | ((val & 0x00000000001f0000UL) <<  6) /* bit 16 -> 22 */
+				     | ((val & 0x000000000000ff80UL) << 20) /* bit  7 -> 27 */
+				     | ((val & 0x000000000000007fUL) << 13) /* bit  0 -> 13 */));
+	ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
+}
+
+void
+ia64_patch_imm60 (u64 insn_addr, u64 val)
+{
+	/* The assembler may generate offset pointing to either slot 1
+	   or slot 2 for a long (2-slot) instruction, occupying slots 1
+	   and 2.  */
+  	insn_addr &= -16UL;
+	ia64_patch(insn_addr + 2,
+		   0x011ffffe000UL, (  ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
+				     | ((val & 0x00000000000fffffUL) << 13) /* bit  0 -> 13 */));
+	ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18);
+}
+
+/*
+ * We need sometimes to load the physical address of a kernel
+ * object.  Often we can convert the virtual address to physical
+ * at execution time, but sometimes (either for performance reasons
+ * or during error recovery) we cannot to this.  Patch the marked
+ * bundles to load the physical address.
+ */
+void __init
+ia64_patch_vtop (unsigned long start, unsigned long end)
+{
+	s32 *offp = (s32 *) start;
+	u64 ip;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) offp + *offp;
+
+		/* replace virtual address with corresponding physical address: */
+		ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+/*
+ * Disable the RSE workaround by turning the conditional branch
+ * that we tagged in each place the workaround was used into an
+ * unconditional branch.
+ */
+void __init
+ia64_patch_rse (unsigned long start, unsigned long end)
+{
+	s32 *offp = (s32 *) start;
+	u64 ip, *b;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) offp + *offp;
+
+		b = (u64 *)(ip & -16);
+		b[1] &= ~0xf800000L;
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void __init
+ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
+{
+	static int first_time = 1;
+	int need_workaround;
+	s32 *offp = (s32 *) start;
+	u64 *wp;
+
+	need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);
+
+	if (first_time) {
+		first_time = 0;
+		if (need_workaround)
+			printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
+	}
+	if (need_workaround)
+		return;
+
+	while (offp < (s32 *) end) {
+		wp = (u64 *) ia64_imva((char *) offp + *offp);
+		wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+		wp[1] = 0x0084006880000200UL;
+		wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+		wp[3] = 0x0004000000000200UL;
+		ia64_fc(wp); ia64_fc(wp + 2);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+extern unsigned long ia64_native_fsyscall_table[NR_syscalls];
+extern char ia64_native_fsys_bubble_down[];
+struct pv_fsys_data pv_fsys_data __initdata = {
+	.fsyscall_table = (unsigned long *)ia64_native_fsyscall_table,
+	.fsys_bubble_down = (void *)ia64_native_fsys_bubble_down,
+};
+
+unsigned long * __init
+paravirt_get_fsyscall_table(void)
+{
+	return pv_fsys_data.fsyscall_table;
+}
+
+char * __init
+paravirt_get_fsys_bubble_down(void)
+{
+	return pv_fsys_data.fsys_bubble_down;
+}
+
+static void __init
+patch_fsyscall_table (unsigned long start, unsigned long end)
+{
+	u64 fsyscall_table = (u64)paravirt_get_fsyscall_table();
+	s32 *offp = (s32 *) start;
+	u64 ip;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) ia64_imva((char *) offp + *offp);
+		ia64_patch_imm64(ip, fsyscall_table);
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+static void __init
+patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
+{
+	u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down();
+	s32 *offp = (s32 *) start;
+	u64 ip;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) offp + *offp;
+		ia64_patch_imm60((u64) ia64_imva((void *) ip),
+				 (u64) (fsys_bubble_down - (ip & -16)) / 16);
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void __init
+ia64_patch_gate (void)
+{
+#	define START(name)	paravirt_get_gate_patchlist(PV_GATE_START_##name)
+#	define END(name)	paravirt_get_gate_patchlist(PV_GATE_END_##name)
+
+	patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL));
+	patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN));
+	ia64_patch_vtop(START(VTOP), END(VTOP));
+	ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9));
+}
+
+void ia64_patch_phys_stack_reg(unsigned long val)
+{
+	s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
+	s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
+	u64 ip, mask, imm;
+
+	/* see instruction format A4: adds r1 = imm13, r3 */
+	mask = (0x3fUL << 27) | (0x7f << 13);
+	imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
+
+	while (offp < end) {
+		ip = (u64) offp + *offp;
+		ia64_patch(ip, mask, imm);
+		ia64_fc((void *)ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@ -0,0 +1,110 @@
+/*
+ * Dynamic DMA mapping support.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/dmar.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+#include <linux/dma-mapping.h>
+
+
+#ifdef CONFIG_INTEL_IOMMU
+
+#include <linux/kernel.h>
+
+#include <asm/page.h>
+
+dma_addr_t bad_dma_address __read_mostly;
+EXPORT_SYMBOL(bad_dma_address);
+
+static int iommu_sac_force __read_mostly;
+
+int no_iommu __read_mostly;
+#ifdef CONFIG_IOMMU_DEBUG
+int force_iommu __read_mostly = 1;
+#else
+int force_iommu __read_mostly;
+#endif
+
+int iommu_pass_through;
+
+extern struct dma_map_ops intel_dma_ops;
+
+static int __init pci_iommu_init(void)
+{
+	if (iommu_detected)
+		intel_iommu_init();
+
+	return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
+
+void pci_iommu_shutdown(void)
+{
+	return;
+}
+
+void __init
+iommu_dma_init(void)
+{
+	return;
+}
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+	/* Copied from i386. Doesn't make much sense, because it will
+	   only work for pci_alloc_coherent.
+	   The caller just has to use GFP_DMA in this case. */
+	if (mask < DMA_BIT_MASK(24))
+		return 0;
+
+	/* Tell the device to use SAC when IOMMU force is on.  This
+	   allows the driver to use cheaper accesses in some cases.
+
+	   Problem with this is that if we overflow the IOMMU area and
+	   return DAC as fallback address the device may not handle it
+	   correctly.
+
+	   As a special case some controllers have a 39bit address
+	   mode that is as efficient as 32bit (aic79xx). Don't force
+	   SAC for these.  Assume all masks <= 40 bits are of this
+	   type. Normally this doesn't make any difference, but gives
+	   more gentle handling of IOMMU overflow. */
+	if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
+		dev_info(dev, "Force SAC with mask %llx\n", mask);
+		return 0;
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL(iommu_dma_supported);
+
+void __init pci_iommu_alloc(void)
+{
+	dma_ops = &intel_dma_ops;
+
+	dma_ops->sync_single_for_cpu = machvec_dma_sync_single;
+	dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg;
+	dma_ops->sync_single_for_device = machvec_dma_sync_single;
+	dma_ops->sync_sg_for_device = machvec_dma_sync_sg;
+	dma_ops->dma_supported = iommu_dma_supported;
+
+	/*
+	 * The order of these functions is important for
+	 * fall-back/fail-over reasons
+	 */
+	detect_intel_iommu();
+
+#ifdef CONFIG_SWIOTLB
+	pci_swiotlb_init();
+#endif
+}
+
+#endif
--- a/arch/ia64/kernel/pci-swiotlb.c
+++ b/arch/ia64/kernel/pci-swiotlb.c
@ -0,0 +1,67 @@
+/* Glue code to lib/swiotlb.c */
+
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+
+int swiotlb __read_mostly;
+EXPORT_SYMBOL(swiotlb);
+
+static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
+					 dma_addr_t *dma_handle, gfp_t gfp,
+					 struct dma_attrs *attrs)
+{
+	if (dev->coherent_dma_mask != DMA_BIT_MASK(64))
+		gfp |= GFP_DMA;
+	return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
+}
+
+static void ia64_swiotlb_free_coherent(struct device *dev, size_t size,
+				       void *vaddr, dma_addr_t dma_addr,
+				       struct dma_attrs *attrs)
+{
+	swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+}
+
+struct dma_map_ops swiotlb_dma_ops = {
+	.alloc = ia64_swiotlb_alloc_coherent,
+	.free = ia64_swiotlb_free_coherent,
+	.map_page = swiotlb_map_page,
+	.unmap_page = swiotlb_unmap_page,
+	.map_sg = swiotlb_map_sg_attrs,
+	.unmap_sg = swiotlb_unmap_sg_attrs,
+	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+	.sync_single_for_device = swiotlb_sync_single_for_device,
+	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = swiotlb_sync_sg_for_device,
+	.dma_supported = swiotlb_dma_supported,
+	.mapping_error = swiotlb_dma_mapping_error,
+};
+
+void __init swiotlb_dma_init(void)
+{
+	dma_ops = &swiotlb_dma_ops;
+	swiotlb_init(1);
+}
+
+void __init pci_swiotlb_init(void)
+{
+	if (!iommu_detected) {
+#ifdef CONFIG_IA64_GENERIC
+		swiotlb = 1;
+		printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
+		machvec_init("dig");
+		swiotlb_init(1);
+		dma_ops = &swiotlb_dma_ops;
+#else
+		panic("Unable to find Intel IOMMU");
+#endif
+	}
+}
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
--- a/arch/ia64/kernel/perfmon_default_smpl.c
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * This file implements the default sampling buffer format
+ * for the Linux/ia64 perfmon-2 subsystem.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <asm/delay.h>
+#include <linux/smp.h>
+
+#include <asm/perfmon.h>
+#include <asm/perfmon_default_smpl.h>
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_DESCRIPTION("perfmon default sampling format");
+MODULE_LICENSE("GPL");
+
+#define DEFAULT_DEBUG 1
+
+#ifdef DEFAULT_DEBUG
+#define DPRINT(a) \
+	do { \
+		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
+	} while (0)
+
+#define DPRINT_ovfl(a) \
+	do { \
+		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
+	} while (0)
+
+#else
+#define DPRINT(a)
+#define DPRINT_ovfl(a)
+#endif
+
+static int
+default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
+{
+	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
+	int ret = 0;
+
+	if (data == NULL) {
+		DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
+		return -EINVAL;
+	}
+
+	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
+
+	/*
+	 * must hold at least the buffer header + one minimally sized entry
+	 */
+	if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
+
+	DPRINT(("buf_size=%lu\n", arg->buf_size));
+
+	return ret;
+}
+
+static int
+default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
+{
+	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+	/*
+	 * size has been validated in default_validate
+	 */
+	*size = arg->buf_size;
+
+	return 0;
+}
+
+static int
+default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
+{
+	pfm_default_smpl_hdr_t *hdr;
+	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
+
+	hdr = (pfm_default_smpl_hdr_t *)buf;
+
+	hdr->hdr_version      = PFM_DEFAULT_SMPL_VERSION;
+	hdr->hdr_buf_size     = arg->buf_size;
+	hdr->hdr_cur_offs     = sizeof(*hdr);
+	hdr->hdr_overflows    = 0UL;
+	hdr->hdr_count        = 0UL;
+
+	DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
+		task_pid_nr(task),
+		buf,
+		hdr->hdr_buf_size,
+		sizeof(*hdr),
+		hdr->hdr_version,
+		hdr->hdr_cur_offs));
+
+	return 0;
+}
+
+static int
+default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
+{
+	pfm_default_smpl_hdr_t *hdr;
+	pfm_default_smpl_entry_t *ent;
+	void *cur, *last;
+	unsigned long *e, entry_size;
+	unsigned int npmds, i;
+	unsigned char ovfl_pmd;
+	unsigned char ovfl_notify;
+
+	if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
+		DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
+		return -EINVAL;
+	}
+
+	hdr         = (pfm_default_smpl_hdr_t *)buf;
+	cur         = buf+hdr->hdr_cur_offs;
+	last        = buf+hdr->hdr_buf_size;
+	ovfl_pmd    = arg->ovfl_pmd;
+	ovfl_notify = arg->ovfl_notify;
+
+	/*
+	 * precheck for sanity
+	 */
+	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+	npmds = hweight64(arg->smpl_pmds[0]);
+
+	ent = (pfm_default_smpl_entry_t *)cur;
+
+	prefetch(arg->smpl_pmds_values);
+
+	entry_size = sizeof(*ent) + (npmds << 3);
+
+	/* position for first pmd */
+	e = (unsigned long *)(ent+1);
+
+	hdr->hdr_count++;
+
+	DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
+			task->pid,
+			hdr->hdr_count,
+			cur, last,
+			last-cur,
+			ovfl_pmd,
+			ovfl_notify, npmds));
+
+	/*
+	 * current = task running at the time of the overflow.
+	 *
+	 * per-task mode:
+	 * 	- this is usually the task being monitored.
+	 * 	  Under certain conditions, it might be a different task
+	 *
+	 * system-wide:
+	 * 	- this is not necessarily the task controlling the session
+	 */
+	ent->pid            = current->pid;
+	ent->ovfl_pmd  	    = ovfl_pmd;
+	ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
+
+	/*
+	 * where did the fault happen (includes slot number)
+	 */
+	ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
+
+	ent->tstamp    = stamp;
+	ent->cpu       = smp_processor_id();
+	ent->set       = arg->active_set;
+	ent->tgid      = current->tgid;
+
+	/*
+	 * selectively store PMDs in increasing index number
+	 */
+	if (npmds) {
+		unsigned long *val = arg->smpl_pmds_values;
+		for(i=0; i < npmds; i++) {
+			*e++ = *val++;
+		}
+	}
+
+	/*
+	 * update position for next entry
+	 */
+	hdr->hdr_cur_offs += entry_size;
+	cur               += entry_size;
+
+	/*
+	 * post check to avoid losing the last sample
+	 */
+	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
+
+	/*
+	 * keep same ovfl_pmds, ovfl_notify
+	 */
+	arg->ovfl_ctrl.bits.notify_user     = 0;
+	arg->ovfl_ctrl.bits.block_task      = 0;
+	arg->ovfl_ctrl.bits.mask_monitoring = 0;
+	arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
+
+	return 0;
+full:
+	DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
+
+	/*
+	 * increment number of buffer overflow.
+	 * important to detect duplicate set of samples.
+	 */
+	hdr->hdr_overflows++;
+
+	/*
+	 * if no notification requested, then we saturate the buffer
+	 */
+	if (ovfl_notify == 0) {
+		arg->ovfl_ctrl.bits.notify_user     = 0;
+		arg->ovfl_ctrl.bits.block_task      = 0;
+		arg->ovfl_ctrl.bits.mask_monitoring = 1;
+		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
+	} else {
+		arg->ovfl_ctrl.bits.notify_user     = 1;
+		arg->ovfl_ctrl.bits.block_task      = 1; /* ignored for non-blocking context */
+		arg->ovfl_ctrl.bits.mask_monitoring = 1;
+		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
+	}
+	return -1; /* we are full, sorry */
+}
+
+static int
+default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
+{
+	pfm_default_smpl_hdr_t *hdr;
+
+	hdr = (pfm_default_smpl_hdr_t *)buf;
+
+	hdr->hdr_count    = 0UL;
+	hdr->hdr_cur_offs = sizeof(*hdr);
+
+	ctrl->bits.mask_monitoring = 0;
+	ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
+
+	return 0;
+}
+
+static int
+default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
+{
+	DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
+	return 0;
+}
+
+static pfm_buffer_fmt_t default_fmt={
+ 	.fmt_name 	    = "default_format",
+ 	.fmt_uuid	    = PFM_DEFAULT_SMPL_UUID,
+ 	.fmt_arg_size	    = sizeof(pfm_default_smpl_arg_t),
+ 	.fmt_validate	    = default_validate,
+ 	.fmt_getsize	    = default_get_size,
+ 	.fmt_init	    = default_init,
+ 	.fmt_handler	    = default_handler,
+ 	.fmt_restart	    = default_restart,
+ 	.fmt_restart_active = default_restart,
+ 	.fmt_exit	    = default_exit,
+};
+
+static int __init
+pfm_default_smpl_init_module(void)
+{
+	int ret;
+
+	ret = pfm_register_buffer_fmt(&default_fmt);
+	if (ret == 0) {
+		printk("perfmon_default_smpl: %s v%u.%u registered\n",
+			default_fmt.fmt_name,
+			PFM_DEFAULT_SMPL_VERSION_MAJ,
+			PFM_DEFAULT_SMPL_VERSION_MIN);
+	} else {
+		printk("perfmon_default_smpl: %s cannot register ret=%d\n",
+			default_fmt.fmt_name,
+			ret);
+	}
+
+	return ret;
+}
+
+static void __exit
+pfm_default_smpl_cleanup_module(void)
+{
+	int ret;
+	ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
+
+	printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
+}
+
+module_init(pfm_default_smpl_init_module);
+module_exit(pfm_default_smpl_cleanup_module);
+
--- a/arch/ia64/kernel/perfmon_generic.h
+++ b/arch/ia64/kernel/perfmon_generic.h
@ -0,0 +1,45 @@
+/*
+ * This file contains the generic PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd1  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd2  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd3  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_gen={
+	.pmu_name   = "Generic",
+	.pmu_family = 0xff, /* any */
+	.ovfl_val   = (1UL << 32) - 1,
+	.num_ibrs   = 0, /* does not use */
+	.num_dbrs   = 0, /* does not use */
+	.pmd_desc   = pfm_gen_pmd_desc,
+	.pmc_desc   = pfm_gen_pmc_desc
+};
+
--- a/arch/ia64/kernel/perfmon_itanium.h
+++ b/arch/ia64/kernel/perfmon_itanium.h
@ -0,0 +1,115 @@
+/*
+ * This file contains the Itanium PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static int
+pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	int ret;
+	int is_loaded;
+
+	/* sanitfy check */
+	if (ctx == NULL) return -EINVAL;
+
+	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+	/*
+	 * we must clear the (instruction) debug registers if pmc13.ta bit is cleared
+	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
+	 */
+	if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+
+	/*
+	 * we must clear the (data) debug registers if pmc11.pt bit is cleared
+	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
+	 */
+	if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+	return 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_ita={
+	.pmu_name      = "Itanium",
+	.pmu_family    = 0x7,
+	.ovfl_val      = (1UL << 32) - 1,
+	.pmd_desc      = pfm_ita_pmd_desc,
+	.pmc_desc      = pfm_ita_pmc_desc,
+	.num_ibrs      = 8,
+	.num_dbrs      = 8,
+	.use_rr_dbregs = 1, /* debug register are use for range retrictions */
+};
+
+
--- a/arch/ia64/kernel/perfmon_mckinley.h
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@ -0,0 +1,187 @@
+/*
+ * This file contains the McKinley PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL,  pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL,  pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc14 */ { PFM_REG_CONFIG  , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc15 */ { PFM_REG_CONFIG  , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd16 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd17 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * PMC reserved fields must have their power-up values preserved
+ */
+static int
+pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	unsigned long tmp1, tmp2, ival = *val;
+
+	/* remove reserved areas from user value */
+	tmp1 = ival & PMC_RSVD_MASK(cnum);
+
+	/* get reserved fields values */
+	tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
+
+	*val = tmp1 | tmp2;
+
+	DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
+		  cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
+	return 0;
+}
+
+/*
+ * task can be NULL if the context is unloaded
+ */
+static int
+pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	int ret = 0, check_case1 = 0;
+	unsigned long val8 = 0, val14 = 0, val13 = 0;
+	int is_loaded;
+
+	/* first preserve the reserved fields */
+	pfm_mck_reserved(cnum, val, regs);
+
+	/* sanitfy check */
+	if (ctx == NULL) return -EINVAL;
+
+	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+	/*
+	 * we must clear the debug registers if pmc13 has a value which enable
+	 * memory pipeline event constraints. In this case we need to clear the
+	 * the debug registers if they have not yet been accessed. This is required
+	 * to avoid picking stale state.
+	 * PMC13 is "active" if:
+	 * 	one of the pmc13.cfg_dbrpXX field is different from 0x3
+	 * AND
+	 * 	at the corresponding pmc13.ena_dbrpXX is set.
+	 */
+	DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
+
+	if (cnum == 13 && is_loaded
+	    && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+	/*
+	 * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
+	 * before they are (fl_using_dbreg==0) to avoid picking up stale information.
+	 */
+	if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+
+	}
+
+	switch(cnum) {
+		case  4: *val |= 1UL << 23; /* force power enable bit */
+			 break;
+		case  8: val8 = *val;
+			 val13 = ctx->ctx_pmcs[13];
+			 val14 = ctx->ctx_pmcs[14];
+			 check_case1 = 1;
+			 break;
+		case 13: val8  = ctx->ctx_pmcs[8];
+			 val13 = *val;
+			 val14 = ctx->ctx_pmcs[14];
+			 check_case1 = 1;
+			 break;
+		case 14: val8  = ctx->ctx_pmcs[8];
+			 val13 = ctx->ctx_pmcs[13];
+			 val14 = *val;
+			 check_case1 = 1;
+			 break;
+	}
+	/* check illegal configuration which can produce inconsistencies in tagging
+	 * i-side events in L1D and L2 caches
+	 */
+	if (check_case1) {
+		ret =   ((val13 >> 45) & 0xf) == 0
+		   && ((val8 & 0x1) == 0)
+		   && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
+		       ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
+
+		if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
+	}
+
+	return ret ? -EINVAL : 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_mck={
+	.pmu_name      = "Itanium 2",
+	.pmu_family    = 0x1f,
+	.flags	       = PFM_PMU_IRQ_RESEND,
+	.ovfl_val      = (1UL << 47) - 1,
+	.pmd_desc      = pfm_mck_pmd_desc,
+	.pmc_desc      = pfm_mck_pmc_desc,
+	.num_ibrs       = 8,
+	.num_dbrs       = 8,
+	.use_rr_dbregs = 1 /* debug register are use for range restrictions */
+};
+
+
--- a/arch/ia64/kernel/perfmon_montecito.h
+++ b/arch/ia64/kernel/perfmon_montecito.h
@ -0,0 +1,269 @@
+/*
+ * This file contains the Montecito PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
+ *               Contributed by Stephane Eranian <eranian@hpl.hp.com>
+ */
+static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+
+#define RDEP_MONT_ETB	(RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\
+			 RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63))
+#define RDEP_MONT_DEAR  (RDEP(32)|RDEP(33)|RDEP(36))
+#define RDEP_MONT_IEAR  (RDEP(34)|RDEP(35))
+
+static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc4  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}},
+/* pmc5  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}},
+/* pmc6  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}},
+/* pmc7  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}},
+/* pmc8  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}},
+/* pmc9  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}},
+/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}},
+/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}},
+/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}},
+/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}},
+/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}},
+/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}},
+/* pmc16 */ { PFM_REG_NOTIMPL, },
+/* pmc17 */ { PFM_REG_NOTIMPL, },
+/* pmc18 */ { PFM_REG_NOTIMPL, },
+/* pmc19 */ { PFM_REG_NOTIMPL, },
+/* pmc20 */ { PFM_REG_NOTIMPL, },
+/* pmc21 */ { PFM_REG_NOTIMPL, },
+/* pmc22 */ { PFM_REG_NOTIMPL, },
+/* pmc23 */ { PFM_REG_NOTIMPL, },
+/* pmc24 */ { PFM_REG_NOTIMPL, },
+/* pmc25 */ { PFM_REG_NOTIMPL, },
+/* pmc26 */ { PFM_REG_NOTIMPL, },
+/* pmc27 */ { PFM_REG_NOTIMPL, },
+/* pmc28 */ { PFM_REG_NOTIMPL, },
+/* pmc29 */ { PFM_REG_NOTIMPL, },
+/* pmc30 */ { PFM_REG_NOTIMPL, },
+/* pmc31 */ { PFM_REG_NOTIMPL, },
+/* pmc32 */ { PFM_REG_CONFIG,  0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc33 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc34 */ { PFM_REG_CONFIG,  0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc35 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc36 */ { PFM_REG_CONFIG,  0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}},
+/* pmc38 */ { PFM_REG_CONFIG,  0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
+/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}},
+/* pmc41 */ { PFM_REG_CONFIG,  0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
+	    { PFM_REG_END    , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_NOTIMPL, }, 
+/* pmd1  */ { PFM_REG_NOTIMPL, },
+/* pmd2  */ { PFM_REG_NOTIMPL, },
+/* pmd3  */ { PFM_REG_NOTIMPL, },
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}},
+/* pmd8  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}}, 
+/* pmd9  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}},
+/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}},
+/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}},
+/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}},
+/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}},
+/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}},
+/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}},
+/* pmd16 */ { PFM_REG_NOTIMPL, },
+/* pmd17 */ { PFM_REG_NOTIMPL, },
+/* pmd18 */ { PFM_REG_NOTIMPL, },
+/* pmd19 */ { PFM_REG_NOTIMPL, },
+/* pmd20 */ { PFM_REG_NOTIMPL, },
+/* pmd21 */ { PFM_REG_NOTIMPL, },
+/* pmd22 */ { PFM_REG_NOTIMPL, },
+/* pmd23 */ { PFM_REG_NOTIMPL, },
+/* pmd24 */ { PFM_REG_NOTIMPL, },
+/* pmd25 */ { PFM_REG_NOTIMPL, },
+/* pmd26 */ { PFM_REG_NOTIMPL, },
+/* pmd27 */ { PFM_REG_NOTIMPL, },
+/* pmd28 */ { PFM_REG_NOTIMPL, },
+/* pmd29 */ { PFM_REG_NOTIMPL, },
+/* pmd30 */ { PFM_REG_NOTIMPL, },
+/* pmd31 */ { PFM_REG_NOTIMPL, },
+/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
+/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
+/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}},
+/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}},
+/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}},
+/* pmd37 */ { PFM_REG_NOTIMPL, },
+/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd40 */ { PFM_REG_NOTIMPL, },
+/* pmd41 */ { PFM_REG_NOTIMPL, },
+/* pmd42 */ { PFM_REG_NOTIMPL, },
+/* pmd43 */ { PFM_REG_NOTIMPL, },
+/* pmd44 */ { PFM_REG_NOTIMPL, },
+/* pmd45 */ { PFM_REG_NOTIMPL, },
+/* pmd46 */ { PFM_REG_NOTIMPL, },
+/* pmd47 */ { PFM_REG_NOTIMPL, },
+/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
+	    { PFM_REG_END   , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * PMC reserved fields must have their power-up values preserved
+ */
+static int
+pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	unsigned long tmp1, tmp2, ival = *val;
+
+	/* remove reserved areas from user value */
+	tmp1 = ival & PMC_RSVD_MASK(cnum);
+
+	/* get reserved fields values */
+	tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
+
+	*val = tmp1 | tmp2;
+
+	DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
+		  cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
+	return 0;
+}
+
+/*
+ * task can be NULL if the context is unloaded
+ */
+static int
+pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
+{
+	int ret = 0;
+	unsigned long val32 = 0, val38 = 0, val41 = 0;
+	unsigned long tmpval;
+	int check_case1 = 0;
+	int is_loaded;
+
+	/* first preserve the reserved fields */
+	pfm_mont_reserved(cnum, val, regs);
+
+	tmpval = *val;
+
+	/* sanity check */
+	if (ctx == NULL) return -EINVAL;
+
+	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
+
+	/*
+	 * we must clear the debug registers if pmc41 has a value which enable
+	 * memory pipeline event constraints. In this case we need to clear the
+	 * the debug registers if they have not yet been accessed. This is required
+	 * to avoid picking stale state.
+	 * PMC41 is "active" if:
+	 * 	one of the pmc41.cfg_dtagXX field is different from 0x3
+	 * AND
+	 * 	at the corresponding pmc41.en_dbrpXX is set.
+	 * AND
+	 *	ctx_fl_using_dbreg == 0  (i.e., dbr not yet used)
+	 */
+	DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded));
+
+	if (cnum == 41 && is_loaded 
+	    && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers if:
+		 * AND
+		 */
+		ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+	}
+	/*
+	 * we must clear the (instruction) debug registers if:
+	 * 	pmc38.ig_ibrpX is 0 (enabled)
+	 * AND
+	 *	ctx_fl_using_dbreg == 0  (i.e., dbr not yet used)
+	 */
+	if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) {
+
+		DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval));
+
+		/* don't mix debug with perfmon */
+		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
+
+		/*
+		 * a count of 0 will mark the debug registers as in use and also
+		 * ensure that they are properly cleared.
+		 */
+		ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
+		if (ret) return ret;
+
+	}
+	switch(cnum) {
+		case  32: val32 = *val;
+			  val38 = ctx->ctx_pmcs[38];
+			  val41 = ctx->ctx_pmcs[41];
+			  check_case1 = 1;
+			  break;
+		case  38: val38 = *val;
+			  val32 = ctx->ctx_pmcs[32];
+			  val41 = ctx->ctx_pmcs[41];
+			  check_case1 = 1;
+			  break;
+		case  41: val41 = *val;
+			  val32 = ctx->ctx_pmcs[32];
+			  val38 = ctx->ctx_pmcs[38];
+			  check_case1 = 1;
+			  break;
+	}
+	/* check illegal configuration which can produce inconsistencies in tagging
+	 * i-side events in L1D and L2 caches
+	 */
+	if (check_case1) {
+		ret =   (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
+		     && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
+		     ||  (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
+		if (ret) {
+			DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32));
+			return -EINVAL;
+		}
+	}
+	*val = tmpval;
+	return 0;
+}
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf_mont={
+	.pmu_name        = "Montecito",
+	.pmu_family      = 0x20,
+	.flags           = PFM_PMU_IRQ_RESEND,
+	.ovfl_val        = (1UL << 47) - 1,
+	.pmd_desc        = pfm_mont_pmd_desc,
+	.pmc_desc        = pfm_mont_pmc_desc,
+	.num_ibrs        = 8,
+	.num_dbrs        = 8,
+	.use_rr_dbregs   = 1 /* debug register are use for range retrictions */
+};
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@ -0,0 +1,682 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * 04/11/17 Ashok Raj	<ashok.raj@intel.com> Added CPU Hotplug Support
+ *
+ * 2005-10-07 Keith Owens <kaos@sgi.com>
+ *	      Add notify_die() hooks.
+ */
+#include <linux/cpu.h>
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/thread_info.h>
+#include <linux/unistd.h>
+#include <linux/efi.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/kdebug.h>
+#include <linux/utsname.h>
+#include <linux/tracehook.h>
+#include <linux/rcupdate.h>
+
+#include <asm/cpu.h>
+#include <asm/delay.h>
+#include <asm/elf.h>
+#include <asm/irq.h>
+#include <asm/kexec.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/switch_to.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#include <asm/user.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#include "sigframe.h"
+
+void (*ia64_mark_idle)(int);
+
+unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
+EXPORT_SYMBOL(boot_option_idle_override);
+void (*pm_power_off) (void);
+EXPORT_SYMBOL(pm_power_off);
+
+void
+ia64_do_show_stack (struct unw_frame_info *info, void *arg)
+{
+	unsigned long ip, sp, bsp;
+	char buf[128];			/* don't make it so big that it overflows the stack! */
+
+	printk("\nCall Trace:\n");
+	do {
+		unw_get_ip(info, &ip);
+		if (ip == 0)
+			break;
+
+		unw_get_sp(info, &sp);
+		unw_get_bsp(info, &bsp);
+		snprintf(buf, sizeof(buf),
+			 " [<%016lx>] %%s\n"
+			 "                                sp=%016lx bsp=%016lx\n",
+			 ip, sp, bsp);
+		print_symbol(buf, ip);
+	} while (unw_unwind(info) >= 0);
+}
+
+void
+show_stack (struct task_struct *task, unsigned long *sp)
+{
+	if (!task)
+		unw_init_running(ia64_do_show_stack, NULL);
+	else {
+		struct unw_frame_info info;
+
+		unw_init_from_blocked_task(&info, task);
+		ia64_do_show_stack(&info, NULL);
+	}
+}
+
+void
+show_regs (struct pt_regs *regs)
+{
+	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+	print_modules();
+	printk("\n");
+	show_regs_print_info(KERN_DEFAULT);
+	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s (%s)\n",
+	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
+	       init_utsname()->release);
+	print_symbol("ip is at %s\n", ip);
+	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+	printk("rnat: %016lx bsps: %016lx pr  : %016lx\n",
+	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
+	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0, regs->b6, regs->b7);
+	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+	       regs->f6.u.bits[1], regs->f6.u.bits[0],
+	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
+	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+	       regs->f8.u.bits[1], regs->f8.u.bits[0],
+	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
+	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+	       regs->f10.u.bits[1], regs->f10.u.bits[0],
+	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1, regs->r2, regs->r3);
+	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10);
+	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13);
+	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16);
+	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19);
+	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22);
+	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25);
+	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28);
+	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31);
+
+	if (user_mode(regs)) {
+		/* print the stacked registers */
+		unsigned long val, *bsp, ndirty;
+		int i, sof, is_nat = 0;
+
+		sof = regs->cr_ifs & 0x7f;	/* size of frame */
+		ndirty = (regs->loadrs >> 19);
+		bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty);
+		for (i = 0; i < sof; ++i) {
+			get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i));
+			printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val,
+			       ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+		}
+	} else
+		show_stack(NULL, NULL);
+}
+
+/* local support for deprecated console_print */
+void
+console_print(const char *s)
+{
+	printk(KERN_EMERG "%s", s);
+}
+
+void
+do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
+{
+	if (fsys_mode(current, &scr->pt)) {
+		/*
+		 * defer signal-handling etc. until we return to
+		 * privilege-level 0.
+		 */
+		if (!ia64_psr(&scr->pt)->lp)
+			ia64_psr(&scr->pt)->lp = 1;
+		return;
+	}
+
+#ifdef CONFIG_PERFMON
+	if (current->thread.pfm_needs_checking)
+		/*
+		 * Note: pfm_handle_work() allow us to call it with interrupts
+		 * disabled, and may enable interrupts within the function.
+		 */
+		pfm_handle_work();
+#endif
+
+	/* deal with pending signal delivery */
+	if (test_thread_flag(TIF_SIGPENDING)) {
+		local_irq_enable();	/* force interrupt enable */
+		ia64_do_signal(scr, in_syscall);
+	}
+
+	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) {
+		local_irq_enable();	/* force interrupt enable */
+		tracehook_notify_resume(&scr->pt);
+	}
+
+	/* copy user rbs to kernel rbs */
+	if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) {
+		local_irq_enable();	/* force interrupt enable */
+		ia64_sync_krbs();
+	}
+
+	local_irq_disable();	/* force interrupt disable */
+}
+
+static int __init nohalt_setup(char * str)
+{
+	cpu_idle_poll_ctrl(true);
+	return 1;
+}
+__setup("nohalt", nohalt_setup);
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+	unsigned int this_cpu = smp_processor_id();
+
+	/* Ack it */
+	__this_cpu_write(cpu_state, CPU_DEAD);
+
+	max_xtp();
+	local_irq_disable();
+	idle_task_exit();
+	ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
+	/*
+	 * The above is a point of no-return, the processor is
+	 * expected to be in SAL loop now.
+	 */
+	BUG();
+}
+#else
+static inline void play_dead(void)
+{
+	BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void arch_cpu_idle_dead(void)
+{
+	play_dead();
+}
+
+void arch_cpu_idle(void)
+{
+	void (*mark_idle)(int) = ia64_mark_idle;
+
+#ifdef CONFIG_SMP
+	min_xtp();
+#endif
+	rmb();
+	if (mark_idle)
+		(*mark_idle)(1);
+
+	safe_halt();
+
+	if (mark_idle)
+		(*mark_idle)(0);
+#ifdef CONFIG_SMP
+	normal_xtp();
+#endif
+}
+
+void
+ia64_save_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
+
+	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+		ia64_save_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		pfm_save_regs(task);
+
+	info = __this_cpu_read(pfm_syst_info);
+	if (info & PFM_CPUINFO_SYST_WIDE)
+		pfm_syst_wide_update_task(task, info, 0);
+#endif
+}
+
+void
+ia64_load_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
+
+	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+		ia64_load_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		pfm_load_regs(task);
+
+	info = __this_cpu_read(pfm_syst_info);
+	if (info & PFM_CPUINFO_SYST_WIDE) 
+		pfm_syst_wide_update_task(task, info, 1);
+#endif
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following  call chain:
+ *
+ *	from user-level:	from kernel:
+ *
+ *	<clone syscall>	        <some kernel call frames>
+ *	sys_clone		   :
+ *	do_fork			do_fork
+ *	copy_thread		copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ *	+---------------------+ (highest addr)
+ *	|   struct pt_regs    |
+ *	+---------------------+
+ *	| struct switch_stack |
+ *	+---------------------+
+ *	|                     |
+ *	|    memory stack     |
+ *	|                     | <-- sp (lowest addr)
+ *	+---------------------+
+ *
+ * Observe that we copy the unat values that are in pt_regs and switch_stack.  Spilling an
+ * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register,
+ * with N=(X & 0x1ff)/8.  Thus, copying the unat value preserves the NaT bits ONLY if the
+ * pt_regs structure in the parent is congruent to that of the child, modulo 512.  Since
+ * the stack is page aligned and the page size is at least 4KB, this is always the case,
+ * so there is nothing to worry about.
+ */
+int
+copy_thread(unsigned long clone_flags,
+	     unsigned long user_stack_base, unsigned long user_stack_size,
+	     struct task_struct *p)
+{
+	extern char ia64_ret_from_clone;
+	struct switch_stack *child_stack, *stack;
+	unsigned long rbs, child_rbs, rbs_size;
+	struct pt_regs *child_ptregs;
+	struct pt_regs *regs = current_pt_regs();
+	int retval = 0;
+
+	child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
+	child_stack = (struct switch_stack *) child_ptregs - 1;
+
+	rbs = (unsigned long) current + IA64_RBS_OFFSET;
+	child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+
+	/* copy parts of thread_struct: */
+	p->thread.ksp = (unsigned long) child_stack - 16;
+
+	/*
+	 * NOTE: The calling convention considers all floating point
+	 * registers in the high partition (fph) to be scratch.  Since
+	 * the only way to get to this point is through a system call,
+	 * we know that the values in fph are all dead.  Hence, there
+	 * is no need to inherit the fph state from the parent to the
+	 * child and all we have to do is to make sure that
+	 * IA64_THREAD_FPH_VALID is cleared in the child.
+	 *
+	 * XXX We could push this optimization a bit further by
+	 * clearing IA64_THREAD_FPH_VALID on ANY system call.
+	 * However, it's not clear this is worth doing.  Also, it
+	 * would be a slight deviation from the normal Linux system
+	 * call behavior where scratch registers are preserved across
+	 * system calls (unless used by the system call itself).
+	 */
+#	define THREAD_FLAGS_TO_CLEAR	(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \
+					 | IA64_THREAD_PM_VALID)
+#	define THREAD_FLAGS_TO_SET	0
+	p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
+			   | THREAD_FLAGS_TO_SET);
+
+	ia64_drop_fpu(p);	/* don't pick up stale state from a CPU's fph */
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		if (unlikely(!user_stack_base)) {
+			/* fork_idle() called us */
+			return 0;
+		}
+		memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack));
+		child_stack->r4 = user_stack_base;	/* payload */
+		child_stack->r5 = user_stack_size;	/* argument */
+		/*
+		 * Preserve PSR bits, except for bits 32-34 and 37-45,
+		 * which we can't read.
+		 */
+		child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+		/* mark as valid, empty frame */
+		child_ptregs->cr_ifs = 1UL << 63;
+		child_stack->ar_fpsr = child_ptregs->ar_fpsr
+			= ia64_getreg(_IA64_REG_AR_FPSR);
+		child_stack->pr = (1 << PRED_KERNEL_STACK);
+		child_stack->ar_bspstore = child_rbs;
+		child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+		/* stop some PSR bits from being inherited.
+		 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+		 * therefore we must specify them explicitly here and not include them in
+		 * IA64_PSR_BITS_TO_CLEAR.
+		 */
+		child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
+		return 0;
+	}
+	stack = ((struct switch_stack *) regs) - 1;
+	/* copy parent's switch_stack & pt_regs to child: */
+	memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
+
+	/* copy the parent's register backing store to the child: */
+	rbs_size = stack->ar_bspstore - rbs;
+	memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+	if (clone_flags & CLONE_SETTLS)
+		child_ptregs->r13 = regs->r16;	/* see sys_clone2() in entry.S */
+	if (user_stack_base) {
+		child_ptregs->r12 = user_stack_base + user_stack_size - 16;
+		child_ptregs->ar_bspstore = user_stack_base;
+		child_ptregs->ar_rnat = 0;
+		child_ptregs->loadrs = 0;
+	}
+	child_stack->ar_bspstore = child_rbs + rbs_size;
+	child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+	/* stop some PSR bits from being inherited.
+	 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+	 * therefore we must specify them explicitly here and not include them in
+	 * IA64_PSR_BITS_TO_CLEAR.
+	 */
+	child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
+#ifdef CONFIG_PERFMON
+	if (current->thread.pfm_context)
+		pfm_inherit(p, child_ptregs);
+#endif
+	return retval;
+}
+
+static void
+do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg)
+{
+	unsigned long mask, sp, nat_bits = 0, ar_rnat, urbs_end, cfm;
+	unsigned long uninitialized_var(ip);	/* GCC be quiet */
+	elf_greg_t *dst = arg;
+	struct pt_regs *pt;
+	char nat;
+	int i;
+
+	memset(dst, 0, sizeof(elf_gregset_t));	/* don't leak any kernel bits to user-level */
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	unw_get_sp(info, &sp);
+	pt = (struct pt_regs *) (sp + 16);
+
+	urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
+
+	if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
+		return;
+
+	ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end),
+		  &ar_rnat);
+
+	/*
+	 * coredump format:
+	 *	r0-r31
+	 *	NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *	predicate registers (p0-p63)
+	 *	b0-b7
+	 *	ip cfm user-mask
+	 *	ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *	ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+	 */
+
+	/* r0 is zero */
+	for (i = 1, mask = (1UL << i); i < 32; ++i) {
+		unw_get_gr(info, i, &dst[i], &nat);
+		if (nat)
+			nat_bits |= mask;
+		mask <<= 1;
+	}
+	dst[32] = nat_bits;
+	unw_get_pr(info, &dst[33]);
+
+	for (i = 0; i < 8; ++i)
+		unw_get_br(info, i, &dst[34 + i]);
+
+	unw_get_rp(info, &ip);
+	dst[42] = ip + ia64_psr(pt)->ri;
+	dst[43] = cfm;
+	dst[44] = pt->cr_ipsr & IA64_PSR_UM;
+
+	unw_get_ar(info, UNW_AR_RSC, &dst[45]);
+	/*
+	 * For bsp and bspstore, unw_get_ar() would return the kernel
+	 * addresses, but we need the user-level addresses instead:
+	 */
+	dst[46] = urbs_end;	/* note: by convention PT_AR_BSP points to the end of the urbs! */
+	dst[47] = pt->ar_bspstore;
+	dst[48] = ar_rnat;
+	unw_get_ar(info, UNW_AR_CCV, &dst[49]);
+	unw_get_ar(info, UNW_AR_UNAT, &dst[50]);
+	unw_get_ar(info, UNW_AR_FPSR, &dst[51]);
+	dst[52] = pt->ar_pfs;	/* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */
+	unw_get_ar(info, UNW_AR_LC, &dst[53]);
+	unw_get_ar(info, UNW_AR_EC, &dst[54]);
+	unw_get_ar(info, UNW_AR_CSD, &dst[55]);
+	unw_get_ar(info, UNW_AR_SSD, &dst[56]);
+}
+
+void
+do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg)
+{
+	elf_fpreg_t *dst = arg;
+	int i;
+
+	memset(dst, 0, sizeof(elf_fpregset_t));	/* don't leak any "random" bits */
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* f0 is 0.0, f1 is 1.0 */
+
+	for (i = 2; i < 32; ++i)
+		unw_get_fr(info, i, dst + i);
+
+	ia64_flush_fph(task);
+	if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
+		memcpy(dst + 32, task->thread.fph, 96*16);
+}
+
+void
+do_copy_regs (struct unw_frame_info *info, void *arg)
+{
+	do_copy_task_regs(current, info, arg);
+}
+
+void
+do_dump_fpu (struct unw_frame_info *info, void *arg)
+{
+	do_dump_task_fpu(current, info, arg);
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+	unw_init_running(do_copy_regs, dst);
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+	unw_init_running(do_dump_fpu, dst);
+	return 1;	/* f0-f31 are always valid so we always return 1 */
+}
+
+/*
+ * Flush thread state.  This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+	/* drop floating-point and debug-register state if it exists: */
+	current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
+	ia64_drop_fpu(current);
+}
+
+/*
+ * Clean up state associated with current thread.  This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+
+	ia64_drop_fpu(current);
+#ifdef CONFIG_PERFMON
+       /* if needed, stop monitoring and flush state to perfmon context */
+	if (current->thread.pfm_context)
+		pfm_exit_thread(current);
+
+	/* free debug register resources */
+	if (current->thread.flags & IA64_THREAD_DBG_VALID)
+		pfm_release_debug_registers(current);
+#endif
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+	struct unw_frame_info info;
+	unsigned long ip;
+	int count = 0;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	/*
+	 * Note: p may not be a blocked task (it could be current or
+	 * another process running on some other CPU.  Rather than
+	 * trying to determine if p is really blocked, we just assume
+	 * it's blocked and rely on the unwind routines to fail
+	 * gracefully if the process wasn't really blocked after all.
+	 * --davidm 99/12/15
+	 */
+	unw_init_from_blocked_task(&info, p);
+	do {
+		if (p->state == TASK_RUNNING)
+			return 0;
+		if (unw_unwind(&info) < 0)
+			return 0;
+		unw_get_ip(&info, &ip);
+		if (!in_sched_functions(ip))
+			return ip;
+	} while (count++ < 16);
+	return 0;
+}
+
+void
+cpu_halt (void)
+{
+	pal_power_mgmt_info_u_t power_info[8];
+	unsigned long min_power;
+	int i, min_power_state;
+
+	if (ia64_pal_halt_info(power_info) != 0)
+		return;
+
+	min_power_state = 0;
+	min_power = power_info[0].pal_power_mgmt_info_s.power_consumption;
+	for (i = 1; i < 8; ++i)
+		if (power_info[i].pal_power_mgmt_info_s.im
+		    && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) {
+			min_power = power_info[i].pal_power_mgmt_info_s.power_consumption;
+			min_power_state = i;
+		}
+
+	while (1)
+		ia64_pal_halt(min_power_state);
+}
+
+void machine_shutdown(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (cpu != smp_processor_id())
+			cpu_down(cpu);
+	}
+#endif
+#ifdef CONFIG_KEXEC
+	kexec_disable_iosapic();
+#endif
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+	(void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0);
+	efi_reboot(REBOOT_WARM, NULL);
+}
+
+void
+machine_halt (void)
+{
+	(void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0);
+	cpu_halt();
+}
+
+void
+machine_power_off (void)
+{
+	if (pm_power_off)
+		pm_power_off();
+	machine_halt();
+}
+
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
--- a/arch/ia64/kernel/relocate_kernel.S
+++ b/arch/ia64/kernel/relocate_kernel.S
@ -0,0 +1,325 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz  <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp,  Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+       /* Must be relocatable PIC code callable as a C function
+        */
+GLOBAL_ENTRY(relocate_new_kernel)
+	.prologue
+	alloc r31=ar.pfs,4,0,0,0
+        .body
+.reloc_entry:
+{
+	rsm psr.i| psr.ic
+	mov r2=ip
+}
+	;;
+{
+        flushrs                         // must be first insn in group
+        srlz.i
+}
+	;;
+	dep r2=0,r2,61,3		//to physical address
+	;;
+	//first switch to physical mode
+	add r3=1f-.reloc_entry, r2
+	movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+	mov ar.rsc=0	          	// put RSE in enforced lazy mode
+	;;
+	add sp=(memory_stack_end - 16 - .reloc_entry),r2
+	add r8=(register_stack - .reloc_entry),r2
+	;;
+	mov r18=ar.rnat
+	mov ar.bspstore=r8
+	;;
+        mov cr.ipsr=r16
+        mov cr.iip=r3
+        mov cr.ifs=r0
+	srlz.i
+	;;
+	mov ar.rnat=r18
+	rfi				// note: this unmask MCA/INIT (psr.mc)
+	;;
+1:
+	//physical mode code begin
+	mov b6=in1
+	dep r28=0,in2,61,3	//to physical address
+
+	// purge all TC entries
+#define O(member)       IA64_CPUINFO_##member##_OFFSET
+        GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
+        ;;
+        addl r17=O(PTCE_STRIDE),r2
+        addl r2=O(PTCE_BASE),r2
+        ;;
+        ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;    	// r18=ptce_base
+        ld4 r19=[r2],4                                  // r19=ptce_count[0]
+        ld4 r21=[r17],4                                 // r21=ptce_stride[0]
+        ;;
+        ld4 r20=[r2]                                    // r20=ptce_count[1]
+        ld4 r22=[r17]                                   // r22=ptce_stride[1]
+        mov r24=r0
+        ;;
+        adds r20=-1,r20
+        ;;
+#undef O
+2:
+        cmp.ltu p6,p7=r24,r19
+(p7)    br.cond.dpnt.few 4f
+        mov ar.lc=r20
+3:
+        ptc.e r18
+        ;;
+        add r18=r22,r18
+        br.cloop.sptk.few 3b
+        ;;
+        add r18=r21,r18
+        add r24=1,r24
+        ;;
+        br.sptk.few 2b
+4:
+        srlz.i
+        ;;
+	// purge TR entry for kernel text and data
+        movl r16=KERNEL_START
+        mov r18=KERNEL_TR_PAGE_SHIFT<<2
+        ;;
+        ptr.i r16, r18
+        ptr.d r16, r18
+        ;;
+        srlz.i
+        ;;
+
+        // purge TR entry for pal code
+        mov r16=in3
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.i r16,r18
+        ;;
+        srlz.i
+	;;
+
+        // purge TR entry for stack
+        mov r16=IA64_KR(CURRENT_STACK)
+        ;;
+        shl r16=r16,IA64_GRANULE_SHIFT
+        movl r19=PAGE_OFFSET
+        ;;
+        add r16=r19,r16
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.d r16,r18
+        ;;
+        srlz.i
+	;;
+
+	//copy segments
+	movl r16=PAGE_MASK
+        mov  r30=in0                    // in0 is page_list
+        br.sptk.few .dest_page
+	;;
+.loop:
+	ld8  r30=[in0], 8;;
+.dest_page:
+	tbit.z p0, p6=r30, 0;;    	// 0x1 dest page
+(p6)	and r17=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 1;;		// 0x2 indirect page
+(p6)	and in0=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 2;;		// 0x4 end flag
+(p6)	br.cond.sptk.few .end_loop;;
+
+	tbit.z p6, p0=r30, 3;;		// 0x8 source page
+(p6)	br.cond.sptk.few .loop
+
+	and r18=r30, r16
+
+	// simple copy page, may optimize later
+	movl r14=PAGE_SIZE/8 - 1;;
+	mov ar.lc=r14;;
+1:
+	ld8 r14=[r18], 8;;
+	st8 [r17]=r14;;
+	fc.i r17
+	add r17=8, r17
+	br.ctop.sptk.few 1b
+	br.sptk.few .loop
+	;;
+
+.end_loop:
+	sync.i			// for fc.i
+	;;
+	srlz.i
+	;;
+	srlz.d
+	;;
+	br.call.sptk.many b0=b6;;
+
+.align  32
+memory_stack:
+	.fill           8192, 1, 0
+memory_stack_end:
+register_stack:
+	.fill           8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+.global relocate_new_kernel_size
+relocate_new_kernel_size:
+	data8	relocate_new_kernel_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+        .prologue
+        alloc loc0=ar.pfs,1,2,0,0
+        .body
+        mov     ar.rsc=0                // put RSE in enforced lazy mode
+        add     loc1=4*8, in0           // save r4 and r5 first
+        ;;
+{
+        flushrs                         // flush dirty regs to backing store
+        srlz.i
+}
+        st8 [loc1]=r4, 8
+        ;;
+        st8 [loc1]=r5, 8
+        ;;
+        add loc1=32*8, in0
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r0, 8			// r0
+        st8 [loc1]=r4, 8		// rnat
+        mov r5=pr
+        ;;
+        st8 [in0]=r1, 8			// r1
+        st8 [loc1]=r5, 8		// pr
+        mov r4=b0
+        ;;
+        st8 [in0]=r2, 8			// r2
+        st8 [loc1]=r4, 8		// b0
+        mov r5=b1;
+        ;;
+        st8 [in0]=r3, 24		// r3
+        st8 [loc1]=r5, 8		// b1
+        mov r4=b2
+        ;;
+        st8 [in0]=r6, 8			// r6
+        st8 [loc1]=r4, 8		// b2
+	mov r5=b3
+        ;;
+        st8 [in0]=r7, 8			// r7
+        st8 [loc1]=r5, 8		// b3
+        mov r4=b4
+        ;;
+        st8 [in0]=r8, 8			// r8
+        st8 [loc1]=r4, 8		// b4
+        mov r5=b5
+        ;;
+        st8 [in0]=r9, 8			// r9
+        st8 [loc1]=r5, 8		// b5
+        mov r4=b6
+        ;;
+        st8 [in0]=r10, 8		// r10
+        st8 [loc1]=r5, 8		// b6
+        mov r5=b7
+        ;;
+        st8 [in0]=r11, 8		// r11
+        st8 [loc1]=r5, 8		// b7
+        mov r4=b0
+        ;;
+        st8 [in0]=r12, 8		// r12
+        st8 [loc1]=r4, 8		// ip
+        mov r5=loc0
+	;;
+        st8 [in0]=r13, 8		// r13
+        extr.u r5=r5, 0, 38		// ar.pfs.pfm
+	mov r4=r0			// user mask
+        ;;
+        st8 [in0]=r14, 8		// r14
+        st8 [loc1]=r5, 8		// cfm
+        ;;
+        st8 [in0]=r15, 8		// r15
+        st8 [loc1]=r4, 8        	// user mask
+	mov r5=ar.rsc
+        ;;
+        st8 [in0]=r16, 8		// r16
+        st8 [loc1]=r5, 8        	// ar.rsc
+        mov r4=ar.bsp
+        ;;
+        st8 [in0]=r17, 8		// r17
+        st8 [loc1]=r4, 8        	// ar.bsp
+        mov r5=ar.bspstore
+        ;;
+        st8 [in0]=r18, 8		// r18
+        st8 [loc1]=r5, 8        	// ar.bspstore
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r19, 8		// r19
+        st8 [loc1]=r4, 8        	// ar.rnat
+        mov r5=ar.ccv
+        ;;
+        st8 [in0]=r20, 8		// r20
+	st8 [loc1]=r5, 8        	// ar.ccv
+        mov r4=ar.unat
+        ;;
+        st8 [in0]=r21, 8		// r21
+        st8 [loc1]=r4, 8        	// ar.unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r22, 8		// r22
+        st8 [loc1]=r5, 8        	// ar.fpsr
+        mov r4 = ar.unat
+        ;;
+        st8 [in0]=r23, 8		// r23
+        st8 [loc1]=r4, 8        	// unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r24, 8		// r24
+        st8 [loc1]=r5, 8        	// fpsr
+        mov r4 = ar.pfs
+        ;;
+        st8 [in0]=r25, 8		// r25
+        st8 [loc1]=r4, 8        	// ar.pfs
+        mov r5 = ar.lc
+        ;;
+        st8 [in0]=r26, 8		// r26
+        st8 [loc1]=r5, 8        	// ar.lc
+        mov r4 = ar.ec
+        ;;
+        st8 [in0]=r27, 8		// r27
+        st8 [loc1]=r4, 8        	// ar.ec
+        mov r5 = ar.csd
+        ;;
+        st8 [in0]=r28, 8		// r28
+        st8 [loc1]=r5, 8        	// ar.csd
+        mov r4 = ar.ssd
+        ;;
+        st8 [in0]=r29, 8		// r29
+        st8 [loc1]=r4, 8        	// ar.ssd
+        ;;
+        st8 [in0]=r30, 8		// r30
+        ;;
+	st8 [in0]=r31, 8		// r31
+        mov ar.pfs=loc0
+        ;;
+        br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@ -0,0 +1,405 @@
+/*
+ * System Abstraction Layer (SAL) interface routines.
+ *
+ * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/delay.h>
+#include <asm/page.h>
+#include <asm/sal.h>
+#include <asm/pal.h>
+
+ __cacheline_aligned DEFINE_SPINLOCK(sal_lock);
+unsigned long sal_platform_features;
+
+unsigned short sal_revision;
+unsigned short sal_version;
+
+#define SAL_MAJOR(x) ((x) >> 8)
+#define SAL_MINOR(x) ((x) & 0xff)
+
+static struct {
+	void *addr;	/* function entry point */
+	void *gpval;	/* gp value to use */
+} pdesc;
+
+static long
+default_handler (void)
+{
+	return -1;
+}
+
+ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler;
+ia64_sal_desc_ptc_t *ia64_ptc_domain_info;
+
+const char *
+ia64_sal_strerror (long status)
+{
+	const char *str;
+	switch (status) {
+	      case 0: str = "Call completed without error"; break;
+	      case 1: str = "Effect a warm boot of the system to complete "
+			      "the update"; break;
+	      case -1: str = "Not implemented"; break;
+	      case -2: str = "Invalid argument"; break;
+	      case -3: str = "Call completed with error"; break;
+	      case -4: str = "Virtual address not registered"; break;
+	      case -5: str = "No information available"; break;
+	      case -6: str = "Insufficient space to add the entry"; break;
+	      case -7: str = "Invalid entry_addr value"; break;
+	      case -8: str = "Invalid interrupt vector"; break;
+	      case -9: str = "Requested memory not available"; break;
+	      case -10: str = "Unable to write to the NVM device"; break;
+	      case -11: str = "Invalid partition type specified"; break;
+	      case -12: str = "Invalid NVM_Object id specified"; break;
+	      case -13: str = "NVM_Object already has the maximum number "
+				"of partitions"; break;
+	      case -14: str = "Insufficient space in partition for the "
+				"requested write sub-function"; break;
+	      case -15: str = "Insufficient data buffer space for the "
+				"requested read record sub-function"; break;
+	      case -16: str = "Scratch buffer required for the write/delete "
+				"sub-function"; break;
+	      case -17: str = "Insufficient space in the NVM_Object for the "
+				"requested create sub-function"; break;
+	      case -18: str = "Invalid value specified in the partition_rec "
+				"argument"; break;
+	      case -19: str = "Record oriented I/O not supported for this "
+				"partition"; break;
+	      case -20: str = "Bad format of record to be written or "
+				"required keyword variable not "
+				"specified"; break;
+	      default: str = "Unknown SAL status code"; break;
+	}
+	return str;
+}
+
+void __init
+ia64_sal_handler_init (void *entry_point, void *gpval)
+{
+	/* fill in the SAL procedure descriptor and point ia64_sal to it: */
+	pdesc.addr = entry_point;
+	pdesc.gpval = gpval;
+	ia64_sal = (ia64_sal_handler) &pdesc;
+}
+
+static void __init
+check_versions (struct ia64_sal_systab *systab)
+{
+	sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor;
+	sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor;
+
+	/* Check for broken firmware */
+	if ((sal_revision == SAL_VERSION_CODE(49, 29))
+	    && (sal_version == SAL_VERSION_CODE(49, 29)))
+	{
+		/*
+		 * Old firmware for zx2000 prototypes have this weird version number,
+		 * reset it to something sane.
+		 */
+		sal_revision = SAL_VERSION_CODE(2, 8);
+		sal_version = SAL_VERSION_CODE(0, 0);
+	}
+
+	if (ia64_platform_is("sn2") && (sal_revision == SAL_VERSION_CODE(2, 9)))
+		/*
+		 * SGI Altix has hard-coded version 2.9 in their prom
+		 * but they actually implement 3.2, so let's fix it here.
+		 */
+		sal_revision = SAL_VERSION_CODE(3, 2);
+}
+
+static void __init
+sal_desc_entry_point (void *p)
+{
+	struct ia64_sal_desc_entry_point *ep = p;
+	ia64_pal_handler_init(__va(ep->pal_proc));
+	ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
+}
+
+#ifdef CONFIG_SMP
+static void __init
+set_smp_redirect (int flag)
+{
+#ifndef CONFIG_HOTPLUG_CPU
+	if (no_int_routing)
+		smp_int_redirect &= ~flag;
+	else
+		smp_int_redirect |= flag;
+#else
+	/*
+	 * For CPU Hotplug we dont want to do any chipset supported
+	 * interrupt redirection. The reason is this would require that
+	 * All interrupts be stopped and hard bind the irq to a cpu.
+	 * Later when the interrupt is fired we need to set the redir hint
+	 * on again in the vector. This is cumbersome for something that the
+	 * user mode irq balancer will solve anyways.
+	 */
+	no_int_routing=1;
+	smp_int_redirect &= ~flag;
+#endif
+}
+#else
+#define set_smp_redirect(flag)	do { } while (0)
+#endif
+
+static void __init
+sal_desc_platform_feature (void *p)
+{
+	struct ia64_sal_desc_platform_feature *pf = p;
+	sal_platform_features = pf->feature_mask;
+
+	printk(KERN_INFO "SAL Platform features:");
+	if (!sal_platform_features) {
+		printk(" None\n");
+		return;
+	}
+
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK)
+		printk(" BusLock");
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) {
+		printk(" IRQ_Redirection");
+		set_smp_redirect(SMP_IRQ_REDIRECTION);
+	}
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) {
+		printk(" IPI_Redirection");
+		set_smp_redirect(SMP_IPI_REDIRECTION);
+	}
+	if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)
+		printk(" ITC_Drift");
+	printk("\n");
+}
+
+#ifdef CONFIG_SMP
+static void __init
+sal_desc_ap_wakeup (void *p)
+{
+	struct ia64_sal_desc_ap_wakeup *ap = p;
+
+	switch (ap->mechanism) {
+	case IA64_SAL_AP_EXTERNAL_INT:
+		ap_wakeup_vector = ap->vector;
+		printk(KERN_INFO "SAL: AP wakeup using external interrupt "
+				"vector 0x%lx\n", ap_wakeup_vector);
+		break;
+	default:
+		printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n");
+		break;
+	}
+}
+
+static void __init
+chk_nointroute_opt(void)
+{
+	char *cp;
+
+	for (cp = boot_command_line; *cp; ) {
+		if (memcmp(cp, "nointroute", 10) == 0) {
+			no_int_routing = 1;
+			printk ("no_int_routing on\n");
+			break;
+		} else {
+			while (*cp != ' ' && *cp)
+				++cp;
+			while (*cp == ' ')
+				++cp;
+		}
+	}
+}
+
+#else
+static void __init sal_desc_ap_wakeup(void *p) { }
+#endif
+
+/*
+ * HP rx5670 firmware polls for interrupts during SAL_CACHE_FLUSH by reading
+ * cr.ivr, but it never writes cr.eoi.  This leaves any interrupt marked as
+ * "in-service" and masks other interrupts of equal or lower priority.
+ *
+ * HP internal defect reports: F1859, F2775, F3031.
+ */
+static int sal_cache_flush_drops_interrupts;
+
+static int __init
+force_pal_cache_flush(char *str)
+{
+	sal_cache_flush_drops_interrupts = 1;
+	return 0;
+}
+early_param("force_pal_cache_flush", force_pal_cache_flush);
+
+void __init
+check_sal_cache_flush (void)
+{
+	unsigned long flags;
+	int cpu;
+	u64 vector, cache_type = 3;
+	struct ia64_sal_retval isrv;
+
+	if (sal_cache_flush_drops_interrupts)
+		return;
+
+	cpu = get_cpu();
+	local_irq_save(flags);
+
+	/*
+	 * Send ourselves a timer interrupt, wait until it's reported, and see
+	 * if SAL_CACHE_FLUSH drops it.
+	 */
+	platform_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0);
+
+	while (!ia64_get_irr(IA64_TIMER_VECTOR))
+		cpu_relax();
+
+	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
+
+	if (isrv.status)
+		printk(KERN_ERR "SAL_CAL_FLUSH failed with %ld\n", isrv.status);
+
+	if (ia64_get_irr(IA64_TIMER_VECTOR)) {
+		vector = ia64_get_ivr();
+		ia64_eoi();
+		WARN_ON(vector != IA64_TIMER_VECTOR);
+	} else {
+		sal_cache_flush_drops_interrupts = 1;
+		printk(KERN_ERR "SAL: SAL_CACHE_FLUSH drops interrupts; "
+			"PAL_CACHE_FLUSH will be used instead\n");
+		ia64_eoi();
+	}
+
+	local_irq_restore(flags);
+	put_cpu();
+}
+
+s64
+ia64_sal_cache_flush (u64 cache_type)
+{
+	struct ia64_sal_retval isrv;
+
+	if (sal_cache_flush_drops_interrupts) {
+		unsigned long flags;
+		u64 progress;
+		s64 rc;
+
+		progress = 0;
+		local_irq_save(flags);
+		rc = ia64_pal_cache_flush(cache_type,
+			PAL_CACHE_FLUSH_INVALIDATE, &progress, NULL);
+		local_irq_restore(flags);
+		return rc;
+	}
+
+	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
+	return isrv.status;
+}
+EXPORT_SYMBOL_GPL(ia64_sal_cache_flush);
+
+void __init
+ia64_sal_init (struct ia64_sal_systab *systab)
+{
+	char *p;
+	int i;
+
+	if (!systab) {
+		printk(KERN_WARNING "Hmm, no SAL System Table.\n");
+		return;
+	}
+
+	if (strncmp(systab->signature, "SST_", 4) != 0)
+		printk(KERN_ERR "bad signature in system table!");
+
+	check_versions(systab);
+#ifdef CONFIG_SMP
+	chk_nointroute_opt();
+#endif
+
+	/* revisions are coded in BCD, so %x does the job for us */
+	printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n",
+			SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision),
+			systab->oem_id, systab->product_id,
+			systab->product_id[0] ? " " : "",
+			SAL_MAJOR(sal_version), SAL_MINOR(sal_version));
+
+	p = (char *) (systab + 1);
+	for (i = 0; i < systab->entry_count; i++) {
+		/*
+		 * The first byte of each entry type contains the type
+		 * descriptor.
+		 */
+		switch (*p) {
+		case SAL_DESC_ENTRY_POINT:
+			sal_desc_entry_point(p);
+			break;
+		case SAL_DESC_PLATFORM_FEATURE:
+			sal_desc_platform_feature(p);
+			break;
+		case SAL_DESC_PTC:
+			ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p;
+			break;
+		case SAL_DESC_AP_WAKEUP:
+			sal_desc_ap_wakeup(p);
+			break;
+		}
+		p += SAL_DESC_SIZE(*p);
+	}
+
+}
+
+int
+ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
+		 u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
+{
+	if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+		return -1;
+	SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+	return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall);
+
+int
+ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
+			u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+			u64 arg7)
+{
+	if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+		return -1;
+	SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
+			arg7);
+	return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall_nolock);
+
+int
+ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc,
+			   u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+			   u64 arg6, u64 arg7)
+{
+	if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
+		return -1;
+	SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
+			   arg7);
+	return 0;
+}
+EXPORT_SYMBOL(ia64_sal_oemcall_reentrant);
+
+long
+ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second,
+		    unsigned long *drift_info)
+{
+	struct ia64_sal_retval isrv;
+
+	SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
+	*ticks_per_second = isrv.v0;
+	*drift_info = isrv.v1;
+	return isrv.status;
+}
+EXPORT_SYMBOL_GPL(ia64_sal_freq_base);
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@ -0,0 +1,704 @@
+/*
+ * salinfo.c
+ *
+ * Creates entries in /proc/sal for various system features.
+ *
+ * Copyright (c) 2003, 2006 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2003 Hewlett-Packard Co
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * 10/30/2001	jbarnes@sgi.com		copied much of Stephane's palinfo
+ *					code to create this file
+ * Oct 23 2003	kaos@sgi.com
+ *   Replace IPI with set_cpus_allowed() to read a record from the required cpu.
+ *   Redesign salinfo log processing to separate interrupt and user space
+ *   contexts.
+ *   Cache the record across multi-block reads from user space.
+ *   Support > 64 cpus.
+ *   Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module.
+ *
+ * Jan 28 2004	kaos@sgi.com
+ *   Periodically check for outstanding MCA or INIT records.
+ *
+ * Dec  5 2004	kaos@sgi.com
+ *   Standardize which records are cleared automatically.
+ *
+ * Aug 18 2005	kaos@sgi.com
+ *   mca.c may not pass a buffer, a NULL buffer just indicates that a new
+ *   record is available in SAL.
+ *   Replace some NR_CPUS by cpus_online, for hotplug cpu.
+ *
+ * Jan  5 2006        kaos@sgi.com
+ *   Handle hotplug cpus coming online.
+ *   Handle hotplug cpus going offline while they still have outstanding records.
+ *   Use the cpu_* macros consistently.
+ *   Replace the counting semaphore with a mutex and a test if the cpumask is non-empty.
+ *   Modify the locking to make the test for "work to do" an atomic operation.
+ */
+
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/timer.h>
+#include <linux/vmalloc.h>
+#include <linux/semaphore.h>
+
+#include <asm/sal.h>
+#include <asm/uaccess.h>
+
+MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
+MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
+MODULE_LICENSE("GPL");
+
+static const struct file_operations proc_salinfo_fops;
+
+typedef struct {
+	const char		*name;		/* name of the proc entry */
+	unsigned long           feature;        /* feature bit */
+	struct proc_dir_entry	*entry;		/* registered entry (removal) */
+} salinfo_entry_t;
+
+/*
+ * List {name,feature} pairs for every entry in /proc/sal/<feature>
+ * that this module exports
+ */
+static const salinfo_entry_t salinfo_entries[]={
+	{ "bus_lock",           IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, },
+	{ "irq_redirection",	IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, },
+	{ "ipi_redirection",	IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, },
+	{ "itc_drift",		IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, },
+};
+
+#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries)
+
+static char *salinfo_log_name[] = {
+	"mca",
+	"init",
+	"cmc",
+	"cpe",
+};
+
+static struct proc_dir_entry *salinfo_proc_entries[
+	ARRAY_SIZE(salinfo_entries) +			/* /proc/sal/bus_lock */
+	ARRAY_SIZE(salinfo_log_name) +			/* /proc/sal/{mca,...} */
+	(2 * ARRAY_SIZE(salinfo_log_name)) +		/* /proc/sal/mca/{event,data} */
+	1];						/* /proc/sal */
+
+/* Some records we get ourselves, some are accessed as saved data in buffers
+ * that are owned by mca.c.
+ */
+struct salinfo_data_saved {
+	u8*			buffer;
+	u64			size;
+	u64			id;
+	int			cpu;
+};
+
+/* State transitions.  Actions are :-
+ *   Write "read <cpunum>" to the data file.
+ *   Write "clear <cpunum>" to the data file.
+ *   Write "oemdata <cpunum> <offset> to the data file.
+ *   Read from the data file.
+ *   Close the data file.
+ *
+ * Start state is NO_DATA.
+ *
+ * NO_DATA
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> return -EINVAL.
+ *    read data -> return EOF.
+ *    close -> unchanged.  Free record areas.
+ *
+ * LOG_RECORD
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ *    read data -> return the INIT/MCA/CMC/CPE record.
+ *    close -> unchanged.  Keep record areas.
+ *
+ * OEMDATA
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ *    read data -> return the formatted oemdata.
+ *    close -> unchanged.  Keep record areas.
+ *
+ * Closing the data file does not change the state.  This allows shell scripts
+ * to manipulate salinfo data, each shell redirection opens the file, does one
+ * action then closes it again.  The record areas are only freed at close when
+ * the state is NO_DATA.
+ */
+enum salinfo_state {
+	STATE_NO_DATA,
+	STATE_LOG_RECORD,
+	STATE_OEMDATA,
+};
+
+struct salinfo_data {
+	cpumask_t		cpu_event;	/* which cpus have outstanding events */
+	struct semaphore	mutex;
+	u8			*log_buffer;
+	u64			log_size;
+	u8			*oemdata;	/* decoded oem data */
+	u64			oemdata_size;
+	int			open;		/* single-open to prevent races */
+	u8			type;
+	u8			saved_num;	/* using a saved record? */
+	enum salinfo_state	state :8;	/* processing state */
+	u8			padding;
+	int			cpu_check;	/* next CPU to check */
+	struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */
+};
+
+static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
+
+static DEFINE_SPINLOCK(data_lock);
+static DEFINE_SPINLOCK(data_saved_lock);
+
+/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
+ * record.
+ * @sect_header: pointer to the start of the section to decode.
+ * @oemdata: returns vmalloc area containing the decoded output.
+ * @oemdata_size: returns length of decoded output (strlen).
+ *
+ * Description: If user space asks for oem data to be decoded by the kernel
+ * and/or prom and the platform has set salinfo_platform_oemdata to the address
+ * of a platform specific routine then call that routine.  salinfo_platform_oemdata
+ * vmalloc's and formats its output area, returning the address of the text
+ * and its strlen.  Returns 0 for success, -ve for error.  The callback is
+ * invoked on the cpu that generated the error record.
+ */
+int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size);
+
+struct salinfo_platform_oemdata_parms {
+	const u8 *efi_guid;
+	u8 **oemdata;
+	u64 *oemdata_size;
+	int ret;
+};
+
+/* Kick the mutex that tells user space that there is work to do.  Instead of
+ * trying to track the state of the mutex across multiple cpus, in user
+ * context, interrupt context, non-maskable interrupt context and hotplug cpu,
+ * it is far easier just to grab the mutex if it is free then release it.
+ *
+ * This routine must be called with data_saved_lock held, to make the down/up
+ * operation atomic.
+ */
+static void
+salinfo_work_to_do(struct salinfo_data *data)
+{
+	(void)(down_trylock(&data->mutex) ?: 0);
+	up(&data->mutex);
+}
+
+static void
+salinfo_platform_oemdata_cpu(void *context)
+{
+	struct salinfo_platform_oemdata_parms *parms = context;
+	parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size);
+}
+
+static void
+shift1_data_saved (struct salinfo_data *data, int shift)
+{
+	memcpy(data->data_saved+shift, data->data_saved+shift+1,
+	       (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0]));
+	memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0,
+	       sizeof(data->data_saved[0]));
+}
+
+/* This routine is invoked in interrupt context.  Note: mca.c enables
+ * interrupts before calling this code for CMC/CPE.  MCA and INIT events are
+ * not irq safe, do not call any routines that use spinlocks, they may deadlock.
+ * MCA and INIT records are recorded, a timer event will look for any
+ * outstanding events and wake up the user space code.
+ *
+ * The buffer passed from mca.c points to the output from ia64_log_get. This is
+ * a persistent buffer but its contents can change between the interrupt and
+ * when user space processes the record.  Save the record id to identify
+ * changes.  If the buffer is NULL then just update the bitmap.
+ */
+void
+salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
+{
+	struct salinfo_data *data = salinfo_data + type;
+	struct salinfo_data_saved *data_saved;
+	unsigned long flags = 0;
+	int i;
+	int saved_size = ARRAY_SIZE(data->data_saved);
+
+	BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
+
+	if (irqsafe)
+		spin_lock_irqsave(&data_saved_lock, flags);
+	if (buffer) {
+		for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+			if (!data_saved->buffer)
+				break;
+		}
+		if (i == saved_size) {
+			if (!data->saved_num) {
+				shift1_data_saved(data, 0);
+				data_saved = data->data_saved + saved_size - 1;
+			} else
+				data_saved = NULL;
+		}
+		if (data_saved) {
+			data_saved->cpu = smp_processor_id();
+			data_saved->id = ((sal_log_record_header_t *)buffer)->id;
+			data_saved->size = size;
+			data_saved->buffer = buffer;
+		}
+	}
+	cpu_set(smp_processor_id(), data->cpu_event);
+	if (irqsafe) {
+		salinfo_work_to_do(data);
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+	}
+}
+
+/* Check for outstanding MCA/INIT records every minute (arbitrary) */
+#define SALINFO_TIMER_DELAY (60*HZ)
+static struct timer_list salinfo_timer;
+extern void ia64_mlogbuf_dump(void);
+
+static void
+salinfo_timeout_check(struct salinfo_data *data)
+{
+	unsigned long flags;
+	if (!data->open)
+		return;
+	if (!cpus_empty(data->cpu_event)) {
+		spin_lock_irqsave(&data_saved_lock, flags);
+		salinfo_work_to_do(data);
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+	}
+}
+
+static void
+salinfo_timeout (unsigned long arg)
+{
+	ia64_mlogbuf_dump();
+	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
+	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
+	salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
+	add_timer(&salinfo_timer);
+}
+
+static int
+salinfo_event_open(struct inode *inode, struct file *file)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	return 0;
+}
+
+static ssize_t
+salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct salinfo_data *data = PDE_DATA(file_inode(file));
+	char cmd[32];
+	size_t size;
+	int i, n, cpu = -1;
+
+retry:
+	if (cpus_empty(data->cpu_event) && down_trylock(&data->mutex)) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		if (down_interruptible(&data->mutex))
+			return -EINTR;
+	}
+
+	n = data->cpu_check;
+	for (i = 0; i < nr_cpu_ids; i++) {
+		if (cpu_isset(n, data->cpu_event)) {
+			if (!cpu_online(n)) {
+				cpu_clear(n, data->cpu_event);
+				continue;
+			}
+			cpu = n;
+			break;
+		}
+		if (++n == nr_cpu_ids)
+			n = 0;
+	}
+
+	if (cpu == -1)
+		goto retry;
+
+	ia64_mlogbuf_dump();
+
+	/* for next read, start checking at next CPU */
+	data->cpu_check = cpu;
+	if (++data->cpu_check == nr_cpu_ids)
+		data->cpu_check = 0;
+
+	snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
+
+	size = strlen(cmd);
+	if (size > count)
+		size = count;
+	if (copy_to_user(buffer, cmd, size))
+		return -EFAULT;
+
+	return size;
+}
+
+static const struct file_operations salinfo_event_fops = {
+	.open  = salinfo_event_open,
+	.read  = salinfo_event_read,
+	.llseek = noop_llseek,
+};
+
+static int
+salinfo_log_open(struct inode *inode, struct file *file)
+{
+	struct salinfo_data *data = PDE_DATA(inode);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	spin_lock(&data_lock);
+	if (data->open) {
+		spin_unlock(&data_lock);
+		return -EBUSY;
+	}
+	data->open = 1;
+	spin_unlock(&data_lock);
+
+	if (data->state == STATE_NO_DATA &&
+	    !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) {
+		data->open = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int
+salinfo_log_release(struct inode *inode, struct file *file)
+{
+	struct salinfo_data *data = PDE_DATA(inode);
+
+	if (data->state == STATE_NO_DATA) {
+		vfree(data->log_buffer);
+		vfree(data->oemdata);
+		data->log_buffer = NULL;
+		data->oemdata = NULL;
+	}
+	spin_lock(&data_lock);
+	data->open = 0;
+	spin_unlock(&data_lock);
+	return 0;
+}
+
+static void
+call_on_cpu(int cpu, void (*fn)(void *), void *arg)
+{
+	cpumask_t save_cpus_allowed = current->cpus_allowed;
+	set_cpus_allowed_ptr(current, cpumask_of(cpu));
+	(*fn)(arg);
+	set_cpus_allowed_ptr(current, &save_cpus_allowed);
+}
+
+static void
+salinfo_log_read_cpu(void *context)
+{
+	struct salinfo_data *data = context;
+	sal_log_record_header_t *rh;
+	data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer);
+	rh = (sal_log_record_header_t *)(data->log_buffer);
+	/* Clear corrected errors as they are read from SAL */
+	if (rh->severity == sal_log_severity_corrected)
+		ia64_sal_clear_state_info(data->type);
+}
+
+static void
+salinfo_log_new_read(int cpu, struct salinfo_data *data)
+{
+	struct salinfo_data_saved *data_saved;
+	unsigned long flags;
+	int i;
+	int saved_size = ARRAY_SIZE(data->data_saved);
+
+	data->saved_num = 0;
+	spin_lock_irqsave(&data_saved_lock, flags);
+retry:
+	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+		if (data_saved->buffer && data_saved->cpu == cpu) {
+			sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer);
+			data->log_size = data_saved->size;
+			memcpy(data->log_buffer, rh, data->log_size);
+			barrier();	/* id check must not be moved */
+			if (rh->id == data_saved->id) {
+				data->saved_num = i+1;
+				break;
+			}
+			/* saved record changed by mca.c since interrupt, discard it */
+			shift1_data_saved(data, i);
+			goto retry;
+		}
+	}
+	spin_unlock_irqrestore(&data_saved_lock, flags);
+
+	if (!data->saved_num)
+		call_on_cpu(cpu, salinfo_log_read_cpu, data);
+	if (!data->log_size) {
+		data->state = STATE_NO_DATA;
+		cpu_clear(cpu, data->cpu_event);
+	} else {
+		data->state = STATE_LOG_RECORD;
+	}
+}
+
+static ssize_t
+salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct salinfo_data *data = PDE_DATA(file_inode(file));
+	u8 *buf;
+	u64 bufsize;
+
+	if (data->state == STATE_LOG_RECORD) {
+		buf = data->log_buffer;
+		bufsize = data->log_size;
+	} else if (data->state == STATE_OEMDATA) {
+		buf = data->oemdata;
+		bufsize = data->oemdata_size;
+	} else {
+		buf = NULL;
+		bufsize = 0;
+	}
+	return simple_read_from_buffer(buffer, count, ppos, buf, bufsize);
+}
+
+static void
+salinfo_log_clear_cpu(void *context)
+{
+	struct salinfo_data *data = context;
+	ia64_sal_clear_state_info(data->type);
+}
+
+static int
+salinfo_log_clear(struct salinfo_data *data, int cpu)
+{
+	sal_log_record_header_t *rh;
+	unsigned long flags;
+	spin_lock_irqsave(&data_saved_lock, flags);
+	data->state = STATE_NO_DATA;
+	if (!cpu_isset(cpu, data->cpu_event)) {
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+		return 0;
+	}
+	cpu_clear(cpu, data->cpu_event);
+	if (data->saved_num) {
+		shift1_data_saved(data, data->saved_num - 1);
+		data->saved_num = 0;
+	}
+	spin_unlock_irqrestore(&data_saved_lock, flags);
+	rh = (sal_log_record_header_t *)(data->log_buffer);
+	/* Corrected errors have already been cleared from SAL */
+	if (rh->severity != sal_log_severity_corrected)
+		call_on_cpu(cpu, salinfo_log_clear_cpu, data);
+	/* clearing a record may make a new record visible */
+	salinfo_log_new_read(cpu, data);
+	if (data->state == STATE_LOG_RECORD) {
+		spin_lock_irqsave(&data_saved_lock, flags);
+		cpu_set(cpu, data->cpu_event);
+		salinfo_work_to_do(data);
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+	}
+	return 0;
+}
+
+static ssize_t
+salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct salinfo_data *data = PDE_DATA(file_inode(file));
+	char cmd[32];
+	size_t size;
+	u32 offset;
+	int cpu;
+
+	size = sizeof(cmd);
+	if (count < size)
+		size = count;
+	if (copy_from_user(cmd, buffer, size))
+		return -EFAULT;
+
+	if (sscanf(cmd, "read %d", &cpu) == 1) {
+		salinfo_log_new_read(cpu, data);
+	} else if (sscanf(cmd, "clear %d", &cpu) == 1) {
+		int ret;
+		if ((ret = salinfo_log_clear(data, cpu)))
+			count = ret;
+	} else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) {
+		if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA)
+			return -EINVAL;
+		if (offset > data->log_size - sizeof(efi_guid_t))
+			return -EINVAL;
+		data->state = STATE_OEMDATA;
+		if (salinfo_platform_oemdata) {
+			struct salinfo_platform_oemdata_parms parms = {
+				.efi_guid = data->log_buffer + offset,
+				.oemdata = &data->oemdata,
+				.oemdata_size = &data->oemdata_size
+			};
+			call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms);
+			if (parms.ret)
+				count = parms.ret;
+		} else
+			data->oemdata_size = 0;
+	} else
+		return -EINVAL;
+
+	return count;
+}
+
+static const struct file_operations salinfo_data_fops = {
+	.open    = salinfo_log_open,
+	.release = salinfo_log_release,
+	.read    = salinfo_log_read,
+	.write   = salinfo_log_write,
+	.llseek  = default_llseek,
+};
+
+static int
+salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
+{
+	unsigned int i, cpu = (unsigned long)hcpu;
+	unsigned long flags;
+	struct salinfo_data *data;
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		spin_lock_irqsave(&data_saved_lock, flags);
+		for (i = 0, data = salinfo_data;
+		     i < ARRAY_SIZE(salinfo_data);
+		     ++i, ++data) {
+			cpu_set(cpu, data->cpu_event);
+			salinfo_work_to_do(data);
+		}
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		spin_lock_irqsave(&data_saved_lock, flags);
+		for (i = 0, data = salinfo_data;
+		     i < ARRAY_SIZE(salinfo_data);
+		     ++i, ++data) {
+			struct salinfo_data_saved *data_saved;
+			int j;
+			for (j = ARRAY_SIZE(data->data_saved) - 1, data_saved = data->data_saved + j;
+			     j >= 0;
+			     --j, --data_saved) {
+				if (data_saved->buffer && data_saved->cpu == cpu) {
+					shift1_data_saved(data, j);
+				}
+			}
+			cpu_clear(cpu, data->cpu_event);
+		}
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block salinfo_cpu_notifier =
+{
+	.notifier_call = salinfo_cpu_callback,
+	.priority = 0,
+};
+
+static int __init
+salinfo_init(void)
+{
+	struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
+	struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
+	struct proc_dir_entry *dir, *entry;
+	struct salinfo_data *data;
+	int i, j;
+
+	salinfo_dir = proc_mkdir("sal", NULL);
+	if (!salinfo_dir)
+		return 0;
+
+	for (i=0; i < NR_SALINFO_ENTRIES; i++) {
+		/* pass the feature bit in question as misc data */
+		*sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir,
+					   &proc_salinfo_fops,
+					   (void *)salinfo_entries[i].feature);
+	}
+
+	cpu_notifier_register_begin();
+
+	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
+		data = salinfo_data + i;
+		data->type = i;
+		sema_init(&data->mutex, 1);
+		dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
+		if (!dir)
+			continue;
+
+		entry = proc_create_data("event", S_IRUSR, dir,
+					 &salinfo_event_fops, data);
+		if (!entry)
+			continue;
+		*sdir++ = entry;
+
+		entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir,
+					 &salinfo_data_fops, data);
+		if (!entry)
+			continue;
+		*sdir++ = entry;
+
+		/* we missed any events before now */
+		for_each_online_cpu(j)
+			cpu_set(j, data->cpu_event);
+
+		*sdir++ = dir;
+	}
+
+	*sdir++ = salinfo_dir;
+
+	init_timer(&salinfo_timer);
+	salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
+	salinfo_timer.function = &salinfo_timeout;
+	add_timer(&salinfo_timer);
+
+	__register_hotcpu_notifier(&salinfo_cpu_notifier);
+
+	cpu_notifier_register_done();
+
+	return 0;
+}
+
+/*
+ * 'data' contains an integer that corresponds to the feature we're
+ * testing
+ */
+static int proc_salinfo_show(struct seq_file *m, void *v)
+{
+	unsigned long data = (unsigned long)v;
+	seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n");
+	return 0;
+}
+
+static int proc_salinfo_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_salinfo_show, PDE_DATA(inode));
+}
+
+static const struct file_operations proc_salinfo_fops = {
+	.open		= proc_salinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+module_init(salinfo_init);
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
--- a/arch/ia64/kernel/sigframe.h
+++ b/arch/ia64/kernel/sigframe.h
@ -0,0 +1,25 @@
+struct sigscratch {
+	unsigned long scratch_unat;	/* ar.unat for the general registers saved in pt */
+	unsigned long ar_pfs;		/* for syscalls, the user-level function-state  */
+	struct pt_regs pt;
+};
+
+struct sigframe {
+	/*
+	 * Place signal handler args where user-level unwinder can find them easily.
+	 * DO NOT MOVE THESE.  They are part of the IA-64 Linux ABI and there is
+	 * user-level code that depends on their presence!
+	 */
+	unsigned long arg0;		/* signum */
+	unsigned long arg1;		/* siginfo pointer */
+	unsigned long arg2;		/* sigcontext pointer */
+	/*
+	 * End of architected state.
+	 */
+
+	void __user *handler;		/* pointer to the plabel of the signal handler */
+	struct siginfo info;
+	struct sigcontext sc;
+};
+
+extern void ia64_do_signal (struct sigscratch *, long);
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@ -0,0 +1,496 @@
+/*
+ * Architecture-specific signal handling support.
+ *
+ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from i386 and Alpha versions.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <linux/tracehook.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+
+#include <asm/intrinsics.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/sigcontext.h>
+
+#include "sigframe.h"
+
+#define DEBUG_SIG	0
+#define STACK_ALIGN	16		/* minimal alignment for stack pointer */
+
+#if _NSIG_WORDS > 1
+# define PUT_SIGSET(k,u)	__copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
+# define GET_SIGSET(k,u)	__copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t))
+#else
+# define PUT_SIGSET(k,u)	__put_user((k)->sig[0], &(u)->sig[0])
+# define GET_SIGSET(k,u)	__get_user((k)->sig[0], &(u)->sig[0])
+#endif
+
+static long
+restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
+{
+	unsigned long ip, flags, nat, um, cfm, rsc;
+	long err;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	/* restore scratch that always needs gets updated during signal delivery: */
+	err  = __get_user(flags, &sc->sc_flags);
+	err |= __get_user(nat, &sc->sc_nat);
+	err |= __get_user(ip, &sc->sc_ip);			/* instruction pointer */
+	err |= __get_user(cfm, &sc->sc_cfm);
+	err |= __get_user(um, &sc->sc_um);			/* user mask */
+	err |= __get_user(rsc, &sc->sc_ar_rsc);
+	err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
+	err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
+	err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+	err |= __get_user(scr->pt.pr, &sc->sc_pr);		/* predicates */
+	err |= __get_user(scr->pt.b0, &sc->sc_br[0]);		/* b0 (rp) */
+	err |= __get_user(scr->pt.b6, &sc->sc_br[6]);		/* b6 */
+	err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8);	/* r1 */
+	err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8);	/* r8-r11 */
+	err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8);	/* r12-r13 */
+	err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8);	/* r15 */
+
+	scr->pt.cr_ifs = cfm | (1UL << 63);
+	scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */
+
+	/* establish new instruction pointer: */
+	scr->pt.cr_iip = ip & ~0x3UL;
+	ia64_psr(&scr->pt)->ri = ip & 0x3;
+	scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);
+
+	scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat);
+
+	if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
+		/* Restore most scratch-state only when not in syscall. */
+		err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv);		/* ar.ccv */
+		err |= __get_user(scr->pt.b7, &sc->sc_br[7]);			/* b7 */
+		err |= __get_user(scr->pt.r14, &sc->sc_gr[14]);			/* r14 */
+		err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
+		err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8);	/* r2-r3 */
+		err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8);	/* r16-r31 */
+	}
+
+	if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) {
+		struct ia64_psr *psr = ia64_psr(&scr->pt);
+
+		err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
+		psr->mfh = 0;	/* drop signal handler's fph contents... */
+		preempt_disable();
+		if (psr->dfh)
+			ia64_drop_fpu(current);
+		else {
+			/* We already own the local fph, otherwise psr->dfh wouldn't be 0.  */
+			__ia64_load_fpu(current->thread.fph);
+			ia64_set_local_fpu_owner(current);
+		}
+		preempt_enable();
+	}
+	return err;
+}
+
+int
+copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from)
+{
+	if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
+		return -EFAULT;
+	if (from->si_code < 0) {
+		if (__copy_to_user(to, from, sizeof(siginfo_t)))
+			return -EFAULT;
+		return 0;
+	} else {
+		int err;
+
+		/*
+		 * If you change siginfo_t structure, please be sure this code is fixed
+		 * accordingly.  It should never copy any pad contained in the structure
+		 * to avoid security leaks, but must copy the generic 3 ints plus the
+		 * relevant union member.
+		 */
+		err = __put_user(from->si_signo, &to->si_signo);
+		err |= __put_user(from->si_errno, &to->si_errno);
+		err |= __put_user((short)from->si_code, &to->si_code);
+		switch (from->si_code >> 16) {
+		      case __SI_FAULT >> 16:
+			err |= __put_user(from->si_flags, &to->si_flags);
+			err |= __put_user(from->si_isr, &to->si_isr);
+		      case __SI_POLL >> 16:
+			err |= __put_user(from->si_addr, &to->si_addr);
+			err |= __put_user(from->si_imm, &to->si_imm);
+			break;
+		      case __SI_TIMER >> 16:
+			err |= __put_user(from->si_tid, &to->si_tid);
+			err |= __put_user(from->si_overrun, &to->si_overrun);
+			err |= __put_user(from->si_ptr, &to->si_ptr);
+			break;
+		      case __SI_RT >> 16:	/* Not generated by the kernel as of now.  */
+		      case __SI_MESGQ >> 16:
+			err |= __put_user(from->si_uid, &to->si_uid);
+			err |= __put_user(from->si_pid, &to->si_pid);
+			err |= __put_user(from->si_ptr, &to->si_ptr);
+			break;
+		      case __SI_CHLD >> 16:
+			err |= __put_user(from->si_utime, &to->si_utime);
+			err |= __put_user(from->si_stime, &to->si_stime);
+			err |= __put_user(from->si_status, &to->si_status);
+		      default:
+			err |= __put_user(from->si_uid, &to->si_uid);
+			err |= __put_user(from->si_pid, &to->si_pid);
+			break;
+		}
+		return err;
+	}
+}
+
+long
+ia64_rt_sigreturn (struct sigscratch *scr)
+{
+	extern char ia64_strace_leave_kernel, ia64_leave_kernel;
+	struct sigcontext __user *sc;
+	struct siginfo si;
+	sigset_t set;
+	long retval;
+
+	sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc;
+
+	/*
+	 * When we return to the previously executing context, r8 and r10 have already
+	 * been setup the way we want them.  Indeed, if the signal wasn't delivered while
+	 * in a system call, we must not touch r8 or r10 as otherwise user-level state
+	 * could be corrupted.
+	 */
+	retval = (long) &ia64_leave_kernel;
+	if (test_thread_flag(TIF_SYSCALL_TRACE)
+	    || test_thread_flag(TIF_SYSCALL_AUDIT))
+		/*
+		 * strace expects to be notified after sigreturn returns even though the
+		 * context to which we return may not be in the middle of a syscall.
+		 * Thus, the return-value that strace displays for sigreturn is
+		 * meaningless.
+		 */
+		retval = (long) &ia64_strace_leave_kernel;
+
+	if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
+		goto give_sigsegv;
+
+	if (GET_SIGSET(&set, &sc->sc_mask))
+		goto give_sigsegv;
+
+	set_current_blocked(&set);
+
+	if (restore_sigcontext(sc, scr))
+		goto give_sigsegv;
+
+#if DEBUG_SIG
+	printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
+	       current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip);
+#endif
+	if (restore_altstack(&sc->sc_stack))
+		goto give_sigsegv;
+	return retval;
+
+  give_sigsegv:
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = SI_KERNEL;
+	si.si_pid = task_pid_vnr(current);
+	si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+	si.si_addr = sc;
+	force_sig_info(SIGSEGV, &si, current);
+	return retval;
+}
+
+/*
+ * This does just the minimum required setup of sigcontext.
+ * Specifically, it only installs data that is either not knowable at
+ * the user-level or that gets modified before execution in the
+ * trampoline starts.  Everything else is done at the user-level.
+ */
+static long
+setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr)
+{
+	unsigned long flags = 0, ifs, cfm, nat;
+	long err = 0;
+
+	ifs = scr->pt.cr_ifs;
+
+	if (on_sig_stack((unsigned long) sc))
+		flags |= IA64_SC_FLAG_ONSTACK;
+	if ((ifs & (1UL << 63)) == 0)
+		/* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
+		flags |= IA64_SC_FLAG_IN_SYSCALL;
+	cfm = ifs & ((1UL << 38) - 1);
+	ia64_flush_fph(current);
+	if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
+		flags |= IA64_SC_FLAG_FPH_VALID;
+		err = __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
+	}
+
+	nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat);
+
+	err |= __put_user(flags, &sc->sc_flags);
+	err |= __put_user(nat, &sc->sc_nat);
+	err |= PUT_SIGSET(mask, &sc->sc_mask);
+	err |= __put_user(cfm, &sc->sc_cfm);
+	err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um);
+	err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
+	err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat);		/* ar.unat */
+	err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);		/* ar.fpsr */
+	err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+	err |= __put_user(scr->pt.pr, &sc->sc_pr);			/* predicates */
+	err |= __put_user(scr->pt.b0, &sc->sc_br[0]);			/* b0 (rp) */
+	err |= __put_user(scr->pt.b6, &sc->sc_br[6]);			/* b6 */
+	err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8);		/* r1 */
+	err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8);		/* r8-r11 */
+	err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8);	/* r12-r13 */
+	err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8);		/* r15 */
+	err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip);
+
+	if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
+		/* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */
+		err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv);		/* ar.ccv */
+		err |= __put_user(scr->pt.b7, &sc->sc_br[7]);			/* b7 */
+		err |= __put_user(scr->pt.r14, &sc->sc_gr[14]);			/* r14 */
+		err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */
+		err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8);		/* r2-r3 */
+		err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8);	/* r16-r31 */
+	}
+	return err;
+}
+
+/*
+ * Check whether the register-backing store is already on the signal stack.
+ */
+static inline int
+rbs_on_sig_stack (unsigned long bsp)
+{
+	return (bsp - current->sas_ss_sp < current->sas_ss_size);
+}
+
+static long
+force_sigsegv_info (int sig, void __user *addr)
+{
+	unsigned long flags;
+	struct siginfo si;
+
+	if (sig == SIGSEGV) {
+		/*
+		 * Acquiring siglock around the sa_handler-update is almost
+		 * certainly overkill, but this isn't a
+		 * performance-critical path and I'd rather play it safe
+		 * here than having to debug a nasty race if and when
+		 * something changes in kernel/signal.c that would make it
+		 * no longer safe to modify sa_handler without holding the
+		 * lock.
+		 */
+		spin_lock_irqsave(&current->sighand->siglock, flags);
+		current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	}
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = SI_KERNEL;
+	si.si_pid = task_pid_vnr(current);
+	si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+	si.si_addr = addr;
+	force_sig_info(SIGSEGV, &si, current);
+	return 1;
+}
+
+static long
+setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr)
+{
+	extern char __kernel_sigtramp[];
+	unsigned long tramp_addr, new_rbs = 0, new_sp;
+	struct sigframe __user *frame;
+	long err;
+
+	new_sp = scr->pt.r12;
+	tramp_addr = (unsigned long) __kernel_sigtramp;
+	if (ksig->ka.sa.sa_flags & SA_ONSTACK) {
+		int onstack = sas_ss_flags(new_sp);
+
+		if (onstack == 0) {
+			new_sp = current->sas_ss_sp + current->sas_ss_size;
+			/*
+			 * We need to check for the register stack being on the
+			 * signal stack separately, because it's switched
+			 * separately (memory stack is switched in the kernel,
+			 * register stack is switched in the signal trampoline).
+			 */
+			if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
+				new_rbs = ALIGN(current->sas_ss_sp,
+						sizeof(long));
+		} else if (onstack == SS_ONSTACK) {
+			unsigned long check_sp;
+
+			/*
+			 * If we are on the alternate signal stack and would
+			 * overflow it, don't. Return an always-bogus address
+			 * instead so we will die with SIGSEGV.
+			 */
+			check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN;
+			if (!likely(on_sig_stack(check_sp)))
+				return force_sigsegv_info(ksig->sig, (void __user *)
+							  check_sp);
+		}
+	}
+	frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN);
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		return force_sigsegv_info(ksig->sig, frame);
+
+	err  = __put_user(ksig->sig, &frame->arg0);
+	err |= __put_user(&frame->info, &frame->arg1);
+	err |= __put_user(&frame->sc, &frame->arg2);
+	err |= __put_user(new_rbs, &frame->sc.sc_rbs_base);
+	err |= __put_user(0, &frame->sc.sc_loadrs);	/* initialize to zero */
+	err |= __put_user(ksig->ka.sa.sa_handler, &frame->handler);
+
+	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+	err |= __save_altstack(&frame->sc.sc_stack, scr->pt.r12);
+	err |= setup_sigcontext(&frame->sc, set, scr);
+
+	if (unlikely(err))
+		return force_sigsegv_info(ksig->sig, frame);
+
+	scr->pt.r12 = (unsigned long) frame - 16;	/* new stack pointer */
+	scr->pt.ar_fpsr = FPSR_DEFAULT;			/* reset fpsr for signal handler */
+	scr->pt.cr_iip = tramp_addr;
+	ia64_psr(&scr->pt)->ri = 0;			/* start executing in first slot */
+	ia64_psr(&scr->pt)->be = 0;			/* force little-endian byte-order */
+	/*
+	 * Force the interruption function mask to zero.  This has no effect when a
+	 * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is
+	 * ignored), but it has the desirable effect of making it possible to deliver a
+	 * signal with an incomplete register frame (which happens when a mandatory RSE
+	 * load faults).  Furthermore, it has no negative effect on the getting the user's
+	 * dirty partition preserved, because that's governed by scr->pt.loadrs.
+	 */
+	scr->pt.cr_ifs = (1UL << 63);
+
+	/*
+	 * Note: this affects only the NaT bits of the scratch regs (the ones saved in
+	 * pt_regs), which is exactly what we want.
+	 */
+	scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n",
+	       current->comm, current->pid, ksig->sig, scr->pt.r12, frame->sc.sc_ip, frame->handler);
+#endif
+	return 0;
+}
+
+static long
+handle_signal (struct ksignal *ksig, struct sigscratch *scr)
+{
+	int ret = setup_frame(ksig, sigmask_to_save(), scr);
+
+	if (!ret)
+		signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP));
+
+	return ret;
+}
+
+/*
+ * Note that `init' is a special process: it doesn't get signals it doesn't want to
+ * handle.  Thus you cannot kill init even with a SIGKILL even by mistake.
+ */
+void
+ia64_do_signal (struct sigscratch *scr, long in_syscall)
+{
+	long restart = in_syscall;
+	long errno = scr->pt.r8;
+	struct ksignal ksig;
+
+	/*
+	 * This only loops in the rare cases of handle_signal() failing, in which case we
+	 * need to push through a forced SIGSEGV.
+	 */
+	while (1) {
+		get_signal(&ksig);
+
+		/*
+		 * get_signal_to_deliver() may have run a debugger (via notify_parent())
+		 * and the debugger may have modified the state (e.g., to arrange for an
+		 * inferior call), thus it's important to check for restarting _after_
+		 * get_signal_to_deliver().
+		 */
+		if ((long) scr->pt.r10 != -1)
+			/*
+			 * A system calls has to be restarted only if one of the error codes
+			 * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned.  If r10
+			 * isn't -1 then r8 doesn't hold an error code and we don't need to
+			 * restart the syscall, so we can clear the "restart" flag here.
+			 */
+			restart = 0;
+
+		if (ksig.sig <= 0)
+			break;
+
+		if (unlikely(restart)) {
+			switch (errno) {
+			      case ERESTART_RESTARTBLOCK:
+			      case ERESTARTNOHAND:
+				scr->pt.r8 = EINTR;
+				/* note: scr->pt.r10 is already -1 */
+				break;
+
+			      case ERESTARTSYS:
+				if ((ksig.ka.sa.sa_flags & SA_RESTART) == 0) {
+					scr->pt.r8 = EINTR;
+					/* note: scr->pt.r10 is already -1 */
+					break;
+				}
+			      case ERESTARTNOINTR:
+				ia64_decrement_ip(&scr->pt);
+				restart = 0; /* don't restart twice if handle_signal() fails... */
+			}
+		}
+
+		/*
+		 * Whee!  Actually deliver the signal.  If the delivery failed, we need to
+		 * continue to iterate in this loop so we can deliver the SIGSEGV...
+		 */
+		if (handle_signal(&ksig, scr))
+			return;
+	}
+
+	/* Did we come from a system call? */
+	if (restart) {
+		/* Restart the system call - no handlers present */
+		if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
+		    || errno == ERESTART_RESTARTBLOCK)
+		{
+			/*
+			 * Note: the syscall number is in r15 which is saved in
+			 * pt_regs so all we need to do here is adjust ip so that
+			 * the "break" instruction gets re-executed.
+			 */
+			ia64_decrement_ip(&scr->pt);
+			if (errno == ERESTART_RESTARTBLOCK)
+				scr->pt.r15 = __NR_restart_syscall;
+		}
+	}
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	restore_saved_sigmask();
+}
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@ -0,0 +1,342 @@
+/*
+ * SMP Support
+ *
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Lots of stuff stolen from arch/alpha/kernel/smp.c
+ *
+ * 01/05/16 Rohit Seth <rohit.seth@intel.com>  IA64-SMP functions. Reorganized
+ * the existing code (on the lines of x86 port).
+ * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
+ * calibration on each CPU.
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
+ * 00/03/31 Rohit Seth <rohit.seth@intel.com>	Fixes for Bootstrap Processor
+ * & cpu_online_map now gets done here (instead of setup.c)
+ * 99/10/05 davidm	Update to bring it in sync with new command-line processing
+ *  scheme.
+ * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
+ *		smp_call_function_single to resend IPI on timeouts
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/efi.h>
+#include <linux/bitops.h>
+#include <linux/kexec.h>
+
+#include <linux/atomic.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+#include <asm/machvec.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/tlbflush.h>
+#include <asm/unistd.h>
+#include <asm/mca.h>
+
+/*
+ * Note: alignment of 4 entries/cacheline was empirically determined
+ * to be a good tradeoff between hot cachelines & spreading the array
+ * across too many cacheline.
+ */
+static struct local_tlb_flush_counts {
+	unsigned int count;
+} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS],
+				     shadow_flush_counts);
+
+#define IPI_CALL_FUNC		0
+#define IPI_CPU_STOP		1
+#define IPI_CALL_FUNC_SINGLE	2
+#define IPI_KDUMP_CPU_STOP	3
+
+/* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
+static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation);
+
+extern void cpu_halt (void);
+
+static void
+stop_this_cpu(void)
+{
+	/*
+	 * Remove this CPU:
+	 */
+	set_cpu_online(smp_processor_id(), false);
+	max_xtp();
+	local_irq_disable();
+	cpu_halt();
+}
+
+void
+cpu_die(void)
+{
+	max_xtp();
+	local_irq_disable();
+	cpu_halt();
+	/* Should never be here */
+	BUG();
+	for (;;);
+}
+
+irqreturn_t
+handle_IPI (int irq, void *dev_id)
+{
+	int this_cpu = get_cpu();
+	unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
+	unsigned long ops;
+
+	mb();	/* Order interrupt and bit testing. */
+	while ((ops = xchg(pending_ipis, 0)) != 0) {
+		mb();	/* Order bit clearing and data access. */
+		do {
+			unsigned long which;
+
+			which = ffz(~ops);
+			ops &= ~(1 << which);
+
+			switch (which) {
+			case IPI_CPU_STOP:
+				stop_this_cpu();
+				break;
+			case IPI_CALL_FUNC:
+				generic_smp_call_function_interrupt();
+				break;
+			case IPI_CALL_FUNC_SINGLE:
+				generic_smp_call_function_single_interrupt();
+				break;
+#ifdef CONFIG_KEXEC
+			case IPI_KDUMP_CPU_STOP:
+				unw_init_running(kdump_cpu_freeze, NULL);
+				break;
+#endif
+			default:
+				printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
+						this_cpu, which);
+				break;
+			}
+		} while (ops);
+		mb();	/* Order data access and bit testing. */
+	}
+	put_cpu();
+	return IRQ_HANDLED;
+}
+
+
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_single (int dest_cpu, int op)
+{
+	set_bit(op, &per_cpu(ipi_operation, dest_cpu));
+	platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_allbutself (int op)
+{
+	unsigned int i;
+
+	for_each_online_cpu(i) {
+		if (i != smp_processor_id())
+			send_IPI_single(i, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_mask(const struct cpumask *mask, int op)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask) {
+			send_IPI_single(cpu, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_all (int op)
+{
+	int i;
+
+	for_each_online_cpu(i) {
+		send_IPI_single(i, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_self (int op)
+{
+	send_IPI_single(smp_processor_id(), op);
+}
+
+#ifdef CONFIG_KEXEC
+void
+kdump_smp_send_stop(void)
+{
+ 	send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
+}
+
+void
+kdump_smp_send_init(void)
+{
+	unsigned int cpu, self_cpu;
+	self_cpu = smp_processor_id();
+	for_each_online_cpu(cpu) {
+		if (cpu != self_cpu) {
+			if(kdump_status[cpu] == 0)
+				platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
+		}
+	}
+}
+#endif
+/*
+ * Called with preemption disabled.
+ */
+void
+smp_send_reschedule (int cpu)
+{
+	platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
+}
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
+
+/*
+ * Called with preemption disabled.
+ */
+static void
+smp_send_local_flush_tlb (int cpu)
+{
+	platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0);
+}
+
+void
+smp_local_flush_tlb(void)
+{
+	/*
+	 * Use atomic ops. Otherwise, the load/increment/store sequence from
+	 * a "++" operation can have the line stolen between the load & store.
+	 * The overhead of the atomic op in negligible in this case & offers
+	 * significant benefit for the brief periods where lots of cpus
+	 * are simultaneously flushing TLBs.
+	 */
+	ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq);
+	local_flush_tlb_all();
+}
+
+#define FLUSH_DELAY	5 /* Usec backoff to eliminate excessive cacheline bouncing */
+
+void
+smp_flush_tlb_cpumask(cpumask_t xcpumask)
+{
+	unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts);
+	cpumask_t cpumask = xcpumask;
+	int mycpu, cpu, flush_mycpu = 0;
+
+	preempt_disable();
+	mycpu = smp_processor_id();
+
+	for_each_cpu_mask(cpu, cpumask)
+		counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff;
+
+	mb();
+	for_each_cpu_mask(cpu, cpumask) {
+		if (cpu == mycpu)
+			flush_mycpu = 1;
+		else
+			smp_send_local_flush_tlb(cpu);
+	}
+
+	if (flush_mycpu)
+		smp_local_flush_tlb();
+
+	for_each_cpu_mask(cpu, cpumask)
+		while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff))
+			udelay(FLUSH_DELAY);
+
+	preempt_enable();
+}
+
+void
+smp_flush_tlb_all (void)
+{
+	on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1);
+}
+
+void
+smp_flush_tlb_mm (struct mm_struct *mm)
+{
+	cpumask_var_t cpus;
+	preempt_disable();
+	/* this happens for the common case of a single-threaded fork():  */
+	if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
+	{
+		local_finish_flush_tlb_mm(mm);
+		preempt_enable();
+		return;
+	}
+	if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) {
+		smp_call_function((void (*)(void *))local_finish_flush_tlb_mm,
+			mm, 1);
+	} else {
+		cpumask_copy(cpus, mm_cpumask(mm));
+		smp_call_function_many(cpus,
+			(void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
+		free_cpumask_var(cpus);
+	}
+	local_irq_disable();
+	local_finish_flush_tlb_mm(mm);
+	local_irq_enable();
+	preempt_enable();
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	send_IPI_mask(mask, IPI_CALL_FUNC);
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+void
+smp_send_stop (void)
+{
+	send_IPI_allbutself(IPI_CPU_STOP);
+}
+
+int
+setup_profiling_timer (unsigned int multiplier)
+{
+	return -EINVAL;
+}
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@ -0,0 +1,858 @@
+/*
+ * SMP boot-related support
+ *
+ * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2001, 2004-2005 Intel Corp
+ * 	Rohit Seth <rohit.seth@intel.com>
+ * 	Suresh Siddha <suresh.b.siddha@intel.com>
+ * 	Gordon Jin <gordon.jin@intel.com>
+ *	Ashok Raj  <ashok.raj@intel.com>
+ *
+ * 01/05/16 Rohit Seth <rohit.seth@intel.com>	Moved SMP booting functions from smp.c to here.
+ * 01/04/27 David Mosberger <davidm@hpl.hp.com>	Added ITC synching code.
+ * 02/07/31 David Mosberger <davidm@hpl.hp.com>	Switch over to hotplug-CPU boot-sequence.
+ *						smp_boot_cpus()/smp_commence() is replaced by
+ *						smp_prepare_cpus()/__cpu_up()/smp_cpus_done().
+ * 04/06/21 Ashok Raj		<ashok.raj@intel.com> Added CPU Hotplug Support
+ * 04/12/26 Jin Gordon <gordon.jin@intel.com>
+ * 04/12/26 Rohit Seth <rohit.seth@intel.com>
+ *						Add multi-threading and multi-core detection
+ * 05/01/30 Suresh Siddha <suresh.b.siddha@intel.com>
+ *						Setup cpu_sibling_map and cpu_core_map
+ */
+
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/efi.h>
+#include <linux/percpu.h>
+#include <linux/bitops.h>
+
+#include <linux/atomic.h>
+#include <asm/cache.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/mca.h>
+#include <asm/page.h>
+#include <asm/paravirt.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/tlbflush.h>
+#include <asm/unistd.h>
+#include <asm/sn/arch.h>
+
+#define SMP_DEBUG 0
+
+#if SMP_DEBUG
+#define Dprintk(x...)  printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_PERMIT_BSP_REMOVE
+#define bsp_remove_ok	1
+#else
+#define bsp_remove_ok	0
+#endif
+
+/*
+ * Global array allocated for NR_CPUS at boot time
+ */
+struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS];
+
+/*
+ * start_ap in head.S uses this to store current booting cpu
+ * info.
+ */
+struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
+
+#define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]);
+
+#else
+#define set_brendez_area(x)
+#endif
+
+
+/*
+ * ITC synchronization related stuff:
+ */
+#define MASTER	(0)
+#define SLAVE	(SMP_CACHE_BYTES/8)
+
+#define NUM_ROUNDS	64	/* magic value */
+#define NUM_ITERS	5	/* likewise */
+
+static DEFINE_SPINLOCK(itc_sync_lock);
+static volatile unsigned long go[SLAVE + 1];
+
+#define DEBUG_ITC_SYNC	0
+
+extern void start_ap (void);
+extern unsigned long ia64_iobase;
+
+struct task_struct *task_for_booting_cpu;
+
+/*
+ * State for each CPU
+ */
+DEFINE_PER_CPU(int, cpu_state);
+
+cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+
+int smp_num_siblings = 1;
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+volatile int ia64_cpu_to_sapicid[NR_CPUS];
+EXPORT_SYMBOL(ia64_cpu_to_sapicid);
+
+static volatile cpumask_t cpu_callin_map;
+
+struct smp_boot_data smp_boot_data __initdata;
+
+unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */
+
+char __initdata no_int_routing;
+
+unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
+
+#ifdef CONFIG_FORCE_CPEI_RETARGET
+#define CPEI_OVERRIDE_DEFAULT	(1)
+#else
+#define CPEI_OVERRIDE_DEFAULT	(0)
+#endif
+
+unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
+
+static int __init
+cmdl_force_cpei(char *str)
+{
+	int value=0;
+
+	get_option (&str, &value);
+	force_cpei_retarget = value;
+
+	return 1;
+}
+
+__setup("force_cpei=", cmdl_force_cpei);
+
+static int __init
+nointroute (char *str)
+{
+	no_int_routing = 1;
+	printk ("no_int_routing on\n");
+	return 1;
+}
+
+__setup("nointroute", nointroute);
+
+static void fix_b0_for_bsp(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpuid;
+	static int fix_bsp_b0 = 1;
+
+	cpuid = smp_processor_id();
+
+	/*
+	 * Cache the b0 value on the first AP that comes up
+	 */
+	if (!(fix_bsp_b0 && cpuid))
+		return;
+
+	sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
+	printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
+
+	fix_bsp_b0 = 0;
+#endif
+}
+
+void
+sync_master (void *arg)
+{
+	unsigned long flags, i;
+
+	go[MASTER] = 0;
+
+	local_irq_save(flags);
+	{
+		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
+			while (!go[MASTER])
+				cpu_relax();
+			go[MASTER] = 0;
+			go[SLAVE] = ia64_get_itc();
+		}
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Return the number of cycles by which our itc differs from the itc on the master
+ * (time-keeper) CPU.  A positive number indicates our itc is ahead of the master,
+ * negative that it is behind.
+ */
+static inline long
+get_delta (long *rt, long *master)
+{
+	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+	unsigned long tcenter, t0, t1, tm;
+	long i;
+
+	for (i = 0; i < NUM_ITERS; ++i) {
+		t0 = ia64_get_itc();
+		go[MASTER] = 1;
+		while (!(tm = go[SLAVE]))
+			cpu_relax();
+		go[SLAVE] = 0;
+		t1 = ia64_get_itc();
+
+		if (t1 - t0 < best_t1 - best_t0)
+			best_t0 = t0, best_t1 = t1, best_tm = tm;
+	}
+
+	*rt = best_t1 - best_t0;
+	*master = best_tm - best_t0;
+
+	/* average best_t0 and best_t1 without overflow: */
+	tcenter = (best_t0/2 + best_t1/2);
+	if (best_t0 % 2 + best_t1 % 2 == 2)
+		++tcenter;
+	return tcenter - best_tm;
+}
+
+/*
+ * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU
+ * (normally the time-keeper CPU).  We use a closed loop to eliminate the possibility of
+ * unaccounted-for errors (such as getting a machine check in the middle of a calibration
+ * step).  The basic idea is for the slave to ask the master what itc value it has and to
+ * read its own itc before and after the master responds.  Each iteration gives us three
+ * timestamps:
+ *
+ *	slave		master
+ *
+ *	t0 ---\
+ *             ---\
+ *		   --->
+ *			tm
+ *		   /---
+ *	       /---
+ *	t1 <---
+ *
+ *
+ * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0
+ * and t1.  If we achieve this, the clocks are synchronized provided the interconnect
+ * between the slave and the master is symmetric.  Even if the interconnect were
+ * asymmetric, we would still know that the synchronization error is smaller than the
+ * roundtrip latency (t0 - t1).
+ *
+ * When the interconnect is quiet and symmetric, this lets us synchronize the itc to
+ * within one or two cycles.  However, we can only *guarantee* that the synchronization is
+ * accurate to within a round-trip time, which is typically in the range of several
+ * hundred cycles (e.g., ~500 cycles).  In practice, this means that the itc's are usually
+ * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better
+ * than half a micro second or so.
+ */
+void
+ia64_sync_itc (unsigned int master)
+{
+	long i, delta, adj, adjust_latency = 0, done = 0;
+	unsigned long flags, rt, master_time_stamp, bound;
+#if DEBUG_ITC_SYNC
+	struct {
+		long rt;	/* roundtrip time */
+		long master;	/* master's timestamp */
+		long diff;	/* difference between midpoint and master's timestamp */
+		long lat;	/* estimate of itc adjustment latency */
+	} t[NUM_ROUNDS];
+#endif
+
+	/*
+	 * Make sure local timer ticks are disabled while we sync.  If
+	 * they were enabled, we'd have to worry about nasty issues
+	 * like setting the ITC ahead of (or a long time before) the
+	 * next scheduled tick.
+	 */
+	BUG_ON((ia64_get_itv() & (1 << 16)) == 0);
+
+	go[MASTER] = 1;
+
+	if (smp_call_function_single(master, sync_master, NULL, 0) < 0) {
+		printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
+		return;
+	}
+
+	while (go[MASTER])
+		cpu_relax();	/* wait for master to be ready */
+
+	spin_lock_irqsave(&itc_sync_lock, flags);
+	{
+		for (i = 0; i < NUM_ROUNDS; ++i) {
+			delta = get_delta(&rt, &master_time_stamp);
+			if (delta == 0) {
+				done = 1;	/* let's lock on to this... */
+				bound = rt;
+			}
+
+			if (!done) {
+				if (i > 0) {
+					adjust_latency += -delta;
+					adj = -delta + adjust_latency/4;
+				} else
+					adj = -delta;
+
+				ia64_set_itc(ia64_get_itc() + adj);
+			}
+#if DEBUG_ITC_SYNC
+			t[i].rt = rt;
+			t[i].master = master_time_stamp;
+			t[i].diff = delta;
+			t[i].lat = adjust_latency/4;
+#endif
+		}
+	}
+	spin_unlock_irqrestore(&itc_sync_lock, flags);
+
+#if DEBUG_ITC_SYNC
+	for (i = 0; i < NUM_ROUNDS; ++i)
+		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
+#endif
+
+	printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, "
+	       "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt);
+}
+
+/*
+ * Ideally sets up per-cpu profiling hooks.  Doesn't do much now...
+ */
+static inline void smp_setup_percpu_timer(void)
+{
+}
+
+static void
+smp_callin (void)
+{
+	int cpuid, phys_id, itc_master;
+	struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo;
+	extern void ia64_init_itm(void);
+	extern volatile int time_keeper_id;
+
+#ifdef CONFIG_PERFMON
+	extern void pfm_init_percpu(void);
+#endif
+
+	cpuid = smp_processor_id();
+	phys_id = hard_smp_processor_id();
+	itc_master = time_keeper_id;
+
+	if (cpu_online(cpuid)) {
+		printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
+		       phys_id, cpuid);
+		BUG();
+	}
+
+	fix_b0_for_bsp();
+
+	/*
+	 * numa_node_id() works after this.
+	 */
+	set_numa_node(cpu_to_node_map[cpuid]);
+	set_numa_mem(local_memory_node(cpu_to_node_map[cpuid]));
+
+	spin_lock(&vector_lock);
+	/* Setup the per cpu irq handling data structures */
+	__setup_vector_irq(cpuid);
+	notify_cpu_starting(cpuid);
+	set_cpu_online(cpuid, true);
+	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
+	spin_unlock(&vector_lock);
+
+	smp_setup_percpu_timer();
+
+	ia64_mca_cmc_vector_setup();	/* Setup vector on AP */
+
+#ifdef CONFIG_PERFMON
+	pfm_init_percpu();
+#endif
+
+	local_irq_enable();
+
+	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
+		/*
+		 * Synchronize the ITC with the BP.  Need to do this after irqs are
+		 * enabled because ia64_sync_itc() calls smp_call_function_single(), which
+		 * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
+		 * local_bh_enable(), which bugs out if irqs are not enabled...
+		 */
+		Dprintk("Going to syncup ITC with ITC Master.\n");
+		ia64_sync_itc(itc_master);
+	}
+
+	/*
+	 * Get our bogomips.
+	 */
+	ia64_init_itm();
+
+	/*
+	 * Delay calibration can be skipped if new processor is identical to the
+	 * previous processor.
+	 */
+	last_cpuinfo = cpu_data(cpuid - 1);
+	this_cpuinfo = local_cpu_data;
+	if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq ||
+	    last_cpuinfo->proc_freq != this_cpuinfo->proc_freq ||
+	    last_cpuinfo->features != this_cpuinfo->features ||
+	    last_cpuinfo->revision != this_cpuinfo->revision ||
+	    last_cpuinfo->family != this_cpuinfo->family ||
+	    last_cpuinfo->archrev != this_cpuinfo->archrev ||
+	    last_cpuinfo->model != this_cpuinfo->model)
+		calibrate_delay();
+	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
+
+	/*
+	 * Allow the master to continue.
+	 */
+	cpu_set(cpuid, cpu_callin_map);
+	Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
+}
+
+
+/*
+ * Activate a secondary processor.  head.S calls this.
+ */
+int
+start_secondary (void *unused)
+{
+	/* Early console may use I/O ports */
+	ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+#ifndef CONFIG_PRINTK_TIME
+	Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
+#endif
+	efi_map_pal_code();
+	cpu_init();
+	preempt_disable();
+	smp_callin();
+
+	cpu_startup_entry(CPUHP_ONLINE);
+	return 0;
+}
+
+static int
+do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
+{
+	int timeout;
+
+	task_for_booting_cpu = idle;
+	Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
+
+	set_brendez_area(cpu);
+	platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
+
+	/*
+	 * Wait 10s total for the AP to start
+	 */
+	Dprintk("Waiting on callin_map ...");
+	for (timeout = 0; timeout < 100000; timeout++) {
+		if (cpu_isset(cpu, cpu_callin_map))
+			break;  /* It has booted */
+		udelay(100);
+	}
+	Dprintk("\n");
+
+	if (!cpu_isset(cpu, cpu_callin_map)) {
+		printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
+		ia64_cpu_to_sapicid[cpu] = -1;
+		set_cpu_online(cpu, false);  /* was set in smp_callin() */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int __init
+decay (char *str)
+{
+	int ticks;
+	get_option (&str, &ticks);
+	return 1;
+}
+
+__setup("decay=", decay);
+
+/*
+ * Initialize the logical CPU number to SAPICID mapping
+ */
+void __init
+smp_build_cpu_map (void)
+{
+	int sapicid, cpu, i;
+	int boot_cpu_id = hard_smp_processor_id();
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		ia64_cpu_to_sapicid[cpu] = -1;
+	}
+
+	ia64_cpu_to_sapicid[0] = boot_cpu_id;
+	init_cpu_present(cpumask_of(0));
+	set_cpu_possible(0, true);
+	for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
+		sapicid = smp_boot_data.cpu_phys_id[i];
+		if (sapicid == boot_cpu_id)
+			continue;
+		set_cpu_present(cpu, true);
+		set_cpu_possible(cpu, true);
+		ia64_cpu_to_sapicid[cpu] = sapicid;
+		cpu++;
+	}
+}
+
+/*
+ * Cycle through the APs sending Wakeup IPIs to boot each.
+ */
+void __init
+smp_prepare_cpus (unsigned int max_cpus)
+{
+	int boot_cpu_id = hard_smp_processor_id();
+
+	/*
+	 * Initialize the per-CPU profiling counter/multiplier
+	 */
+
+	smp_setup_percpu_timer();
+
+	cpu_set(0, cpu_callin_map);
+
+	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
+	ia64_cpu_to_sapicid[0] = boot_cpu_id;
+
+	printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
+
+	current_thread_info()->cpu = 0;
+
+	/*
+	 * If SMP should be disabled, then really disable it!
+	 */
+	if (!max_cpus) {
+		printk(KERN_INFO "SMP mode deactivated.\n");
+		init_cpu_online(cpumask_of(0));
+		init_cpu_present(cpumask_of(0));
+		init_cpu_possible(cpumask_of(0));
+		return;
+	}
+}
+
+void smp_prepare_boot_cpu(void)
+{
+	set_cpu_online(smp_processor_id(), true);
+	cpu_set(smp_processor_id(), cpu_callin_map);
+	set_numa_node(cpu_to_node_map[smp_processor_id()]);
+	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+	paravirt_post_smp_prepare_boot_cpu();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void
+clear_cpu_sibling_map(int cpu)
+{
+	int i;
+
+	for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
+	for_each_cpu_mask(i, cpu_core_map[cpu])
+		cpu_clear(cpu, cpu_core_map[i]);
+
+	per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE;
+}
+
+static void
+remove_siblinginfo(int cpu)
+{
+	int last = 0;
+
+	if (cpu_data(cpu)->threads_per_core == 1 &&
+	    cpu_data(cpu)->cores_per_socket == 1) {
+		cpu_clear(cpu, cpu_core_map[cpu]);
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
+		return;
+	}
+
+	last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
+
+	/* remove it from all sibling map's */
+	clear_cpu_sibling_map(cpu);
+}
+
+extern void fixup_irqs(void);
+
+int migrate_platform_irqs(unsigned int cpu)
+{
+	int new_cpei_cpu;
+	struct irq_data *data = NULL;
+	const struct cpumask *mask;
+	int 		retval = 0;
+
+	/*
+	 * dont permit CPEI target to removed.
+	 */
+	if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
+		printk ("CPU (%d) is CPEI Target\n", cpu);
+		if (can_cpei_retarget()) {
+			/*
+			 * Now re-target the CPEI to a different processor
+			 */
+			new_cpei_cpu = cpumask_any(cpu_online_mask);
+			mask = cpumask_of(new_cpei_cpu);
+			set_cpei_target_cpu(new_cpei_cpu);
+			data = irq_get_irq_data(ia64_cpe_irq);
+			/*
+			 * Switch for now, immediately, we need to do fake intr
+			 * as other interrupts, but need to study CPEI behaviour with
+			 * polling before making changes.
+			 */
+			if (data && data->chip) {
+				data->chip->irq_disable(data);
+				data->chip->irq_set_affinity(data, mask, false);
+				data->chip->irq_enable(data);
+				printk ("Re-targeting CPEI to cpu %d\n", new_cpei_cpu);
+			}
+		}
+		if (!data) {
+			printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
+			retval = -EBUSY;
+		}
+	}
+	return retval;
+}
+
+/* must be called with cpucontrol mutex held */
+int __cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * dont permit boot processor for now
+	 */
+	if (cpu == 0 && !bsp_remove_ok) {
+		printk ("Your platform does not support removal of BSP\n");
+		return (-EBUSY);
+	}
+
+	if (ia64_platform_is("sn2")) {
+		if (!sn_cpu_disable_allowed(cpu))
+			return -EBUSY;
+	}
+
+	set_cpu_online(cpu, false);
+
+	if (migrate_platform_irqs(cpu)) {
+		set_cpu_online(cpu, true);
+		return -EBUSY;
+	}
+
+	remove_siblinginfo(cpu);
+	fixup_irqs();
+	local_flush_tlb_all();
+	cpu_clear(cpu, cpu_callin_map);
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	unsigned int i;
+
+	for (i = 0; i < 100; i++) {
+		/* They ack this in play_dead by setting CPU_DEAD */
+		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+		{
+			printk ("CPU %d is now offline\n", cpu);
+			return;
+		}
+		msleep(100);
+	}
+ 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void
+smp_cpus_done (unsigned int dummy)
+{
+	int cpu;
+	unsigned long bogosum = 0;
+
+	/*
+	 * Allow the user to impress friends.
+	 */
+
+	for_each_online_cpu(cpu) {
+		bogosum += cpu_data(cpu)->loops_per_jiffy;
+	}
+
+	printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+	       (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
+}
+
+static inline void set_cpu_sibling_map(int cpu)
+{
+	int i;
+
+	for_each_online_cpu(i) {
+		if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) {
+			cpu_set(i, cpu_core_map[cpu]);
+			cpu_set(cpu, cpu_core_map[i]);
+			if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
+				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+			}
+		}
+	}
+}
+
+int
+__cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	int ret;
+	int sapicid;
+
+	sapicid = ia64_cpu_to_sapicid[cpu];
+	if (sapicid == -1)
+		return -EINVAL;
+
+	/*
+	 * Already booted cpu? not valid anymore since we dont
+	 * do idle loop tightspin anymore.
+	 */
+	if (cpu_isset(cpu, cpu_callin_map))
+		return -EINVAL;
+
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+	/* Processor goes to start_secondary(), sets online flag */
+	ret = do_boot_cpu(sapicid, cpu, tidle);
+	if (ret < 0)
+		return ret;
+
+	if (cpu_data(cpu)->threads_per_core == 1 &&
+	    cpu_data(cpu)->cores_per_socket == 1) {
+		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
+		cpu_set(cpu, cpu_core_map[cpu]);
+		return 0;
+	}
+
+	set_cpu_sibling_map(cpu);
+
+	return 0;
+}
+
+/*
+ * Assume that CPUs have been discovered by some platform-dependent interface.  For
+ * SoftSDV/Lion, that would be ACPI.
+ *
+ * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
+ */
+void __init
+init_smp_config(void)
+{
+	struct fptr {
+		unsigned long fp;
+		unsigned long gp;
+	} *ap_startup;
+	long sal_ret;
+
+	/* Tell SAL where to drop the APs.  */
+	ap_startup = (struct fptr *) start_ap;
+	sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
+				       ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0);
+	if (sal_ret < 0)
+		printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
+		       ia64_sal_strerror(sal_ret));
+}
+
+/*
+ * identify_siblings(cpu) gets called from identify_cpu. This populates the 
+ * information related to logical execution units in per_cpu_data structure.
+ */
+void identify_siblings(struct cpuinfo_ia64 *c)
+{
+	long status;
+	u16 pltid;
+	pal_logical_to_physical_t info;
+
+	status = ia64_pal_logical_to_phys(-1, &info);
+	if (status != PAL_STATUS_SUCCESS) {
+		if (status != PAL_STATUS_UNIMPLEMENTED) {
+			printk(KERN_ERR
+				"ia64_pal_logical_to_phys failed with %ld\n",
+				status);
+			return;
+		}
+
+		info.overview_ppid = 0;
+		info.overview_cpp  = 1;
+		info.overview_tpc  = 1;
+	}
+
+	status = ia64_sal_physical_id_info(&pltid);
+	if (status != PAL_STATUS_SUCCESS) {
+		if (status != PAL_STATUS_UNIMPLEMENTED)
+			printk(KERN_ERR
+				"ia64_sal_pltid failed with %ld\n",
+				status);
+		return;
+	}
+
+	c->socket_id =  (pltid << 8) | info.overview_ppid;
+
+	if (info.overview_cpp == 1 && info.overview_tpc == 1)
+		return;
+
+	c->cores_per_socket = info.overview_cpp;
+	c->threads_per_core = info.overview_tpc;
+	c->num_log = info.overview_num_log;
+
+	c->core_id = info.log1_cid;
+	c->thread_id = info.log1_tid;
+}
+
+/*
+ * returns non zero, if multi-threading is enabled
+ * on at least one physical package. Due to hotplug cpu
+ * and (maxcpus=), all threads may not necessarily be enabled
+ * even though the processor supports multi-threading.
+ */
+int is_multithreading_enabled(void)
+{
+	int i, j;
+
+	for_each_present_cpu(i) {
+		for_each_present_cpu(j) {
+			if (j == i)
+				continue;
+			if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) {
+				if (cpu_data(j)->core_id == cpu_data(i)->core_id)
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(is_multithreading_enabled);
--- a/arch/ia64/kernel/stacktrace.c
+++ b/arch/ia64/kernel/stacktrace.c
@ -0,0 +1,39 @@
+/*
+ * arch/ia64/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/module.h>
+
+static void
+ia64_do_save_stack(struct unw_frame_info *info, void *arg)
+{
+	struct stack_trace *trace = arg;
+	unsigned long ip;
+	int skip = trace->skip;
+
+	trace->nr_entries = 0;
+	do {
+		unw_get_ip(info, &ip);
+		if (ip == 0)
+			break;
+		if (skip == 0) {
+			trace->entries[trace->nr_entries++] = ip;
+			if (trace->nr_entries == trace->max_entries)
+				break;
+		} else
+			skip--;
+	} while (unw_unwind(info) >= 0);
+}
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+void save_stack_trace(struct stack_trace *trace)
+{
+	unw_init_running(ia64_do_save_stack, trace);
+}
+EXPORT_SYMBOL(save_stack_trace);
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@ -0,0 +1,183 @@
+/*
+ * This file contains various system calls that have different calling
+ * conventions on different platforms.
+ *
+ * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/shm.h>
+#include <linux/file.h>		/* doh, must come after sched.h... */
+#include <linux/smp.h>
+#include <linux/syscalls.h>
+#include <linux/highuid.h>
+#include <linux/hugetlb.h>
+
+#include <asm/shmparam.h>
+#include <asm/uaccess.h>
+
+unsigned long
+arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len,
+			unsigned long pgoff, unsigned long flags)
+{
+	long map_shared = (flags & MAP_SHARED);
+	unsigned long align_mask = 0;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+
+	if (len > RGN_MAP_LIMIT)
+		return -ENOMEM;
+
+	/* handle fixed mapping: prevent overlap with huge pages */
+	if (flags & MAP_FIXED) {
+		if (is_hugepage_only_range(mm, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
+#ifdef CONFIG_HUGETLB_PAGE
+	if (REGION_NUMBER(addr) == RGN_HPAGE)
+		addr = 0;
+#endif
+	if (!addr)
+		addr = TASK_UNMAPPED_BASE;
+
+	if (map_shared && (TASK_SIZE > 0xfffffffful))
+		/*
+		 * For 64-bit tasks, align shared segments to 1MB to avoid potential
+		 * performance penalty due to virtual aliasing (see ASDM).  For 32-bit
+		 * tasks, we prefer to avoid exhausting the address space too quickly by
+		 * limiting alignment to a single page.
+		 */
+		align_mask = PAGE_MASK & (SHMLBA - 1);
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = addr;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = align_mask;
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
+}
+
+asmlinkage long
+ia64_getpriority (int which, int who)
+{
+	long prio;
+
+	prio = sys_getpriority(which, who);
+	if (prio >= 0) {
+		force_successful_syscall_return();
+		prio = 20 - prio;
+	}
+	return prio;
+}
+
+/* XXX obsolete, but leave it here until the old libc is gone... */
+asmlinkage unsigned long
+sys_getpagesize (void)
+{
+	return PAGE_SIZE;
+}
+
+asmlinkage unsigned long
+ia64_brk (unsigned long brk)
+{
+	unsigned long retval = sys_brk(brk);
+	force_successful_syscall_return();
+	return retval;
+}
+
+/*
+ * On IA-64, we return the two file descriptors in ret0 and ret1 (r8
+ * and r9) as this is faster than doing a copy_to_user().
+ */
+asmlinkage long
+sys_ia64_pipe (void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	int fd[2];
+	int retval;
+
+	retval = do_pipe_flags(fd, 0);
+	if (retval)
+		goto out;
+	retval = fd[0];
+	regs->r9 = fd[1];
+  out:
+	return retval;
+}
+
+int ia64_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags)
+{
+	unsigned long roff;
+
+	/*
+	 * Don't permit mappings into unmapped space, the virtual page table
+	 * of a region, or across a region boundary.  Note: RGN_MAP_LIMIT is
+	 * equal to 2^n-PAGE_SIZE (for some integer n <= 61) and len > 0.
+	 */
+	roff = REGION_OFFSET(addr);
+	if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len)))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * mmap2() is like mmap() except that the offset is expressed in units
+ * of PAGE_SIZE (instead of bytes).  This allows to mmap2() (pieces
+ * of) files that are larger than the address space of the CPU.
+ */
+asmlinkage unsigned long
+sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
+{
+	addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
+	if (!IS_ERR((void *) addr))
+		force_successful_syscall_return();
+	return addr;
+}
+
+asmlinkage unsigned long
+sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off)
+{
+	if (offset_in_page(off) != 0)
+		return -EINVAL;
+
+	addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+	if (!IS_ERR((void *) addr))
+		force_successful_syscall_return();
+	return addr;
+}
+
+asmlinkage unsigned long
+ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags,
+	     unsigned long new_addr)
+{
+	addr = sys_mremap(addr, old_len, new_len, flags, new_addr);
+	if (!IS_ERR((void *) addr))
+		force_successful_syscall_return();
+	return addr;
+}
+
+#ifndef CONFIG_PCI
+
+asmlinkage long
+sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+		    void *buf)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long
+sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+		     void *buf)
+{
+	return -ENOSYS;
+}
+
+#endif /* CONFIG_PCI */
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@ -0,0 +1,456 @@
+/*
+ * linux/arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *	David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+#include <linux/efi.h>
+#include <linux/timex.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/platform_device.h>
+
+#include <asm/machvec.h>
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/paravirt.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+
+#include "fsyscall_gtod_data.h"
+
+static cycle_t itc_get_cycles(struct clocksource *cs);
+
+struct fsyscall_gtod_data_t fsyscall_gtod_data;
+
+struct itc_jitter_data_t itc_jitter_data;
+
+volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+
+unsigned long last_cli_ip;
+EXPORT_SYMBOL(last_cli_ip);
+
+#endif
+
+#ifdef CONFIG_PARAVIRT
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+unsigned long long sched_clock(void)
+{
+        return paravirt_sched_clock();
+}
+#endif
+
+#ifdef CONFIG_PARAVIRT
+static void
+paravirt_clocksource_resume(struct clocksource *cs)
+{
+	if (pv_time_ops.clocksource_resume)
+		pv_time_ops.clocksource_resume();
+}
+#endif
+
+static struct clocksource clocksource_itc = {
+	.name           = "itc",
+	.rating         = 350,
+	.read           = itc_get_cycles,
+	.mask           = CLOCKSOURCE_MASK(64),
+	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+#ifdef CONFIG_PARAVIRT
+	.resume		= paravirt_clocksource_resume,
+#endif
+};
+static struct clocksource *itc_clocksource;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+void vtime_account_user(struct task_struct *tsk)
+{
+	cputime_t delta_utime;
+	struct thread_info *ti = task_thread_info(tsk);
+
+	if (ti->ac_utime) {
+		delta_utime = cycle_to_cputime(ti->ac_utime);
+		account_user_time(tsk, delta_utime, delta_utime);
+		ti->ac_utime = 0;
+	}
+}
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void arch_vtime_task_switch(struct task_struct *prev)
+{
+	struct thread_info *pi = task_thread_info(prev);
+	struct thread_info *ni = task_thread_info(current);
+
+	pi->ac_stamp = ni->ac_stamp;
+	ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+static cputime_t vtime_delta(struct task_struct *tsk)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	cputime_t delta_stime;
+	__u64 now;
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+	ti->ac_stime = 0;
+	ti->ac_stamp = now;
+
+	return delta_stime;
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+	cputime_t delta = vtime_delta(tsk);
+
+	account_system_time(tsk, 0, delta, delta);
+}
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+	account_idle_time(vtime_delta(tsk));
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static irqreturn_t
+timer_interrupt (int irq, void *dev_id)
+{
+	unsigned long new_itm;
+
+	if (cpu_is_offline(smp_processor_id())) {
+		return IRQ_HANDLED;
+	}
+
+	platform_timer_interrupt(irq, dev_id);
+
+	new_itm = local_cpu_data->itm_next;
+
+	if (!time_after(ia64_get_itc(), new_itm))
+		printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
+		       ia64_get_itc(), new_itm);
+
+	profile_tick(CPU_PROFILING);
+
+	if (paravirt_do_steal_accounting(&new_itm))
+		goto skip_process_time_accounting;
+
+	while (1) {
+		update_process_times(user_mode(get_irq_regs()));
+
+		new_itm += local_cpu_data->itm_delta;
+
+		if (smp_processor_id() == time_keeper_id)
+			xtime_update(1);
+
+		local_cpu_data->itm_next = new_itm;
+
+		if (time_after(new_itm, ia64_get_itc()))
+			break;
+
+		/*
+		 * Allow IPIs to interrupt the timer loop.
+		 */
+		local_irq_enable();
+		local_irq_disable();
+	}
+
+skip_process_time_accounting:
+
+	do {
+		/*
+		 * If we're too close to the next clock tick for
+		 * comfort, we increase the safety margin by
+		 * intentionally dropping the next tick(s).  We do NOT
+		 * update itm.next because that would force us to call
+		 * xtime_update() which in turn would let our clock run
+		 * too fast (with the potentially devastating effect
+		 * of losing monotony of time).
+		 */
+		while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
+			new_itm += local_cpu_data->itm_delta;
+		ia64_set_itm(new_itm);
+		/* double check, in case we got hit by a (slow) PMI: */
+	} while (time_after_eq(ia64_get_itc(), new_itm));
+	return IRQ_HANDLED;
+}
+
+/*
+ * Encapsulate access to the itm structure for SMP.
+ */
+void
+ia64_cpu_local_tick (void)
+{
+	int cpu = smp_processor_id();
+	unsigned long shift = 0, delta;
+
+	/* arrange for the cycle counter to generate a timer interrupt: */
+	ia64_set_itv(IA64_TIMER_VECTOR);
+
+	delta = local_cpu_data->itm_delta;
+	/*
+	 * Stagger the timer tick for each CPU so they don't occur all at (almost) the
+	 * same time:
+	 */
+	if (cpu) {
+		unsigned long hi = 1UL << ia64_fls(cpu);
+		shift = (2*(cpu - hi) + 1) * delta/hi/2;
+	}
+	local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
+	ia64_set_itm(local_cpu_data->itm_next);
+}
+
+static int nojitter;
+
+static int __init nojitter_setup(char *str)
+{
+	nojitter = 1;
+	printk("Jitter checking for ITC timers disabled\n");
+	return 1;
+}
+
+__setup("nojitter", nojitter_setup);
+
+
+void ia64_init_itm(void)
+{
+	unsigned long platform_base_freq, itc_freq;
+	struct pal_freq_ratio itc_ratio, proc_ratio;
+	long status, platform_base_drift, itc_drift;
+
+	/*
+	 * According to SAL v2.6, we need to use a SAL call to determine the platform base
+	 * frequency and then a PAL call to determine the frequency ratio between the ITC
+	 * and the base frequency.
+	 */
+	status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+				    &platform_base_freq, &platform_base_drift);
+	if (status != 0) {
+		printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
+	} else {
+		status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
+		if (status != 0)
+			printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
+	}
+	if (status != 0) {
+		/* invent "random" values */
+		printk(KERN_ERR
+		       "SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
+		platform_base_freq = 100000000;
+		platform_base_drift = -1;	/* no drift info */
+		itc_ratio.num = 3;
+		itc_ratio.den = 1;
+	}
+	if (platform_base_freq < 40000000) {
+		printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
+		       platform_base_freq);
+		platform_base_freq = 75000000;
+		platform_base_drift = -1;
+	}
+	if (!proc_ratio.den)
+		proc_ratio.den = 1;	/* avoid division by zero */
+	if (!itc_ratio.den)
+		itc_ratio.den = 1;	/* avoid division by zero */
+
+	itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
+
+	local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
+	printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, "
+	       "ITC freq=%lu.%03luMHz", smp_processor_id(),
+	       platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
+	       itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
+
+	if (platform_base_drift != -1) {
+		itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
+		printk("+/-%ldppm\n", itc_drift);
+	} else {
+		itc_drift = -1;
+		printk("\n");
+	}
+
+	local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
+	local_cpu_data->itc_freq = itc_freq;
+	local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
+	local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
+					+ itc_freq/2)/itc_freq;
+
+	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
+#ifdef CONFIG_SMP
+		/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
+		 * Jitter compensation requires a cmpxchg which may limit
+		 * the scalability of the syscalls for retrieving time.
+		 * The ITC synchronization is usually successful to within a few
+		 * ITC ticks but this is not a sure thing. If you need to improve
+		 * timer performance in SMP situations then boot the kernel with the
+		 * "nojitter" option. However, doing so may result in time fluctuating (maybe
+		 * even going backward) if the ITC offsets between the individual CPUs
+		 * are too large.
+		 */
+		if (!nojitter)
+			itc_jitter_data.itc_jitter = 1;
+#endif
+	} else
+		/*
+		 * ITC is drifty and we have not synchronized the ITCs in smpboot.c.
+		 * ITC values may fluctuate significantly between processors.
+		 * Clock should not be used for hrtimers. Mark itc as only
+		 * useful for boot and testing.
+		 *
+		 * Note that jitter compensation is off! There is no point of
+		 * synchronizing ITCs since they may be large differentials
+		 * that change over time.
+		 *
+		 * The only way to fix this would be to repeatedly sync the
+		 * ITCs. Until that time we have to avoid ITC.
+		 */
+		clocksource_itc.rating = 50;
+
+	paravirt_init_missing_ticks_accounting(smp_processor_id());
+
+	/* avoid softlock up message when cpu is unplug and plugged again. */
+	touch_softlockup_watchdog();
+
+	/* Setup the CPU local timer tick */
+	ia64_cpu_local_tick();
+
+	if (!itc_clocksource) {
+		clocksource_register_hz(&clocksource_itc,
+						local_cpu_data->itc_freq);
+		itc_clocksource = &clocksource_itc;
+	}
+}
+
+static cycle_t itc_get_cycles(struct clocksource *cs)
+{
+	unsigned long lcycle, now, ret;
+
+	if (!itc_jitter_data.itc_jitter)
+		return get_cycles();
+
+	lcycle = itc_jitter_data.itc_lastcycle;
+	now = get_cycles();
+	if (lcycle && time_after(lcycle, now))
+		return lcycle;
+
+	/*
+	 * Keep track of the last timer value returned.
+	 * In an SMP environment, you could lose out in contention of
+	 * cmpxchg. If so, your cmpxchg returns new value which the
+	 * winner of contention updated to. Use the new value instead.
+	 */
+	ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, now);
+	if (unlikely(ret != lcycle))
+		return ret;
+
+	return now;
+}
+
+
+static struct irqaction timer_irqaction = {
+	.handler =	timer_interrupt,
+	.flags =	IRQF_IRQPOLL,
+	.name =		"timer"
+};
+
+void read_persistent_clock(struct timespec *ts)
+{
+	efi_gettimeofday(ts);
+}
+
+void __init
+time_init (void)
+{
+	register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
+	ia64_init_itm();
+}
+
+/*
+ * Generic udelay assumes that if preemption is allowed and the thread
+ * migrates to another CPU, that the ITC values are synchronized across
+ * all CPUs.
+ */
+static void
+ia64_itc_udelay (unsigned long usecs)
+{
+	unsigned long start = ia64_get_itc();
+	unsigned long end = start + usecs*local_cpu_data->cyc_per_usec;
+
+	while (time_before(ia64_get_itc(), end))
+		cpu_relax();
+}
+
+void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay;
+
+void
+udelay (unsigned long usecs)
+{
+	(*ia64_udelay)(usecs);
+}
+EXPORT_SYMBOL(udelay);
+
+/* IA64 doesn't cache the timezone */
+void update_vsyscall_tz(void)
+{
+}
+
+void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
+			 struct clocksource *c, u32 mult, cycle_t cycle_last)
+{
+	write_seqcount_begin(&fsyscall_gtod_data.seq);
+
+        /* copy fsyscall clock data */
+        fsyscall_gtod_data.clk_mask = c->mask;
+        fsyscall_gtod_data.clk_mult = mult;
+        fsyscall_gtod_data.clk_shift = c->shift;
+        fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio;
+        fsyscall_gtod_data.clk_cycle_last = cycle_last;
+
+	/* copy kernel time structures */
+        fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec;
+        fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec;
+	fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec
+							+ wall->tv_sec;
+	fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec
+							+ wall->tv_nsec;
+
+	/* normalize */
+	while (fsyscall_gtod_data.monotonic_time.tv_nsec >= NSEC_PER_SEC) {
+		fsyscall_gtod_data.monotonic_time.tv_nsec -= NSEC_PER_SEC;
+		fsyscall_gtod_data.monotonic_time.tv_sec++;
+	}
+
+	write_seqcount_end(&fsyscall_gtod_data.seq);
+}
+
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@ -0,0 +1,472 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file contains NUMA specific variables and functions which can
+ * be split away from DISCONTIGMEM and are used on NUMA machines with
+ * contiguous memory.
+ * 		2002/08/07 Erich Focht <efocht@ess.nec.de>
+ * Populate cpu entries in sysfs for non-numa systems as well
+ *  	Intel Corporation - Ashok Raj
+ * 02/27/2006 Zhang, Yanmin
+ *	Populate cpu cache entries in sysfs for cpu cache info
+ */
+
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/node.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/nodemask.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <asm/mmzone.h>
+#include <asm/numa.h>
+#include <asm/cpu.h>
+
+static struct ia64_cpu *sysfs_cpus;
+
+void arch_fix_phys_package_id(int num, u32 slot)
+{
+#ifdef CONFIG_SMP
+	if (cpu_data(num)->socket_id == -1)
+		cpu_data(num)->socket_id = slot;
+#endif
+}
+EXPORT_SYMBOL_GPL(arch_fix_phys_package_id);
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+int __ref arch_register_cpu(int num)
+{
+#ifdef CONFIG_ACPI
+	/*
+	 * If CPEI can be re-targeted or if this is not
+	 * CPEI target, then it is hotpluggable
+	 */
+	if (can_cpei_retarget() || !is_cpu_cpei_target(num))
+		sysfs_cpus[num].cpu.hotpluggable = 1;
+	map_cpu_to_node(num, node_cpuid[num].nid);
+#endif
+	return register_cpu(&sysfs_cpus[num].cpu, num);
+}
+EXPORT_SYMBOL(arch_register_cpu);
+
+void __ref arch_unregister_cpu(int num)
+{
+	unregister_cpu(&sysfs_cpus[num].cpu);
+#ifdef CONFIG_ACPI
+	unmap_cpu_from_node(num, cpu_to_node(num));
+#endif
+}
+EXPORT_SYMBOL(arch_unregister_cpu);
+#else
+static int __init arch_register_cpu(int num)
+{
+	return register_cpu(&sysfs_cpus[num].cpu, num);
+}
+#endif /*CONFIG_HOTPLUG_CPU*/
+
+
+static int __init topology_init(void)
+{
+	int i, err = 0;
+
+#ifdef CONFIG_NUMA
+	/*
+	 * MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes?
+	 */
+	for_each_online_node(i) {
+		if ((err = register_one_node(i)))
+			goto out;
+	}
+#endif
+
+	sysfs_cpus = kzalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL);
+	if (!sysfs_cpus)
+		panic("kzalloc in topology_init failed - NR_CPUS too big?");
+
+	for_each_present_cpu(i) {
+		if((err = arch_register_cpu(i)))
+			goto out;
+	}
+out:
+	return err;
+}
+
+subsys_initcall(topology_init);
+
+
+/*
+ * Export cpu cache information through sysfs
+ */
+
+/*
+ *  A bunch of string array to get pretty printing
+ */
+static const char *cache_types[] = {
+	"",			/* not used */
+	"Instruction",
+	"Data",
+	"Unified"	/* unified */
+};
+
+static const char *cache_mattrib[]={
+	"WriteThrough",
+	"WriteBack",
+	"",		/* reserved */
+	""		/* reserved */
+};
+
+struct cache_info {
+	pal_cache_config_info_t	cci;
+	cpumask_t shared_cpu_map;
+	int level;
+	int type;
+	struct kobject kobj;
+};
+
+struct cpu_cache_info {
+	struct cache_info *cache_leaves;
+	int	num_cache_leaves;
+	struct kobject kobj;
+};
+
+static struct cpu_cache_info	all_cpu_cache_info[NR_CPUS];
+#define LEAF_KOBJECT_PTR(x,y)    (&all_cpu_cache_info[x].cache_leaves[y])
+
+#ifdef CONFIG_SMP
+static void cache_shared_cpu_map_setup(unsigned int cpu,
+		struct cache_info * this_leaf)
+{
+	pal_cache_shared_info_t	csi;
+	int num_shared, i = 0;
+	unsigned int j;
+
+	if (cpu_data(cpu)->threads_per_core <= 1 &&
+		cpu_data(cpu)->cores_per_socket <= 1) {
+		cpu_set(cpu, this_leaf->shared_cpu_map);
+		return;
+	}
+
+	if (ia64_pal_cache_shared_info(this_leaf->level,
+					this_leaf->type,
+					0,
+					&csi) != PAL_STATUS_SUCCESS)
+		return;
+
+	num_shared = (int) csi.num_shared;
+	do {
+		for_each_possible_cpu(j)
+			if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id
+				&& cpu_data(j)->core_id == csi.log1_cid
+				&& cpu_data(j)->thread_id == csi.log1_tid)
+				cpu_set(j, this_leaf->shared_cpu_map);
+
+		i++;
+	} while (i < num_shared &&
+		ia64_pal_cache_shared_info(this_leaf->level,
+				this_leaf->type,
+				i,
+				&csi) == PAL_STATUS_SUCCESS);
+}
+#else
+static void cache_shared_cpu_map_setup(unsigned int cpu,
+		struct cache_info * this_leaf)
+{
+	cpu_set(cpu, this_leaf->shared_cpu_map);
+	return;
+}
+#endif
+
+static ssize_t show_coherency_line_size(struct cache_info *this_leaf,
+					char *buf)
+{
+	return sprintf(buf, "%u\n", 1 << this_leaf->cci.pcci_line_size);
+}
+
+static ssize_t show_ways_of_associativity(struct cache_info *this_leaf,
+					char *buf)
+{
+	return sprintf(buf, "%u\n", this_leaf->cci.pcci_assoc);
+}
+
+static ssize_t show_attributes(struct cache_info *this_leaf, char *buf)
+{
+	return sprintf(buf,
+			"%s\n",
+			cache_mattrib[this_leaf->cci.pcci_cache_attr]);
+}
+
+static ssize_t show_size(struct cache_info *this_leaf, char *buf)
+{
+	return sprintf(buf, "%uK\n", this_leaf->cci.pcci_cache_size / 1024);
+}
+
+static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf)
+{
+	unsigned number_of_sets = this_leaf->cci.pcci_cache_size;
+	number_of_sets /= this_leaf->cci.pcci_assoc;
+	number_of_sets /= 1 << this_leaf->cci.pcci_line_size;
+
+	return sprintf(buf, "%u\n", number_of_sets);
+}
+
+static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
+{
+	ssize_t	len;
+	cpumask_t shared_cpu_map;
+
+	cpumask_and(&shared_cpu_map,
+				&this_leaf->shared_cpu_map, cpu_online_mask);
+	len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
+	len += sprintf(buf+len, "\n");
+	return len;
+}
+
+static ssize_t show_type(struct cache_info *this_leaf, char *buf)
+{
+	int type = this_leaf->type + this_leaf->cci.pcci_unified;
+	return sprintf(buf, "%s\n", cache_types[type]);
+}
+
+static ssize_t show_level(struct cache_info *this_leaf, char *buf)
+{
+	return sprintf(buf, "%u\n", this_leaf->level);
+}
+
+struct cache_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cache_info *, char *);
+	ssize_t (*store)(struct cache_info *, const char *, size_t count);
+};
+
+#ifdef define_one_ro
+	#undef define_one_ro
+#endif
+#define define_one_ro(_name) \
+	static struct cache_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
+
+define_one_ro(level);
+define_one_ro(type);
+define_one_ro(coherency_line_size);
+define_one_ro(ways_of_associativity);
+define_one_ro(size);
+define_one_ro(number_of_sets);
+define_one_ro(shared_cpu_map);
+define_one_ro(attributes);
+
+static struct attribute * cache_default_attrs[] = {
+	&type.attr,
+	&level.attr,
+	&coherency_line_size.attr,
+	&ways_of_associativity.attr,
+	&attributes.attr,
+	&size.attr,
+	&number_of_sets.attr,
+	&shared_cpu_map.attr,
+	NULL
+};
+
+#define to_object(k) container_of(k, struct cache_info, kobj)
+#define to_attr(a) container_of(a, struct cache_attr, attr)
+
+static ssize_t ia64_cache_show(struct kobject * kobj, struct attribute * attr, char * buf)
+{
+	struct cache_attr *fattr = to_attr(attr);
+	struct cache_info *this_leaf = to_object(kobj);
+	ssize_t ret;
+
+	ret = fattr->show ? fattr->show(this_leaf, buf) : 0;
+	return ret;
+}
+
+static const struct sysfs_ops cache_sysfs_ops = {
+	.show   = ia64_cache_show
+};
+
+static struct kobj_type cache_ktype = {
+	.sysfs_ops	= &cache_sysfs_ops,
+	.default_attrs	= cache_default_attrs,
+};
+
+static struct kobj_type cache_ktype_percpu_entry = {
+	.sysfs_ops	= &cache_sysfs_ops,
+};
+
+static void cpu_cache_sysfs_exit(unsigned int cpu)
+{
+	kfree(all_cpu_cache_info[cpu].cache_leaves);
+	all_cpu_cache_info[cpu].cache_leaves = NULL;
+	all_cpu_cache_info[cpu].num_cache_leaves = 0;
+	memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
+	return;
+}
+
+static int cpu_cache_sysfs_init(unsigned int cpu)
+{
+	unsigned long i, levels, unique_caches;
+	pal_cache_config_info_t cci;
+	int j;
+	long status;
+	struct cache_info *this_cache;
+	int num_cache_leaves = 0;
+
+	if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
+		printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
+		return -1;
+	}
+
+	this_cache=kzalloc(sizeof(struct cache_info)*unique_caches,
+			GFP_KERNEL);
+	if (this_cache == NULL)
+		return -ENOMEM;
+
+	for (i=0; i < levels; i++) {
+		for (j=2; j >0 ; j--) {
+			if ((status=ia64_pal_cache_config_info(i,j, &cci)) !=
+					PAL_STATUS_SUCCESS)
+				continue;
+
+			this_cache[num_cache_leaves].cci = cci;
+			this_cache[num_cache_leaves].level = i + 1;
+			this_cache[num_cache_leaves].type = j;
+
+			cache_shared_cpu_map_setup(cpu,
+					&this_cache[num_cache_leaves]);
+			num_cache_leaves ++;
+		}
+	}
+
+	all_cpu_cache_info[cpu].cache_leaves = this_cache;
+	all_cpu_cache_info[cpu].num_cache_leaves = num_cache_leaves;
+
+	memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
+
+	return 0;
+}
+
+/* Add cache interface for CPU device */
+static int cache_add_dev(struct device *sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i, j;
+	struct cache_info *this_object;
+	int retval = 0;
+	cpumask_t oldmask;
+
+	if (all_cpu_cache_info[cpu].kobj.parent)
+		return 0;
+
+	oldmask = current->cpus_allowed;
+	retval = set_cpus_allowed_ptr(current, cpumask_of(cpu));
+	if (unlikely(retval))
+		return retval;
+
+	retval = cpu_cache_sysfs_init(cpu);
+	set_cpus_allowed_ptr(current, &oldmask);
+	if (unlikely(retval < 0))
+		return retval;
+
+	retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj,
+				      &cache_ktype_percpu_entry, &sys_dev->kobj,
+				      "%s", "cache");
+	if (unlikely(retval < 0)) {
+		cpu_cache_sysfs_exit(cpu);
+		return retval;
+	}
+
+	for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
+		this_object = LEAF_KOBJECT_PTR(cpu,i);
+		retval = kobject_init_and_add(&(this_object->kobj),
+					      &cache_ktype,
+					      &all_cpu_cache_info[cpu].kobj,
+					      "index%1lu", i);
+		if (unlikely(retval)) {
+			for (j = 0; j < i; j++) {
+				kobject_put(&(LEAF_KOBJECT_PTR(cpu,j)->kobj));
+			}
+			kobject_put(&all_cpu_cache_info[cpu].kobj);
+			cpu_cache_sysfs_exit(cpu);
+			return retval;
+		}
+		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
+	}
+	kobject_uevent(&all_cpu_cache_info[cpu].kobj, KOBJ_ADD);
+	return retval;
+}
+
+/* Remove cache interface for CPU device */
+static int cache_remove_dev(struct device *sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i;
+
+	for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++)
+		kobject_put(&(LEAF_KOBJECT_PTR(cpu,i)->kobj));
+
+	if (all_cpu_cache_info[cpu].kobj.parent) {
+		kobject_put(&all_cpu_cache_info[cpu].kobj);
+		memset(&all_cpu_cache_info[cpu].kobj,
+			0,
+			sizeof(struct kobject));
+	}
+
+	cpu_cache_sysfs_exit(cpu);
+
+	return 0;
+}
+
+/*
+ * When a cpu is hot-plugged, do a check and initiate
+ * cache kobject if necessary
+ */
+static int cache_cpu_callback(struct notifier_block *nfb,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct device *sys_dev;
+
+	sys_dev = get_cpu_device(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		cache_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		cache_remove_dev(sys_dev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cache_cpu_notifier =
+{
+	.notifier_call = cache_cpu_callback
+};
+
+static int __init cache_sysfs_init(void)
+{
+	int i;
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(i) {
+		struct device *sys_dev = get_cpu_device((unsigned int)i);
+		cache_add_dev(sys_dev);
+	}
+
+	__register_hotcpu_notifier(&cache_cpu_notifier);
+
+	cpu_notifier_register_done();
+
+	return 0;
+}
+
+device_initcall(cache_sysfs_init);
+
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@ -0,0 +1,652 @@
+/*
+ * Architecture-specific trap handling.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>		/* For unblank_screen() */
+#include <linux/module.h>       /* for EXPORT_SYMBOL */
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/delay.h>		/* for ssleep() */
+#include <linux/kdebug.h>
+
+#include <asm/fpswa.h>
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/setup.h>
+
+fpswa_interface_t *fpswa_interface;
+EXPORT_SYMBOL(fpswa_interface);
+
+void __init
+trap_init (void)
+{
+	if (ia64_boot_param->fpswa)
+		/* FPSWA fixup: make the interface pointer a kernel virtual address: */
+		fpswa_interface = __va(ia64_boot_param->fpswa);
+}
+
+int
+die (const char *str, struct pt_regs *regs, long err)
+{
+	static struct {
+		spinlock_t lock;
+		u32 lock_owner;
+		int lock_owner_depth;
+	} die = {
+		.lock =	__SPIN_LOCK_UNLOCKED(die.lock),
+		.lock_owner = -1,
+		.lock_owner_depth = 0
+	};
+	static int die_counter;
+	int cpu = get_cpu();
+
+	if (die.lock_owner != cpu) {
+		console_verbose();
+		spin_lock_irq(&die.lock);
+		die.lock_owner = cpu;
+		die.lock_owner_depth = 0;
+		bust_spinlocks(1);
+	}
+	put_cpu();
+
+	if (++die.lock_owner_depth < 3) {
+		printk("%s[%d]: %s %ld [%d]\n",
+		current->comm, task_pid_nr(current), str, err, ++die_counter);
+		if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
+	            != NOTIFY_STOP)
+			show_regs(regs);
+		else
+			regs = NULL;
+  	} else
+		printk(KERN_ERR "Recursive die() failure, output suppressed\n");
+
+	bust_spinlocks(0);
+	die.lock_owner = -1;
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	spin_unlock_irq(&die.lock);
+
+	if (!regs)
+		return 1;
+
+	if (panic_on_oops)
+		panic("Fatal exception");
+
+  	do_exit(SIGSEGV);
+	return 0;
+}
+
+int
+die_if_kernel (char *str, struct pt_regs *regs, long err)
+{
+	if (!user_mode(regs))
+		return die(str, regs, err);
+	return 0;
+}
+
+void
+__kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
+{
+	siginfo_t siginfo;
+	int sig, code;
+
+	/* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
+	siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+	siginfo.si_imm = break_num;
+	siginfo.si_flags = 0;		/* clear __ISR_VALID */
+	siginfo.si_isr = 0;
+
+	switch (break_num) {
+	      case 0: /* unknown error (used by GCC for __builtin_abort()) */
+		if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP)
+			       	== NOTIFY_STOP)
+			return;
+		if (die_if_kernel("bugcheck!", regs, break_num))
+			return;
+		sig = SIGILL; code = ILL_ILLOPC;
+		break;
+
+	      case 1: /* integer divide by zero */
+		sig = SIGFPE; code = FPE_INTDIV;
+		break;
+
+	      case 2: /* integer overflow */
+		sig = SIGFPE; code = FPE_INTOVF;
+		break;
+
+	      case 3: /* range check/bounds check */
+		sig = SIGFPE; code = FPE_FLTSUB;
+		break;
+
+	      case 4: /* null pointer dereference */
+		sig = SIGSEGV; code = SEGV_MAPERR;
+		break;
+
+	      case 5: /* misaligned data */
+		sig = SIGSEGV; code = BUS_ADRALN;
+		break;
+
+	      case 6: /* decimal overflow */
+		sig = SIGFPE; code = __FPE_DECOVF;
+		break;
+
+	      case 7: /* decimal divide by zero */
+		sig = SIGFPE; code = __FPE_DECDIV;
+		break;
+
+	      case 8: /* packed decimal error */
+		sig = SIGFPE; code = __FPE_DECERR;
+		break;
+
+	      case 9: /* invalid ASCII digit */
+		sig = SIGFPE; code = __FPE_INVASC;
+		break;
+
+	      case 10: /* invalid decimal digit */
+		sig = SIGFPE; code = __FPE_INVDEC;
+		break;
+
+	      case 11: /* paragraph stack overflow */
+		sig = SIGSEGV; code = __SEGV_PSTKOVF;
+		break;
+
+	      case 0x3f000 ... 0x3ffff:	/* bundle-update in progress */
+		sig = SIGILL; code = __ILL_BNDMOD;
+		break;
+
+	      default:
+		if ((break_num < 0x40000 || break_num > 0x100000)
+		    && die_if_kernel("Bad break", regs, break_num))
+			return;
+
+		if (break_num < 0x80000) {
+			sig = SIGILL; code = __ILL_BREAK;
+		} else {
+			if (notify_die(DIE_BREAK, "bad break", regs, break_num, TRAP_BRKPT, SIGTRAP)
+					== NOTIFY_STOP)
+				return;
+			sig = SIGTRAP; code = TRAP_BRKPT;
+		}
+	}
+	siginfo.si_signo = sig;
+	siginfo.si_errno = 0;
+	siginfo.si_code = code;
+	force_sig_info(sig, &siginfo, current);
+}
+
+/*
+ * disabled_fph_fault() is called when a user-level process attempts to access f32..f127
+ * and it doesn't own the fp-high register partition.  When this happens, we save the
+ * current fph partition in the task_struct of the fpu-owner (if necessary) and then load
+ * the fp-high partition of the current task (if necessary).  Note that the kernel has
+ * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes
+ * care of clearing psr.dfh.
+ */
+static inline void
+disabled_fph_fault (struct pt_regs *regs)
+{
+	struct ia64_psr *psr = ia64_psr(regs);
+
+	/* first, grant user-level access to fph partition: */
+	psr->dfh = 0;
+
+	/*
+	 * Make sure that no other task gets in on this processor
+	 * while we're claiming the FPU
+	 */
+	preempt_disable();
+#ifndef CONFIG_SMP
+	{
+		struct task_struct *fpu_owner
+			= (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
+
+		if (ia64_is_local_fpu_owner(current)) {
+			preempt_enable_no_resched();
+			return;
+		}
+
+		if (fpu_owner)
+			ia64_flush_fph(fpu_owner);
+	}
+#endif /* !CONFIG_SMP */
+	ia64_set_local_fpu_owner(current);
+	if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) {
+		__ia64_load_fpu(current->thread.fph);
+		psr->mfh = 0;
+	} else {
+		__ia64_init_fpu();
+		/*
+		 * Set mfh because the state in thread.fph does not match the state in
+		 * the fph partition.
+		 */
+		psr->mfh = 1;
+	}
+	preempt_enable_no_resched();
+}
+
+static inline int
+fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs,
+	    struct pt_regs *regs)
+{
+	fp_state_t fp_state;
+	fpswa_ret_t ret;
+
+	if (!fpswa_interface)
+		return -1;
+
+	memset(&fp_state, 0, sizeof(fp_state_t));
+
+	/*
+	 * compute fp_state.  only FP registers f6 - f11 are used by the
+	 * kernel, so set those bits in the mask and set the low volatile
+	 * pointer to point to these registers.
+	 */
+	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
+
+	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+	/*
+	 * unsigned long (*EFI_FPSWA) (
+	 *      unsigned long    trap_type,
+	 *	void             *Bundle,
+	 *	unsigned long    *pipsr,
+	 *	unsigned long    *pfsr,
+	 *	unsigned long    *pisr,
+	 *	unsigned long    *ppreds,
+	 *	unsigned long    *pifs,
+	 *	void             *fp_state);
+	 */
+	ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle,
+					(unsigned long *) ipsr, (unsigned long *) fpsr,
+					(unsigned long *) isr, (unsigned long *) pr,
+					(unsigned long *) ifs, &fp_state);
+
+	return ret.status;
+}
+
+struct fpu_swa_msg {
+	unsigned long count;
+	unsigned long time;
+};
+static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast);
+DECLARE_PER_CPU(struct fpu_swa_msg, cpulast);
+static struct fpu_swa_msg last __cacheline_aligned;
+
+
+/*
+ * Handle floating-point assist faults and traps.
+ */
+static int
+handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
+{
+	long exception, bundle[2];
+	unsigned long fault_ip;
+	struct siginfo siginfo;
+
+	fault_ip = regs->cr_iip;
+	if (!fp_fault && (ia64_psr(regs)->ri == 0))
+		fault_ip -= 16;
+	if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle)))
+		return -1;
+
+	if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT))  {
+		unsigned long count, current_jiffies = jiffies;
+		struct fpu_swa_msg *cp = this_cpu_ptr(&cpulast);
+
+		if (unlikely(current_jiffies > cp->time))
+			cp->count = 0;
+		if (unlikely(cp->count < 5)) {
+			cp->count++;
+			cp->time = current_jiffies + 5 * HZ;
+
+			/* minimize races by grabbing a copy of count BEFORE checking last.time. */
+			count = last.count;
+			barrier();
+
+			/*
+			 * Lower 4 bits are used as a count. Upper bits are a sequence
+			 * number that is updated when count is reset. The cmpxchg will
+			 * fail is seqno has changed. This minimizes mutiple cpus
+			 * resetting the count.
+			 */
+			if (current_jiffies > last.time)
+				(void) cmpxchg_acq(&last.count, count, 16 + (count & ~15));
+
+			/* used fetchadd to atomically update the count */
+			if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
+				last.time = current_jiffies + 5 * HZ;
+				printk(KERN_WARNING
+		       			"%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
+		       			current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
+			}
+		}
+	}
+
+	exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
+			       &regs->cr_ifs, regs);
+	if (fp_fault) {
+		if (exception == 0) {
+			/* emulation was successful */
+			ia64_increment_ip(regs);
+		} else if (exception == -1) {
+			printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
+			return -1;
+		} else {
+			/* is next instruction a trap? */
+			if (exception & 2) {
+				ia64_increment_ip(regs);
+			}
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = __SI_FAULT;	/* default code */
+			siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+			if (isr & 0x11) {
+				siginfo.si_code = FPE_FLTINV;
+			} else if (isr & 0x22) {
+				/* denormal operand gets the same si_code as underflow 
+				* see arch/i386/kernel/traps.c:math_error()  */
+				siginfo.si_code = FPE_FLTUND;
+			} else if (isr & 0x44) {
+				siginfo.si_code = FPE_FLTDIV;
+			}
+			siginfo.si_isr = isr;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_imm = 0;
+			force_sig_info(SIGFPE, &siginfo, current);
+		}
+	} else {
+		if (exception == -1) {
+			printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
+			return -1;
+		} else if (exception != 0) {
+			/* raise exception */
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = __SI_FAULT;	/* default code */
+			siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
+			if (isr & 0x880) {
+				siginfo.si_code = FPE_FLTOVF;
+			} else if (isr & 0x1100) {
+				siginfo.si_code = FPE_FLTUND;
+			} else if (isr & 0x2200) {
+				siginfo.si_code = FPE_FLTRES;
+			}
+			siginfo.si_isr = isr;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_imm = 0;
+			force_sig_info(SIGFPE, &siginfo, current);
+		}
+	}
+	return 0;
+}
+
+struct illegal_op_return {
+	unsigned long fkt, arg1, arg2, arg3;
+};
+
+struct illegal_op_return
+ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
+		       long arg4, long arg5, long arg6, long arg7,
+		       struct pt_regs regs)
+{
+	struct illegal_op_return rv;
+	struct siginfo si;
+	char buf[128];
+
+#ifdef CONFIG_IA64_BRL_EMU
+	{
+		extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long);
+
+		rv = ia64_emulate_brl(&regs, ec);
+		if (rv.fkt != (unsigned long) -1)
+			return rv;
+	}
+#endif
+
+	sprintf(buf, "IA-64 Illegal operation fault");
+	rv.fkt = 0;
+	if (die_if_kernel(buf, &regs, 0))
+		return rv;
+
+	memset(&si, 0, sizeof(si));
+	si.si_signo = SIGILL;
+	si.si_code = ILL_ILLOPC;
+	si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
+	force_sig_info(SIGILL, &si, current);
+	return rv;
+}
+
+void __kprobes
+ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+	    unsigned long iim, unsigned long itir, long arg5, long arg6,
+	    long arg7, struct pt_regs regs)
+{
+	unsigned long code, error = isr, iip;
+	struct siginfo siginfo;
+	char buf[128];
+	int result, sig;
+	static const char *reason[] = {
+		"IA-64 Illegal Operation fault",
+		"IA-64 Privileged Operation fault",
+		"IA-64 Privileged Register fault",
+		"IA-64 Reserved Register/Field fault",
+		"Disabled Instruction Set Transition fault",
+		"Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault",
+		"Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12",
+		"Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
+	};
+
+	if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) {
+		/*
+		 * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel
+		 * the lfetch.
+		 */
+		ia64_psr(&regs)->ed = 1;
+		return;
+	}
+
+	iip = regs.cr_iip + ia64_psr(&regs)->ri;
+
+	switch (vector) {
+	      case 24: /* General Exception */
+		code = (isr >> 4) & 0xf;
+		sprintf(buf, "General Exception: %s%s", reason[code],
+			(code == 3) ? ((isr & (1UL << 37))
+				       ? " (RSE access)" : " (data access)") : "");
+		if (code == 8) {
+# ifdef CONFIG_IA64_PRINT_HAZARDS
+			printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
+			       current->comm, task_pid_nr(current),
+			       regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
+# endif
+			return;
+		}
+		break;
+
+	      case 25: /* Disabled FP-Register */
+		if (isr & 2) {
+			disabled_fph_fault(&regs);
+			return;
+		}
+		sprintf(buf, "Disabled FPL fault---not supposed to happen!");
+		break;
+
+	      case 26: /* NaT Consumption */
+		if (user_mode(&regs)) {
+			void __user *addr;
+
+			if (((isr >> 4) & 0xf) == 2) {
+				/* NaT page consumption */
+				sig = SIGSEGV;
+				code = SEGV_ACCERR;
+				addr = (void __user *) ifa;
+			} else {
+				/* register NaT consumption */
+				sig = SIGILL;
+				code = ILL_ILLOPN;
+				addr = (void __user *) (regs.cr_iip
+							+ ia64_psr(&regs)->ri);
+			}
+			siginfo.si_signo = sig;
+			siginfo.si_code = code;
+			siginfo.si_errno = 0;
+			siginfo.si_addr = addr;
+			siginfo.si_imm = vector;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_isr = isr;
+			force_sig_info(sig, &siginfo, current);
+			return;
+		} else if (ia64_done_with_exception(&regs))
+			return;
+		sprintf(buf, "NaT consumption");
+		break;
+
+	      case 31: /* Unsupported Data Reference */
+		if (user_mode(&regs)) {
+			siginfo.si_signo = SIGILL;
+			siginfo.si_code = ILL_ILLOPN;
+			siginfo.si_errno = 0;
+			siginfo.si_addr = (void __user *) iip;
+			siginfo.si_imm = vector;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_isr = isr;
+			force_sig_info(SIGILL, &siginfo, current);
+			return;
+		}
+		sprintf(buf, "Unsupported data reference");
+		break;
+
+	      case 29: /* Debug */
+	      case 35: /* Taken Branch Trap */
+	      case 36: /* Single Step Trap */
+		if (fsys_mode(current, &regs)) {
+			extern char __kernel_syscall_via_break[];
+			/*
+			 * Got a trap in fsys-mode: Taken Branch Trap
+			 * and Single Step trap need special handling;
+			 * Debug trap is ignored (we disable it here
+			 * and re-enable it in the lower-privilege trap).
+			 */
+			if (unlikely(vector == 29)) {
+				set_thread_flag(TIF_DB_DISABLED);
+				ia64_psr(&regs)->db = 0;
+				ia64_psr(&regs)->lp = 1;
+				return;
+			}
+			/* re-do the system call via break 0x100000: */
+			regs.cr_iip = (unsigned long) __kernel_syscall_via_break;
+			ia64_psr(&regs)->ri = 0;
+			ia64_psr(&regs)->cpl = 3;
+			return;
+		}
+		switch (vector) {
+		      case 29:
+			siginfo.si_code = TRAP_HWBKPT;
+#ifdef CONFIG_ITANIUM
+			/*
+			 * Erratum 10 (IFA may contain incorrect address) now has
+			 * "NoFix" status.  There are no plans for fixing this.
+			 */
+			if (ia64_psr(&regs)->is == 0)
+			  ifa = regs.cr_iip;
+#endif
+			break;
+		      case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
+		      case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
+		}
+		if (notify_die(DIE_FAULT, "ia64_fault", &regs, vector, siginfo.si_code, SIGTRAP)
+			       	== NOTIFY_STOP)
+			return;
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		siginfo.si_addr  = (void __user *) ifa;
+		siginfo.si_imm   = 0;
+		siginfo.si_flags = __ISR_VALID;
+		siginfo.si_isr   = isr;
+		force_sig_info(SIGTRAP, &siginfo, current);
+		return;
+
+	      case 32: /* fp fault */
+	      case 33: /* fp trap */
+		result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
+		if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = FPE_FLTINV;
+			siginfo.si_addr = (void __user *) iip;
+			siginfo.si_flags = __ISR_VALID;
+			siginfo.si_isr = isr;
+			siginfo.si_imm = 0;
+			force_sig_info(SIGFPE, &siginfo, current);
+		}
+		return;
+
+	      case 34:
+		if (isr & 0x2) {
+			/* Lower-Privilege Transfer Trap */
+
+			/* If we disabled debug traps during an fsyscall,
+			 * re-enable them here.
+			 */
+			if (test_thread_flag(TIF_DB_DISABLED)) {
+				clear_thread_flag(TIF_DB_DISABLED);
+				ia64_psr(&regs)->db = 1;
+			}
+
+			/*
+			 * Just clear PSR.lp and then return immediately:
+			 * all the interesting work (e.g., signal delivery)
+			 * is done in the kernel exit path.
+			 */
+			ia64_psr(&regs)->lp = 0;
+			return;
+		} else {
+			/* Unimplemented Instr. Address Trap */
+			if (user_mode(&regs)) {
+				siginfo.si_signo = SIGILL;
+				siginfo.si_code = ILL_BADIADDR;
+				siginfo.si_errno = 0;
+				siginfo.si_flags = 0;
+				siginfo.si_isr = 0;
+				siginfo.si_imm = 0;
+				siginfo.si_addr = (void __user *) iip;
+				force_sig_info(SIGILL, &siginfo, current);
+				return;
+			}
+			sprintf(buf, "Unimplemented Instruction Address fault");
+		}
+		break;
+
+	      case 45:
+		printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
+		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
+		       iip, ifa, isr);
+		force_sig(SIGSEGV, current);
+		return;
+
+	      case 46:
+		printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
+		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
+		       iip, ifa, isr, iim);
+		force_sig(SIGSEGV, current);
+		return;
+
+	      case 47:
+		sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
+		break;
+
+	      default:
+		sprintf(buf, "Fault %lu", vector);
+		break;
+	}
+	if (!die_if_kernel(buf, &regs, error))
+		force_sig(SIGILL, current);
+}
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2001-2008 Silicon Graphics, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * A simple uncached page allocator using the generic allocator. This
+ * allocator first utilizes the spare (spill) pages found in the EFI
+ * memmap and will then start converting cached pages to uncached ones
+ * at a granule at a time. Node awareness is implemented by having a
+ * pool of pages per node.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/efi.h>
+#include <linux/genalloc.h>
+#include <linux/gfp.h>
+#include <asm/page.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <linux/atomic.h>
+#include <asm/tlbflush.h>
+#include <asm/sn/arch.h>
+
+
+extern void __init efi_memmap_walk_uc(efi_freemem_callback_t, void *);
+
+struct uncached_pool {
+	struct gen_pool *pool;
+	struct mutex add_chunk_mutex;	/* serialize adding a converted chunk */
+	int nchunks_added;		/* #of converted chunks added to pool */
+	atomic_t status;		/* smp called function's return status*/
+};
+
+#define MAX_CONVERTED_CHUNKS_PER_NODE	2
+
+struct uncached_pool uncached_pools[MAX_NUMNODES];
+
+
+static void uncached_ipi_visibility(void *data)
+{
+	int status;
+	struct uncached_pool *uc_pool = (struct uncached_pool *)data;
+
+	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+	if ((status != PAL_VISIBILITY_OK) &&
+	    (status != PAL_VISIBILITY_OK_REMOTE_NEEDED))
+		atomic_inc(&uc_pool->status);
+}
+
+
+static void uncached_ipi_mc_drain(void *data)
+{
+	int status;
+	struct uncached_pool *uc_pool = (struct uncached_pool *)data;
+
+	status = ia64_pal_mc_drain();
+	if (status != PAL_STATUS_SUCCESS)
+		atomic_inc(&uc_pool->status);
+}
+
+
+/*
+ * Add a new chunk of uncached memory pages to the specified pool.
+ *
+ * @pool: pool to add new chunk of uncached memory to
+ * @nid: node id of node to allocate memory from, or -1
+ *
+ * This is accomplished by first allocating a granule of cached memory pages
+ * and then converting them to uncached memory pages.
+ */
+static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
+{
+	struct page *page;
+	int status, i, nchunks_added = uc_pool->nchunks_added;
+	unsigned long c_addr, uc_addr;
+
+	if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0)
+		return -1;	/* interrupted by a signal */
+
+	if (uc_pool->nchunks_added > nchunks_added) {
+		/* someone added a new chunk while we were waiting */
+		mutex_unlock(&uc_pool->add_chunk_mutex);
+		return 0;
+	}
+
+	if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) {
+		mutex_unlock(&uc_pool->add_chunk_mutex);
+		return -1;
+	}
+
+	/* attempt to allocate a granule's worth of cached memory pages */
+
+	page = alloc_pages_exact_node(nid,
+				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
+				IA64_GRANULE_SHIFT-PAGE_SHIFT);
+	if (!page) {
+		mutex_unlock(&uc_pool->add_chunk_mutex);
+		return -1;
+	}
+
+	/* convert the memory pages from cached to uncached */
+
+	c_addr = (unsigned long)page_address(page);
+	uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;
+
+	/*
+	 * There's a small race here where it's possible for someone to
+	 * access the page through /dev/mem halfway through the conversion
+	 * to uncached - not sure it's really worth bothering about
+	 */
+	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
+		SetPageUncached(&page[i]);
+
+	flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);
+
+	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+	if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
+		atomic_set(&uc_pool->status, 0);
+		status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
+		if (status || atomic_read(&uc_pool->status))
+			goto failed;
+	} else if (status != PAL_VISIBILITY_OK)
+		goto failed;
+
+	preempt_disable();
+
+	if (ia64_platform_is("sn2"))
+		sn_flush_all_caches(uc_addr, IA64_GRANULE_SIZE);
+	else
+		flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);
+
+	/* flush the just introduced uncached translation from the TLB */
+	local_flush_tlb_all();
+
+	preempt_enable();
+
+	status = ia64_pal_mc_drain();
+	if (status != PAL_STATUS_SUCCESS)
+		goto failed;
+	atomic_set(&uc_pool->status, 0);
+	status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
+	if (status || atomic_read(&uc_pool->status))
+		goto failed;
+
+	/*
+	 * The chunk of memory pages has been converted to uncached so now we
+	 * can add it to the pool.
+	 */
+	status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid);
+	if (status)
+		goto failed;
+
+	uc_pool->nchunks_added++;
+	mutex_unlock(&uc_pool->add_chunk_mutex);
+	return 0;
+
+	/* failed to convert or add the chunk so give it back to the kernel */
+failed:
+	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
+		ClearPageUncached(&page[i]);
+
+	free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT);
+	mutex_unlock(&uc_pool->add_chunk_mutex);
+	return -1;
+}
+
+
+/*
+ * uncached_alloc_page
+ *
+ * @starting_nid: node id of node to start with, or -1
+ * @n_pages: number of contiguous pages to allocate
+ *
+ * Allocate the specified number of contiguous uncached pages on the
+ * the requested node. If not enough contiguous uncached pages are available
+ * on the requested node, roundrobin starting with the next higher node.
+ */
+unsigned long uncached_alloc_page(int starting_nid, int n_pages)
+{
+	unsigned long uc_addr;
+	struct uncached_pool *uc_pool;
+	int nid;
+
+	if (unlikely(starting_nid >= MAX_NUMNODES))
+		return 0;
+
+	if (starting_nid < 0)
+		starting_nid = numa_node_id();
+	nid = starting_nid;
+
+	do {
+		if (!node_state(nid, N_HIGH_MEMORY))
+			continue;
+		uc_pool = &uncached_pools[nid];
+		if (uc_pool->pool == NULL)
+			continue;
+		do {
+			uc_addr = gen_pool_alloc(uc_pool->pool,
+						 n_pages * PAGE_SIZE);
+			if (uc_addr != 0)
+				return uc_addr;
+		} while (uncached_add_chunk(uc_pool, nid) == 0);
+
+	} while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid);
+
+	return 0;
+}
+EXPORT_SYMBOL(uncached_alloc_page);
+
+
+/*
+ * uncached_free_page
+ *
+ * @uc_addr: uncached address of first page to free
+ * @n_pages: number of contiguous pages to free
+ *
+ * Free the specified number of uncached pages.
+ */
+void uncached_free_page(unsigned long uc_addr, int n_pages)
+{
+	int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET);
+	struct gen_pool *pool = uncached_pools[nid].pool;
+
+	if (unlikely(pool == NULL))
+		return;
+
+	if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
+		panic("uncached_free_page invalid address %lx\n", uc_addr);
+
+	gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE);
+}
+EXPORT_SYMBOL(uncached_free_page);
+
+
+/*
+ * uncached_build_memmap,
+ *
+ * @uc_start: uncached starting address of a chunk of uncached memory
+ * @uc_end: uncached ending address of a chunk of uncached memory
+ * @arg: ignored, (NULL argument passed in on call to efi_memmap_walk_uc())
+ *
+ * Called at boot time to build a map of pages that can be used for
+ * memory special operations.
+ */
+static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg)
+{
+	int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET);
+	struct gen_pool *pool = uncached_pools[nid].pool;
+	size_t size = uc_end - uc_start;
+
+	touch_softlockup_watchdog();
+
+	if (pool != NULL) {
+		memset((char *)uc_start, 0, size);
+		(void) gen_pool_add(pool, uc_start, size, nid);
+	}
+	return 0;
+}
+
+
+static int __init uncached_init(void)
+{
+	int nid;
+
+	for_each_node_state(nid, N_ONLINE) {
+		uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid);
+		mutex_init(&uncached_pools[nid].add_chunk_mutex);
+	}
+
+	efi_memmap_walk_uc(uncached_build_memmap, NULL);
+	return 0;
+}
+
+__initcall(uncached_init);
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
--- a/arch/ia64/kernel/unwind_decoder.c
+++ b/arch/ia64/kernel/unwind_decoder.c
@ -0,0 +1,459 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Generic IA-64 unwind info decoder.
+ *
+ * This file is used both by the Linux kernel and objdump.  Please keep
+ * the two copies of this file in sync.
+ *
+ * You need to customize the decoder by defining the following
+ * macros/constants before including this file:
+ *
+ *  Types:
+ *	unw_word	Unsigned integer type with at least 64 bits 
+ *
+ *  Register names:
+ *	UNW_REG_BSP
+ *	UNW_REG_BSPSTORE
+ *	UNW_REG_FPSR
+ *	UNW_REG_LC
+ *	UNW_REG_PFS
+ *	UNW_REG_PR
+ *	UNW_REG_RNAT
+ *	UNW_REG_PSP
+ *	UNW_REG_RP
+ *	UNW_REG_UNAT
+ *
+ *  Decoder action macros:
+ *	UNW_DEC_BAD_CODE(code)
+ *	UNW_DEC_ABI(fmt,abi,context,arg)
+ *	UNW_DEC_BR_GR(fmt,brmask,gr,arg)
+ *	UNW_DEC_BR_MEM(fmt,brmask,arg)
+ *	UNW_DEC_COPY_STATE(fmt,label,arg)
+ *	UNW_DEC_EPILOGUE(fmt,t,ecount,arg)
+ *	UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg)
+ *	UNW_DEC_FR_MEM(fmt,frmask,arg)
+ *	UNW_DEC_GR_GR(fmt,grmask,gr,arg)
+ *	UNW_DEC_GR_MEM(fmt,grmask,arg)
+ *	UNW_DEC_LABEL_STATE(fmt,label,arg)
+ *	UNW_DEC_MEM_STACK_F(fmt,t,size,arg)
+ *	UNW_DEC_MEM_STACK_V(fmt,t,arg)
+ *	UNW_DEC_PRIUNAT_GR(fmt,r,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg)
+ *	UNW_DEC_PROLOGUE(fmt,body,rlen,arg)
+ *	UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg)
+ *	UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg)
+ *	UNW_DEC_REG_REG(fmt,src,dst,arg)
+ *	UNW_DEC_REG_SPREL(fmt,reg,spoff,arg)
+ *	UNW_DEC_REG_WHEN(fmt,reg,t,arg)
+ *	UNW_DEC_RESTORE(fmt,t,abreg,arg)
+ *	UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg)
+ *	UNW_DEC_SPILL_BASE(fmt,pspoff,arg)
+ *	UNW_DEC_SPILL_MASK(fmt,imaskp,arg)
+ *	UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg)
+ *	UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ *	UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg)
+ *	UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg)
+ *	UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg)
+ *	UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ */
+
+static unw_word
+unw_decode_uleb128 (unsigned char **dpp)
+{
+  unsigned shift = 0;
+  unw_word byte, result = 0;
+  unsigned char *bp = *dpp;
+
+  while (1)
+    {
+      byte = *bp++;
+      result |= (byte & 0x7f) << shift;
+      if ((byte & 0x80) == 0)
+	break;
+      shift += 7;
+    }
+  *dpp = bp;
+  return result;
+}
+
+static unsigned char *
+unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, abreg;
+  unw_word t, off;
+
+  byte1 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  if (byte1 & 0x80)
+	  UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg);
+  else
+	  UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  ytreg = byte2;
+  x = (byte1 >> 7) & 1;
+  if ((byte1 & 0x80) == 0 && ytreg == 0)
+    UNW_DEC_RESTORE(X2, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, abreg, qp;
+  unw_word t, off;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+
+  if (byte1 & 0x80)
+    UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg);
+  else
+    UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+  x = (byte2 >> 7) & 1;
+  ytreg = byte3;
+
+  if ((byte2 & 0x80) == 0 && byte3 == 0)
+    UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int body = (code & 0x20) != 0;
+  unw_word rlen;
+
+  rlen = (code & 0x1f);
+  UNW_DEC_PROLOGUE(R1, body, rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, mask, grsave;
+  unw_word rlen;
+
+  byte1 = *dp++;
+
+  mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+  grsave = (byte1 & 0x7f);
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word rlen;
+
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char brmask = (code & 0x1f);
+
+  UNW_DEC_BR_MEM(P1, brmask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg)
+{
+  if ((code & 0x10) == 0)
+    {
+      unsigned char byte1 = *dp++;
+
+      UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1),
+		    (byte1 & 0x7f), arg);
+    }
+  else if ((code & 0x08) == 0)
+    {
+      unsigned char byte1 = *dp++, r, dst;
+
+      r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+      dst = (byte1 & 0x7f);
+      switch (r)
+	{
+	case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break;
+	case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break;
+	case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break;
+	case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break;
+	case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break;
+	case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break;
+	case 6: UNW_DEC_RP_BR(P3, dst, arg); break;
+	case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break;
+	case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break;
+	case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break;
+	case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break;
+	case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break;
+	default: UNW_DEC_BAD_CODE(r); break;
+	}
+    }
+  else if ((code & 0x7) == 0)
+    UNW_DEC_SPILL_MASK(P4, dp, arg);
+  else if ((code & 0x7) == 1)
+    {
+      unw_word grmask, frmask, byte1, byte2, byte3;
+
+      byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+      grmask = ((byte1 >> 4) & 0xf);
+      frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3;
+      UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg);
+    }
+  else
+    UNW_DEC_BAD_CODE(code);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int gregs = (code & 0x10) != 0;
+  unsigned char mask = (code & 0x0f);
+
+  if (gregs)
+    UNW_DEC_GR_MEM(P6, mask, arg);
+  else
+    UNW_DEC_FR_MEM(P6, mask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char r, byte1, byte2;
+  unw_word t, size;
+
+  if ((code & 0x10) == 0)
+    {
+      r = (code & 0xf);
+      t = unw_decode_uleb128 (&dp);
+      switch (r)
+	{
+	case 0:
+	  size = unw_decode_uleb128 (&dp);
+	  UNW_DEC_MEM_STACK_F(P7, t, size, arg);
+	  break;
+
+	case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break;
+	case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break;
+	case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break;
+	case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break;
+	case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break;
+	case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break;
+	case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break;
+	case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break;
+	case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break;
+	case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break;
+	case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break;
+	case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break;
+	case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break;
+	case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break;
+	case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break;
+	default: UNW_DEC_BAD_CODE(r); break;
+	}
+    }
+  else
+    {
+      switch (code & 0xf)
+	{
+	case 0x0: /* p8 */
+	  {
+	    r = *dp++;
+	    t = unw_decode_uleb128 (&dp);
+	    switch (r)
+	      {
+	      case  1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break;
+	      case  2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break;
+	      case  3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break;
+	      case  4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break;
+	      case  5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break;
+	      case  6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break;
+	      case  7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break;
+	      case  8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break;
+	      case  9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break;
+	      case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break;
+	      case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+	      case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+	      case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break;
+	      case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break;
+	      case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break;
+	      case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break;
+	      case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break;
+	      case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break;
+	      case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break;
+	      default: UNW_DEC_BAD_CODE(r); break;
+	    }
+	  }
+	  break;
+
+	case 0x1:
+	  byte1 = *dp++; byte2 = *dp++;
+	  UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg);
+	  break;
+
+	case 0xf: /* p10 */
+	  byte1 = *dp++; byte2 = *dp++;
+	  UNW_DEC_ABI(P10, byte1, byte2, arg);
+	  break;
+
+	case 0x9:
+	  return unw_decode_x1 (dp, code, arg);
+
+	case 0xa:
+	  return unw_decode_x2 (dp, code, arg);
+
+	case 0xb:
+	  return unw_decode_x3 (dp, code, arg);
+
+	case 0xc:
+	  return unw_decode_x4 (dp, code, arg);
+
+	default:
+	  UNW_DEC_BAD_CODE(code);
+	  break;
+	}
+    }
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word label = (code & 0x1f);
+
+  if ((code & 0x20) != 0)
+    UNW_DEC_COPY_STATE(B1, label, arg);
+  else
+    UNW_DEC_LABEL_STATE(B1, label, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t;
+
+  t = unw_decode_uleb128 (&dp);
+  UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t, ecount, label;
+
+  if ((code & 0x10) == 0)
+    {
+      t = unw_decode_uleb128 (&dp);
+      ecount = unw_decode_uleb128 (&dp);
+      UNW_DEC_EPILOGUE(B3, t, ecount, arg);
+    }
+  else if ((code & 0x07) == 0)
+    {
+      label = unw_decode_uleb128 (&dp);
+      if ((code & 0x08) != 0)
+	UNW_DEC_COPY_STATE(B4, label, arg);
+      else
+	UNW_DEC_LABEL_STATE(B4, label, arg);
+    }
+  else
+    switch (code & 0x7)
+      {
+      case 1: return unw_decode_x1 (dp, code, arg);
+      case 2: return unw_decode_x2 (dp, code, arg);
+      case 3: return unw_decode_x3 (dp, code, arg);
+      case 4: return unw_decode_x4 (dp, code, arg);
+      default: UNW_DEC_BAD_CODE(code); break;
+      }
+  return dp;
+}
+
+typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *);
+
+static unw_decoder unw_decode_table[2][8] =
+{
+  /* prologue table: */
+  {
+    unw_decode_r1,	/* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_p1,	/* 4 */
+    unw_decode_p2_p5,
+    unw_decode_p6,
+    unw_decode_p7_p10
+  },
+  {
+    unw_decode_r1,	/* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_b1,	/* 4 */
+    unw_decode_b1,
+    unw_decode_b2,
+    unw_decode_b3_x4
+  }
+};
+
+/*
+ * Decode one descriptor and return address of next descriptor.
+ */
+static inline unsigned char *
+unw_decode (unsigned char *dp, int inside_body, void *arg)
+{
+  unw_decoder decoder;
+  unsigned char code;
+
+  code = *dp++;
+  decoder = unw_decode_table[inside_body][code >> 5];
+  dp = (*decoder) (dp, code, arg);
+  return dp;
+}
--- a/arch/ia64/kernel/unwind_i.h
+++ b/arch/ia64/kernel/unwind_i.h
@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Kernel unwind support.
+ */
+
+#define UNW_VER(x)		((x) >> 48)
+#define UNW_FLAG_MASK		0x0000ffff00000000
+#define UNW_FLAG_OSMASK		0x0000f00000000000
+#define UNW_FLAG_EHANDLER(x)	((x) & 0x0000000100000000L)
+#define UNW_FLAG_UHANDLER(x)	((x) & 0x0000000200000000L)
+#define UNW_LENGTH(x)		((x) & 0x00000000ffffffffL)
+
+enum unw_register_index {
+	/* primary unat: */
+	UNW_REG_PRI_UNAT_GR,
+	UNW_REG_PRI_UNAT_MEM,
+
+	/* register stack */
+	UNW_REG_BSP,					/* register stack pointer */
+	UNW_REG_BSPSTORE,
+	UNW_REG_PFS,					/* previous function state */
+	UNW_REG_RNAT,
+	/* memory stack */
+	UNW_REG_PSP,					/* previous memory stack pointer */
+	/* return pointer: */
+	UNW_REG_RP,
+
+	/* preserved registers: */
+	UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7,
+	UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR,
+	UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5,
+	UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5,
+	UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19,
+	UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23,
+	UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27,
+	UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31,
+	UNW_NUM_REGS
+};
+
+struct unw_info_block {
+	u64 header;
+	u64 desc[0];		/* unwind descriptors */
+	/* personality routine and language-specific data follow behind descriptors */
+};
+
+struct unw_table {
+	struct unw_table *next;		/* must be first member! */
+	const char *name;
+	unsigned long gp;		/* global pointer for this load-module */
+	unsigned long segment_base;	/* base for offsets in the unwind table entries */
+	unsigned long start;
+	unsigned long end;
+	const struct unw_table_entry *array;
+	unsigned long length;
+};
+
+enum unw_where {
+	UNW_WHERE_NONE,			/* register isn't saved at all */
+	UNW_WHERE_GR,			/* register is saved in a general register */
+	UNW_WHERE_FR,			/* register is saved in a floating-point register */
+	UNW_WHERE_BR,			/* register is saved in a branch register */
+	UNW_WHERE_SPREL,		/* register is saved on memstack (sp-relative) */
+	UNW_WHERE_PSPREL,		/* register is saved on memstack (psp-relative) */
+	/*
+	 * At the end of each prologue these locations get resolved to
+	 * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively:
+	 */
+	UNW_WHERE_SPILL_HOME,		/* register is saved in its spill home */
+	UNW_WHERE_GR_SAVE		/* register is saved in next general register */
+};
+
+#define UNW_WHEN_NEVER	0x7fffffff
+
+struct unw_reg_info {
+	unsigned long val;		/* save location: register number or offset */
+	enum unw_where where;		/* where the register gets saved */
+	int when;			/* when the register gets saved */
+};
+
+struct unw_reg_state {
+	struct unw_reg_state *next;		/* next (outer) element on state stack */
+	struct unw_reg_info reg[UNW_NUM_REGS];	/* register save locations */
+};
+
+struct unw_labeled_state {
+	struct unw_labeled_state *next;		/* next labeled state (or NULL) */
+	unsigned long label;			/* label for this state */
+	struct unw_reg_state saved_state;
+};
+
+struct unw_state_record {
+	unsigned int first_region : 1;	/* is this the first region? */
+	unsigned int done : 1;		/* are we done scanning descriptors? */
+	unsigned int any_spills : 1;	/* got any register spills? */
+	unsigned int in_body : 1;	/* are we inside a body (as opposed to a prologue)? */
+	unsigned long flags;		/* see UNW_FLAG_* in unwind.h */
+
+	u8 *imask;			/* imask of spill_mask record or NULL */
+	unsigned long pr_val;		/* predicate values */
+	unsigned long pr_mask;		/* predicate mask */
+	long spill_offset;		/* psp-relative offset for spill base */
+	int region_start;
+	int region_len;
+	int epilogue_start;
+	int epilogue_count;
+	int when_target;
+
+	u8 gr_save_loc;			/* next general register to use for saving a register */
+	u8 return_link_reg;		/* branch register in which the return link is passed */
+
+	struct unw_labeled_state *labeled_states;	/* list of all labeled states */
+	struct unw_reg_state curr;	/* current state */
+};
+
+enum unw_nat_type {
+	UNW_NAT_NONE,		/* NaT not represented */
+	UNW_NAT_VAL,		/* NaT represented by NaT value (fp reg) */
+	UNW_NAT_MEMSTK,		/* NaT value is in unat word at offset OFF  */
+	UNW_NAT_REGSTK		/* NaT is in rnat */
+};
+
+enum unw_insn_opcode {
+	UNW_INSN_ADD,			/* s[dst] += val */
+	UNW_INSN_ADD_PSP,		/* s[dst] = (s.psp + val) */
+	UNW_INSN_ADD_SP,		/* s[dst] = (s.sp + val) */
+	UNW_INSN_MOVE,			/* s[dst] = s[val] */
+	UNW_INSN_MOVE2,			/* s[dst] = s[val]; s[dst+1] = s[val+1] */
+	UNW_INSN_MOVE_STACKED,		/* s[dst] = ia64_rse_skip(*s.bsp, val) */
+	UNW_INSN_SETNAT_MEMSTK,		/* s[dst+1].nat.type = MEMSTK;
+					   s[dst+1].nat.off = *s.pri_unat - s[dst] */
+	UNW_INSN_SETNAT_TYPE,		/* s[dst+1].nat.type = val */
+	UNW_INSN_LOAD,			/* s[dst] = *s[val] */
+	UNW_INSN_MOVE_SCRATCH,		/* s[dst] = scratch reg "val" */
+	UNW_INSN_MOVE_CONST,            /* s[dst] = constant reg "val" */
+};
+
+struct unw_insn {
+	unsigned int opc	:  4;
+	unsigned int dst	:  9;
+	signed int val		: 19;
+};
+
+/*
+ * Preserved general static registers (r4-r7) give rise to two script
+ * instructions; everything else yields at most one instruction; at
+ * the end of the script, the psp gets popped, accounting for one more
+ * instruction.
+ */
+#define UNW_MAX_SCRIPT_LEN	(UNW_NUM_REGS + 5)
+
+struct unw_script {
+	unsigned long ip;		/* ip this script is for */
+	unsigned long pr_mask;		/* mask of predicates script depends on */
+	unsigned long pr_val;		/* predicate values this script is for */
+	rwlock_t lock;
+	unsigned int flags;		/* see UNW_FLAG_* in unwind.h */
+	unsigned short lru_chain;	/* used for least-recently-used chain */
+	unsigned short coll_chain;	/* used for hash collisions */
+	unsigned short hint;		/* hint for next script to try (or -1) */
+	unsigned short count;		/* number of instructions in script */
+	struct unw_insn insn[UNW_MAX_SCRIPT_LEN];
+};
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@ -0,0 +1,248 @@
+
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/pgtable.h>
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(phys_start)
+jiffies = jiffies_64;
+
+PHDRS {
+	code   PT_LOAD;
+	percpu PT_LOAD;
+	data   PT_LOAD;
+	note   PT_NOTE;
+	unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
+}
+
+SECTIONS {
+	/*
+	 * unwind exit sections must be discarded before
+	 * the rest of the sections get included.
+	 */
+	/DISCARD/ : {
+		*(.IA_64.unwind.exit.text)
+		*(.IA_64.unwind_info.exit.text)
+		*(.comment)
+		*(.note)
+	}
+
+	v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
+	phys_start = _start - LOAD_OFFSET;
+
+	code : {
+	} :code
+	. = KERNEL_START;
+
+	_text = .;
+	_stext = .;
+
+	.text : AT(ADDR(.text) - LOAD_OFFSET) {
+		__start_ivt_text = .;
+		*(.text..ivt)
+		__end_ivt_text = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		*(.gnu.linkonce.t*)
+	}
+
+	.text2 : AT(ADDR(.text2) - LOAD_OFFSET)	{
+		*(.text2)
+	}
+
+#ifdef CONFIG_SMP
+	.text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) {
+		*(.text..lock)
+	}
+#endif
+	_etext = .;
+
+	/*
+	 * Read-only data
+	 */
+	NOTES :code :note       /* put .notes in text and mark in PT_NOTE  */
+	code_continues : {
+	} : code               /* switch back to regular program...  */
+
+	EXCEPTION_TABLE(16)
+
+	/* MCA table */
+	. = ALIGN(16);
+	__mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) {
+		__start___mca_table = .;
+		*(__mca_table)
+		__stop___mca_table = .;
+	}
+
+	.data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) {
+		__start___phys_stack_reg_patchlist = .;
+		*(.data..patch.phys_stack_reg)
+		__end___phys_stack_reg_patchlist = .;
+	}
+
+	/*
+	 * Global data
+	 */
+	_data = .;
+
+	/* Unwind info & table: */
+	. = ALIGN(8);
+	.IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) {
+		*(.IA_64.unwind_info*)
+	}
+	.IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) {
+		__start_unwind = .;
+		*(.IA_64.unwind*)
+		__end_unwind = .;
+	} :code :unwind
+	code_continues2 : {
+	} : code
+
+	RODATA
+
+	.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
+		*(.opd)
+	}
+
+	/*
+	 * Initialization code and data:
+	 */
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
+
+	INIT_TEXT_SECTION(PAGE_SIZE)
+	INIT_DATA_SECTION(16)
+
+	.data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) {
+		__start___vtop_patchlist = .;
+		*(.data..patch.vtop)
+		__end___vtop_patchlist = .;
+	}
+
+	.data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) {
+		__start___rse_patchlist = .;
+		*(.data..patch.rse)
+		__end___rse_patchlist = .;
+	}
+
+	.data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) {
+		__start___mckinley_e9_bundles = .;
+		*(.data..patch.mckinley_e9)
+		__end___mckinley_e9_bundles = .;
+	}
+
+#if defined(CONFIG_PARAVIRT)
+	. = ALIGN(16);
+	.paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET) {
+		__start_paravirt_bundles = .;
+		*(.paravirt_bundles)
+		__stop_paravirt_bundles = .;
+	}
+	. = ALIGN(16);
+	.paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET) {
+		__start_paravirt_insts = .;
+		*(.paravirt_insts)
+		__stop_paravirt_insts = .;
+	}
+	. = ALIGN(16);
+	.paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET) {
+		__start_paravirt_branches = .;
+		*(.paravirt_branches)
+		__stop_paravirt_branches = .;
+	}
+#endif
+
+#if defined(CONFIG_IA64_GENERIC)
+	/* Machine Vector */
+	. = ALIGN(16);
+	.machvec : AT(ADDR(.machvec) - LOAD_OFFSET) {
+		machvec_start = .;
+		*(.machvec)
+		machvec_end = .;
+	}
+#endif
+
+#ifdef	CONFIG_SMP
+	. = ALIGN(PERCPU_PAGE_SIZE);
+	__cpu0_per_cpu = .;
+	. = . + PERCPU_PAGE_SIZE;   /* cpu0 per-cpu space */
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+	.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
+		PAGE_ALIGNED_DATA(PAGE_SIZE)
+		. = ALIGN(PAGE_SIZE);
+		__start_gate_section = .;
+		*(.data..gate)
+		__stop_gate_section = .;
+	}
+	/*
+	 * make sure the gate page doesn't expose
+	 * kernel data
+	 */
+	. = ALIGN(PAGE_SIZE);
+
+	/* Per-cpu data: */
+	. = ALIGN(PERCPU_PAGE_SIZE);
+	PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
+	__phys_per_cpu_start = __per_cpu_load;
+	/*
+	 * ensure percpu data fits
+	 * into percpu page size
+	 */
+	. = __phys_per_cpu_start + PERCPU_PAGE_SIZE;
+
+	data : {
+	} :data
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		_sdata  =  .;
+		INIT_TASK_DATA(PAGE_SIZE)
+		CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
+		READ_MOSTLY_DATA(SMP_CACHE_BYTES)
+		DATA_DATA
+		*(.data1)
+		*(.gnu.linkonce.d*)
+		CONSTRUCTORS
+	}
+
+	. = ALIGN(16);	/* gp must be 16-byte aligned for exc. table */
+	.got : AT(ADDR(.got) - LOAD_OFFSET) {
+		*(.got.plt)
+		*(.got)
+	}
+	__gp = ADDR(.got) + 0x200000;
+
+	/*
+	 * We want the small data sections together,
+	 * so single-instruction offsets can access
+	 * them all, and initialized data all before
+	 * uninitialized, so we can shorten the
+	 * on-disk segment size.
+	 */
+	.sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
+		*(.sdata)
+		*(.sdata1)
+		*(.srdata)
+	}
+	_edata  =  .;
+
+	BSS_SECTION(0, 0, 0)
+
+	_end = .;
+
+	code : {
+	} :code
+
+	STABS_DEBUG
+	DWARF_DEBUG
+
+	/* Default discards */
+	DISCARDS
+}