mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-10-29 23:28:52 +01:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
69
arch/x86/kernel/cpu/Makefile
Normal file
69
arch/x86/kernel/cpu/Makefile
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
#
|
||||
# Makefile for x86-compatible CPU details, features and quirks
|
||||
#
|
||||
|
||||
# Don't trace early stages of a secondary CPU boot
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_common.o = -pg
|
||||
CFLAGS_REMOVE_perf_event.o = -pg
|
||||
endif
|
||||
|
||||
# Make sure load_percpu_segment has no stackprotector
|
||||
nostackp := $(call cc-option, -fno-stack-protector)
|
||||
CFLAGS_common.o := $(nostackp)
|
||||
|
||||
obj-y := intel_cacheinfo.o scattered.o topology.o
|
||||
obj-y += common.o
|
||||
obj-y += rdrand.o
|
||||
obj-y += match.o
|
||||
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
|
||||
|
||||
obj-$(CONFIG_X86_32) += bugs.o
|
||||
obj-$(CONFIG_X86_64) += bugs_64.o
|
||||
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
|
||||
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
|
||||
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
|
||||
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
|
||||
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
|
||||
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
|
||||
|
||||
ifdef CONFIG_PERF_EVENTS
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
|
||||
ifdef CONFIG_AMD_IOMMU
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
|
||||
endif
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o
|
||||
|
||||
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
|
||||
perf_event_intel_uncore_snb.o \
|
||||
perf_event_intel_uncore_snbep.o \
|
||||
perf_event_intel_uncore_nhmex.o
|
||||
endif
|
||||
|
||||
|
||||
obj-$(CONFIG_X86_MCE) += mcheck/
|
||||
obj-$(CONFIG_MTRR) += mtrr/
|
||||
obj-$(CONFIG_MICROCODE) += microcode/
|
||||
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
|
||||
|
||||
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
|
||||
|
||||
ifdef CONFIG_X86_FEATURE_NAMES
|
||||
quiet_cmd_mkcapflags = MKCAP $@
|
||||
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
|
||||
|
||||
cpufeature = $(src)/../../include/asm/cpufeature.h
|
||||
|
||||
targets += capflags.c
|
||||
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
|
||||
$(call if_changed,mkcapflags)
|
||||
endif
|
||||
clean-files += capflags.c
|
||||
872
arch/x86/kernel/cpu/amd.c
Normal file
872
arch/x86/kernel/cpu/amd.c
Normal file
|
|
@ -0,0 +1,872 @@
|
|||
#include <linux/export.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <linux/io.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/pci-direct.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/mmconfig.h>
|
||||
# include <asm/cacheflush.h>
|
||||
#endif
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
|
||||
{
|
||||
u32 gprs[8] = { 0 };
|
||||
int err;
|
||||
|
||||
WARN_ONCE((boot_cpu_data.x86 != 0xf),
|
||||
"%s should only be used on K8!\n", __func__);
|
||||
|
||||
gprs[1] = msr;
|
||||
gprs[7] = 0x9c5a203a;
|
||||
|
||||
err = rdmsr_safe_regs(gprs);
|
||||
|
||||
*p = gprs[0] | ((u64)gprs[2] << 32);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
|
||||
{
|
||||
u32 gprs[8] = { 0 };
|
||||
|
||||
WARN_ONCE((boot_cpu_data.x86 != 0xf),
|
||||
"%s should only be used on K8!\n", __func__);
|
||||
|
||||
gprs[0] = (u32)val;
|
||||
gprs[1] = msr;
|
||||
gprs[2] = val >> 32;
|
||||
gprs[7] = 0x9c5a203a;
|
||||
|
||||
return wrmsr_safe_regs(gprs);
|
||||
}
|
||||
|
||||
/*
|
||||
* B step AMD K6 before B 9730xxxx have hardware bugs that can cause
|
||||
* misexecution of code under Linux. Owners of such processors should
|
||||
* contact AMD for precise details and a CPU swap.
|
||||
*
|
||||
* See http://www.multimania.com/poulot/k6bug.html
|
||||
* and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6"
|
||||
* (Publication # 21266 Issue Date: August 1998)
|
||||
*
|
||||
* The following test is erm.. interesting. AMD neglected to up
|
||||
* the chip setting when fixing the bug but they also tweaked some
|
||||
* performance at the same time..
|
||||
*/
|
||||
|
||||
extern __visible void vide(void);
|
||||
__asm__(".globl vide\n\t.align 4\nvide: ret");
|
||||
|
||||
static void init_amd_k5(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* General Systems BIOSen alias the cpu frequency registers
|
||||
* of the Elan at 0x000df000. Unfortuantly, one of the Linux
|
||||
* drivers subsequently pokes it, and changes the CPU speed.
|
||||
* Workaround : Remove the unneeded alias.
|
||||
*/
|
||||
#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
|
||||
#define CBAR_ENB (0x80000000)
|
||||
#define CBAR_KEY (0X000000CB)
|
||||
if (c->x86_model == 9 || c->x86_model == 10) {
|
||||
if (inl(CBAR) & CBAR_ENB)
|
||||
outl(0 | CBAR_KEY, CBAR);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void init_amd_k6(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
u32 l, h;
|
||||
int mbytes = get_num_physpages() >> (20-PAGE_SHIFT);
|
||||
|
||||
if (c->x86_model < 6) {
|
||||
/* Based on AMD doc 20734R - June 2000 */
|
||||
if (c->x86_model == 0) {
|
||||
clear_cpu_cap(c, X86_FEATURE_APIC);
|
||||
set_cpu_cap(c, X86_FEATURE_PGE);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (c->x86_model == 6 && c->x86_mask == 1) {
|
||||
const int K6_BUG_LOOP = 1000000;
|
||||
int n;
|
||||
void (*f_vide)(void);
|
||||
unsigned long d, d2;
|
||||
|
||||
printk(KERN_INFO "AMD K6 stepping B detected - ");
|
||||
|
||||
/*
|
||||
* It looks like AMD fixed the 2.6.2 bug and improved indirect
|
||||
* calls at the same time.
|
||||
*/
|
||||
|
||||
n = K6_BUG_LOOP;
|
||||
f_vide = vide;
|
||||
rdtscl(d);
|
||||
while (n--)
|
||||
f_vide();
|
||||
rdtscl(d2);
|
||||
d = d2-d;
|
||||
|
||||
if (d > 20*K6_BUG_LOOP)
|
||||
printk(KERN_CONT
|
||||
"system stability may be impaired when more than 32 MB are used.\n");
|
||||
else
|
||||
printk(KERN_CONT "probably OK (after B9730xxxx).\n");
|
||||
}
|
||||
|
||||
/* K6 with old style WHCR */
|
||||
if (c->x86_model < 8 ||
|
||||
(c->x86_model == 8 && c->x86_mask < 8)) {
|
||||
/* We can only write allocate on the low 508Mb */
|
||||
if (mbytes > 508)
|
||||
mbytes = 508;
|
||||
|
||||
rdmsr(MSR_K6_WHCR, l, h);
|
||||
if ((l&0x0000FFFF) == 0) {
|
||||
unsigned long flags;
|
||||
l = (1<<0)|((mbytes/4)<<1);
|
||||
local_irq_save(flags);
|
||||
wbinvd();
|
||||
wrmsr(MSR_K6_WHCR, l, h);
|
||||
local_irq_restore(flags);
|
||||
printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
|
||||
mbytes);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if ((c->x86_model == 8 && c->x86_mask > 7) ||
|
||||
c->x86_model == 9 || c->x86_model == 13) {
|
||||
/* The more serious chips .. */
|
||||
|
||||
if (mbytes > 4092)
|
||||
mbytes = 4092;
|
||||
|
||||
rdmsr(MSR_K6_WHCR, l, h);
|
||||
if ((l&0xFFFF0000) == 0) {
|
||||
unsigned long flags;
|
||||
l = ((mbytes>>2)<<22)|(1<<16);
|
||||
local_irq_save(flags);
|
||||
wbinvd();
|
||||
wrmsr(MSR_K6_WHCR, l, h);
|
||||
local_irq_restore(flags);
|
||||
printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
|
||||
mbytes);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (c->x86_model == 10) {
|
||||
/* AMD Geode LX is model 10 */
|
||||
/* placeholder for any needed mods */
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void init_amd_k7(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
u32 l, h;
|
||||
|
||||
/*
|
||||
* Bit 15 of Athlon specific MSR 15, needs to be 0
|
||||
* to enable SSE on Palomino/Morgan/Barton CPU's.
|
||||
* If the BIOS didn't enable it already, enable it here.
|
||||
*/
|
||||
if (c->x86_model >= 6 && c->x86_model <= 10) {
|
||||
if (!cpu_has(c, X86_FEATURE_XMM)) {
|
||||
printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
|
||||
msr_clear_bit(MSR_K7_HWCR, 15);
|
||||
set_cpu_cap(c, X86_FEATURE_XMM);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* It's been determined by AMD that Athlons since model 8 stepping 1
|
||||
* are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
|
||||
* As per AMD technical note 27212 0.2
|
||||
*/
|
||||
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
|
||||
rdmsr(MSR_K7_CLK_CTL, l, h);
|
||||
if ((l & 0xfff00000) != 0x20000000) {
|
||||
printk(KERN_INFO
|
||||
"CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
|
||||
l, ((l & 0x000fffff)|0x20000000));
|
||||
wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
|
||||
}
|
||||
}
|
||||
|
||||
set_cpu_cap(c, X86_FEATURE_K7);
|
||||
|
||||
/* calling is from identify_secondary_cpu() ? */
|
||||
if (!c->cpu_index)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Certain Athlons might work (for various values of 'work') in SMP
|
||||
* but they are not certified as MP capable.
|
||||
*/
|
||||
/* Athlon 660/661 is valid. */
|
||||
if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
|
||||
(c->x86_mask == 1)))
|
||||
return;
|
||||
|
||||
/* Duron 670 is valid */
|
||||
if ((c->x86_model == 7) && (c->x86_mask == 0))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Athlon 662, Duron 671, and Athlon >model 7 have capability
|
||||
* bit. It's worth noting that the A5 stepping (662) of some
|
||||
* Athlon XP's have the MP bit set.
|
||||
* See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
|
||||
* more.
|
||||
*/
|
||||
if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
|
||||
((c->x86_model == 7) && (c->x86_mask >= 1)) ||
|
||||
(c->x86_model > 7))
|
||||
if (cpu_has(c, X86_FEATURE_MP))
|
||||
return;
|
||||
|
||||
/* If we get here, not a certified SMP capable AMD system. */
|
||||
|
||||
/*
|
||||
* Don't taint if we are running SMP kernel on a single non-MP
|
||||
* approved Athlon
|
||||
*/
|
||||
WARN_ONCE(1, "WARNING: This combination of AMD"
|
||||
" processors is not suitable for SMP.\n");
|
||||
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
* To workaround broken NUMA config. Read the comment in
|
||||
* srat_detect_node().
|
||||
*/
|
||||
static int nearby_node(int apicid)
|
||||
{
|
||||
int i, node;
|
||||
|
||||
for (i = apicid - 1; i >= 0; i--) {
|
||||
node = __apicid_to_node[i];
|
||||
if (node != NUMA_NO_NODE && node_online(node))
|
||||
return node;
|
||||
}
|
||||
for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
|
||||
node = __apicid_to_node[i];
|
||||
if (node != NUMA_NO_NODE && node_online(node))
|
||||
return node;
|
||||
}
|
||||
return first_node(node_online_map); /* Shouldn't happen */
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Fixup core topology information for
|
||||
* (1) AMD multi-node processors
|
||||
* Assumption: Number of cores in each internal node is the same.
|
||||
* (2) AMD processors supporting compute units
|
||||
*/
|
||||
#ifdef CONFIG_X86_HT
|
||||
static void amd_get_topology(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 nodes, cores_per_cu = 1;
|
||||
u8 node_id;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/* get information required for multi-node processors */
|
||||
if (cpu_has_topoext) {
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
|
||||
nodes = ((ecx >> 8) & 7) + 1;
|
||||
node_id = ecx & 7;
|
||||
|
||||
/* get compute unit information */
|
||||
smp_num_siblings = ((ebx >> 8) & 3) + 1;
|
||||
c->compute_unit_id = ebx & 0xff;
|
||||
cores_per_cu += ((ebx >> 8) & 3);
|
||||
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
|
||||
u64 value;
|
||||
|
||||
rdmsrl(MSR_FAM10H_NODE_ID, value);
|
||||
nodes = ((value >> 3) & 7) + 1;
|
||||
node_id = value & 7;
|
||||
} else
|
||||
return;
|
||||
|
||||
/* fixup multi-node processor information */
|
||||
if (nodes > 1) {
|
||||
u32 cores_per_node;
|
||||
u32 cus_per_node;
|
||||
|
||||
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
|
||||
cores_per_node = c->x86_max_cores / nodes;
|
||||
cus_per_node = cores_per_node / cores_per_cu;
|
||||
|
||||
/* store NodeID, use llc_shared_map to store sibling info */
|
||||
per_cpu(cpu_llc_id, cpu) = node_id;
|
||||
|
||||
/* core id has to be in the [0 .. cores_per_node - 1] range */
|
||||
c->cpu_core_id %= cores_per_node;
|
||||
c->compute_unit_id %= cus_per_node;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
|
||||
* Assumes number of cores is a power of two.
|
||||
*/
|
||||
static void amd_detect_cmp(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_HT
|
||||
unsigned bits;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
bits = c->x86_coreid_bits;
|
||||
/* Low order bits define the core id (index of core in socket) */
|
||||
c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
|
||||
/* Convert the initial APIC ID into the socket ID */
|
||||
c->phys_proc_id = c->initial_apicid >> bits;
|
||||
/* use socket ID also for last level cache */
|
||||
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
|
||||
amd_get_topology(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
u16 amd_get_nb_id(int cpu)
|
||||
{
|
||||
u16 id = 0;
|
||||
#ifdef CONFIG_SMP
|
||||
id = per_cpu(cpu_llc_id, cpu);
|
||||
#endif
|
||||
return id;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(amd_get_nb_id);
|
||||
|
||||
static void srat_detect_node(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_NUMA
|
||||
int cpu = smp_processor_id();
|
||||
int node;
|
||||
unsigned apicid = c->apicid;
|
||||
|
||||
node = numa_cpu_node(cpu);
|
||||
if (node == NUMA_NO_NODE)
|
||||
node = per_cpu(cpu_llc_id, cpu);
|
||||
|
||||
/*
|
||||
* On multi-fabric platform (e.g. Numascale NumaChip) a
|
||||
* platform-specific handler needs to be called to fixup some
|
||||
* IDs of the CPU.
|
||||
*/
|
||||
if (x86_cpuinit.fixup_cpu_id)
|
||||
x86_cpuinit.fixup_cpu_id(c, node);
|
||||
|
||||
if (!node_online(node)) {
|
||||
/*
|
||||
* Two possibilities here:
|
||||
*
|
||||
* - The CPU is missing memory and no node was created. In
|
||||
* that case try picking one from a nearby CPU.
|
||||
*
|
||||
* - The APIC IDs differ from the HyperTransport node IDs
|
||||
* which the K8 northbridge parsing fills in. Assume
|
||||
* they are all increased by a constant offset, but in
|
||||
* the same order as the HT nodeids. If that doesn't
|
||||
* result in a usable node fall back to the path for the
|
||||
* previous case.
|
||||
*
|
||||
* This workaround operates directly on the mapping between
|
||||
* APIC ID and NUMA node, assuming certain relationship
|
||||
* between APIC ID, HT node ID and NUMA topology. As going
|
||||
* through CPU mapping may alter the outcome, directly
|
||||
* access __apicid_to_node[].
|
||||
*/
|
||||
int ht_nodeid = c->initial_apicid;
|
||||
|
||||
if (ht_nodeid >= 0 &&
|
||||
__apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
|
||||
node = __apicid_to_node[ht_nodeid];
|
||||
/* Pick a nearby node */
|
||||
if (!node_online(node))
|
||||
node = nearby_node(apicid);
|
||||
}
|
||||
numa_set_node(cpu, node);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void early_init_amd_mc(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_HT
|
||||
unsigned bits, ecx;
|
||||
|
||||
/* Multi core CPU? */
|
||||
if (c->extended_cpuid_level < 0x80000008)
|
||||
return;
|
||||
|
||||
ecx = cpuid_ecx(0x80000008);
|
||||
|
||||
c->x86_max_cores = (ecx & 0xff) + 1;
|
||||
|
||||
/* CPU telling us the core id bits shift? */
|
||||
bits = (ecx >> 12) & 0xF;
|
||||
|
||||
/* Otherwise recompute */
|
||||
if (bits == 0) {
|
||||
while ((1 << bits) < c->x86_max_cores)
|
||||
bits++;
|
||||
}
|
||||
|
||||
c->x86_coreid_bits = bits;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void bsp_init_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (c->x86 >= 0xf) {
|
||||
unsigned long long tseg;
|
||||
|
||||
/*
|
||||
* Split up direct mapping around the TSEG SMM area.
|
||||
* Don't do it for gbpages because there seems very little
|
||||
* benefit in doing so.
|
||||
*/
|
||||
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
|
||||
unsigned long pfn = tseg >> PAGE_SHIFT;
|
||||
|
||||
printk(KERN_DEBUG "tseg: %010llx\n", tseg);
|
||||
if (pfn_range_is_mapped(pfn, pfn + 1))
|
||||
set_memory_4k((unsigned long)__va(tseg), 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
|
||||
|
||||
if (c->x86 > 0x10 ||
|
||||
(c->x86 == 0x10 && c->x86_model >= 0x2)) {
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_K7_HWCR, val);
|
||||
if (!(val & BIT(24)))
|
||||
printk(KERN_WARNING FW_BUG "TSC doesn't count "
|
||||
"with P0 frequency!\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (c->x86 == 0x15) {
|
||||
unsigned long upperbit;
|
||||
u32 cpuid, assoc;
|
||||
|
||||
cpuid = cpuid_edx(0x80000005);
|
||||
assoc = cpuid >> 16 & 0xff;
|
||||
upperbit = ((cpuid >> 24) << 10) / assoc;
|
||||
|
||||
va_align.mask = (upperbit - 1) & PAGE_MASK;
|
||||
va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
|
||||
}
|
||||
}
|
||||
|
||||
static void early_init_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
early_init_amd_mc(c);
|
||||
|
||||
/*
|
||||
* c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
|
||||
* with P/T states and does not stop in deep C-states
|
||||
*/
|
||||
if (c->x86_power & (1 << 8)) {
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
|
||||
if (!check_tsc_unstable())
|
||||
set_sched_clock_stable();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
|
||||
#else
|
||||
/* Set MTRR capability flag if appropriate */
|
||||
if (c->x86 == 5)
|
||||
if (c->x86_model == 13 || c->x86_model == 9 ||
|
||||
(c->x86_model == 8 && c->x86_mask >= 8))
|
||||
set_cpu_cap(c, X86_FEATURE_K6_MTRR);
|
||||
#endif
|
||||
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
|
||||
/* check CPU config space for extended APIC ID */
|
||||
if (cpu_has_apic && c->x86 >= 0xf) {
|
||||
unsigned int val;
|
||||
val = read_pci_config(0, 24, 0, 0x68);
|
||||
if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
|
||||
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is only needed to tell the kernel whether to use VMCALL
|
||||
* and VMMCALL. VMMCALL is never executed except under virt, so
|
||||
* we can set it unconditionally.
|
||||
*/
|
||||
set_cpu_cap(c, X86_FEATURE_VMMCALL);
|
||||
|
||||
/* F16h erratum 793, CVE-2013-6885 */
|
||||
if (c->x86 == 0x16 && c->x86_model <= 0xf)
|
||||
msr_set_bit(MSR_AMD64_LS_CFG, 15);
|
||||
}
|
||||
|
||||
static const int amd_erratum_383[];
|
||||
static const int amd_erratum_400[];
|
||||
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
|
||||
|
||||
static void init_amd_k8(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 level;
|
||||
u64 value;
|
||||
|
||||
/* On C+ stepping K8 rep microcode works well for copy/memset */
|
||||
level = cpuid_eax(1);
|
||||
if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
|
||||
/*
|
||||
* Some BIOSes incorrectly force this feature, but only K8 revision D
|
||||
* (model = 0x14) and later actually support it.
|
||||
* (AMD Erratum #110, docId: 25759).
|
||||
*/
|
||||
if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
|
||||
clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
|
||||
if (!rdmsrl_amd_safe(0xc001100d, &value)) {
|
||||
value &= ~BIT_64(32);
|
||||
wrmsrl_amd_safe(0xc001100d, value);
|
||||
}
|
||||
}
|
||||
|
||||
if (!c->x86_model_id[0])
|
||||
strcpy(c->x86_model_id, "Hammer");
|
||||
}
|
||||
|
||||
static void init_amd_gh(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
/* do this for boot cpu */
|
||||
if (c == &boot_cpu_data)
|
||||
check_enable_amd_mmconf_dmi();
|
||||
|
||||
fam10h_check_enable_mmcfg();
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Disable GART TLB Walk Errors on Fam10h. We do this here because this
|
||||
* is always needed when GART is enabled, even in a kernel which has no
|
||||
* MCE support built in. BIOS should disable GartTlbWlk Errors already.
|
||||
* If it doesn't, we do it here as suggested by the BKDG.
|
||||
*
|
||||
* Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
|
||||
*/
|
||||
msr_set_bit(MSR_AMD64_MCx_MASK(4), 10);
|
||||
|
||||
/*
|
||||
* On family 10h BIOS may not have properly enabled WC+ support, causing
|
||||
* it to be converted to CD memtype. This may result in performance
|
||||
* degradation for certain nested-paging guests. Prevent this conversion
|
||||
* by clearing bit 24 in MSR_AMD64_BU_CFG2.
|
||||
*
|
||||
* NOTE: we want to use the _safe accessors so as not to #GP kvm
|
||||
* guests on older kvm hosts.
|
||||
*/
|
||||
msr_clear_bit(MSR_AMD64_BU_CFG2, 24);
|
||||
|
||||
if (cpu_has_amd_erratum(c, amd_erratum_383))
|
||||
set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
|
||||
}
|
||||
|
||||
static void init_amd_bd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 value;
|
||||
|
||||
/* re-enable TopologyExtensions if switched off by BIOS */
|
||||
if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
|
||||
!cpu_has(c, X86_FEATURE_TOPOEXT)) {
|
||||
|
||||
if (msr_set_bit(0xc0011005, 54) > 0) {
|
||||
rdmsrl(0xc0011005, value);
|
||||
if (value & BIT_64(54)) {
|
||||
set_cpu_cap(c, X86_FEATURE_TOPOEXT);
|
||||
pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The way access filter has a performance penalty on some workloads.
|
||||
* Disable it on the affected CPUs.
|
||||
*/
|
||||
if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
|
||||
if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) {
|
||||
value |= 0x1E;
|
||||
wrmsrl_safe(0xc0011021, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void init_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 dummy;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Disable TLB flush filter by setting HWCR.FFDIS on K8
|
||||
* bit 6 of msr C001_0015
|
||||
*
|
||||
* Errata 63 for SH-B3 steppings
|
||||
* Errata 122 for all steppings (F+ have it disabled by default)
|
||||
*/
|
||||
if (c->x86 == 0xf)
|
||||
msr_set_bit(MSR_K7_HWCR, 6);
|
||||
#endif
|
||||
|
||||
early_init_amd(c);
|
||||
|
||||
/*
|
||||
* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
|
||||
* 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
|
||||
*/
|
||||
clear_cpu_cap(c, 0*32+31);
|
||||
|
||||
if (c->x86 >= 0x10)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
|
||||
/* get apicid instead of initial apic id from cpuid */
|
||||
c->apicid = hard_smp_processor_id();
|
||||
|
||||
/* K6s reports MCEs but don't actually have all the MSRs */
|
||||
if (c->x86 < 6)
|
||||
clear_cpu_cap(c, X86_FEATURE_MCE);
|
||||
|
||||
switch (c->x86) {
|
||||
case 4: init_amd_k5(c); break;
|
||||
case 5: init_amd_k6(c); break;
|
||||
case 6: init_amd_k7(c); break;
|
||||
case 0xf: init_amd_k8(c); break;
|
||||
case 0x10: init_amd_gh(c); break;
|
||||
case 0x15: init_amd_bd(c); break;
|
||||
}
|
||||
|
||||
/* Enable workaround for FXSAVE leak */
|
||||
if (c->x86 >= 6)
|
||||
set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
|
||||
|
||||
cpu_detect_cache_sizes(c);
|
||||
|
||||
/* Multi core CPU? */
|
||||
if (c->extended_cpuid_level >= 0x80000008) {
|
||||
amd_detect_cmp(c);
|
||||
srat_detect_node(c);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
detect_ht(c);
|
||||
#endif
|
||||
|
||||
init_amd_cacheinfo(c);
|
||||
|
||||
if (c->x86 >= 0xf)
|
||||
set_cpu_cap(c, X86_FEATURE_K8);
|
||||
|
||||
if (cpu_has_xmm2) {
|
||||
/* MFENCE stops RDTSC speculation */
|
||||
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
|
||||
}
|
||||
|
||||
/*
|
||||
* Family 0x12 and above processors have APIC timer
|
||||
* running in deep C states.
|
||||
*/
|
||||
if (c->x86 > 0x11)
|
||||
set_cpu_cap(c, X86_FEATURE_ARAT);
|
||||
|
||||
if (cpu_has_amd_erratum(c, amd_erratum_400))
|
||||
set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
|
||||
|
||||
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
|
||||
{
|
||||
/* AMD errata T13 (order #21922) */
|
||||
if ((c->x86 == 6)) {
|
||||
/* Duron Rev A0 */
|
||||
if (c->x86_model == 3 && c->x86_mask == 0)
|
||||
size = 64;
|
||||
/* Tbird rev A1/A2 */
|
||||
if (c->x86_model == 4 &&
|
||||
(c->x86_mask == 0 || c->x86_mask == 1))
|
||||
size = 256;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 ebx, eax, ecx, edx;
|
||||
u16 mask = 0xfff;
|
||||
|
||||
if (c->x86 < 0xf)
|
||||
return;
|
||||
|
||||
if (c->extended_cpuid_level < 0x80000006)
|
||||
return;
|
||||
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
|
||||
tlb_lli_4k[ENTRIES] = ebx & mask;
|
||||
|
||||
/*
|
||||
* K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
|
||||
* characteristics from the CPUID function 0x80000005 instead.
|
||||
*/
|
||||
if (c->x86 == 0xf) {
|
||||
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
||||
mask = 0xff;
|
||||
}
|
||||
|
||||
/* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
|
||||
if (!((eax >> 16) & mask))
|
||||
tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff;
|
||||
else
|
||||
tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
|
||||
|
||||
/* a 4M entry uses two 2M entries */
|
||||
tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
|
||||
|
||||
/* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
|
||||
if (!(eax & mask)) {
|
||||
/* Erratum 658 */
|
||||
if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
|
||||
tlb_lli_2m[ENTRIES] = 1024;
|
||||
} else {
|
||||
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
||||
tlb_lli_2m[ENTRIES] = eax & 0xff;
|
||||
}
|
||||
} else
|
||||
tlb_lli_2m[ENTRIES] = eax & mask;
|
||||
|
||||
tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
|
||||
}
|
||||
|
||||
static const struct cpu_dev amd_cpu_dev = {
|
||||
.c_vendor = "AMD",
|
||||
.c_ident = { "AuthenticAMD" },
|
||||
#ifdef CONFIG_X86_32
|
||||
.legacy_models = {
|
||||
{ .family = 4, .model_names =
|
||||
{
|
||||
[3] = "486 DX/2",
|
||||
[7] = "486 DX/2-WB",
|
||||
[8] = "486 DX/4",
|
||||
[9] = "486 DX/4-WB",
|
||||
[14] = "Am5x86-WT",
|
||||
[15] = "Am5x86-WB"
|
||||
}
|
||||
},
|
||||
},
|
||||
.legacy_cache_size = amd_size_cache,
|
||||
#endif
|
||||
.c_early_init = early_init_amd,
|
||||
.c_detect_tlb = cpu_detect_tlb_amd,
|
||||
.c_bsp_init = bsp_init_amd,
|
||||
.c_init = init_amd,
|
||||
.c_x86_vendor = X86_VENDOR_AMD,
|
||||
};
|
||||
|
||||
cpu_dev_register(amd_cpu_dev);
|
||||
|
||||
/*
|
||||
* AMD errata checking
|
||||
*
|
||||
* Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
|
||||
* AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
|
||||
* have an OSVW id assigned, which it takes as first argument. Both take a
|
||||
* variable number of family-specific model-stepping ranges created by
|
||||
* AMD_MODEL_RANGE().
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* const int amd_erratum_319[] =
|
||||
* AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
|
||||
* AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
|
||||
* AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
|
||||
*/
|
||||
|
||||
#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
|
||||
#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
|
||||
#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
|
||||
((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
|
||||
#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
|
||||
#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
|
||||
#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
|
||||
|
||||
static const int amd_erratum_400[] =
|
||||
AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
|
||||
AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
|
||||
|
||||
static const int amd_erratum_383[] =
|
||||
AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
|
||||
|
||||
|
||||
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
|
||||
{
|
||||
int osvw_id = *erratum++;
|
||||
u32 range;
|
||||
u32 ms;
|
||||
|
||||
if (osvw_id >= 0 && osvw_id < 65536 &&
|
||||
cpu_has(cpu, X86_FEATURE_OSVW)) {
|
||||
u64 osvw_len;
|
||||
|
||||
rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
|
||||
if (osvw_id < osvw_len) {
|
||||
u64 osvw_bits;
|
||||
|
||||
rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
|
||||
osvw_bits);
|
||||
return osvw_bits & (1ULL << (osvw_id & 0x3f));
|
||||
}
|
||||
}
|
||||
|
||||
/* OSVW unavailable or ID unknown, match family-model-stepping range */
|
||||
ms = (cpu->x86_model << 4) | cpu->x86_mask;
|
||||
while ((range = *erratum++))
|
||||
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
|
||||
(ms >= AMD_MODEL_RANGE_START(range)) &&
|
||||
(ms <= AMD_MODEL_RANGE_END(range)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
94
arch/x86/kernel/cpu/bugs.c
Normal file
94
arch/x86/kernel/cpu/bugs.c
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Copyright (C) 1994 Linus Torvalds
|
||||
*
|
||||
* Cyrix stuff, June 1998 by:
|
||||
* - Rafael R. Reilova (moved everything from head.S),
|
||||
* <rreilova@ececs.uc.edu>
|
||||
* - Channing Corn (tests & fixes),
|
||||
* - Andrew D. Balsa (code cleanup).
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <asm/bugs.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/alternative.h>
|
||||
|
||||
static double __initdata x = 4195835.0;
|
||||
static double __initdata y = 3145727.0;
|
||||
|
||||
/*
|
||||
* This used to check for exceptions..
|
||||
* However, it turns out that to support that,
|
||||
* the XMM trap handlers basically had to
|
||||
* be buggy. So let's have a correct XMM trap
|
||||
* handler, and forget about printing out
|
||||
* some status at boot.
|
||||
*
|
||||
* We should really only care about bugs here
|
||||
* anyway. Not features.
|
||||
*/
|
||||
static void __init check_fpu(void)
|
||||
{
|
||||
s32 fdiv_bug;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
/*
|
||||
* trap_init() enabled FXSR and company _before_ testing for FP
|
||||
* problems here.
|
||||
*
|
||||
* Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug
|
||||
*/
|
||||
__asm__("fninit\n\t"
|
||||
"fldl %1\n\t"
|
||||
"fdivl %2\n\t"
|
||||
"fmull %2\n\t"
|
||||
"fldl %1\n\t"
|
||||
"fsubp %%st,%%st(1)\n\t"
|
||||
"fistpl %0\n\t"
|
||||
"fwait\n\t"
|
||||
"fninit"
|
||||
: "=m" (*&fdiv_bug)
|
||||
: "m" (*&x), "m" (*&y));
|
||||
|
||||
kernel_fpu_end();
|
||||
|
||||
if (fdiv_bug) {
|
||||
set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
|
||||
pr_warn("Hmm, FPU with FDIV bug\n");
|
||||
}
|
||||
}
|
||||
|
||||
void __init check_bugs(void)
|
||||
{
|
||||
identify_boot_cpu();
|
||||
#ifndef CONFIG_SMP
|
||||
pr_info("CPU: ");
|
||||
print_cpu_info(&boot_cpu_data);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check whether we are able to run this kernel safely on SMP.
|
||||
*
|
||||
* - i386 is no longer supported.
|
||||
* - In order to run on anything without a TSC, we need to be
|
||||
* compiled for a i486.
|
||||
*/
|
||||
if (boot_cpu_data.x86 < 4)
|
||||
panic("Kernel requires i486+ for 'invlpg' and other features");
|
||||
|
||||
init_utsname()->machine[1] =
|
||||
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
|
||||
alternative_instructions();
|
||||
|
||||
/*
|
||||
* kernel_fpu_begin/end() in check_fpu() relies on the patched
|
||||
* alternative instructions.
|
||||
*/
|
||||
if (cpu_has_fpu)
|
||||
check_fpu();
|
||||
}
|
||||
33
arch/x86/kernel/cpu/bugs_64.c
Normal file
33
arch/x86/kernel/cpu/bugs_64.c
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (C) 1994 Linus Torvalds
|
||||
* Copyright (C) 2000 SuSE
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/bugs.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
void __init check_bugs(void)
|
||||
{
|
||||
identify_boot_cpu();
|
||||
#if !defined(CONFIG_SMP)
|
||||
printk(KERN_INFO "CPU: ");
|
||||
print_cpu_info(&boot_cpu_data);
|
||||
#endif
|
||||
alternative_instructions();
|
||||
|
||||
/*
|
||||
* Make sure the first 2MB area is not mapped by huge pages
|
||||
* There are typically fixed size MTRRs in there and overlapping
|
||||
* MTRRs into large pages causes slow downs.
|
||||
*
|
||||
* Right now we don't do that with gbpages because there seems
|
||||
* very little benefit for that case.
|
||||
*/
|
||||
if (!direct_gbpages)
|
||||
set_memory_4k((unsigned long)__va(0), 1);
|
||||
}
|
||||
229
arch/x86/kernel/cpu/centaur.c
Normal file
229
arch/x86/kernel/cpu/centaur.c
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
#include <linux/bitops.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
#define ACE_PRESENT (1 << 6)
|
||||
#define ACE_ENABLED (1 << 7)
|
||||
#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */
|
||||
|
||||
#define RNG_PRESENT (1 << 2)
|
||||
#define RNG_ENABLED (1 << 3)
|
||||
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
|
||||
|
||||
static void init_c3(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
/* Test for Centaur Extended Feature Flags presence */
|
||||
if (cpuid_eax(0xC0000000) >= 0xC0000001) {
|
||||
u32 tmp = cpuid_edx(0xC0000001);
|
||||
|
||||
/* enable ACE unit, if present and disabled */
|
||||
if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
|
||||
rdmsr(MSR_VIA_FCR, lo, hi);
|
||||
lo |= ACE_FCR; /* enable ACE unit */
|
||||
wrmsr(MSR_VIA_FCR, lo, hi);
|
||||
printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
|
||||
}
|
||||
|
||||
/* enable RNG unit, if present and disabled */
|
||||
if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
|
||||
rdmsr(MSR_VIA_RNG, lo, hi);
|
||||
lo |= RNG_ENABLE; /* enable RNG unit */
|
||||
wrmsr(MSR_VIA_RNG, lo, hi);
|
||||
printk(KERN_INFO "CPU: Enabled h/w RNG\n");
|
||||
}
|
||||
|
||||
/* store Centaur Extended Feature Flags as
|
||||
* word 5 of the CPU capability bit array
|
||||
*/
|
||||
c->x86_capability[5] = cpuid_edx(0xC0000001);
|
||||
}
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Cyrix III family needs CX8 & PGE explicitly enabled. */
|
||||
if (c->x86_model >= 6 && c->x86_model <= 13) {
|
||||
rdmsr(MSR_VIA_FCR, lo, hi);
|
||||
lo |= (1<<1 | 1<<7);
|
||||
wrmsr(MSR_VIA_FCR, lo, hi);
|
||||
set_cpu_cap(c, X86_FEATURE_CX8);
|
||||
}
|
||||
|
||||
/* Before Nehemiah, the C3's had 3dNOW! */
|
||||
if (c->x86_model >= 6 && c->x86_model < 9)
|
||||
set_cpu_cap(c, X86_FEATURE_3DNOW);
|
||||
#endif
|
||||
if (c->x86 == 0x6 && c->x86_model >= 0xf) {
|
||||
c->x86_cache_alignment = c->x86_clflush_size * 2;
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
}
|
||||
|
||||
cpu_detect_cache_sizes(c);
|
||||
}
|
||||
|
||||
enum {
|
||||
ECX8 = 1<<1,
|
||||
EIERRINT = 1<<2,
|
||||
DPM = 1<<3,
|
||||
DMCE = 1<<4,
|
||||
DSTPCLK = 1<<5,
|
||||
ELINEAR = 1<<6,
|
||||
DSMC = 1<<7,
|
||||
DTLOCK = 1<<8,
|
||||
EDCTLB = 1<<8,
|
||||
EMMX = 1<<9,
|
||||
DPDC = 1<<11,
|
||||
EBRPRED = 1<<12,
|
||||
DIC = 1<<13,
|
||||
DDC = 1<<14,
|
||||
DNA = 1<<15,
|
||||
ERETSTK = 1<<16,
|
||||
E2MMX = 1<<19,
|
||||
EAMD3D = 1<<20,
|
||||
};
|
||||
|
||||
static void early_init_centaur(struct cpuinfo_x86 *c)
|
||||
{
|
||||
switch (c->x86) {
|
||||
#ifdef CONFIG_X86_32
|
||||
case 5:
|
||||
/* Emulate MTRRs using Centaur's MCR. */
|
||||
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
|
||||
break;
|
||||
#endif
|
||||
case 6:
|
||||
if (c->x86_model >= 0xf)
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
break;
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void init_centaur(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
char *name;
|
||||
u32 fcr_set = 0;
|
||||
u32 fcr_clr = 0;
|
||||
u32 lo, hi, newlo;
|
||||
u32 aa, bb, cc, dd;
|
||||
|
||||
/*
|
||||
* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
|
||||
* 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
|
||||
*/
|
||||
clear_cpu_cap(c, 0*32+31);
|
||||
#endif
|
||||
early_init_centaur(c);
|
||||
switch (c->x86) {
|
||||
#ifdef CONFIG_X86_32
|
||||
case 5:
|
||||
switch (c->x86_model) {
|
||||
case 4:
|
||||
name = "C6";
|
||||
fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
|
||||
fcr_clr = DPDC;
|
||||
printk(KERN_NOTICE "Disabling bugged TSC.\n");
|
||||
clear_cpu_cap(c, X86_FEATURE_TSC);
|
||||
break;
|
||||
case 8:
|
||||
switch (c->x86_mask) {
|
||||
default:
|
||||
name = "2";
|
||||
break;
|
||||
case 7 ... 9:
|
||||
name = "2A";
|
||||
break;
|
||||
case 10 ... 15:
|
||||
name = "2B";
|
||||
break;
|
||||
}
|
||||
fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
|
||||
E2MMX|EAMD3D;
|
||||
fcr_clr = DPDC;
|
||||
break;
|
||||
case 9:
|
||||
name = "3";
|
||||
fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
|
||||
E2MMX|EAMD3D;
|
||||
fcr_clr = DPDC;
|
||||
break;
|
||||
default:
|
||||
name = "??";
|
||||
}
|
||||
|
||||
rdmsr(MSR_IDT_FCR1, lo, hi);
|
||||
newlo = (lo|fcr_set) & (~fcr_clr);
|
||||
|
||||
if (newlo != lo) {
|
||||
printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
|
||||
lo, newlo);
|
||||
wrmsr(MSR_IDT_FCR1, newlo, hi);
|
||||
} else {
|
||||
printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
|
||||
}
|
||||
/* Emulate MTRRs using Centaur's MCR. */
|
||||
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
|
||||
/* Report CX8 */
|
||||
set_cpu_cap(c, X86_FEATURE_CX8);
|
||||
/* Set 3DNow! on Winchip 2 and above. */
|
||||
if (c->x86_model >= 8)
|
||||
set_cpu_cap(c, X86_FEATURE_3DNOW);
|
||||
/* See if we can find out some more. */
|
||||
if (cpuid_eax(0x80000000) >= 0x80000005) {
|
||||
/* Yes, we can. */
|
||||
cpuid(0x80000005, &aa, &bb, &cc, &dd);
|
||||
/* Add L1 data and code cache sizes. */
|
||||
c->x86_cache_size = (cc>>24)+(dd>>24);
|
||||
}
|
||||
sprintf(c->x86_model_id, "WinChip %s", name);
|
||||
break;
|
||||
#endif
|
||||
case 6:
|
||||
init_c3(c);
|
||||
break;
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static unsigned int
|
||||
centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
|
||||
{
|
||||
/* VIA C3 CPUs (670-68F) need further shifting. */
|
||||
if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
|
||||
size >>= 8;
|
||||
|
||||
/*
|
||||
* There's also an erratum in Nehemiah stepping 1, which
|
||||
* returns '65KB' instead of '64KB'
|
||||
* - Note, it seems this may only be in engineering samples.
|
||||
*/
|
||||
if ((c->x86 == 6) && (c->x86_model == 9) &&
|
||||
(c->x86_mask == 1) && (size == 65))
|
||||
size -= 1;
|
||||
return size;
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct cpu_dev centaur_cpu_dev = {
|
||||
.c_vendor = "Centaur",
|
||||
.c_ident = { "CentaurHauls" },
|
||||
.c_early_init = early_init_centaur,
|
||||
.c_init = init_centaur,
|
||||
#ifdef CONFIG_X86_32
|
||||
.legacy_cache_size = centaur_size_cache,
|
||||
#endif
|
||||
.c_x86_vendor = X86_VENDOR_CENTAUR,
|
||||
};
|
||||
|
||||
cpu_dev_register(centaur_cpu_dev);
|
||||
1450
arch/x86/kernel/cpu/common.c
Normal file
1450
arch/x86/kernel/cpu/common.c
Normal file
File diff suppressed because it is too large
Load diff
48
arch/x86/kernel/cpu/cpu.h
Normal file
48
arch/x86/kernel/cpu/cpu.h
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
#ifndef ARCH_X86_CPU_H
|
||||
#define ARCH_X86_CPU_H
|
||||
|
||||
/* attempt to consolidate cpu attributes */
|
||||
struct cpu_dev {
|
||||
const char *c_vendor;
|
||||
|
||||
/* some have two possibilities for cpuid string */
|
||||
const char *c_ident[2];
|
||||
|
||||
void (*c_early_init)(struct cpuinfo_x86 *);
|
||||
void (*c_bsp_init)(struct cpuinfo_x86 *);
|
||||
void (*c_init)(struct cpuinfo_x86 *);
|
||||
void (*c_identify)(struct cpuinfo_x86 *);
|
||||
void (*c_detect_tlb)(struct cpuinfo_x86 *);
|
||||
int c_x86_vendor;
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Optional vendor specific routine to obtain the cache size. */
|
||||
unsigned int (*legacy_cache_size)(struct cpuinfo_x86 *,
|
||||
unsigned int);
|
||||
|
||||
/* Family/stepping-based lookup table for model names. */
|
||||
struct legacy_cpu_model_info {
|
||||
int family;
|
||||
const char *model_names[16];
|
||||
} legacy_models[5];
|
||||
#endif
|
||||
};
|
||||
|
||||
struct _tlb_table {
|
||||
unsigned char descriptor;
|
||||
char tlb_type;
|
||||
unsigned int entries;
|
||||
/* unsigned int ways; */
|
||||
char info[128];
|
||||
};
|
||||
|
||||
#define cpu_dev_register(cpu_devX) \
|
||||
static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
|
||||
__attribute__((__section__(".x86_cpu_dev.init"))) = \
|
||||
&cpu_devX;
|
||||
|
||||
extern const struct cpu_dev *const __x86_cpu_dev_start[],
|
||||
*const __x86_cpu_dev_end[];
|
||||
|
||||
extern void get_cpu_cap(struct cpuinfo_x86 *c);
|
||||
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
|
||||
#endif /* ARCH_X86_CPU_H */
|
||||
461
arch/x86/kernel/cpu/cyrix.c
Normal file
461
arch/x86/kernel/cpu/cyrix.c
Normal file
|
|
@ -0,0 +1,461 @@
|
|||
#include <linux/bitops.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/pci.h>
|
||||
#include <asm/dma.h>
|
||||
#include <linux/io.h>
|
||||
#include <asm/processor-cyrix.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <linux/timer.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/tsc.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
/*
|
||||
* Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
|
||||
*/
|
||||
static void __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
|
||||
{
|
||||
unsigned char ccr2, ccr3;
|
||||
|
||||
/* we test for DEVID by checking whether CCR3 is writable */
|
||||
ccr3 = getCx86(CX86_CCR3);
|
||||
setCx86(CX86_CCR3, ccr3 ^ 0x80);
|
||||
getCx86(0xc0); /* dummy to change bus */
|
||||
|
||||
if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */
|
||||
ccr2 = getCx86(CX86_CCR2);
|
||||
setCx86(CX86_CCR2, ccr2 ^ 0x04);
|
||||
getCx86(0xc0); /* dummy */
|
||||
|
||||
if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */
|
||||
*dir0 = 0xfd;
|
||||
else { /* Cx486S A step */
|
||||
setCx86(CX86_CCR2, ccr2);
|
||||
*dir0 = 0xfe;
|
||||
}
|
||||
} else {
|
||||
setCx86(CX86_CCR3, ccr3); /* restore CCR3 */
|
||||
|
||||
/* read DIR0 and DIR1 CPU registers */
|
||||
*dir0 = getCx86(CX86_DIR0);
|
||||
*dir1 = getCx86(CX86_DIR1);
|
||||
}
|
||||
}
|
||||
|
||||
static void do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
__do_cyrix_devid(dir0, dir1);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
/*
|
||||
* Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
|
||||
* order to identify the Cyrix CPU model after we're out of setup.c
|
||||
*
|
||||
* Actually since bugs.h doesn't even reference this perhaps someone should
|
||||
* fix the documentation ???
|
||||
*/
|
||||
static unsigned char Cx86_dir0_msb = 0;
|
||||
|
||||
static const char Cx86_model[][9] = {
|
||||
"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
|
||||
"M II ", "Unknown"
|
||||
};
|
||||
static const char Cx486_name[][5] = {
|
||||
"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
|
||||
"SRx2", "DRx2"
|
||||
};
|
||||
static const char Cx486S_name[][4] = {
|
||||
"S", "S2", "Se", "S2e"
|
||||
};
|
||||
static const char Cx486D_name[][4] = {
|
||||
"DX", "DX2", "?", "?", "?", "DX4"
|
||||
};
|
||||
static char Cx86_cb[] = "?.5x Core/Bus Clock";
|
||||
static const char cyrix_model_mult1[] = "12??43";
|
||||
static const char cyrix_model_mult2[] = "12233445";
|
||||
|
||||
/*
|
||||
* Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
|
||||
* BIOSes for compatibility with DOS games. This makes the udelay loop
|
||||
* work correctly, and improves performance.
|
||||
*
|
||||
* FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
|
||||
*/
|
||||
|
||||
static void check_cx686_slop(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (Cx86_dir0_msb == 3) {
|
||||
unsigned char ccr3, ccr5;
|
||||
|
||||
local_irq_save(flags);
|
||||
ccr3 = getCx86(CX86_CCR3);
|
||||
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
|
||||
ccr5 = getCx86(CX86_CCR5);
|
||||
if (ccr5 & 2)
|
||||
setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */
|
||||
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (ccr5 & 2) { /* possible wrong calibration done */
|
||||
printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
|
||||
calibrate_delay();
|
||||
c->loops_per_jiffy = loops_per_jiffy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void set_cx86_reorder(void)
|
||||
{
|
||||
u8 ccr3;
|
||||
|
||||
printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
|
||||
ccr3 = getCx86(CX86_CCR3);
|
||||
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
|
||||
|
||||
/* Load/Store Serialize to mem access disable (=reorder it) */
|
||||
setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
|
||||
/* set load/store serialize from 1GB to 4GB */
|
||||
ccr3 |= 0xe0;
|
||||
setCx86(CX86_CCR3, ccr3);
|
||||
}
|
||||
|
||||
static void set_cx86_memwb(void)
|
||||
{
|
||||
printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
|
||||
|
||||
/* CCR2 bit 2: unlock NW bit */
|
||||
setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
|
||||
/* set 'Not Write-through' */
|
||||
write_cr0(read_cr0() | X86_CR0_NW);
|
||||
/* CCR2 bit 2: lock NW bit and set WT1 */
|
||||
setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
|
||||
}
|
||||
|
||||
/*
|
||||
* Configure later MediaGX and/or Geode processor.
|
||||
*/
|
||||
|
||||
static void geode_configure(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
u8 ccr3;
|
||||
local_irq_save(flags);
|
||||
|
||||
/* Suspend on halt power saving and enable #SUSP pin */
|
||||
setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
|
||||
|
||||
ccr3 = getCx86(CX86_CCR3);
|
||||
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
|
||||
|
||||
|
||||
/* FPU fast, DTE cache, Mem bypass */
|
||||
setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
|
||||
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
|
||||
|
||||
set_cx86_memwb();
|
||||
set_cx86_reorder();
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void early_init_cyrix(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned char dir0, dir0_msn, dir1 = 0;
|
||||
|
||||
__do_cyrix_devid(&dir0, &dir1);
|
||||
dir0_msn = dir0 >> 4; /* identifies CPU "family" */
|
||||
|
||||
switch (dir0_msn) {
|
||||
case 3: /* 6x86/6x86L */
|
||||
/* Emulate MTRRs using Cyrix's ARRs. */
|
||||
set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
|
||||
break;
|
||||
case 5: /* 6x86MX/M II */
|
||||
/* Emulate MTRRs using Cyrix's ARRs. */
|
||||
set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void init_cyrix(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
|
||||
char *buf = c->x86_model_id;
|
||||
const char *p = NULL;
|
||||
|
||||
/*
|
||||
* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
|
||||
* 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
|
||||
*/
|
||||
clear_cpu_cap(c, 0*32+31);
|
||||
|
||||
/* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */
|
||||
if (test_cpu_cap(c, 1*32+24)) {
|
||||
clear_cpu_cap(c, 1*32+24);
|
||||
set_cpu_cap(c, X86_FEATURE_CXMMX);
|
||||
}
|
||||
|
||||
do_cyrix_devid(&dir0, &dir1);
|
||||
|
||||
check_cx686_slop(c);
|
||||
|
||||
Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */
|
||||
dir0_lsn = dir0 & 0xf; /* model or clock multiplier */
|
||||
|
||||
/* common case step number/rev -- exceptions handled below */
|
||||
c->x86_model = (dir1 >> 4) + 1;
|
||||
c->x86_mask = dir1 & 0xf;
|
||||
|
||||
/* Now cook; the original recipe is by Channing Corn, from Cyrix.
|
||||
* We do the same thing for each generation: we work out
|
||||
* the model, multiplier and stepping. Black magic included,
|
||||
* to make the silicon step/rev numbers match the printed ones.
|
||||
*/
|
||||
|
||||
switch (dir0_msn) {
|
||||
unsigned char tmp;
|
||||
|
||||
case 0: /* Cx486SLC/DLC/SRx/DRx */
|
||||
p = Cx486_name[dir0_lsn & 7];
|
||||
break;
|
||||
|
||||
case 1: /* Cx486S/DX/DX2/DX4 */
|
||||
p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5]
|
||||
: Cx486S_name[dir0_lsn & 3];
|
||||
break;
|
||||
|
||||
case 2: /* 5x86 */
|
||||
Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
|
||||
p = Cx86_cb+2;
|
||||
break;
|
||||
|
||||
case 3: /* 6x86/6x86L */
|
||||
Cx86_cb[1] = ' ';
|
||||
Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
|
||||
if (dir1 > 0x21) { /* 686L */
|
||||
Cx86_cb[0] = 'L';
|
||||
p = Cx86_cb;
|
||||
(c->x86_model)++;
|
||||
} else /* 686 */
|
||||
p = Cx86_cb+1;
|
||||
/* Emulate MTRRs using Cyrix's ARRs. */
|
||||
set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
|
||||
/* 6x86's contain this bug */
|
||||
set_cpu_bug(c, X86_BUG_COMA);
|
||||
break;
|
||||
|
||||
case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
|
||||
#ifdef CONFIG_PCI
|
||||
{
|
||||
u32 vendor, device;
|
||||
/*
|
||||
* It isn't really a PCI quirk directly, but the cure is the
|
||||
* same. The MediaGX has deep magic SMM stuff that handles the
|
||||
* SB emulation. It throws away the fifo on disable_dma() which
|
||||
* is wrong and ruins the audio.
|
||||
*
|
||||
* Bug2: VSA1 has a wrap bug so that using maximum sized DMA
|
||||
* causes bad things. According to NatSemi VSA2 has another
|
||||
* bug to do with 'hlt'. I've not seen any boards using VSA2
|
||||
* and X doesn't seem to support it either so who cares 8).
|
||||
* VSA1 we work around however.
|
||||
*/
|
||||
|
||||
printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
|
||||
isa_dma_bridge_buggy = 2;
|
||||
|
||||
/* We do this before the PCI layer is running. However we
|
||||
are safe here as we know the bridge must be a Cyrix
|
||||
companion and must be present */
|
||||
vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID);
|
||||
device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID);
|
||||
|
||||
/*
|
||||
* The 5510/5520 companion chips have a funky PIT.
|
||||
*/
|
||||
if (vendor == PCI_VENDOR_ID_CYRIX &&
|
||||
(device == PCI_DEVICE_ID_CYRIX_5510 ||
|
||||
device == PCI_DEVICE_ID_CYRIX_5520))
|
||||
mark_tsc_unstable("cyrix 5510/5520 detected");
|
||||
}
|
||||
#endif
|
||||
c->x86_cache_size = 16; /* Yep 16K integrated cache thats it */
|
||||
|
||||
/* GXm supports extended cpuid levels 'ala' AMD */
|
||||
if (c->cpuid_level == 2) {
|
||||
/* Enable cxMMX extensions (GX1 Datasheet 54) */
|
||||
setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
|
||||
|
||||
/*
|
||||
* GXm : 0x30 ... 0x5f GXm datasheet 51
|
||||
* GXlv: 0x6x GXlv datasheet 54
|
||||
* ? : 0x7x
|
||||
* GX1 : 0x8x GX1 datasheet 56
|
||||
*/
|
||||
if ((0x30 <= dir1 && dir1 <= 0x6f) ||
|
||||
(0x80 <= dir1 && dir1 <= 0x8f))
|
||||
geode_configure();
|
||||
return;
|
||||
} else { /* MediaGX */
|
||||
Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
|
||||
p = Cx86_cb+2;
|
||||
c->x86_model = (dir1 & 0x20) ? 1 : 2;
|
||||
}
|
||||
break;
|
||||
|
||||
case 5: /* 6x86MX/M II */
|
||||
if (dir1 > 7) {
|
||||
dir0_msn++; /* M II */
|
||||
/* Enable MMX extensions (App note 108) */
|
||||
setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
|
||||
} else {
|
||||
/* A 6x86MX - it has the bug. */
|
||||
set_cpu_bug(c, X86_BUG_COMA);
|
||||
}
|
||||
tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
|
||||
Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
|
||||
p = Cx86_cb+tmp;
|
||||
if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
|
||||
(c->x86_model)++;
|
||||
/* Emulate MTRRs using Cyrix's ARRs. */
|
||||
set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
|
||||
break;
|
||||
|
||||
case 0xf: /* Cyrix 486 without DEVID registers */
|
||||
switch (dir0_lsn) {
|
||||
case 0xd: /* either a 486SLC or DLC w/o DEVID */
|
||||
dir0_msn = 0;
|
||||
p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
|
||||
break;
|
||||
|
||||
case 0xe: /* a 486S A step */
|
||||
dir0_msn = 0;
|
||||
p = Cx486S_name[0];
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default: /* unknown (shouldn't happen, we know everyone ;-) */
|
||||
dir0_msn = 7;
|
||||
break;
|
||||
}
|
||||
strcpy(buf, Cx86_model[dir0_msn & 7]);
|
||||
if (p)
|
||||
strcat(buf, p);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle National Semiconductor branded processors
|
||||
*/
|
||||
static void init_nsc(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/*
|
||||
* There may be GX1 processors in the wild that are branded
|
||||
* NSC and not Cyrix.
|
||||
*
|
||||
* This function only handles the GX processor, and kicks every
|
||||
* thing else to the Cyrix init function above - that should
|
||||
* cover any processors that might have been branded differently
|
||||
* after NSC acquired Cyrix.
|
||||
*
|
||||
* If this breaks your GX1 horribly, please e-mail
|
||||
* info-linux@ldcmail.amd.com to tell us.
|
||||
*/
|
||||
|
||||
/* Handle the GX (Formally known as the GX2) */
|
||||
|
||||
if (c->x86 == 5 && c->x86_model == 5)
|
||||
cpu_detect_cache_sizes(c);
|
||||
else
|
||||
init_cyrix(c);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected
|
||||
* by the fact that they preserve the flags across the division of 5/2.
|
||||
* PII and PPro exhibit this behavior too, but they have cpuid available.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Perform the Cyrix 5/2 test. A Cyrix won't change
|
||||
* the flags, while other 486 chips will.
|
||||
*/
|
||||
static inline int test_cyrix_52div(void)
|
||||
{
|
||||
unsigned int test;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sahf\n\t" /* clear flags (%eax = 0x0005) */
|
||||
"div %b2\n\t" /* divide 5 by 2 */
|
||||
"lahf" /* store flags into %ah */
|
||||
: "=a" (test)
|
||||
: "0" (5), "q" (2)
|
||||
: "cc");
|
||||
|
||||
/* AH is 0x02 on Cyrix after the divide.. */
|
||||
return (unsigned char) (test >> 8) == 0x02;
|
||||
}
|
||||
|
||||
static void cyrix_identify(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/* Detect Cyrix with disabled CPUID */
|
||||
if (c->x86 == 4 && test_cyrix_52div()) {
|
||||
unsigned char dir0, dir1;
|
||||
|
||||
strcpy(c->x86_vendor_id, "CyrixInstead");
|
||||
c->x86_vendor = X86_VENDOR_CYRIX;
|
||||
|
||||
/* Actually enable cpuid on the older cyrix */
|
||||
|
||||
/* Retrieve CPU revisions */
|
||||
|
||||
do_cyrix_devid(&dir0, &dir1);
|
||||
|
||||
dir0 >>= 4;
|
||||
|
||||
/* Check it is an affected model */
|
||||
|
||||
if (dir0 == 5 || dir0 == 3) {
|
||||
unsigned char ccr3;
|
||||
unsigned long flags;
|
||||
printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
|
||||
local_irq_save(flags);
|
||||
ccr3 = getCx86(CX86_CCR3);
|
||||
/* enable MAPEN */
|
||||
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
|
||||
/* enable cpuid */
|
||||
setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);
|
||||
/* disable MAPEN */
|
||||
setCx86(CX86_CCR3, ccr3);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const struct cpu_dev cyrix_cpu_dev = {
|
||||
.c_vendor = "Cyrix",
|
||||
.c_ident = { "CyrixInstead" },
|
||||
.c_early_init = early_init_cyrix,
|
||||
.c_init = init_cyrix,
|
||||
.c_identify = cyrix_identify,
|
||||
.c_x86_vendor = X86_VENDOR_CYRIX,
|
||||
};
|
||||
|
||||
cpu_dev_register(cyrix_cpu_dev);
|
||||
|
||||
static const struct cpu_dev nsc_cpu_dev = {
|
||||
.c_vendor = "NSC",
|
||||
.c_ident = { "Geode by NSC" },
|
||||
.c_init = init_nsc,
|
||||
.c_x86_vendor = X86_VENDOR_NSC,
|
||||
};
|
||||
|
||||
cpu_dev_register(nsc_cpu_dev);
|
||||
87
arch/x86/kernel/cpu/hypervisor.c
Normal file
87
arch/x86/kernel/cpu/hypervisor.c
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Common hypervisor code
|
||||
*
|
||||
* Copyright (C) 2008, VMware, Inc.
|
||||
* Author : Alok N Kataria <akataria@vmware.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
|
||||
* NON INFRINGEMENT. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/hypervisor.h>
|
||||
|
||||
static const __initconst struct hypervisor_x86 * const hypervisors[] =
|
||||
{
|
||||
#ifdef CONFIG_XEN_PVHVM
|
||||
&x86_hyper_xen_hvm,
|
||||
#endif
|
||||
&x86_hyper_vmware,
|
||||
&x86_hyper_ms_hyperv,
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
&x86_hyper_kvm,
|
||||
#endif
|
||||
};
|
||||
|
||||
const struct hypervisor_x86 *x86_hyper;
|
||||
EXPORT_SYMBOL(x86_hyper);
|
||||
|
||||
static inline void __init
|
||||
detect_hypervisor_vendor(void)
|
||||
{
|
||||
const struct hypervisor_x86 *h, * const *p;
|
||||
uint32_t pri, max_pri = 0;
|
||||
|
||||
for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
|
||||
h = *p;
|
||||
pri = h->detect();
|
||||
if (pri != 0 && pri > max_pri) {
|
||||
max_pri = pri;
|
||||
x86_hyper = h;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_pri)
|
||||
printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
|
||||
}
|
||||
|
||||
void init_hypervisor(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (x86_hyper && x86_hyper->set_cpu_features)
|
||||
x86_hyper->set_cpu_features(c);
|
||||
}
|
||||
|
||||
void __init init_hypervisor_platform(void)
|
||||
{
|
||||
|
||||
detect_hypervisor_vendor();
|
||||
|
||||
if (!x86_hyper)
|
||||
return;
|
||||
|
||||
init_hypervisor(&boot_cpu_data);
|
||||
|
||||
if (x86_hyper->init_platform)
|
||||
x86_hyper->init_platform();
|
||||
}
|
||||
|
||||
bool __init hypervisor_x2apic_available(void)
|
||||
{
|
||||
return x86_hyper &&
|
||||
x86_hyper->x2apic_available &&
|
||||
x86_hyper->x2apic_available();
|
||||
}
|
||||
756
arch/x86/kernel/cpu/intel.c
Normal file
756
arch/x86/kernel/cpu/intel.c
Normal file
|
|
@ -0,0 +1,756 @@
|
|||
#include <linux/kernel.h>
|
||||
|
||||
#include <linux/string.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/thread_info.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/bugs.h>
|
||||
#include <asm/cpu.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <linux/topology.h>
|
||||
#endif
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/apic.h>
|
||||
#endif
|
||||
|
||||
static void early_init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 misc_enable;
|
||||
|
||||
/* Unmask CPUID levels if masked: */
|
||||
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
|
||||
if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
|
||||
MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
|
||||
c->cpuid_level = cpuid_eax(0);
|
||||
get_cpu_cap(c);
|
||||
}
|
||||
}
|
||||
|
||||
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
|
||||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
|
||||
if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
|
||||
unsigned lower_word;
|
||||
|
||||
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
|
||||
/* Required by the SDM */
|
||||
sync_core();
|
||||
rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Atom erratum AAE44/AAF40/AAG38/AAH41:
|
||||
*
|
||||
* A race condition between speculative fetches and invalidating
|
||||
* a large page. This is worked around in microcode, but we
|
||||
* need the microcode to have already been loaded... so if it is
|
||||
* not, recommend a BIOS update and disable large pages.
|
||||
*/
|
||||
if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
|
||||
c->microcode < 0x20e) {
|
||||
printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
|
||||
clear_cpu_cap(c, X86_FEATURE_PSE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
|
||||
#else
|
||||
/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
|
||||
if (c->x86 == 15 && c->x86_cache_alignment == 64)
|
||||
c->x86_cache_alignment = 128;
|
||||
#endif
|
||||
|
||||
/* CPUID workaround for 0F33/0F34 CPU */
|
||||
if (c->x86 == 0xF && c->x86_model == 0x3
|
||||
&& (c->x86_mask == 0x3 || c->x86_mask == 0x4))
|
||||
c->x86_phys_bits = 36;
|
||||
|
||||
/*
|
||||
* c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
|
||||
* with P/T states and does not stop in deep C-states.
|
||||
*
|
||||
* It is also reliable across cores and sockets. (but not across
|
||||
* cabinets - we turn it off in that case explicitly.)
|
||||
*/
|
||||
if (c->x86_power & (1 << 8)) {
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
|
||||
if (!check_tsc_unstable())
|
||||
set_sched_clock_stable();
|
||||
}
|
||||
|
||||
/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
|
||||
if (c->x86 == 6) {
|
||||
switch (c->x86_model) {
|
||||
case 0x27: /* Penwell */
|
||||
case 0x35: /* Cloverview */
|
||||
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* There is a known erratum on Pentium III and Core Solo
|
||||
* and Core Duo CPUs.
|
||||
* " Page with PAT set to WC while associated MTRR is UC
|
||||
* may consolidate to UC "
|
||||
* Because of this erratum, it is better to stick with
|
||||
* setting WC in MTRR rather than using PAT on these CPUs.
|
||||
*
|
||||
* Enable PAT WC only on P4, Core 2 or later CPUs.
|
||||
*/
|
||||
if (c->x86 == 6 && c->x86_model < 15)
|
||||
clear_cpu_cap(c, X86_FEATURE_PAT);
|
||||
|
||||
#ifdef CONFIG_KMEMCHECK
|
||||
/*
|
||||
* P4s have a "fast strings" feature which causes single-
|
||||
* stepping REP instructions to only generate a #DB on
|
||||
* cache-line boundaries.
|
||||
*
|
||||
* Ingo Molnar reported a Pentium D (model 6) and a Xeon
|
||||
* (model 2) with the same problem.
|
||||
*/
|
||||
if (c->x86 == 15)
|
||||
if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
|
||||
MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0)
|
||||
pr_info("kmemcheck: Disabling fast string operations\n");
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If fast string is not enabled in IA32_MISC_ENABLE for any reason,
|
||||
* clear the fast string and enhanced fast string CPU capabilities.
|
||||
*/
|
||||
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
|
||||
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
|
||||
if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
|
||||
printk(KERN_INFO "Disabled fast string operations\n");
|
||||
setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
|
||||
setup_clear_cpu_cap(X86_FEATURE_ERMS);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Intel Quark Core DevMan_001.pdf section 6.4.11
|
||||
* "The operating system also is required to invalidate (i.e., flush)
|
||||
* the TLB when any changes are made to any of the page table entries.
|
||||
* The operating system must reload CR3 to cause the TLB to be flushed"
|
||||
*
|
||||
* As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
|
||||
* be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
|
||||
* to be modified
|
||||
*/
|
||||
if (c->x86 == 5 && c->x86_model == 9) {
|
||||
pr_info("Disabling PGE capability bit\n");
|
||||
setup_clear_cpu_cap(X86_FEATURE_PGE);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Early probe support logic for ppro memory erratum #50
|
||||
*
|
||||
* This is called before we do cpu ident work
|
||||
*/
|
||||
|
||||
int ppro_with_ram_bug(void)
|
||||
{
|
||||
/* Uses data from early_cpu_detect now */
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
|
||||
boot_cpu_data.x86 == 6 &&
|
||||
boot_cpu_data.x86_model == 1 &&
|
||||
boot_cpu_data.x86_mask < 8) {
|
||||
printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void intel_smp_check(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/* calling is from identify_secondary_cpu() ? */
|
||||
if (!c->cpu_index)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Mask B, Pentium, but not Pentium MMX
|
||||
*/
|
||||
if (c->x86 == 5 &&
|
||||
c->x86_mask >= 1 && c->x86_mask <= 4 &&
|
||||
c->x86_model <= 3) {
|
||||
/*
|
||||
* Remember we have B step Pentia with bugs
|
||||
*/
|
||||
WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
|
||||
"with B stepping processors.\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int forcepae;
|
||||
static int __init forcepae_setup(char *__unused)
|
||||
{
|
||||
forcepae = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("forcepae", forcepae_setup);
|
||||
|
||||
static void intel_workarounds(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_F00F_BUG
|
||||
/*
|
||||
* All models of Pentium and Pentium with MMX technology CPUs
|
||||
* have the F0 0F bug, which lets nonprivileged users lock up the
|
||||
* system. Announce that the fault handler will be checking for it.
|
||||
* The Quark is also family 5, but does not have the same bug.
|
||||
*/
|
||||
clear_cpu_bug(c, X86_BUG_F00F);
|
||||
if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) {
|
||||
static int f00f_workaround_enabled;
|
||||
|
||||
set_cpu_bug(c, X86_BUG_F00F);
|
||||
if (!f00f_workaround_enabled) {
|
||||
printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
|
||||
f00f_workaround_enabled = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
|
||||
* model 3 mask 3
|
||||
*/
|
||||
if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
|
||||
clear_cpu_cap(c, X86_FEATURE_SEP);
|
||||
|
||||
/*
|
||||
* PAE CPUID issue: many Pentium M report no PAE but may have a
|
||||
* functionally usable PAE implementation.
|
||||
* Forcefully enable PAE if kernel parameter "forcepae" is present.
|
||||
*/
|
||||
if (forcepae) {
|
||||
printk(KERN_WARNING "PAE forced!\n");
|
||||
set_cpu_cap(c, X86_FEATURE_PAE);
|
||||
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
|
||||
}
|
||||
|
||||
/*
|
||||
* P4 Xeon errata 037 workaround.
|
||||
* Hardware prefetcher may cause stale data to be loaded into the cache.
|
||||
*/
|
||||
if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
|
||||
if (msr_set_bit(MSR_IA32_MISC_ENABLE,
|
||||
MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
|
||||
> 0) {
|
||||
pr_info("CPU: C0 stepping P4 Xeon detected.\n");
|
||||
pr_info("CPU: Disabling hardware prefetching (Errata 037)\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* See if we have a good local APIC by checking for buggy Pentia,
|
||||
* i.e. all B steppings and the C2 stepping of P54C when using their
|
||||
* integrated APIC (see 11AP erratum in "Pentium Processor
|
||||
* Specification Update").
|
||||
*/
|
||||
if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
|
||||
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
|
||||
set_cpu_bug(c, X86_BUG_11AP);
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_INTEL_USERCOPY
|
||||
/*
|
||||
* Set up the preferred alignment for movsl bulk memory moves
|
||||
*/
|
||||
switch (c->x86) {
|
||||
case 4: /* 486: untested */
|
||||
break;
|
||||
case 5: /* Old Pentia: untested */
|
||||
break;
|
||||
case 6: /* PII/PIII only like movsl with 8-byte alignment */
|
||||
movsl_mask.mask = 7;
|
||||
break;
|
||||
case 15: /* P4 is OK down to 8-byte alignment */
|
||||
movsl_mask.mask = 7;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
intel_smp_check(c);
|
||||
}
|
||||
#else
|
||||
static void intel_workarounds(struct cpuinfo_x86 *c)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static void srat_detect_node(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_NUMA
|
||||
unsigned node;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/* Don't do the funky fallback heuristics the AMD version employs
|
||||
for now. */
|
||||
node = numa_cpu_node(cpu);
|
||||
if (node == NUMA_NO_NODE || !node_online(node)) {
|
||||
/* reuse the value from init_cpu_to_node() */
|
||||
node = cpu_to_node(cpu);
|
||||
}
|
||||
numa_set_node(cpu, node);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* find out the number of processor cores on the die
|
||||
*/
|
||||
static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
if (c->cpuid_level < 4)
|
||||
return 1;
|
||||
|
||||
/* Intel has a non-standard dependency on %ecx for this CPUID level. */
|
||||
cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
|
||||
if (eax & 0x1f)
|
||||
return (eax >> 26) + 1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/* Intel VMX MSR indicated features */
|
||||
#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
|
||||
#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
|
||||
#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
|
||||
#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
|
||||
#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
|
||||
#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
|
||||
|
||||
u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
|
||||
|
||||
clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
|
||||
clear_cpu_cap(c, X86_FEATURE_VNMI);
|
||||
clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
|
||||
clear_cpu_cap(c, X86_FEATURE_EPT);
|
||||
clear_cpu_cap(c, X86_FEATURE_VPID);
|
||||
|
||||
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
|
||||
msr_ctl = vmx_msr_high | vmx_msr_low;
|
||||
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
|
||||
set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
|
||||
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
|
||||
set_cpu_cap(c, X86_FEATURE_VNMI);
|
||||
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
|
||||
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
vmx_msr_low, vmx_msr_high);
|
||||
msr_ctl2 = vmx_msr_high | vmx_msr_low;
|
||||
if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
|
||||
(msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
|
||||
set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
|
||||
if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
|
||||
set_cpu_cap(c, X86_FEATURE_EPT);
|
||||
if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
|
||||
set_cpu_cap(c, X86_FEATURE_VPID);
|
||||
}
|
||||
}
|
||||
|
||||
static void init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int l2 = 0;
|
||||
|
||||
early_init_intel(c);
|
||||
|
||||
intel_workarounds(c);
|
||||
|
||||
/*
|
||||
* Detect the extended topology information if available. This
|
||||
* will reinitialise the initial_apicid which will be used
|
||||
* in init_intel_cacheinfo()
|
||||
*/
|
||||
detect_extended_topology(c);
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
|
||||
/*
|
||||
* let's use the legacy cpuid vector 0x1 and 0x4 for topology
|
||||
* detection.
|
||||
*/
|
||||
c->x86_max_cores = intel_num_cpu_cores(c);
|
||||
#ifdef CONFIG_X86_32
|
||||
detect_ht(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
l2 = init_intel_cacheinfo(c);
|
||||
|
||||
/* Detect legacy cache sizes if init_intel_cacheinfo did not */
|
||||
if (l2 == 0) {
|
||||
cpu_detect_cache_sizes(c);
|
||||
l2 = c->x86_cache_size;
|
||||
}
|
||||
|
||||
if (c->cpuid_level > 9) {
|
||||
unsigned eax = cpuid_eax(10);
|
||||
/* Check for version and the number of counters */
|
||||
if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
|
||||
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
|
||||
}
|
||||
|
||||
if (cpu_has_xmm2)
|
||||
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
||||
if (cpu_has_ds) {
|
||||
unsigned int l1;
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
|
||||
if (!(l1 & (1<<11)))
|
||||
set_cpu_cap(c, X86_FEATURE_BTS);
|
||||
if (!(l1 & (1<<12)))
|
||||
set_cpu_cap(c, X86_FEATURE_PEBS);
|
||||
}
|
||||
|
||||
if (c->x86 == 6 && cpu_has_clflush &&
|
||||
(c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
|
||||
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (c->x86 == 15)
|
||||
c->x86_cache_alignment = c->x86_clflush_size * 2;
|
||||
if (c->x86 == 6)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
#else
|
||||
/*
|
||||
* Names for the Pentium II/Celeron processors
|
||||
* detectable only by also checking the cache size.
|
||||
* Dixon is NOT a Celeron.
|
||||
*/
|
||||
if (c->x86 == 6) {
|
||||
char *p = NULL;
|
||||
|
||||
switch (c->x86_model) {
|
||||
case 5:
|
||||
if (l2 == 0)
|
||||
p = "Celeron (Covington)";
|
||||
else if (l2 == 256)
|
||||
p = "Mobile Pentium II (Dixon)";
|
||||
break;
|
||||
|
||||
case 6:
|
||||
if (l2 == 128)
|
||||
p = "Celeron (Mendocino)";
|
||||
else if (c->x86_mask == 0 || c->x86_mask == 5)
|
||||
p = "Celeron-A";
|
||||
break;
|
||||
|
||||
case 8:
|
||||
if (l2 == 128)
|
||||
p = "Celeron (Coppermine)";
|
||||
break;
|
||||
}
|
||||
|
||||
if (p)
|
||||
strcpy(c->x86_model_id, p);
|
||||
}
|
||||
|
||||
if (c->x86 == 15)
|
||||
set_cpu_cap(c, X86_FEATURE_P4);
|
||||
if (c->x86 == 6)
|
||||
set_cpu_cap(c, X86_FEATURE_P3);
|
||||
#endif
|
||||
|
||||
/* Work around errata */
|
||||
srat_detect_node(c);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_VMX))
|
||||
detect_vmx_virtcap(c);
|
||||
|
||||
/*
|
||||
* Initialize MSR_IA32_ENERGY_PERF_BIAS if BIOS did not.
|
||||
* x86_energy_perf_policy(8) is available to change it at run-time
|
||||
*/
|
||||
if (cpu_has(c, X86_FEATURE_EPB)) {
|
||||
u64 epb;
|
||||
|
||||
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) {
|
||||
printk_once(KERN_WARNING "ENERGY_PERF_BIAS:"
|
||||
" Set to 'normal', was 'performance'\n"
|
||||
"ENERGY_PERF_BIAS: View and update with"
|
||||
" x86_energy_perf_policy(8)\n");
|
||||
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
|
||||
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
|
||||
{
|
||||
/*
|
||||
* Intel PIII Tualatin. This comes in two flavours.
|
||||
* One has 256kb of cache, the other 512. We have no way
|
||||
* to determine which, so we use a boottime override
|
||||
* for the 512kb model, and assume 256 otherwise.
|
||||
*/
|
||||
if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
|
||||
size = 256;
|
||||
|
||||
/*
|
||||
* Intel Quark SoC X1000 contains a 4-way set associative
|
||||
* 16K cache with a 16 byte cache line and 256 lines per tag
|
||||
*/
|
||||
if ((c->x86 == 5) && (c->x86_model == 9))
|
||||
size = 16;
|
||||
return size;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define TLB_INST_4K 0x01
|
||||
#define TLB_INST_4M 0x02
|
||||
#define TLB_INST_2M_4M 0x03
|
||||
|
||||
#define TLB_INST_ALL 0x05
|
||||
#define TLB_INST_1G 0x06
|
||||
|
||||
#define TLB_DATA_4K 0x11
|
||||
#define TLB_DATA_4M 0x12
|
||||
#define TLB_DATA_2M_4M 0x13
|
||||
#define TLB_DATA_4K_4M 0x14
|
||||
|
||||
#define TLB_DATA_1G 0x16
|
||||
|
||||
#define TLB_DATA0_4K 0x21
|
||||
#define TLB_DATA0_4M 0x22
|
||||
#define TLB_DATA0_2M_4M 0x23
|
||||
|
||||
#define STLB_4K 0x41
|
||||
#define STLB_4K_2M 0x42
|
||||
|
||||
static const struct _tlb_table intel_tlb_table[] = {
|
||||
{ 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
|
||||
{ 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" },
|
||||
{ 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" },
|
||||
{ 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" },
|
||||
{ 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" },
|
||||
{ 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" },
|
||||
{ 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" },
|
||||
{ 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
|
||||
{ 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
|
||||
{ 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
|
||||
{ 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
|
||||
{ 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" },
|
||||
{ 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" },
|
||||
{ 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" },
|
||||
{ 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" },
|
||||
{ 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" },
|
||||
{ 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" },
|
||||
{ 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
|
||||
{ 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" },
|
||||
{ 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" },
|
||||
{ 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
|
||||
{ 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" },
|
||||
{ 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
|
||||
{ 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" },
|
||||
{ 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" },
|
||||
{ 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" },
|
||||
{ 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set ssociative" },
|
||||
{ 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set ssociative" },
|
||||
{ 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
|
||||
{ 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
|
||||
{ 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" },
|
||||
{ 0xc2, TLB_DATA_2M_4M, 16, " DTLB 2 MByte/4MByte pages, 4-way associative" },
|
||||
{ 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" },
|
||||
{ 0x00, 0, 0 }
|
||||
};
|
||||
|
||||
static void intel_tlb_lookup(const unsigned char desc)
|
||||
{
|
||||
unsigned char k;
|
||||
if (desc == 0)
|
||||
return;
|
||||
|
||||
/* look up this descriptor in the table */
|
||||
for (k = 0; intel_tlb_table[k].descriptor != desc && \
|
||||
intel_tlb_table[k].descriptor != 0; k++)
|
||||
;
|
||||
|
||||
if (intel_tlb_table[k].tlb_type == 0)
|
||||
return;
|
||||
|
||||
switch (intel_tlb_table[k].tlb_type) {
|
||||
case STLB_4K:
|
||||
if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case STLB_4K_2M:
|
||||
if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case TLB_INST_ALL:
|
||||
if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case TLB_INST_4K:
|
||||
if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case TLB_INST_4M:
|
||||
if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case TLB_INST_2M_4M:
|
||||
if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case TLB_DATA_4K:
|
||||
case TLB_DATA0_4K:
|
||||
if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case TLB_DATA_4M:
|
||||
case TLB_DATA0_4M:
|
||||
if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case TLB_DATA_2M_4M:
|
||||
case TLB_DATA0_2M_4M:
|
||||
if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case TLB_DATA_4K_4M:
|
||||
if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
|
||||
if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
case TLB_DATA_1G:
|
||||
if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries)
|
||||
tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_detect_tlb(struct cpuinfo_x86 *c)
|
||||
{
|
||||
int i, j, n;
|
||||
unsigned int regs[4];
|
||||
unsigned char *desc = (unsigned char *)regs;
|
||||
|
||||
if (c->cpuid_level < 2)
|
||||
return;
|
||||
|
||||
/* Number of times to iterate */
|
||||
n = cpuid_eax(2) & 0xFF;
|
||||
|
||||
for (i = 0 ; i < n ; i++) {
|
||||
cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
|
||||
|
||||
/* If bit 31 is set, this is an unknown format */
|
||||
for (j = 0 ; j < 3 ; j++)
|
||||
if (regs[j] & (1 << 31))
|
||||
regs[j] = 0;
|
||||
|
||||
/* Byte 0 is level count, not a descriptor */
|
||||
for (j = 1 ; j < 16 ; j++)
|
||||
intel_tlb_lookup(desc[j]);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct cpu_dev intel_cpu_dev = {
|
||||
.c_vendor = "Intel",
|
||||
.c_ident = { "GenuineIntel" },
|
||||
#ifdef CONFIG_X86_32
|
||||
.legacy_models = {
|
||||
{ .family = 4, .model_names =
|
||||
{
|
||||
[0] = "486 DX-25/33",
|
||||
[1] = "486 DX-50",
|
||||
[2] = "486 SX",
|
||||
[3] = "486 DX/2",
|
||||
[4] = "486 SL",
|
||||
[5] = "486 SX/2",
|
||||
[7] = "486 DX/2-WB",
|
||||
[8] = "486 DX/4",
|
||||
[9] = "486 DX/4-WB"
|
||||
}
|
||||
},
|
||||
{ .family = 5, .model_names =
|
||||
{
|
||||
[0] = "Pentium 60/66 A-step",
|
||||
[1] = "Pentium 60/66",
|
||||
[2] = "Pentium 75 - 200",
|
||||
[3] = "OverDrive PODP5V83",
|
||||
[4] = "Pentium MMX",
|
||||
[7] = "Mobile Pentium 75 - 200",
|
||||
[8] = "Mobile Pentium MMX",
|
||||
[9] = "Quark SoC X1000",
|
||||
}
|
||||
},
|
||||
{ .family = 6, .model_names =
|
||||
{
|
||||
[0] = "Pentium Pro A-step",
|
||||
[1] = "Pentium Pro",
|
||||
[3] = "Pentium II (Klamath)",
|
||||
[4] = "Pentium II (Deschutes)",
|
||||
[5] = "Pentium II (Deschutes)",
|
||||
[6] = "Mobile Pentium II",
|
||||
[7] = "Pentium III (Katmai)",
|
||||
[8] = "Pentium III (Coppermine)",
|
||||
[10] = "Pentium III (Cascades)",
|
||||
[11] = "Pentium III (Tualatin)",
|
||||
}
|
||||
},
|
||||
{ .family = 15, .model_names =
|
||||
{
|
||||
[0] = "Pentium 4 (Unknown)",
|
||||
[1] = "Pentium 4 (Willamette)",
|
||||
[2] = "Pentium 4 (Northwood)",
|
||||
[4] = "Pentium 4 (Foster)",
|
||||
[5] = "Pentium 4 (Foster)",
|
||||
}
|
||||
},
|
||||
},
|
||||
.legacy_cache_size = intel_size_cache,
|
||||
#endif
|
||||
.c_detect_tlb = intel_detect_tlb,
|
||||
.c_early_init = early_init_intel,
|
||||
.c_init = init_intel,
|
||||
.c_x86_vendor = X86_VENDOR_INTEL,
|
||||
};
|
||||
|
||||
cpu_dev_register(intel_cpu_dev);
|
||||
|
||||
1262
arch/x86/kernel/cpu/intel_cacheinfo.c
Normal file
1262
arch/x86/kernel/cpu/intel_cacheinfo.c
Normal file
File diff suppressed because it is too large
Load diff
49
arch/x86/kernel/cpu/match.c
Normal file
49
arch/x86/kernel/cpu/match.c
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/processor.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
/**
|
||||
* x86_match_cpu - match current CPU again an array of x86_cpu_ids
|
||||
* @match: Pointer to array of x86_cpu_ids. Last entry terminated with
|
||||
* {}.
|
||||
*
|
||||
* Return the entry if the current CPU matches the entries in the
|
||||
* passed x86_cpu_id match table. Otherwise NULL. The match table
|
||||
* contains vendor (X86_VENDOR_*), family, model and feature bits or
|
||||
* respective wildcard entries.
|
||||
*
|
||||
* A typical table entry would be to match a specific CPU
|
||||
* { X86_VENDOR_INTEL, 6, 0x12 }
|
||||
* or to match a specific CPU feature
|
||||
* { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
|
||||
*
|
||||
* Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
|
||||
* %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
|
||||
*
|
||||
* Arrays used to match for this should also be declared using
|
||||
* MODULE_DEVICE_TABLE(x86cpu, ...)
|
||||
*
|
||||
* This always matches against the boot cpu, assuming models and features are
|
||||
* consistent over all CPUs.
|
||||
*/
|
||||
const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
|
||||
{
|
||||
const struct x86_cpu_id *m;
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
|
||||
if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
|
||||
continue;
|
||||
if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
|
||||
continue;
|
||||
if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
|
||||
continue;
|
||||
if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
|
||||
continue;
|
||||
return m;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(x86_match_cpu);
|
||||
11
arch/x86/kernel/cpu/mcheck/Makefile
Normal file
11
arch/x86/kernel/cpu/mcheck/Makefile
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
obj-y = mce.o mce-severity.o
|
||||
|
||||
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
|
||||
obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
|
||||
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
|
||||
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
|
||||
|
||||
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
|
||||
|
||||
obj-$(CONFIG_ACPI_APEI) += mce-apei.o
|
||||
155
arch/x86/kernel/cpu/mcheck/mce-apei.c
Normal file
155
arch/x86/kernel/cpu/mcheck/mce-apei.c
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
/*
|
||||
* Bridge between MCE and APEI
|
||||
*
|
||||
* On some machine, corrected memory errors are reported via APEI
|
||||
* generic hardware error source (GHES) instead of corrected Machine
|
||||
* Check. These corrected memory errors can be reported to user space
|
||||
* through /dev/mcelog via faking a corrected Machine Check, so that
|
||||
* the error memory page can be offlined by /sbin/mcelog if the error
|
||||
* count for one page is beyond the threshold.
|
||||
*
|
||||
* For fatal MCE, save MCE record into persistent storage via ERST, so
|
||||
* that the MCE record can be logged after reboot via ERST.
|
||||
*
|
||||
* Copyright 2010 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/cper.h>
|
||||
#include <acpi/apei.h>
|
||||
#include <acpi/ghes.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
|
||||
{
|
||||
struct mce m;
|
||||
|
||||
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
|
||||
return;
|
||||
|
||||
mce_setup(&m);
|
||||
m.bank = 1;
|
||||
/* Fake a memory read error with unknown channel */
|
||||
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
|
||||
|
||||
if (severity >= GHES_SEV_RECOVERABLE)
|
||||
m.status |= MCI_STATUS_UC;
|
||||
if (severity >= GHES_SEV_PANIC)
|
||||
m.status |= MCI_STATUS_PCC;
|
||||
|
||||
m.addr = mem_err->physical_addr;
|
||||
mce_log(&m);
|
||||
mce_notify_irq();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
|
||||
|
||||
#define CPER_CREATOR_MCE \
|
||||
UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
|
||||
0x64, 0x90, 0xb8, 0x9d)
|
||||
#define CPER_SECTION_TYPE_MCE \
|
||||
UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
|
||||
0x04, 0x4a, 0x38, 0xfc)
|
||||
|
||||
/*
|
||||
* CPER specification (in UEFI specification 2.3 appendix N) requires
|
||||
* byte-packed.
|
||||
*/
|
||||
struct cper_mce_record {
|
||||
struct cper_record_header hdr;
|
||||
struct cper_section_descriptor sec_hdr;
|
||||
struct mce mce;
|
||||
} __packed;
|
||||
|
||||
int apei_write_mce(struct mce *m)
|
||||
{
|
||||
struct cper_mce_record rcd;
|
||||
|
||||
memset(&rcd, 0, sizeof(rcd));
|
||||
memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
|
||||
rcd.hdr.revision = CPER_RECORD_REV;
|
||||
rcd.hdr.signature_end = CPER_SIG_END;
|
||||
rcd.hdr.section_count = 1;
|
||||
rcd.hdr.error_severity = CPER_SEV_FATAL;
|
||||
/* timestamp, platform_id, partition_id are all invalid */
|
||||
rcd.hdr.validation_bits = 0;
|
||||
rcd.hdr.record_length = sizeof(rcd);
|
||||
rcd.hdr.creator_id = CPER_CREATOR_MCE;
|
||||
rcd.hdr.notification_type = CPER_NOTIFY_MCE;
|
||||
rcd.hdr.record_id = cper_next_record_id();
|
||||
rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
|
||||
|
||||
rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
|
||||
rcd.sec_hdr.section_length = sizeof(rcd.mce);
|
||||
rcd.sec_hdr.revision = CPER_SEC_REV;
|
||||
/* fru_id and fru_text is invalid */
|
||||
rcd.sec_hdr.validation_bits = 0;
|
||||
rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
|
||||
rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
|
||||
rcd.sec_hdr.section_severity = CPER_SEV_FATAL;
|
||||
|
||||
memcpy(&rcd.mce, m, sizeof(*m));
|
||||
|
||||
return erst_write(&rcd.hdr);
|
||||
}
|
||||
|
||||
ssize_t apei_read_mce(struct mce *m, u64 *record_id)
|
||||
{
|
||||
struct cper_mce_record rcd;
|
||||
int rc, pos;
|
||||
|
||||
rc = erst_get_record_id_begin(&pos);
|
||||
if (rc)
|
||||
return rc;
|
||||
retry:
|
||||
rc = erst_get_record_id_next(&pos, record_id);
|
||||
if (rc)
|
||||
goto out;
|
||||
/* no more record */
|
||||
if (*record_id == APEI_ERST_INVALID_RECORD_ID)
|
||||
goto out;
|
||||
rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
|
||||
/* someone else has cleared the record, try next one */
|
||||
if (rc == -ENOENT)
|
||||
goto retry;
|
||||
else if (rc < 0)
|
||||
goto out;
|
||||
/* try to skip other type records in storage */
|
||||
else if (rc != sizeof(rcd) ||
|
||||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
|
||||
goto retry;
|
||||
memcpy(m, &rcd.mce, sizeof(*m));
|
||||
rc = sizeof(*m);
|
||||
out:
|
||||
erst_get_record_id_end();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Check whether there is record in ERST */
|
||||
int apei_check_mce(void)
|
||||
{
|
||||
return erst_get_record_count();
|
||||
}
|
||||
|
||||
int apei_clear_mce(u64 record_id)
|
||||
{
|
||||
return erst_clear(record_id);
|
||||
}
|
||||
256
arch/x86/kernel/cpu/mcheck/mce-inject.c
Normal file
256
arch/x86/kernel/cpu/mcheck/mce-inject.c
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* Machine check injection support.
|
||||
* Copyright 2008 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*
|
||||
* Authors:
|
||||
* Andi Kleen
|
||||
* Ying Huang
|
||||
*/
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/nmi.h>
|
||||
|
||||
/* Update fake mce registers on current CPU. */
|
||||
static void inject_mce(struct mce *m)
|
||||
{
|
||||
struct mce *i = &per_cpu(injectm, m->extcpu);
|
||||
|
||||
/* Make sure no one reads partially written injectm */
|
||||
i->finished = 0;
|
||||
mb();
|
||||
m->finished = 0;
|
||||
/* First set the fields after finished */
|
||||
i->extcpu = m->extcpu;
|
||||
mb();
|
||||
/* Now write record in order, finished last (except above) */
|
||||
memcpy(i, m, sizeof(struct mce));
|
||||
/* Finally activate it */
|
||||
mb();
|
||||
i->finished = 1;
|
||||
}
|
||||
|
||||
static void raise_poll(struct mce *m)
|
||||
{
|
||||
unsigned long flags;
|
||||
mce_banks_t b;
|
||||
|
||||
memset(&b, 0xff, sizeof(mce_banks_t));
|
||||
local_irq_save(flags);
|
||||
machine_check_poll(0, &b);
|
||||
local_irq_restore(flags);
|
||||
m->finished = 0;
|
||||
}
|
||||
|
||||
static void raise_exception(struct mce *m, struct pt_regs *pregs)
|
||||
{
|
||||
struct pt_regs regs;
|
||||
unsigned long flags;
|
||||
|
||||
if (!pregs) {
|
||||
memset(®s, 0, sizeof(struct pt_regs));
|
||||
regs.ip = m->ip;
|
||||
regs.cs = m->cs;
|
||||
pregs = ®s;
|
||||
}
|
||||
/* in mcheck exeception handler, irq will be disabled */
|
||||
local_irq_save(flags);
|
||||
do_machine_check(pregs, 0);
|
||||
local_irq_restore(flags);
|
||||
m->finished = 0;
|
||||
}
|
||||
|
||||
static cpumask_var_t mce_inject_cpumask;
|
||||
static DEFINE_MUTEX(mce_inject_mutex);
|
||||
|
||||
static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct mce *m = this_cpu_ptr(&injectm);
|
||||
if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
|
||||
return NMI_DONE;
|
||||
cpumask_clear_cpu(cpu, mce_inject_cpumask);
|
||||
if (m->inject_flags & MCJ_EXCEPTION)
|
||||
raise_exception(m, regs);
|
||||
else if (m->status)
|
||||
raise_poll(m);
|
||||
return NMI_HANDLED;
|
||||
}
|
||||
|
||||
static void mce_irq_ipi(void *info)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct mce *m = this_cpu_ptr(&injectm);
|
||||
|
||||
if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
|
||||
m->inject_flags & MCJ_EXCEPTION) {
|
||||
cpumask_clear_cpu(cpu, mce_inject_cpumask);
|
||||
raise_exception(m, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Inject mce on current CPU */
|
||||
static int raise_local(void)
|
||||
{
|
||||
struct mce *m = this_cpu_ptr(&injectm);
|
||||
int context = MCJ_CTX(m->inject_flags);
|
||||
int ret = 0;
|
||||
int cpu = m->extcpu;
|
||||
|
||||
if (m->inject_flags & MCJ_EXCEPTION) {
|
||||
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
|
||||
switch (context) {
|
||||
case MCJ_CTX_IRQ:
|
||||
/*
|
||||
* Could do more to fake interrupts like
|
||||
* calling irq_enter, but the necessary
|
||||
* machinery isn't exported currently.
|
||||
*/
|
||||
/*FALL THROUGH*/
|
||||
case MCJ_CTX_PROCESS:
|
||||
raise_exception(m, NULL);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_INFO "Invalid MCE context\n");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
|
||||
} else if (m->status) {
|
||||
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
|
||||
raise_poll(m);
|
||||
mce_notify_irq();
|
||||
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
|
||||
} else
|
||||
m->finished = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void raise_mce(struct mce *m)
|
||||
{
|
||||
int context = MCJ_CTX(m->inject_flags);
|
||||
|
||||
inject_mce(m);
|
||||
|
||||
if (context == MCJ_CTX_RANDOM)
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
|
||||
unsigned long start;
|
||||
int cpu;
|
||||
|
||||
get_online_cpus();
|
||||
cpumask_copy(mce_inject_cpumask, cpu_online_mask);
|
||||
cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct mce *mcpu = &per_cpu(injectm, cpu);
|
||||
if (!mcpu->finished ||
|
||||
MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
|
||||
cpumask_clear_cpu(cpu, mce_inject_cpumask);
|
||||
}
|
||||
if (!cpumask_empty(mce_inject_cpumask)) {
|
||||
if (m->inject_flags & MCJ_IRQ_BROADCAST) {
|
||||
/*
|
||||
* don't wait because mce_irq_ipi is necessary
|
||||
* to be sync with following raise_local
|
||||
*/
|
||||
preempt_disable();
|
||||
smp_call_function_many(mce_inject_cpumask,
|
||||
mce_irq_ipi, NULL, 0);
|
||||
preempt_enable();
|
||||
} else if (m->inject_flags & MCJ_NMI_BROADCAST)
|
||||
apic->send_IPI_mask(mce_inject_cpumask,
|
||||
NMI_VECTOR);
|
||||
}
|
||||
start = jiffies;
|
||||
while (!cpumask_empty(mce_inject_cpumask)) {
|
||||
if (!time_before(jiffies, start + 2*HZ)) {
|
||||
printk(KERN_ERR
|
||||
"Timeout waiting for mce inject %lx\n",
|
||||
*cpumask_bits(mce_inject_cpumask));
|
||||
break;
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
raise_local();
|
||||
put_cpu();
|
||||
put_online_cpus();
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
preempt_disable();
|
||||
raise_local();
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
|
||||
/* Error injection interface */
|
||||
static ssize_t mce_write(struct file *filp, const char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
struct mce m;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
/*
|
||||
* There are some cases where real MSR reads could slip
|
||||
* through.
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
|
||||
return -EIO;
|
||||
|
||||
if ((unsigned long)usize > sizeof(struct mce))
|
||||
usize = sizeof(struct mce);
|
||||
if (copy_from_user(&m, ubuf, usize))
|
||||
return -EFAULT;
|
||||
|
||||
if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Need to give user space some time to set everything up,
|
||||
* so do it a jiffie or two later everywhere.
|
||||
*/
|
||||
schedule_timeout(2);
|
||||
|
||||
mutex_lock(&mce_inject_mutex);
|
||||
raise_mce(&m);
|
||||
mutex_unlock(&mce_inject_mutex);
|
||||
return usize;
|
||||
}
|
||||
|
||||
static int inject_init(void)
|
||||
{
|
||||
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
printk(KERN_INFO "Machine check injector initialized\n");
|
||||
register_mce_write_callback(mce_write);
|
||||
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
|
||||
"mce_notify");
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(inject_init);
|
||||
/*
|
||||
* Cannot tolerate unloading currently because we cannot
|
||||
* guarantee all openers of mce_chrdev will get a reference to us.
|
||||
*/
|
||||
MODULE_LICENSE("GPL");
|
||||
66
arch/x86/kernel/cpu/mcheck/mce-internal.h
Normal file
66
arch/x86/kernel/cpu/mcheck/mce-internal.h
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
#include <linux/device.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
enum severity_level {
|
||||
MCE_NO_SEVERITY,
|
||||
MCE_KEEP_SEVERITY,
|
||||
MCE_SOME_SEVERITY,
|
||||
MCE_AO_SEVERITY,
|
||||
MCE_UC_SEVERITY,
|
||||
MCE_AR_SEVERITY,
|
||||
MCE_PANIC_SEVERITY,
|
||||
};
|
||||
|
||||
#define ATTR_LEN 16
|
||||
|
||||
/* One object for each MCE bank, shared by all CPUs */
|
||||
struct mce_bank {
|
||||
u64 ctl; /* subevents to enable */
|
||||
unsigned char init; /* initialise bank? */
|
||||
struct device_attribute attr; /* device attribute */
|
||||
char attrname[ATTR_LEN]; /* attribute name */
|
||||
};
|
||||
|
||||
int mce_severity(struct mce *a, int tolerant, char **msg);
|
||||
struct dentry *mce_get_debugfs_dir(void);
|
||||
|
||||
extern struct mce_bank *mce_banks;
|
||||
extern mce_banks_t mce_banks_ce_disabled;
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
unsigned long mce_intel_adjust_timer(unsigned long interval);
|
||||
void mce_intel_cmci_poll(void);
|
||||
void mce_intel_hcpu_update(unsigned long cpu);
|
||||
void cmci_disable_bank(int bank);
|
||||
#else
|
||||
# define mce_intel_adjust_timer mce_adjust_timer_default
|
||||
static inline void mce_intel_cmci_poll(void) { }
|
||||
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
|
||||
static inline void cmci_disable_bank(int bank) { }
|
||||
#endif
|
||||
|
||||
void mce_timer_kick(unsigned long interval);
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI
|
||||
int apei_write_mce(struct mce *m);
|
||||
ssize_t apei_read_mce(struct mce *m, u64 *record_id);
|
||||
int apei_check_mce(void);
|
||||
int apei_clear_mce(u64 record_id);
|
||||
#else
|
||||
static inline int apei_write_mce(struct mce *m)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int apei_check_mce(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int apei_clear_mce(u64 record_id)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
283
arch/x86/kernel/cpu/mcheck/mce-severity.c
Normal file
283
arch/x86/kernel/cpu/mcheck/mce-severity.c
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
/*
|
||||
* MCE grading rules.
|
||||
* Copyright 2008, 2009 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*
|
||||
* Author: Andi Kleen
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
/*
|
||||
* Grade an mce by severity. In general the most severe ones are processed
|
||||
* first. Since there are quite a lot of combinations test the bits in a
|
||||
* table-driven way. The rules are simply processed in order, first
|
||||
* match wins.
|
||||
*
|
||||
* Note this is only used for machine check exceptions, the corrected
|
||||
* errors use much simpler rules. The exceptions still check for the corrected
|
||||
* errors, but only to leave them alone for the CMCI handler (except for
|
||||
* panic situations)
|
||||
*/
|
||||
|
||||
enum context { IN_KERNEL = 1, IN_USER = 2 };
|
||||
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
|
||||
|
||||
static struct severity {
|
||||
u64 mask;
|
||||
u64 result;
|
||||
unsigned char sev;
|
||||
unsigned char mcgmask;
|
||||
unsigned char mcgres;
|
||||
unsigned char ser;
|
||||
unsigned char context;
|
||||
unsigned char covered;
|
||||
char *msg;
|
||||
} severities[] = {
|
||||
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
|
||||
#define KERNEL .context = IN_KERNEL
|
||||
#define USER .context = IN_USER
|
||||
#define SER .ser = SER_REQUIRED
|
||||
#define NOSER .ser = NO_SER
|
||||
#define BITCLR(x) .mask = x, .result = 0
|
||||
#define BITSET(x) .mask = x, .result = x
|
||||
#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
|
||||
#define MASK(x, y) .mask = x, .result = y
|
||||
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
|
||||
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
|
||||
#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
|
||||
|
||||
MCESEV(
|
||||
NO, "Invalid",
|
||||
BITCLR(MCI_STATUS_VAL)
|
||||
),
|
||||
MCESEV(
|
||||
NO, "Not enabled",
|
||||
BITCLR(MCI_STATUS_EN)
|
||||
),
|
||||
MCESEV(
|
||||
PANIC, "Processor context corrupt",
|
||||
BITSET(MCI_STATUS_PCC)
|
||||
),
|
||||
/* When MCIP is not set something is very confused */
|
||||
MCESEV(
|
||||
PANIC, "MCIP not set in MCA handler",
|
||||
MCGMASK(MCG_STATUS_MCIP, 0)
|
||||
),
|
||||
/* Neither return not error IP -- no chance to recover -> PANIC */
|
||||
MCESEV(
|
||||
PANIC, "Neither restart nor error IP",
|
||||
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
|
||||
),
|
||||
MCESEV(
|
||||
PANIC, "In kernel and no restart IP",
|
||||
KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
|
||||
),
|
||||
MCESEV(
|
||||
KEEP, "Corrected error",
|
||||
NOSER, BITCLR(MCI_STATUS_UC)
|
||||
),
|
||||
|
||||
/* ignore OVER for UCNA */
|
||||
MCESEV(
|
||||
KEEP, "Uncorrected no action required",
|
||||
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
|
||||
),
|
||||
MCESEV(
|
||||
PANIC, "Illegal combination (UCNA with AR=1)",
|
||||
SER,
|
||||
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
|
||||
),
|
||||
MCESEV(
|
||||
KEEP, "Non signalled machine check",
|
||||
SER, BITCLR(MCI_STATUS_S)
|
||||
),
|
||||
|
||||
MCESEV(
|
||||
PANIC, "Action required with lost events",
|
||||
SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
|
||||
),
|
||||
|
||||
/* known AR MCACODs: */
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
MCESEV(
|
||||
KEEP, "Action required but unaffected thread is continuable",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
|
||||
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
|
||||
),
|
||||
MCESEV(
|
||||
AR, "Action required: data load error in a user process",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
|
||||
USER
|
||||
),
|
||||
MCESEV(
|
||||
AR, "Action required: instruction fetch error in a user process",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
|
||||
USER
|
||||
),
|
||||
#endif
|
||||
MCESEV(
|
||||
PANIC, "Action required: unknown MCACOD",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
|
||||
),
|
||||
|
||||
/* known AO MCACODs: */
|
||||
MCESEV(
|
||||
AO, "Action optional: memory scrubbing error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
|
||||
),
|
||||
MCESEV(
|
||||
AO, "Action optional: last level cache writeback error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
|
||||
),
|
||||
MCESEV(
|
||||
SOME, "Action optional: unknown MCACOD",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
|
||||
),
|
||||
MCESEV(
|
||||
SOME, "Action optional with lost events",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
|
||||
),
|
||||
|
||||
MCESEV(
|
||||
PANIC, "Overflowed uncorrected",
|
||||
BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
|
||||
),
|
||||
MCESEV(
|
||||
UC, "Uncorrected",
|
||||
BITSET(MCI_STATUS_UC)
|
||||
),
|
||||
MCESEV(
|
||||
SOME, "No match",
|
||||
BITSET(0)
|
||||
) /* always matches. keep at end */
|
||||
};
|
||||
|
||||
/*
|
||||
* If mcgstatus indicated that ip/cs on the stack were
|
||||
* no good, then "m->cs" will be zero and we will have
|
||||
* to assume the worst case (IN_KERNEL) as we actually
|
||||
* have no idea what we were executing when the machine
|
||||
* check hit.
|
||||
* If we do have a good "m->cs" (or a faked one in the
|
||||
* case we were executing in VM86 mode) we can use it to
|
||||
* distinguish an exception taken in user from from one
|
||||
* taken in the kernel.
|
||||
*/
|
||||
static int error_context(struct mce *m)
|
||||
{
|
||||
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
|
||||
}
|
||||
|
||||
int mce_severity(struct mce *m, int tolerant, char **msg)
|
||||
{
|
||||
enum context ctx = error_context(m);
|
||||
struct severity *s;
|
||||
|
||||
for (s = severities;; s++) {
|
||||
if ((m->status & s->mask) != s->result)
|
||||
continue;
|
||||
if ((m->mcgstatus & s->mcgmask) != s->mcgres)
|
||||
continue;
|
||||
if (s->ser == SER_REQUIRED && !mca_cfg.ser)
|
||||
continue;
|
||||
if (s->ser == NO_SER && mca_cfg.ser)
|
||||
continue;
|
||||
if (s->context && ctx != s->context)
|
||||
continue;
|
||||
if (msg)
|
||||
*msg = s->msg;
|
||||
s->covered = 1;
|
||||
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
|
||||
if (panic_on_oops || tolerant < 1)
|
||||
return MCE_PANIC_SEVERITY;
|
||||
}
|
||||
return s->sev;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
static void *s_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
if (*pos >= ARRAY_SIZE(severities))
|
||||
return NULL;
|
||||
return &severities[*pos];
|
||||
}
|
||||
|
||||
static void *s_next(struct seq_file *f, void *data, loff_t *pos)
|
||||
{
|
||||
if (++(*pos) >= ARRAY_SIZE(severities))
|
||||
return NULL;
|
||||
return &severities[*pos];
|
||||
}
|
||||
|
||||
static void s_stop(struct seq_file *f, void *data)
|
||||
{
|
||||
}
|
||||
|
||||
static int s_show(struct seq_file *f, void *data)
|
||||
{
|
||||
struct severity *ser = data;
|
||||
seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations severities_seq_ops = {
|
||||
.start = s_start,
|
||||
.next = s_next,
|
||||
.stop = s_stop,
|
||||
.show = s_show,
|
||||
};
|
||||
|
||||
static int severities_coverage_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return seq_open(file, &severities_seq_ops);
|
||||
}
|
||||
|
||||
static ssize_t severities_coverage_write(struct file *file,
|
||||
const char __user *ubuf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ARRAY_SIZE(severities); i++)
|
||||
severities[i].covered = 0;
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations severities_coverage_fops = {
|
||||
.open = severities_coverage_open,
|
||||
.release = seq_release,
|
||||
.read = seq_read,
|
||||
.write = severities_coverage_write,
|
||||
.llseek = seq_lseek,
|
||||
};
|
||||
|
||||
static int __init severities_debugfs_init(void)
|
||||
{
|
||||
struct dentry *dmce, *fsev;
|
||||
|
||||
dmce = mce_get_debugfs_dir();
|
||||
if (!dmce)
|
||||
goto err_out;
|
||||
|
||||
fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
|
||||
&severities_coverage_fops);
|
||||
if (!fsev)
|
||||
goto err_out;
|
||||
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
return -ENOMEM;
|
||||
}
|
||||
late_initcall(severities_debugfs_init);
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
2560
arch/x86/kernel/cpu/mcheck/mce.c
Normal file
2560
arch/x86/kernel/cpu/mcheck/mce.c
Normal file
File diff suppressed because it is too large
Load diff
789
arch/x86/kernel/cpu/mcheck/mce_amd.c
Normal file
789
arch/x86/kernel/cpu/mcheck/mce_amd.c
Normal file
|
|
@ -0,0 +1,789 @@
|
|||
/*
|
||||
* (c) 2005-2012 Advanced Micro Devices, Inc.
|
||||
* Your use of this code is subject to the terms and conditions of the
|
||||
* GNU general public license version 2. See "COPYING" or
|
||||
* http://www.gnu.org/licenses/gpl.html
|
||||
*
|
||||
* Written by Jacob Shin - AMD, Inc.
|
||||
*
|
||||
* Maintained by: Borislav Petkov <bp@alien8.de>
|
||||
*
|
||||
* April 2006
|
||||
* - added support for AMD Family 0x10 processors
|
||||
* May 2012
|
||||
* - major scrubbing
|
||||
*
|
||||
* All MC4_MISCi registers are shared between multi-cores
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/amd_nb.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#define NR_BLOCKS 9
|
||||
#define THRESHOLD_MAX 0xFFF
|
||||
#define INT_TYPE_APIC 0x00020000
|
||||
#define MASK_VALID_HI 0x80000000
|
||||
#define MASK_CNTP_HI 0x40000000
|
||||
#define MASK_LOCKED_HI 0x20000000
|
||||
#define MASK_LVTOFF_HI 0x00F00000
|
||||
#define MASK_COUNT_EN_HI 0x00080000
|
||||
#define MASK_INT_TYPE_HI 0x00060000
|
||||
#define MASK_OVERFLOW_HI 0x00010000
|
||||
#define MASK_ERR_COUNT_HI 0x00000FFF
|
||||
#define MASK_BLKPTR_LO 0xFF000000
|
||||
#define MCG_XBLK_ADDR 0xC0000400
|
||||
|
||||
static const char * const th_names[] = {
|
||||
"load_store",
|
||||
"insn_fetch",
|
||||
"combined_unit",
|
||||
"",
|
||||
"northbridge",
|
||||
"execution_unit",
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
|
||||
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
|
||||
|
||||
static void amd_threshold_interrupt(void);
|
||||
|
||||
/*
|
||||
* CPU Initialization
|
||||
*/
|
||||
|
||||
struct thresh_restart {
|
||||
struct threshold_block *b;
|
||||
int reset;
|
||||
int set_lvt_off;
|
||||
int lvt_off;
|
||||
u16 old_limit;
|
||||
};
|
||||
|
||||
static inline bool is_shared_bank(int bank)
|
||||
{
|
||||
/* Bank 4 is for northbridge reporting and is thus shared */
|
||||
return (bank == 4);
|
||||
}
|
||||
|
||||
static const char * const bank4_names(struct threshold_block *b)
|
||||
{
|
||||
switch (b->address) {
|
||||
/* MSR4_MISC0 */
|
||||
case 0x00000413:
|
||||
return "dram";
|
||||
|
||||
case 0xc0000408:
|
||||
return "ht_links";
|
||||
|
||||
case 0xc0000409:
|
||||
return "l3_cache";
|
||||
|
||||
default:
|
||||
WARN(1, "Funny MSR: 0x%08x\n", b->address);
|
||||
return "";
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
|
||||
{
|
||||
/*
|
||||
* bank 4 supports APIC LVT interrupts implicitly since forever.
|
||||
*/
|
||||
if (bank == 4)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* IntP: interrupt present; if this bit is set, the thresholding
|
||||
* bank can generate APIC LVT interrupts
|
||||
*/
|
||||
return msr_high_bits & BIT(28);
|
||||
}
|
||||
|
||||
static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
|
||||
{
|
||||
int msr = (hi & MASK_LVTOFF_HI) >> 20;
|
||||
|
||||
if (apic < 0) {
|
||||
pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
|
||||
"for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
|
||||
b->bank, b->block, b->address, hi, lo);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (apic != msr) {
|
||||
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
|
||||
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
|
||||
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
};
|
||||
|
||||
/*
|
||||
* Called via smp_call_function_single(), must be called with correct
|
||||
* cpu affinity.
|
||||
*/
|
||||
static void threshold_restart_bank(void *_tr)
|
||||
{
|
||||
struct thresh_restart *tr = _tr;
|
||||
u32 hi, lo;
|
||||
|
||||
rdmsr(tr->b->address, lo, hi);
|
||||
|
||||
if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
|
||||
tr->reset = 1; /* limit cannot be lower than err count */
|
||||
|
||||
if (tr->reset) { /* reset err count and overflow bit */
|
||||
hi =
|
||||
(hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
|
||||
(THRESHOLD_MAX - tr->b->threshold_limit);
|
||||
} else if (tr->old_limit) { /* change limit w/o reset */
|
||||
int new_count = (hi & THRESHOLD_MAX) +
|
||||
(tr->old_limit - tr->b->threshold_limit);
|
||||
|
||||
hi = (hi & ~MASK_ERR_COUNT_HI) |
|
||||
(new_count & THRESHOLD_MAX);
|
||||
}
|
||||
|
||||
/* clear IntType */
|
||||
hi &= ~MASK_INT_TYPE_HI;
|
||||
|
||||
if (!tr->b->interrupt_capable)
|
||||
goto done;
|
||||
|
||||
if (tr->set_lvt_off) {
|
||||
if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
|
||||
/* set new lvt offset */
|
||||
hi &= ~MASK_LVTOFF_HI;
|
||||
hi |= tr->lvt_off << 20;
|
||||
}
|
||||
}
|
||||
|
||||
if (tr->b->interrupt_enable)
|
||||
hi |= INT_TYPE_APIC;
|
||||
|
||||
done:
|
||||
|
||||
hi |= MASK_COUNT_EN_HI;
|
||||
wrmsr(tr->b->address, lo, hi);
|
||||
}
|
||||
|
||||
static void mce_threshold_block_init(struct threshold_block *b, int offset)
|
||||
{
|
||||
struct thresh_restart tr = {
|
||||
.b = b,
|
||||
.set_lvt_off = 1,
|
||||
.lvt_off = offset,
|
||||
};
|
||||
|
||||
b->threshold_limit = THRESHOLD_MAX;
|
||||
threshold_restart_bank(&tr);
|
||||
};
|
||||
|
||||
static int setup_APIC_mce(int reserved, int new)
|
||||
{
|
||||
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
|
||||
APIC_EILVT_MSG_FIX, 0))
|
||||
return new;
|
||||
|
||||
return reserved;
|
||||
}
|
||||
|
||||
/* cpu init entry point, called from mce.c with preempt off */
|
||||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
struct threshold_block b;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
unsigned int bank, block;
|
||||
int offset = -1;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
if (block == 0)
|
||||
address = MSR_IA32_MC0_MISC + bank * 4;
|
||||
else if (block == 1) {
|
||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
||||
if (!address)
|
||||
break;
|
||||
|
||||
address += MCG_XBLK_ADDR;
|
||||
} else
|
||||
++address;
|
||||
|
||||
if (rdmsr_safe(address, &low, &high))
|
||||
break;
|
||||
|
||||
if (!(high & MASK_VALID_HI))
|
||||
continue;
|
||||
|
||||
if (!(high & MASK_CNTP_HI) ||
|
||||
(high & MASK_LOCKED_HI))
|
||||
continue;
|
||||
|
||||
if (!block)
|
||||
per_cpu(bank_map, cpu) |= (1 << bank);
|
||||
|
||||
memset(&b, 0, sizeof(b));
|
||||
b.cpu = cpu;
|
||||
b.bank = bank;
|
||||
b.block = block;
|
||||
b.address = address;
|
||||
b.interrupt_capable = lvt_interrupt_supported(bank, high);
|
||||
|
||||
if (b.interrupt_capable) {
|
||||
int new = (high & MASK_LVTOFF_HI) >> 20;
|
||||
offset = setup_APIC_mce(offset, new);
|
||||
}
|
||||
|
||||
mce_threshold_block_init(&b, offset);
|
||||
mce_threshold_vector = amd_threshold_interrupt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* APIC Interrupt Handler
|
||||
*/
|
||||
|
||||
/*
|
||||
* threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
|
||||
* the interrupt goes off when error_count reaches threshold_limit.
|
||||
* the handler will simply log mcelog w/ software defined bank number.
|
||||
*/
|
||||
static void amd_threshold_interrupt(void)
|
||||
{
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
unsigned int bank, block;
|
||||
struct mce m;
|
||||
|
||||
mce_setup(&m);
|
||||
|
||||
/* assume first bank caused it */
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
|
||||
continue;
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
if (block == 0) {
|
||||
address = MSR_IA32_MC0_MISC + bank * 4;
|
||||
} else if (block == 1) {
|
||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
||||
if (!address)
|
||||
break;
|
||||
address += MCG_XBLK_ADDR;
|
||||
} else {
|
||||
++address;
|
||||
}
|
||||
|
||||
if (rdmsr_safe(address, &low, &high))
|
||||
break;
|
||||
|
||||
if (!(high & MASK_VALID_HI)) {
|
||||
if (block)
|
||||
continue;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (!(high & MASK_CNTP_HI) ||
|
||||
(high & MASK_LOCKED_HI))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Log the machine check that caused the threshold
|
||||
* event.
|
||||
*/
|
||||
machine_check_poll(MCP_TIMESTAMP,
|
||||
this_cpu_ptr(&mce_poll_banks));
|
||||
|
||||
if (high & MASK_OVERFLOW_HI) {
|
||||
rdmsrl(address, m.misc);
|
||||
rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
|
||||
m.status);
|
||||
m.bank = K8_MCE_THRESHOLD_BASE
|
||||
+ bank * NR_BLOCKS
|
||||
+ block;
|
||||
mce_log(&m);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Sysfs Interface
|
||||
*/
|
||||
|
||||
struct threshold_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show) (struct threshold_block *, char *);
|
||||
ssize_t (*store) (struct threshold_block *, const char *, size_t count);
|
||||
};
|
||||
|
||||
#define SHOW_FIELDS(name) \
|
||||
static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
|
||||
{ \
|
||||
return sprintf(buf, "%lu\n", (unsigned long) b->name); \
|
||||
}
|
||||
SHOW_FIELDS(interrupt_enable)
|
||||
SHOW_FIELDS(threshold_limit)
|
||||
|
||||
static ssize_t
|
||||
store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
|
||||
{
|
||||
struct thresh_restart tr;
|
||||
unsigned long new;
|
||||
|
||||
if (!b->interrupt_capable)
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtoul(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
b->interrupt_enable = !!new;
|
||||
|
||||
memset(&tr, 0, sizeof(tr));
|
||||
tr.b = b;
|
||||
|
||||
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
|
||||
{
|
||||
struct thresh_restart tr;
|
||||
unsigned long new;
|
||||
|
||||
if (kstrtoul(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (new > THRESHOLD_MAX)
|
||||
new = THRESHOLD_MAX;
|
||||
if (new < 1)
|
||||
new = 1;
|
||||
|
||||
memset(&tr, 0, sizeof(tr));
|
||||
tr.old_limit = b->threshold_limit;
|
||||
b->threshold_limit = new;
|
||||
tr.b = b;
|
||||
|
||||
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t show_error_count(struct threshold_block *b, char *buf)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
|
||||
|
||||
return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
|
||||
(THRESHOLD_MAX - b->threshold_limit)));
|
||||
}
|
||||
|
||||
static struct threshold_attr error_count = {
|
||||
.attr = {.name = __stringify(error_count), .mode = 0444 },
|
||||
.show = show_error_count,
|
||||
};
|
||||
|
||||
#define RW_ATTR(val) \
|
||||
static struct threshold_attr val = { \
|
||||
.attr = {.name = __stringify(val), .mode = 0644 }, \
|
||||
.show = show_## val, \
|
||||
.store = store_## val, \
|
||||
};
|
||||
|
||||
RW_ATTR(interrupt_enable);
|
||||
RW_ATTR(threshold_limit);
|
||||
|
||||
static struct attribute *default_attrs[] = {
|
||||
&threshold_limit.attr,
|
||||
&error_count.attr,
|
||||
NULL, /* possibly interrupt_enable if supported, see below */
|
||||
NULL,
|
||||
};
|
||||
|
||||
#define to_block(k) container_of(k, struct threshold_block, kobj)
|
||||
#define to_attr(a) container_of(a, struct threshold_attr, attr)
|
||||
|
||||
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
|
||||
{
|
||||
struct threshold_block *b = to_block(kobj);
|
||||
struct threshold_attr *a = to_attr(attr);
|
||||
ssize_t ret;
|
||||
|
||||
ret = a->show ? a->show(b, buf) : -EIO;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t store(struct kobject *kobj, struct attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct threshold_block *b = to_block(kobj);
|
||||
struct threshold_attr *a = to_attr(attr);
|
||||
ssize_t ret;
|
||||
|
||||
ret = a->store ? a->store(b, buf, count) : -EIO;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct sysfs_ops threshold_ops = {
|
||||
.show = show,
|
||||
.store = store,
|
||||
};
|
||||
|
||||
static struct kobj_type threshold_ktype = {
|
||||
.sysfs_ops = &threshold_ops,
|
||||
.default_attrs = default_attrs,
|
||||
};
|
||||
|
||||
static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
|
||||
unsigned int block, u32 address)
|
||||
{
|
||||
struct threshold_block *b = NULL;
|
||||
u32 low, high;
|
||||
int err;
|
||||
|
||||
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
|
||||
return 0;
|
||||
|
||||
if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
|
||||
return 0;
|
||||
|
||||
if (!(high & MASK_VALID_HI)) {
|
||||
if (block)
|
||||
goto recurse;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!(high & MASK_CNTP_HI) ||
|
||||
(high & MASK_LOCKED_HI))
|
||||
goto recurse;
|
||||
|
||||
b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
|
||||
if (!b)
|
||||
return -ENOMEM;
|
||||
|
||||
b->block = block;
|
||||
b->bank = bank;
|
||||
b->cpu = cpu;
|
||||
b->address = address;
|
||||
b->interrupt_enable = 0;
|
||||
b->interrupt_capable = lvt_interrupt_supported(bank, high);
|
||||
b->threshold_limit = THRESHOLD_MAX;
|
||||
|
||||
if (b->interrupt_capable)
|
||||
threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
|
||||
else
|
||||
threshold_ktype.default_attrs[2] = NULL;
|
||||
|
||||
INIT_LIST_HEAD(&b->miscj);
|
||||
|
||||
if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
|
||||
list_add(&b->miscj,
|
||||
&per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
|
||||
} else {
|
||||
per_cpu(threshold_banks, cpu)[bank]->blocks = b;
|
||||
}
|
||||
|
||||
err = kobject_init_and_add(&b->kobj, &threshold_ktype,
|
||||
per_cpu(threshold_banks, cpu)[bank]->kobj,
|
||||
(bank == 4 ? bank4_names(b) : th_names[bank]));
|
||||
if (err)
|
||||
goto out_free;
|
||||
recurse:
|
||||
if (!block) {
|
||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
||||
if (!address)
|
||||
return 0;
|
||||
address += MCG_XBLK_ADDR;
|
||||
} else {
|
||||
++address;
|
||||
}
|
||||
|
||||
err = allocate_threshold_blocks(cpu, bank, ++block, address);
|
||||
if (err)
|
||||
goto out_free;
|
||||
|
||||
if (b)
|
||||
kobject_uevent(&b->kobj, KOBJ_ADD);
|
||||
|
||||
return err;
|
||||
|
||||
out_free:
|
||||
if (b) {
|
||||
kobject_put(&b->kobj);
|
||||
list_del(&b->miscj);
|
||||
kfree(b);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __threshold_add_blocks(struct threshold_bank *b)
|
||||
{
|
||||
struct list_head *head = &b->blocks->miscj;
|
||||
struct threshold_block *pos = NULL;
|
||||
struct threshold_block *tmp = NULL;
|
||||
int err = 0;
|
||||
|
||||
err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
list_for_each_entry_safe(pos, tmp, head, miscj) {
|
||||
|
||||
err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
|
||||
if (err) {
|
||||
list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
|
||||
kobject_del(&pos->kobj);
|
||||
|
||||
return err;
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int threshold_create_bank(unsigned int cpu, unsigned int bank)
|
||||
{
|
||||
struct device *dev = per_cpu(mce_device, cpu);
|
||||
struct amd_northbridge *nb = NULL;
|
||||
struct threshold_bank *b = NULL;
|
||||
const char *name = th_names[bank];
|
||||
int err = 0;
|
||||
|
||||
if (is_shared_bank(bank)) {
|
||||
nb = node_to_amd_nb(amd_get_nb_id(cpu));
|
||||
|
||||
/* threshold descriptor already initialized on this node? */
|
||||
if (nb && nb->bank4) {
|
||||
/* yes, use it */
|
||||
b = nb->bank4;
|
||||
err = kobject_add(b->kobj, &dev->kobj, name);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
per_cpu(threshold_banks, cpu)[bank] = b;
|
||||
atomic_inc(&b->cpus);
|
||||
|
||||
err = __threshold_add_blocks(b);
|
||||
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
|
||||
if (!b) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
b->kobj = kobject_create_and_add(name, &dev->kobj);
|
||||
if (!b->kobj) {
|
||||
err = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
per_cpu(threshold_banks, cpu)[bank] = b;
|
||||
|
||||
if (is_shared_bank(bank)) {
|
||||
atomic_set(&b->cpus, 1);
|
||||
|
||||
/* nb is already initialized, see above */
|
||||
if (nb) {
|
||||
WARN_ON(nb->bank4);
|
||||
nb->bank4 = b;
|
||||
}
|
||||
}
|
||||
|
||||
err = allocate_threshold_blocks(cpu, bank, 0,
|
||||
MSR_IA32_MC0_MISC + bank * 4);
|
||||
if (!err)
|
||||
goto out;
|
||||
|
||||
out_free:
|
||||
kfree(b);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
/* create dir/files for all valid threshold banks */
|
||||
static int threshold_create_device(unsigned int cpu)
|
||||
{
|
||||
unsigned int bank;
|
||||
struct threshold_bank **bp;
|
||||
int err = 0;
|
||||
|
||||
bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
|
||||
GFP_KERNEL);
|
||||
if (!bp)
|
||||
return -ENOMEM;
|
||||
|
||||
per_cpu(threshold_banks, cpu) = bp;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||
continue;
|
||||
err = threshold_create_bank(cpu, bank);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void deallocate_threshold_block(unsigned int cpu,
|
||||
unsigned int bank)
|
||||
{
|
||||
struct threshold_block *pos = NULL;
|
||||
struct threshold_block *tmp = NULL;
|
||||
struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
|
||||
|
||||
if (!head)
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
|
||||
kobject_put(&pos->kobj);
|
||||
list_del(&pos->miscj);
|
||||
kfree(pos);
|
||||
}
|
||||
|
||||
kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
|
||||
per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
|
||||
}
|
||||
|
||||
static void __threshold_remove_blocks(struct threshold_bank *b)
|
||||
{
|
||||
struct threshold_block *pos = NULL;
|
||||
struct threshold_block *tmp = NULL;
|
||||
|
||||
kobject_del(b->kobj);
|
||||
|
||||
list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
|
||||
kobject_del(&pos->kobj);
|
||||
}
|
||||
|
||||
static void threshold_remove_bank(unsigned int cpu, int bank)
|
||||
{
|
||||
struct amd_northbridge *nb;
|
||||
struct threshold_bank *b;
|
||||
|
||||
b = per_cpu(threshold_banks, cpu)[bank];
|
||||
if (!b)
|
||||
return;
|
||||
|
||||
if (!b->blocks)
|
||||
goto free_out;
|
||||
|
||||
if (is_shared_bank(bank)) {
|
||||
if (!atomic_dec_and_test(&b->cpus)) {
|
||||
__threshold_remove_blocks(b);
|
||||
per_cpu(threshold_banks, cpu)[bank] = NULL;
|
||||
return;
|
||||
} else {
|
||||
/*
|
||||
* the last CPU on this node using the shared bank is
|
||||
* going away, remove that bank now.
|
||||
*/
|
||||
nb = node_to_amd_nb(amd_get_nb_id(cpu));
|
||||
nb->bank4 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
deallocate_threshold_block(cpu, bank);
|
||||
|
||||
free_out:
|
||||
kobject_del(b->kobj);
|
||||
kobject_put(b->kobj);
|
||||
kfree(b);
|
||||
per_cpu(threshold_banks, cpu)[bank] = NULL;
|
||||
}
|
||||
|
||||
static void threshold_remove_device(unsigned int cpu)
|
||||
{
|
||||
unsigned int bank;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||
continue;
|
||||
threshold_remove_bank(cpu, bank);
|
||||
}
|
||||
kfree(per_cpu(threshold_banks, cpu));
|
||||
}
|
||||
|
||||
/* get notified when a cpu comes on/off */
|
||||
static void
|
||||
amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
|
||||
{
|
||||
switch (action) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
threshold_create_device(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
threshold_remove_device(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static __init int threshold_init_device(void)
|
||||
{
|
||||
unsigned lcpu = 0;
|
||||
|
||||
/* to hit CPUs online before the notifier is up */
|
||||
for_each_online_cpu(lcpu) {
|
||||
int err = threshold_create_device(lcpu);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
threshold_cpu_callback = amd_64_threshold_cpu_callback;
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* there are 3 funcs which need to be _initcalled in a logic sequence:
|
||||
* 1. xen_late_init_mcelog
|
||||
* 2. mcheck_init_device
|
||||
* 3. threshold_init_device
|
||||
*
|
||||
* xen_late_init_mcelog must register xen_mce_chrdev_device before
|
||||
* native mce_chrdev_device registration if running under xen platform;
|
||||
*
|
||||
* mcheck_init_device should be inited before threshold_init_device to
|
||||
* initialize mce_device, otherwise a NULL ptr dereference will cause panic.
|
||||
*
|
||||
* so we use following _initcalls
|
||||
* 1. device_initcall(xen_late_init_mcelog);
|
||||
* 2. device_initcall_sync(mcheck_init_device);
|
||||
* 3. late_initcall(threshold_init_device);
|
||||
*
|
||||
* when running under xen, the initcall order is 1,2,3;
|
||||
* on baremetal, we skip 1 and we do only 2 and 3.
|
||||
*/
|
||||
late_initcall(threshold_init_device);
|
||||
391
arch/x86/kernel/cpu/mcheck/mce_intel.c
Normal file
391
arch/x86/kernel/cpu/mcheck/mce_intel.c
Normal file
|
|
@ -0,0 +1,391 @@
|
|||
/*
|
||||
* Intel specific MCE features.
|
||||
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
|
||||
* Copyright (C) 2008, 2009 Intel Corporation
|
||||
* Author: Andi Kleen
|
||||
*/
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
/*
|
||||
* Support for Intel Correct Machine Check Interrupts. This allows
|
||||
* the CPU to raise an interrupt when a corrected machine check happened.
|
||||
* Normally we pick those up using a regular polling timer.
|
||||
* Also supports reliable discovery of shared banks.
|
||||
*/
|
||||
|
||||
/*
|
||||
* CMCI can be delivered to multiple cpus that share a machine check bank
|
||||
* so we need to designate a single cpu to process errors logged in each bank
|
||||
* in the interrupt handler (otherwise we would have many races and potential
|
||||
* double reporting of the same error).
|
||||
* Note that this can change when a cpu is offlined or brought online since
|
||||
* some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
|
||||
* disables CMCI on all banks owned by the cpu and clears this bitfield. At
|
||||
* this point, cmci_rediscover() kicks in and a different cpu may end up
|
||||
* taking ownership of some of the shared MCA banks that were previously
|
||||
* owned by the offlined cpu.
|
||||
*/
|
||||
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
|
||||
|
||||
/*
|
||||
* cmci_discover_lock protects against parallel discovery attempts
|
||||
* which could race against each other.
|
||||
*/
|
||||
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
|
||||
|
||||
#define CMCI_THRESHOLD 1
|
||||
#define CMCI_POLL_INTERVAL (30 * HZ)
|
||||
#define CMCI_STORM_INTERVAL (1 * HZ)
|
||||
#define CMCI_STORM_THRESHOLD 15
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
|
||||
static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
|
||||
static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
|
||||
|
||||
enum {
|
||||
CMCI_STORM_NONE,
|
||||
CMCI_STORM_ACTIVE,
|
||||
CMCI_STORM_SUBSIDED,
|
||||
};
|
||||
|
||||
static atomic_t cmci_storm_on_cpus;
|
||||
|
||||
static int cmci_supported(int *banks)
|
||||
{
|
||||
u64 cap;
|
||||
|
||||
if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Vendor check is not strictly needed, but the initial
|
||||
* initialization is vendor keyed and this
|
||||
* makes sure none of the backdoors are entered otherwise.
|
||||
*/
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return 0;
|
||||
if (!cpu_has_apic || lapic_get_maxlvt() < 6)
|
||||
return 0;
|
||||
rdmsrl(MSR_IA32_MCG_CAP, cap);
|
||||
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
|
||||
return !!(cap & MCG_CMCI_P);
|
||||
}
|
||||
|
||||
void mce_intel_cmci_poll(void)
|
||||
{
|
||||
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
|
||||
return;
|
||||
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
|
||||
}
|
||||
|
||||
void mce_intel_hcpu_update(unsigned long cpu)
|
||||
{
|
||||
if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
|
||||
atomic_dec(&cmci_storm_on_cpus);
|
||||
|
||||
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
|
||||
}
|
||||
|
||||
unsigned long mce_intel_adjust_timer(unsigned long interval)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (interval < CMCI_POLL_INTERVAL)
|
||||
return interval;
|
||||
|
||||
switch (__this_cpu_read(cmci_storm_state)) {
|
||||
case CMCI_STORM_ACTIVE:
|
||||
/*
|
||||
* We switch back to interrupt mode once the poll timer has
|
||||
* silenced itself. That means no events recorded and the
|
||||
* timer interval is back to our poll interval.
|
||||
*/
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
|
||||
r = atomic_sub_return(1, &cmci_storm_on_cpus);
|
||||
if (r == 0)
|
||||
pr_notice("CMCI storm subsided: switching to interrupt mode\n");
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case CMCI_STORM_SUBSIDED:
|
||||
/*
|
||||
* We wait for all cpus to go back to SUBSIDED
|
||||
* state. When that happens we switch back to
|
||||
* interrupt mode.
|
||||
*/
|
||||
if (!atomic_read(&cmci_storm_on_cpus)) {
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
|
||||
cmci_reenable();
|
||||
cmci_recheck();
|
||||
}
|
||||
return CMCI_POLL_INTERVAL;
|
||||
default:
|
||||
/*
|
||||
* We have shiny weather. Let the poll do whatever it
|
||||
* thinks.
|
||||
*/
|
||||
return interval;
|
||||
}
|
||||
}
|
||||
|
||||
static void cmci_storm_disable_banks(void)
|
||||
{
|
||||
unsigned long flags, *owned;
|
||||
int bank;
|
||||
u64 val;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
owned = this_cpu_ptr(mce_banks_owned);
|
||||
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static bool cmci_storm_detect(void)
|
||||
{
|
||||
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
|
||||
unsigned long ts = __this_cpu_read(cmci_time_stamp);
|
||||
unsigned long now = jiffies;
|
||||
int r;
|
||||
|
||||
if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
|
||||
return true;
|
||||
|
||||
if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
|
||||
cnt++;
|
||||
} else {
|
||||
cnt = 1;
|
||||
__this_cpu_write(cmci_time_stamp, now);
|
||||
}
|
||||
__this_cpu_write(cmci_storm_cnt, cnt);
|
||||
|
||||
if (cnt <= CMCI_STORM_THRESHOLD)
|
||||
return false;
|
||||
|
||||
cmci_storm_disable_banks();
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
|
||||
r = atomic_add_return(1, &cmci_storm_on_cpus);
|
||||
mce_timer_kick(CMCI_POLL_INTERVAL);
|
||||
|
||||
if (r == 1)
|
||||
pr_notice("CMCI storm detected: switching to poll mode\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The interrupt handler. This is called on every event.
|
||||
* Just call the poller directly to log any events.
|
||||
* This could in theory increase the threshold under high load,
|
||||
* but doesn't for now.
|
||||
*/
|
||||
static void intel_threshold_interrupt(void)
|
||||
{
|
||||
if (cmci_storm_detect())
|
||||
return;
|
||||
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
|
||||
mce_notify_irq();
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
|
||||
* on this CPU. Use the algorithm recommended in the SDM to discover shared
|
||||
* banks.
|
||||
*/
|
||||
static void cmci_discover(int banks)
|
||||
{
|
||||
unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
|
||||
unsigned long flags;
|
||||
int i;
|
||||
int bios_wrong_thresh = 0;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++) {
|
||||
u64 val;
|
||||
int bios_zero_thresh = 0;
|
||||
|
||||
if (test_bit(i, owned))
|
||||
continue;
|
||||
|
||||
/* Skip banks in firmware first mode */
|
||||
if (test_bit(i, mce_banks_ce_disabled))
|
||||
continue;
|
||||
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Already owned by someone else? */
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
clear_bit(i, owned);
|
||||
__clear_bit(i, this_cpu_ptr(mce_poll_banks));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!mca_cfg.bios_cmci_threshold) {
|
||||
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
||||
val |= CMCI_THRESHOLD;
|
||||
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
|
||||
/*
|
||||
* If bios_cmci_threshold boot option was specified
|
||||
* but the threshold is zero, we'll try to initialize
|
||||
* it to 1.
|
||||
*/
|
||||
bios_zero_thresh = 1;
|
||||
val |= CMCI_THRESHOLD;
|
||||
}
|
||||
|
||||
val |= MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Did the enable bit stick? -- the bank supports CMCI */
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
set_bit(i, owned);
|
||||
__clear_bit(i, this_cpu_ptr(mce_poll_banks));
|
||||
/*
|
||||
* We are able to set thresholds for some banks that
|
||||
* had a threshold of 0. This means the BIOS has not
|
||||
* set the thresholds properly or does not work with
|
||||
* this boot option. Note down now and report later.
|
||||
*/
|
||||
if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
|
||||
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
|
||||
bios_wrong_thresh = 1;
|
||||
} else {
|
||||
WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
|
||||
pr_info_once(
|
||||
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
|
||||
pr_info_once(
|
||||
"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Just in case we missed an event during initialization check
|
||||
* all the CMCI owned banks.
|
||||
*/
|
||||
void cmci_recheck(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int banks;
|
||||
|
||||
if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/* Caller must hold the lock on cmci_discover_lock */
|
||||
static void __cmci_disable_bank(int bank)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
|
||||
return;
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable CMCI on this CPU for all banks it owns when it goes down.
|
||||
* This allows other CPUs to claim the banks on rediscovery.
|
||||
*/
|
||||
void cmci_clear(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
int banks;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++)
|
||||
__cmci_disable_bank(i);
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static void cmci_rediscover_work_func(void *arg)
|
||||
{
|
||||
int banks;
|
||||
|
||||
/* Recheck banks in case CPUs don't all have the same */
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks);
|
||||
}
|
||||
|
||||
/* After a CPU went down cycle through all the others and rediscover */
|
||||
void cmci_rediscover(void)
|
||||
{
|
||||
int banks;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
|
||||
on_each_cpu(cmci_rediscover_work_func, NULL, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reenable CMCI on this CPU in case a CPU down failed.
|
||||
*/
|
||||
void cmci_reenable(void)
|
||||
{
|
||||
int banks;
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks);
|
||||
}
|
||||
|
||||
void cmci_disable_bank(int bank)
|
||||
{
|
||||
int banks;
|
||||
unsigned long flags;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
__cmci_disable_bank(bank);
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static void intel_init_cmci(void)
|
||||
{
|
||||
int banks;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
|
||||
mce_threshold_vector = intel_threshold_interrupt;
|
||||
cmci_discover(banks);
|
||||
/*
|
||||
* For CPU #0 this runs with still disabled APIC, but that's
|
||||
* ok because only the vector is set up. We still do another
|
||||
* check for the banks later for CPU #0 just to make sure
|
||||
* to not miss any events.
|
||||
*/
|
||||
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
|
||||
cmci_recheck();
|
||||
}
|
||||
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
intel_init_thermal(c);
|
||||
intel_init_cmci();
|
||||
}
|
||||
66
arch/x86/kernel/cpu/mcheck/p5.c
Normal file
66
arch/x86/kernel/cpu/mcheck/p5.c
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* P5 specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/* By default disabled */
|
||||
int mce_p5_enabled __read_mostly;
|
||||
|
||||
/* Machine check handler for Pentium class Intel CPUs: */
|
||||
static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
u32 loaddr, hi, lotype;
|
||||
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
|
||||
|
||||
printk(KERN_EMERG
|
||||
"CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
|
||||
smp_processor_id(), loaddr, lotype);
|
||||
|
||||
if (lotype & (1<<5)) {
|
||||
printk(KERN_EMERG
|
||||
"CPU#%d: Possible thermal failure (CPU on fire ?).\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
|
||||
/* Default P5 to off as its often misconnected: */
|
||||
if (!mce_p5_enabled)
|
||||
return;
|
||||
|
||||
/* Check for MCE support: */
|
||||
if (!cpu_has(c, X86_FEATURE_MCE))
|
||||
return;
|
||||
|
||||
machine_check_vector = pentium_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
/* Read registers before enabling: */
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
|
||||
printk(KERN_INFO
|
||||
"Intel old style machine check architecture supported.\n");
|
||||
|
||||
/* Enable MCE: */
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
printk(KERN_INFO
|
||||
"Intel old style machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
573
arch/x86/kernel/cpu/mcheck/therm_throt.c
Normal file
573
arch/x86/kernel/cpu/mcheck/therm_throt.c
Normal file
|
|
@ -0,0 +1,573 @@
|
|||
/*
|
||||
* Thermal throttle event support code (such as syslog messaging and rate
|
||||
* limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
|
||||
*
|
||||
* This allows consistent reporting of CPU thermal throttle events.
|
||||
*
|
||||
* Maintains a counter in /sys that keeps track of the number of thermal
|
||||
* events, such that the user knows how bad the thermal problem might be
|
||||
* (since the logging to syslog and mcelog is rate limited).
|
||||
*
|
||||
* Author: Dmitriy Zavin (dmitriyz@google.com)
|
||||
*
|
||||
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
|
||||
* Inspired by Ross Biro's and Al Borchers' counter code.
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
|
||||
/* How long to wait between reporting thermal events */
|
||||
#define CHECK_INTERVAL (300 * HZ)
|
||||
|
||||
#define THERMAL_THROTTLING_EVENT 0
|
||||
#define POWER_LIMIT_EVENT 1
|
||||
|
||||
/*
|
||||
* Current thermal event state:
|
||||
*/
|
||||
struct _thermal_state {
|
||||
bool new_event;
|
||||
int event;
|
||||
u64 next_check;
|
||||
unsigned long count;
|
||||
unsigned long last_count;
|
||||
};
|
||||
|
||||
struct thermal_state {
|
||||
struct _thermal_state core_throttle;
|
||||
struct _thermal_state core_power_limit;
|
||||
struct _thermal_state package_throttle;
|
||||
struct _thermal_state package_power_limit;
|
||||
struct _thermal_state core_thresh0;
|
||||
struct _thermal_state core_thresh1;
|
||||
struct _thermal_state pkg_thresh0;
|
||||
struct _thermal_state pkg_thresh1;
|
||||
};
|
||||
|
||||
/* Callback to handle core threshold interrupts */
|
||||
int (*platform_thermal_notify)(__u64 msr_val);
|
||||
EXPORT_SYMBOL(platform_thermal_notify);
|
||||
|
||||
/* Callback to handle core package threshold_interrupts */
|
||||
int (*platform_thermal_package_notify)(__u64 msr_val);
|
||||
EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
|
||||
|
||||
/* Callback support of rate control, return true, if
|
||||
* callback has rate control */
|
||||
bool (*platform_thermal_package_rate_control)(void);
|
||||
EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
|
||||
|
||||
|
||||
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
|
||||
|
||||
static atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
|
||||
static u32 lvtthmr_init __read_mostly;
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
#define define_therm_throt_device_one_ro(_name) \
|
||||
static DEVICE_ATTR(_name, 0444, \
|
||||
therm_throt_device_show_##_name, \
|
||||
NULL) \
|
||||
|
||||
#define define_therm_throt_device_show_func(event, name) \
|
||||
\
|
||||
static ssize_t therm_throt_device_show_##event##_##name( \
|
||||
struct device *dev, \
|
||||
struct device_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
unsigned int cpu = dev->id; \
|
||||
ssize_t ret; \
|
||||
\
|
||||
preempt_disable(); /* CPU hotplug */ \
|
||||
if (cpu_online(cpu)) { \
|
||||
ret = sprintf(buf, "%lu\n", \
|
||||
per_cpu(thermal_state, cpu).event.name); \
|
||||
} else \
|
||||
ret = 0; \
|
||||
preempt_enable(); \
|
||||
\
|
||||
return ret; \
|
||||
}
|
||||
|
||||
define_therm_throt_device_show_func(core_throttle, count);
|
||||
define_therm_throt_device_one_ro(core_throttle_count);
|
||||
|
||||
define_therm_throt_device_show_func(core_power_limit, count);
|
||||
define_therm_throt_device_one_ro(core_power_limit_count);
|
||||
|
||||
define_therm_throt_device_show_func(package_throttle, count);
|
||||
define_therm_throt_device_one_ro(package_throttle_count);
|
||||
|
||||
define_therm_throt_device_show_func(package_power_limit, count);
|
||||
define_therm_throt_device_one_ro(package_power_limit_count);
|
||||
|
||||
static struct attribute *thermal_throttle_attrs[] = {
|
||||
&dev_attr_core_throttle_count.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group thermal_attr_group = {
|
||||
.attrs = thermal_throttle_attrs,
|
||||
.name = "thermal_throttle"
|
||||
};
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
#define CORE_LEVEL 0
|
||||
#define PACKAGE_LEVEL 1
|
||||
|
||||
/***
|
||||
* therm_throt_process - Process thermal throttling event from interrupt
|
||||
* @curr: Whether the condition is current or not (boolean), since the
|
||||
* thermal interrupt normally gets called both when the thermal
|
||||
* event begins and once the event has ended.
|
||||
*
|
||||
* This function is called by the thermal interrupt after the
|
||||
* IRQ has been acknowledged.
|
||||
*
|
||||
* It will take care of rate limiting and printing messages to the syslog.
|
||||
*
|
||||
* Returns: 0 : Event should NOT be further logged, i.e. still in
|
||||
* "timeout" from previous log message.
|
||||
* 1 : Event should be logged further, and a message has been
|
||||
* printed to the syslog.
|
||||
*/
|
||||
static int therm_throt_process(bool new_event, int event, int level)
|
||||
{
|
||||
struct _thermal_state *state;
|
||||
unsigned int this_cpu = smp_processor_id();
|
||||
bool old_event;
|
||||
u64 now;
|
||||
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
|
||||
|
||||
now = get_jiffies_64();
|
||||
if (level == CORE_LEVEL) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
state = &pstate->core_throttle;
|
||||
else if (event == POWER_LIMIT_EVENT)
|
||||
state = &pstate->core_power_limit;
|
||||
else
|
||||
return 0;
|
||||
} else if (level == PACKAGE_LEVEL) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
state = &pstate->package_throttle;
|
||||
else if (event == POWER_LIMIT_EVENT)
|
||||
state = &pstate->package_power_limit;
|
||||
else
|
||||
return 0;
|
||||
} else
|
||||
return 0;
|
||||
|
||||
old_event = state->new_event;
|
||||
state->new_event = new_event;
|
||||
|
||||
if (new_event)
|
||||
state->count++;
|
||||
|
||||
if (time_before64(now, state->next_check) &&
|
||||
state->count != state->last_count)
|
||||
return 0;
|
||||
|
||||
state->next_check = now + CHECK_INTERVAL;
|
||||
state->last_count = state->count;
|
||||
|
||||
/* if we just entered the thermal event */
|
||||
if (new_event) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
|
||||
this_cpu,
|
||||
level == CORE_LEVEL ? "Core" : "Package",
|
||||
state->count);
|
||||
return 1;
|
||||
}
|
||||
if (old_event) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
|
||||
this_cpu,
|
||||
level == CORE_LEVEL ? "Core" : "Package");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int thresh_event_valid(int level, int event)
|
||||
{
|
||||
struct _thermal_state *state;
|
||||
unsigned int this_cpu = smp_processor_id();
|
||||
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
|
||||
u64 now = get_jiffies_64();
|
||||
|
||||
if (level == PACKAGE_LEVEL)
|
||||
state = (event == 0) ? &pstate->pkg_thresh0 :
|
||||
&pstate->pkg_thresh1;
|
||||
else
|
||||
state = (event == 0) ? &pstate->core_thresh0 :
|
||||
&pstate->core_thresh1;
|
||||
|
||||
if (time_before64(now, state->next_check))
|
||||
return 0;
|
||||
|
||||
state->next_check = now + CHECK_INTERVAL;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool int_pln_enable;
|
||||
static int __init int_pln_enable_setup(char *s)
|
||||
{
|
||||
int_pln_enable = true;
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("int_pln_enable", int_pln_enable_setup);
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
/* Add/Remove thermal_throttle interface for CPU device: */
|
||||
static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
|
||||
{
|
||||
int err;
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_core_power_limit_count.attr,
|
||||
thermal_attr_group.name);
|
||||
if (cpu_has(c, X86_FEATURE_PTS)) {
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_package_throttle_count.attr,
|
||||
thermal_attr_group.name);
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_package_power_limit_count.attr,
|
||||
thermal_attr_group.name);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void thermal_throttle_remove_dev(struct device *dev)
|
||||
{
|
||||
sysfs_remove_group(&dev->kobj, &thermal_attr_group);
|
||||
}
|
||||
|
||||
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
|
||||
static int
|
||||
thermal_throttle_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct device *dev;
|
||||
int err = 0;
|
||||
|
||||
dev = get_cpu_device(cpu);
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
err = thermal_throttle_add_dev(dev, cpu);
|
||||
WARN_ON(err);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
thermal_throttle_remove_dev(dev);
|
||||
break;
|
||||
}
|
||||
return notifier_from_errno(err);
|
||||
}
|
||||
|
||||
static struct notifier_block thermal_throttle_cpu_notifier =
|
||||
{
|
||||
.notifier_call = thermal_throttle_cpu_callback,
|
||||
};
|
||||
|
||||
static __init int thermal_throttle_init_device(void)
|
||||
{
|
||||
unsigned int cpu = 0;
|
||||
int err;
|
||||
|
||||
if (!atomic_read(&therm_throt_en))
|
||||
return 0;
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
/* connect live CPUs to sysfs */
|
||||
for_each_online_cpu(cpu) {
|
||||
err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
|
||||
WARN_ON(err);
|
||||
}
|
||||
|
||||
__register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
|
||||
cpu_notifier_register_done();
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(thermal_throttle_init_device);
|
||||
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
static void notify_package_thresholds(__u64 msr_val)
|
||||
{
|
||||
bool notify_thres_0 = false;
|
||||
bool notify_thres_1 = false;
|
||||
|
||||
if (!platform_thermal_package_notify)
|
||||
return;
|
||||
|
||||
/* lower threshold check */
|
||||
if (msr_val & THERM_LOG_THRESHOLD0)
|
||||
notify_thres_0 = true;
|
||||
/* higher threshold check */
|
||||
if (msr_val & THERM_LOG_THRESHOLD1)
|
||||
notify_thres_1 = true;
|
||||
|
||||
if (!notify_thres_0 && !notify_thres_1)
|
||||
return;
|
||||
|
||||
if (platform_thermal_package_rate_control &&
|
||||
platform_thermal_package_rate_control()) {
|
||||
/* Rate control is implemented in callback */
|
||||
platform_thermal_package_notify(msr_val);
|
||||
return;
|
||||
}
|
||||
|
||||
/* lower threshold reached */
|
||||
if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
|
||||
platform_thermal_package_notify(msr_val);
|
||||
/* higher threshold reached */
|
||||
if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
|
||||
platform_thermal_package_notify(msr_val);
|
||||
}
|
||||
|
||||
static void notify_thresholds(__u64 msr_val)
|
||||
{
|
||||
/* check whether the interrupt handler is defined;
|
||||
* otherwise simply return
|
||||
*/
|
||||
if (!platform_thermal_notify)
|
||||
return;
|
||||
|
||||
/* lower threshold reached */
|
||||
if ((msr_val & THERM_LOG_THRESHOLD0) &&
|
||||
thresh_event_valid(CORE_LEVEL, 0))
|
||||
platform_thermal_notify(msr_val);
|
||||
/* higher threshold reached */
|
||||
if ((msr_val & THERM_LOG_THRESHOLD1) &&
|
||||
thresh_event_valid(CORE_LEVEL, 1))
|
||||
platform_thermal_notify(msr_val);
|
||||
}
|
||||
|
||||
/* Thermal transition interrupt handler */
|
||||
static void intel_thermal_interrupt(void)
|
||||
{
|
||||
__u64 msr_val;
|
||||
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
|
||||
/* Check for violation of core thermal thresholds*/
|
||||
notify_thresholds(msr_val);
|
||||
|
||||
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
|
||||
THERMAL_THROTTLING_EVENT,
|
||||
CORE_LEVEL) != 0)
|
||||
mce_log_therm_throt_event(msr_val);
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
|
||||
therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
|
||||
POWER_LIMIT_EVENT,
|
||||
CORE_LEVEL);
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_PTS)) {
|
||||
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
|
||||
/* check violations of package thermal thresholds */
|
||||
notify_package_thresholds(msr_val);
|
||||
therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
|
||||
THERMAL_THROTTLING_EVENT,
|
||||
PACKAGE_LEVEL);
|
||||
if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
|
||||
therm_throt_process(msr_val &
|
||||
PACKAGE_THERM_STATUS_POWER_LIMIT,
|
||||
POWER_LIMIT_EVENT,
|
||||
PACKAGE_LEVEL);
|
||||
}
|
||||
}
|
||||
|
||||
static void unexpected_thermal_interrupt(void)
|
||||
{
|
||||
printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
|
||||
|
||||
static inline void __smp_thermal_interrupt(void)
|
||||
{
|
||||
inc_irq_stat(irq_thermal_count);
|
||||
smp_thermal_vector();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
entering_irq();
|
||||
__smp_thermal_interrupt();
|
||||
exiting_ack_irq();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
entering_irq();
|
||||
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
|
||||
__smp_thermal_interrupt();
|
||||
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
|
||||
exiting_ack_irq();
|
||||
}
|
||||
|
||||
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
|
||||
static int intel_thermal_supported(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has_apic)
|
||||
return 0;
|
||||
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void __init mcheck_intel_therm_init(void)
|
||||
{
|
||||
/*
|
||||
* This function is only called on boot CPU. Save the init thermal
|
||||
* LVT value on BSP and use that value to restore APs' thermal LVT
|
||||
* entry BIOS programmed later
|
||||
*/
|
||||
if (intel_thermal_supported(&boot_cpu_data))
|
||||
lvtthmr_init = apic_read(APIC_LVTTHMR);
|
||||
}
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
int tm2 = 0;
|
||||
u32 l, h;
|
||||
|
||||
if (!intel_thermal_supported(c))
|
||||
return;
|
||||
|
||||
/*
|
||||
* First check if its enabled already, in which case there might
|
||||
* be some SMM goo which handles it, so we can't even put a handler
|
||||
* since it might be delivered via SMI already:
|
||||
*/
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
|
||||
h = lvtthmr_init;
|
||||
/*
|
||||
* The initial value of thermal LVT entries on all APs always reads
|
||||
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
|
||||
* sequence to them and LVT registers are reset to 0s except for
|
||||
* the mask bits which are set to 1s when APs receive INIT IPI.
|
||||
* If BIOS takes over the thermal interrupt and sets its interrupt
|
||||
* delivery mode to SMI (not fixed), it restores the value that the
|
||||
* BIOS has programmed on AP based on BSP's info we saved since BIOS
|
||||
* is always setting the same value for all threads/cores.
|
||||
*/
|
||||
if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
|
||||
apic_write(APIC_LVTTHMR, lvtthmr_init);
|
||||
|
||||
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
if (system_state == SYSTEM_BOOTING)
|
||||
printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check whether a vector already exists */
|
||||
if (h & APIC_VECTOR_MASK) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal LVT vector (%#x) already installed\n",
|
||||
cpu, (h & APIC_VECTOR_MASK));
|
||||
return;
|
||||
}
|
||||
|
||||
/* early Pentium M models use different method for enabling TM2 */
|
||||
if (cpu_has(c, X86_FEATURE_TM2)) {
|
||||
if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
|
||||
rdmsr(MSR_THERM2_CTL, l, h);
|
||||
if (l & MSR_THERM2_CTL_TM_SELECT)
|
||||
tm2 = 1;
|
||||
} else if (l & MSR_IA32_MISC_ENABLE_TM2)
|
||||
tm2 = 1;
|
||||
}
|
||||
|
||||
/* We'll mask the thermal vector in the lapic till we're ready: */
|
||||
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
(l | (THERM_INT_LOW_ENABLE
|
||||
| THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
|
||||
else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
l | (THERM_INT_LOW_ENABLE
|
||||
| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
|
||||
else
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_PTS)) {
|
||||
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
|
||||
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
||||
(l | (PACKAGE_THERM_INT_LOW_ENABLE
|
||||
| PACKAGE_THERM_INT_HIGH_ENABLE))
|
||||
& ~PACKAGE_THERM_INT_PLN_ENABLE, h);
|
||||
else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
||||
l | (PACKAGE_THERM_INT_LOW_ENABLE
|
||||
| PACKAGE_THERM_INT_HIGH_ENABLE
|
||||
| PACKAGE_THERM_INT_PLN_ENABLE), h);
|
||||
else
|
||||
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
||||
l | (PACKAGE_THERM_INT_LOW_ENABLE
|
||||
| PACKAGE_THERM_INT_HIGH_ENABLE), h);
|
||||
}
|
||||
|
||||
smp_thermal_vector = intel_thermal_interrupt;
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
|
||||
|
||||
/* Unmask the thermal vector: */
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
|
||||
printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
|
||||
tm2 ? "TM2" : "TM1");
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
}
|
||||
41
arch/x86/kernel/cpu/mcheck/threshold.c
Normal file
41
arch/x86/kernel/cpu/mcheck/threshold.c
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Common corrected MCE threshold handler code:
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/irq_vectors.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
|
||||
static void default_threshold_interrupt(void)
|
||||
{
|
||||
printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
|
||||
THRESHOLD_APIC_VECTOR);
|
||||
}
|
||||
|
||||
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
|
||||
|
||||
static inline void __smp_threshold_interrupt(void)
|
||||
{
|
||||
inc_irq_stat(irq_threshold_count);
|
||||
mce_threshold_vector();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_threshold_interrupt(void)
|
||||
{
|
||||
entering_irq();
|
||||
__smp_threshold_interrupt();
|
||||
exiting_ack_irq();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_trace_threshold_interrupt(void)
|
||||
{
|
||||
entering_irq();
|
||||
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
|
||||
__smp_threshold_interrupt();
|
||||
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
|
||||
exiting_ack_irq();
|
||||
}
|
||||
38
arch/x86/kernel/cpu/mcheck/winchip.c
Normal file
38
arch/x86/kernel/cpu/mcheck/winchip.c
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* IDT Winchip specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/* Machine check handler for WinChip C6: */
|
||||
static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting on the Winchip C6 series */
|
||||
void winchip_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
machine_check_vector = winchip_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
rdmsr(MSR_IDT_FCR1, lo, hi);
|
||||
lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */
|
||||
lo &= ~(1<<4); /* Enable MCE */
|
||||
wrmsr(MSR_IDT_FCR1, lo, hi);
|
||||
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
|
||||
printk(KERN_INFO
|
||||
"Winchip machine check reporting enabled on CPU#0.\n");
|
||||
}
|
||||
7
arch/x86/kernel/cpu/microcode/Makefile
Normal file
7
arch/x86/kernel/cpu/microcode/Makefile
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
microcode-y := core.o
|
||||
obj-$(CONFIG_MICROCODE) += microcode.o
|
||||
microcode-$(CONFIG_MICROCODE_INTEL) += intel.o intel_lib.o
|
||||
microcode-$(CONFIG_MICROCODE_AMD) += amd.o
|
||||
obj-$(CONFIG_MICROCODE_EARLY) += core_early.o
|
||||
obj-$(CONFIG_MICROCODE_INTEL_EARLY) += intel_early.o
|
||||
obj-$(CONFIG_MICROCODE_AMD_EARLY) += amd_early.o
|
||||
492
arch/x86/kernel/cpu/microcode/amd.c
Normal file
492
arch/x86/kernel/cpu/microcode/amd.c
Normal file
|
|
@ -0,0 +1,492 @@
|
|||
/*
|
||||
* AMD CPU Microcode Update Driver for Linux
|
||||
* Copyright (C) 2008-2011 Advanced Micro Devices Inc.
|
||||
*
|
||||
* Author: Peter Oruba <peter.oruba@amd.com>
|
||||
*
|
||||
* Based on work by:
|
||||
* Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
|
||||
*
|
||||
* Maintainers:
|
||||
* Andreas Herrmann <herrmann.der.user@googlemail.com>
|
||||
* Borislav Petkov <bp@alien8.de>
|
||||
*
|
||||
* This driver allows to upgrade microcode on F10h AMD
|
||||
* CPUs and later.
|
||||
*
|
||||
* Licensed under the terms of the GNU General Public
|
||||
* License version 2. See file COPYING for details.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/pci_ids.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/microcode_amd.h>
|
||||
|
||||
MODULE_DESCRIPTION("AMD Microcode Update Driver");
|
||||
MODULE_AUTHOR("Peter Oruba");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
static struct equiv_cpu_entry *equiv_cpu_table;
|
||||
|
||||
struct ucode_patch {
|
||||
struct list_head plist;
|
||||
void *data;
|
||||
u32 patch_id;
|
||||
u16 equiv_cpu;
|
||||
};
|
||||
|
||||
static LIST_HEAD(pcache);
|
||||
|
||||
static u16 __find_equiv_id(unsigned int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig);
|
||||
}
|
||||
|
||||
static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
BUG_ON(!equiv_cpu_table);
|
||||
|
||||
while (equiv_cpu_table[i].equiv_cpu != 0) {
|
||||
if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
|
||||
return equiv_cpu_table[i].installed_cpu;
|
||||
i++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* a small, trivial cache of per-family ucode patches
|
||||
*/
|
||||
static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
|
||||
{
|
||||
struct ucode_patch *p;
|
||||
|
||||
list_for_each_entry(p, &pcache, plist)
|
||||
if (p->equiv_cpu == equiv_cpu)
|
||||
return p;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void update_cache(struct ucode_patch *new_patch)
|
||||
{
|
||||
struct ucode_patch *p;
|
||||
|
||||
list_for_each_entry(p, &pcache, plist) {
|
||||
if (p->equiv_cpu == new_patch->equiv_cpu) {
|
||||
if (p->patch_id >= new_patch->patch_id)
|
||||
/* we already have the latest patch */
|
||||
return;
|
||||
|
||||
list_replace(&p->plist, &new_patch->plist);
|
||||
kfree(p->data);
|
||||
kfree(p);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* no patch found, add it */
|
||||
list_add_tail(&new_patch->plist, &pcache);
|
||||
}
|
||||
|
||||
static void free_cache(void)
|
||||
{
|
||||
struct ucode_patch *p, *tmp;
|
||||
|
||||
list_for_each_entry_safe(p, tmp, &pcache, plist) {
|
||||
__list_del(p->plist.prev, p->plist.next);
|
||||
kfree(p->data);
|
||||
kfree(p);
|
||||
}
|
||||
}
|
||||
|
||||
static struct ucode_patch *find_patch(unsigned int cpu)
|
||||
{
|
||||
u16 equiv_id;
|
||||
|
||||
equiv_id = __find_equiv_id(cpu);
|
||||
if (!equiv_id)
|
||||
return NULL;
|
||||
|
||||
return cache_find_patch(equiv_id);
|
||||
}
|
||||
|
||||
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
struct ucode_patch *p;
|
||||
|
||||
csig->sig = cpuid_eax(0x00000001);
|
||||
csig->rev = c->microcode;
|
||||
|
||||
/*
|
||||
* a patch could have been loaded early, set uci->mc so that
|
||||
* mc_bp_resume() can call apply_microcode()
|
||||
*/
|
||||
p = find_patch(cpu);
|
||||
if (p && (p->patch_id == csig->rev))
|
||||
uci->mc = p->data;
|
||||
|
||||
pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int verify_patch_size(u8 family, u32 patch_size,
|
||||
unsigned int size)
|
||||
{
|
||||
u32 max_size;
|
||||
|
||||
#define F1XH_MPB_MAX_SIZE 2048
|
||||
#define F14H_MPB_MAX_SIZE 1824
|
||||
#define F15H_MPB_MAX_SIZE 4096
|
||||
#define F16H_MPB_MAX_SIZE 3458
|
||||
|
||||
switch (family) {
|
||||
case 0x14:
|
||||
max_size = F14H_MPB_MAX_SIZE;
|
||||
break;
|
||||
case 0x15:
|
||||
max_size = F15H_MPB_MAX_SIZE;
|
||||
break;
|
||||
case 0x16:
|
||||
max_size = F16H_MPB_MAX_SIZE;
|
||||
break;
|
||||
default:
|
||||
max_size = F1XH_MPB_MAX_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (patch_size > min_t(u32, size, max_size)) {
|
||||
pr_err("patch size mismatch\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return patch_size;
|
||||
}
|
||||
|
||||
int __apply_microcode_amd(struct microcode_amd *mc_amd)
|
||||
{
|
||||
u32 rev, dummy;
|
||||
|
||||
native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
|
||||
|
||||
/* verify patch application was successful */
|
||||
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
|
||||
if (rev != mc_amd->hdr.patch_id)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int apply_microcode_amd(int cpu)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
struct microcode_amd *mc_amd;
|
||||
struct ucode_cpu_info *uci;
|
||||
struct ucode_patch *p;
|
||||
u32 rev, dummy;
|
||||
|
||||
BUG_ON(raw_smp_processor_id() != cpu);
|
||||
|
||||
uci = ucode_cpu_info + cpu;
|
||||
|
||||
p = find_patch(cpu);
|
||||
if (!p)
|
||||
return 0;
|
||||
|
||||
mc_amd = p->data;
|
||||
uci->mc = p->data;
|
||||
|
||||
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
|
||||
|
||||
/* need to apply patch? */
|
||||
if (rev >= mc_amd->hdr.patch_id) {
|
||||
c->microcode = rev;
|
||||
uci->cpu_sig.rev = rev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (__apply_microcode_amd(mc_amd)) {
|
||||
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
|
||||
cpu, mc_amd->hdr.patch_id);
|
||||
return -1;
|
||||
}
|
||||
pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
|
||||
mc_amd->hdr.patch_id);
|
||||
|
||||
uci->cpu_sig.rev = mc_amd->hdr.patch_id;
|
||||
c->microcode = mc_amd->hdr.patch_id;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int install_equiv_cpu_table(const u8 *buf)
|
||||
{
|
||||
unsigned int *ibuf = (unsigned int *)buf;
|
||||
unsigned int type = ibuf[1];
|
||||
unsigned int size = ibuf[2];
|
||||
|
||||
if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
|
||||
pr_err("empty section/"
|
||||
"invalid type field in container file section header\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
equiv_cpu_table = vmalloc(size);
|
||||
if (!equiv_cpu_table) {
|
||||
pr_err("failed to allocate equivalent CPU table\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
|
||||
|
||||
/* add header length */
|
||||
return size + CONTAINER_HDR_SZ;
|
||||
}
|
||||
|
||||
static void free_equiv_cpu_table(void)
|
||||
{
|
||||
vfree(equiv_cpu_table);
|
||||
equiv_cpu_table = NULL;
|
||||
}
|
||||
|
||||
static void cleanup(void)
|
||||
{
|
||||
free_equiv_cpu_table();
|
||||
free_cache();
|
||||
}
|
||||
|
||||
/*
|
||||
* We return the current size even if some of the checks failed so that
|
||||
* we can skip over the next patch. If we return a negative value, we
|
||||
* signal a grave error like a memory allocation has failed and the
|
||||
* driver cannot continue functioning normally. In such cases, we tear
|
||||
* down everything we've used up so far and exit.
|
||||
*/
|
||||
static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
|
||||
{
|
||||
struct microcode_header_amd *mc_hdr;
|
||||
struct ucode_patch *patch;
|
||||
unsigned int patch_size, crnt_size, ret;
|
||||
u32 proc_fam;
|
||||
u16 proc_id;
|
||||
|
||||
patch_size = *(u32 *)(fw + 4);
|
||||
crnt_size = patch_size + SECTION_HDR_SIZE;
|
||||
mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
|
||||
proc_id = mc_hdr->processor_rev_id;
|
||||
|
||||
proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
|
||||
if (!proc_fam) {
|
||||
pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
|
||||
return crnt_size;
|
||||
}
|
||||
|
||||
/* check if patch is for the current family */
|
||||
proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
|
||||
if (proc_fam != family)
|
||||
return crnt_size;
|
||||
|
||||
if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
|
||||
pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
|
||||
mc_hdr->patch_id);
|
||||
return crnt_size;
|
||||
}
|
||||
|
||||
ret = verify_patch_size(family, patch_size, leftover);
|
||||
if (!ret) {
|
||||
pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
|
||||
return crnt_size;
|
||||
}
|
||||
|
||||
patch = kzalloc(sizeof(*patch), GFP_KERNEL);
|
||||
if (!patch) {
|
||||
pr_err("Patch allocation failure.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
patch->data = kzalloc(patch_size, GFP_KERNEL);
|
||||
if (!patch->data) {
|
||||
pr_err("Patch data allocation failure.\n");
|
||||
kfree(patch);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* All looks ok, copy patch... */
|
||||
memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
|
||||
INIT_LIST_HEAD(&patch->plist);
|
||||
patch->patch_id = mc_hdr->patch_id;
|
||||
patch->equiv_cpu = proc_id;
|
||||
|
||||
pr_debug("%s: Added patch_id: 0x%08x, proc_id: 0x%04x\n",
|
||||
__func__, patch->patch_id, proc_id);
|
||||
|
||||
/* ... and add to cache. */
|
||||
update_cache(patch);
|
||||
|
||||
return crnt_size;
|
||||
}
|
||||
|
||||
static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
|
||||
size_t size)
|
||||
{
|
||||
enum ucode_state ret = UCODE_ERROR;
|
||||
unsigned int leftover;
|
||||
u8 *fw = (u8 *)data;
|
||||
int crnt_size = 0;
|
||||
int offset;
|
||||
|
||||
offset = install_equiv_cpu_table(data);
|
||||
if (offset < 0) {
|
||||
pr_err("failed to create equivalent cpu table\n");
|
||||
return ret;
|
||||
}
|
||||
fw += offset;
|
||||
leftover = size - offset;
|
||||
|
||||
if (*(u32 *)fw != UCODE_UCODE_TYPE) {
|
||||
pr_err("invalid type field in container file section header\n");
|
||||
free_equiv_cpu_table();
|
||||
return ret;
|
||||
}
|
||||
|
||||
while (leftover) {
|
||||
crnt_size = verify_and_add_patch(family, fw, leftover);
|
||||
if (crnt_size < 0)
|
||||
return ret;
|
||||
|
||||
fw += crnt_size;
|
||||
leftover -= crnt_size;
|
||||
}
|
||||
|
||||
return UCODE_OK;
|
||||
}
|
||||
|
||||
enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
|
||||
{
|
||||
enum ucode_state ret;
|
||||
|
||||
/* free old equiv table */
|
||||
free_equiv_cpu_table();
|
||||
|
||||
ret = __load_microcode_amd(family, data, size);
|
||||
|
||||
if (ret != UCODE_OK)
|
||||
cleanup();
|
||||
|
||||
#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
|
||||
/* save BSP's matching patch for early load */
|
||||
if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
|
||||
struct ucode_patch *p = find_patch(cpu);
|
||||
if (p) {
|
||||
memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
|
||||
memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
|
||||
PATCH_MAX_SIZE));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD microcode firmware naming convention, up to family 15h they are in
|
||||
* the legacy file:
|
||||
*
|
||||
* amd-ucode/microcode_amd.bin
|
||||
*
|
||||
* This legacy file is always smaller than 2K in size.
|
||||
*
|
||||
* Beginning with family 15h, they are in family-specific firmware files:
|
||||
*
|
||||
* amd-ucode/microcode_amd_fam15h.bin
|
||||
* amd-ucode/microcode_amd_fam16h.bin
|
||||
* ...
|
||||
*
|
||||
* These might be larger than 2K.
|
||||
*/
|
||||
static enum ucode_state request_microcode_amd(int cpu, struct device *device,
|
||||
bool refresh_fw)
|
||||
{
|
||||
char fw_name[36] = "amd-ucode/microcode_amd.bin";
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
enum ucode_state ret = UCODE_NFOUND;
|
||||
const struct firmware *fw;
|
||||
|
||||
/* reload ucode container only on the boot cpu */
|
||||
if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
|
||||
return UCODE_OK;
|
||||
|
||||
if (c->x86 >= 0x15)
|
||||
snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
|
||||
|
||||
if (request_firmware_direct(&fw, (const char *)fw_name, device)) {
|
||||
pr_debug("failed to load file %s\n", fw_name);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = UCODE_ERROR;
|
||||
if (*(u32 *)fw->data != UCODE_MAGIC) {
|
||||
pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
|
||||
goto fw_release;
|
||||
}
|
||||
|
||||
ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size);
|
||||
|
||||
fw_release:
|
||||
release_firmware(fw);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static enum ucode_state
|
||||
request_microcode_user(int cpu, const void __user *buf, size_t size)
|
||||
{
|
||||
return UCODE_ERROR;
|
||||
}
|
||||
|
||||
static void microcode_fini_cpu_amd(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
uci->mc = NULL;
|
||||
}
|
||||
|
||||
static struct microcode_ops microcode_amd_ops = {
|
||||
.request_microcode_user = request_microcode_user,
|
||||
.request_microcode_fw = request_microcode_amd,
|
||||
.collect_cpu_info = collect_cpu_info_amd,
|
||||
.apply_microcode = apply_microcode_amd,
|
||||
.microcode_fini_cpu = microcode_fini_cpu_amd,
|
||||
};
|
||||
|
||||
struct microcode_ops * __init init_amd_microcode(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(0);
|
||||
|
||||
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
|
||||
pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return µcode_amd_ops;
|
||||
}
|
||||
|
||||
void __exit exit_amd_microcode(void)
|
||||
{
|
||||
cleanup();
|
||||
}
|
||||
422
arch/x86/kernel/cpu/microcode/amd_early.c
Normal file
422
arch/x86/kernel/cpu/microcode/amd_early.c
Normal file
|
|
@ -0,0 +1,422 @@
|
|||
/*
|
||||
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Jacob Shin <jacob.shin@amd.com>
|
||||
* Fixes: Borislav Petkov <bp@suse.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/earlycpio.h>
|
||||
#include <linux/initrd.h>
|
||||
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/microcode_amd.h>
|
||||
|
||||
/*
|
||||
* This points to the current valid container of microcode patches which we will
|
||||
* save from the initrd before jettisoning its contents.
|
||||
*/
|
||||
static u8 *container;
|
||||
static size_t container_size;
|
||||
|
||||
static u32 ucode_new_rev;
|
||||
u8 amd_ucode_patch[PATCH_MAX_SIZE];
|
||||
static u16 this_equiv_id;
|
||||
|
||||
static struct cpio_data ucode_cpio;
|
||||
|
||||
/*
|
||||
* Microcode patch container file is prepended to the initrd in cpio format.
|
||||
* See Documentation/x86/early-microcode.txt
|
||||
*/
|
||||
static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
|
||||
|
||||
static struct cpio_data __init find_ucode_in_initrd(void)
|
||||
{
|
||||
long offset = 0;
|
||||
char *path;
|
||||
void *start;
|
||||
size_t size;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
struct boot_params *p;
|
||||
|
||||
/*
|
||||
* On 32-bit, early load occurs before paging is turned on so we need
|
||||
* to use physical addresses.
|
||||
*/
|
||||
p = (struct boot_params *)__pa_nodebug(&boot_params);
|
||||
path = (char *)__pa_nodebug(ucode_path);
|
||||
start = (void *)p->hdr.ramdisk_image;
|
||||
size = p->hdr.ramdisk_size;
|
||||
#else
|
||||
path = ucode_path;
|
||||
start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
|
||||
size = boot_params.hdr.ramdisk_size;
|
||||
#endif
|
||||
|
||||
return find_cpio_data(path, start, size, &offset);
|
||||
}
|
||||
|
||||
static size_t compute_container_size(u8 *data, u32 total_size)
|
||||
{
|
||||
size_t size = 0;
|
||||
u32 *header = (u32 *)data;
|
||||
|
||||
if (header[0] != UCODE_MAGIC ||
|
||||
header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
|
||||
header[2] == 0) /* size */
|
||||
return size;
|
||||
|
||||
size = header[2] + CONTAINER_HDR_SZ;
|
||||
total_size -= size;
|
||||
data += size;
|
||||
|
||||
while (total_size) {
|
||||
u16 patch_size;
|
||||
|
||||
header = (u32 *)data;
|
||||
|
||||
if (header[0] != UCODE_UCODE_TYPE)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Sanity-check patch size.
|
||||
*/
|
||||
patch_size = header[1];
|
||||
if (patch_size > PATCH_MAX_SIZE)
|
||||
break;
|
||||
|
||||
size += patch_size + SECTION_HDR_SIZE;
|
||||
data += patch_size + SECTION_HDR_SIZE;
|
||||
total_size -= patch_size + SECTION_HDR_SIZE;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Early load occurs before we can vmalloc(). So we look for the microcode
|
||||
* patch container file in initrd, traverse equivalent cpu table, look for a
|
||||
* matching microcode patch, and update, all in initrd memory in place.
|
||||
* When vmalloc() is available for use later -- on 64-bit during first AP load,
|
||||
* and on 32-bit during save_microcode_in_initrd_amd() -- we can call
|
||||
* load_microcode_amd() to save equivalent cpu table and microcode patches in
|
||||
* kernel heap memory.
|
||||
*/
|
||||
static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch)
|
||||
{
|
||||
struct equiv_cpu_entry *eq;
|
||||
size_t *cont_sz;
|
||||
u32 *header;
|
||||
u8 *data, **cont;
|
||||
u8 (*patch)[PATCH_MAX_SIZE];
|
||||
u16 eq_id = 0;
|
||||
int offset, left;
|
||||
u32 rev, eax, ebx, ecx, edx;
|
||||
u32 *new_rev;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
|
||||
cont_sz = (size_t *)__pa_nodebug(&container_size);
|
||||
cont = (u8 **)__pa_nodebug(&container);
|
||||
patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
|
||||
#else
|
||||
new_rev = &ucode_new_rev;
|
||||
cont_sz = &container_size;
|
||||
cont = &container;
|
||||
patch = &amd_ucode_patch;
|
||||
#endif
|
||||
|
||||
data = ucode;
|
||||
left = size;
|
||||
header = (u32 *)data;
|
||||
|
||||
/* find equiv cpu table */
|
||||
if (header[0] != UCODE_MAGIC ||
|
||||
header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
|
||||
header[2] == 0) /* size */
|
||||
return;
|
||||
|
||||
eax = 0x00000001;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
|
||||
while (left > 0) {
|
||||
eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
|
||||
|
||||
*cont = data;
|
||||
|
||||
/* Advance past the container header */
|
||||
offset = header[2] + CONTAINER_HDR_SZ;
|
||||
data += offset;
|
||||
left -= offset;
|
||||
|
||||
eq_id = find_equiv_id(eq, eax);
|
||||
if (eq_id) {
|
||||
this_equiv_id = eq_id;
|
||||
*cont_sz = compute_container_size(*cont, left + offset);
|
||||
|
||||
/*
|
||||
* truncate how much we need to iterate over in the
|
||||
* ucode update loop below
|
||||
*/
|
||||
left = *cont_sz - offset;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* support multiple container files appended together. if this
|
||||
* one does not have a matching equivalent cpu entry, we fast
|
||||
* forward to the next container file.
|
||||
*/
|
||||
while (left > 0) {
|
||||
header = (u32 *)data;
|
||||
if (header[0] == UCODE_MAGIC &&
|
||||
header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
|
||||
break;
|
||||
|
||||
offset = header[1] + SECTION_HDR_SIZE;
|
||||
data += offset;
|
||||
left -= offset;
|
||||
}
|
||||
|
||||
/* mark where the next microcode container file starts */
|
||||
offset = data - (u8 *)ucode;
|
||||
ucode = data;
|
||||
}
|
||||
|
||||
if (!eq_id) {
|
||||
*cont = NULL;
|
||||
*cont_sz = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* find ucode and update if needed */
|
||||
|
||||
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
|
||||
|
||||
while (left > 0) {
|
||||
struct microcode_amd *mc;
|
||||
|
||||
header = (u32 *)data;
|
||||
if (header[0] != UCODE_UCODE_TYPE || /* type */
|
||||
header[1] == 0) /* size */
|
||||
break;
|
||||
|
||||
mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
|
||||
|
||||
if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
|
||||
|
||||
if (!__apply_microcode_amd(mc)) {
|
||||
rev = mc->hdr.patch_id;
|
||||
*new_rev = rev;
|
||||
|
||||
if (save_patch)
|
||||
memcpy(patch, mc,
|
||||
min_t(u32, header[1], PATCH_MAX_SIZE));
|
||||
}
|
||||
}
|
||||
|
||||
offset = header[1] + SECTION_HDR_SIZE;
|
||||
data += offset;
|
||||
left -= offset;
|
||||
}
|
||||
}
|
||||
|
||||
void __init load_ucode_amd_bsp(void)
|
||||
{
|
||||
struct cpio_data cp;
|
||||
void **data;
|
||||
size_t *size;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
data = (void **)__pa_nodebug(&ucode_cpio.data);
|
||||
size = (size_t *)__pa_nodebug(&ucode_cpio.size);
|
||||
#else
|
||||
data = &ucode_cpio.data;
|
||||
size = &ucode_cpio.size;
|
||||
#endif
|
||||
|
||||
cp = find_ucode_in_initrd();
|
||||
if (!cp.data)
|
||||
return;
|
||||
|
||||
*data = cp.data;
|
||||
*size = cp.size;
|
||||
|
||||
apply_ucode_in_initrd(cp.data, cp.size, true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* On 32-bit, since AP's early load occurs before paging is turned on, we
|
||||
* cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
|
||||
* cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
|
||||
* save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
|
||||
* which is used upon resume from suspend.
|
||||
*/
|
||||
void load_ucode_amd_ap(void)
|
||||
{
|
||||
struct microcode_amd *mc;
|
||||
size_t *usize;
|
||||
void **ucode;
|
||||
|
||||
mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
|
||||
if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
|
||||
__apply_microcode_amd(mc);
|
||||
return;
|
||||
}
|
||||
|
||||
ucode = (void *)__pa_nodebug(&container);
|
||||
usize = (size_t *)__pa_nodebug(&container_size);
|
||||
|
||||
if (!*ucode || !*usize)
|
||||
return;
|
||||
|
||||
apply_ucode_in_initrd(*ucode, *usize, false);
|
||||
}
|
||||
|
||||
static void __init collect_cpu_sig_on_bsp(void *arg)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
uci->cpu_sig.sig = cpuid_eax(0x00000001);
|
||||
}
|
||||
|
||||
static void __init get_bsp_sig(void)
|
||||
{
|
||||
unsigned int bsp = boot_cpu_data.cpu_index;
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
|
||||
|
||||
if (!uci->cpu_sig.sig)
|
||||
smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
|
||||
}
|
||||
#else
|
||||
void load_ucode_amd_ap(void)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
struct equiv_cpu_entry *eq;
|
||||
struct microcode_amd *mc;
|
||||
u32 rev, eax;
|
||||
u16 eq_id;
|
||||
|
||||
/* Exit if called on the BSP. */
|
||||
if (!cpu)
|
||||
return;
|
||||
|
||||
if (!container)
|
||||
return;
|
||||
|
||||
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
|
||||
|
||||
uci->cpu_sig.rev = rev;
|
||||
uci->cpu_sig.sig = eax;
|
||||
|
||||
eax = cpuid_eax(0x00000001);
|
||||
eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
|
||||
|
||||
eq_id = find_equiv_id(eq, eax);
|
||||
if (!eq_id)
|
||||
return;
|
||||
|
||||
if (eq_id == this_equiv_id) {
|
||||
mc = (struct microcode_amd *)amd_ucode_patch;
|
||||
|
||||
if (mc && rev < mc->hdr.patch_id) {
|
||||
if (!__apply_microcode_amd(mc))
|
||||
ucode_new_rev = mc->hdr.patch_id;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (!ucode_cpio.data)
|
||||
return;
|
||||
|
||||
/*
|
||||
* AP has a different equivalence ID than BSP, looks like
|
||||
* mixed-steppings silicon so go through the ucode blob anew.
|
||||
*/
|
||||
apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int __init save_microcode_in_initrd_amd(void)
|
||||
{
|
||||
unsigned long cont;
|
||||
int retval = 0;
|
||||
enum ucode_state ret;
|
||||
u8 *cont_va;
|
||||
u32 eax;
|
||||
|
||||
if (!container)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
get_bsp_sig();
|
||||
cont = (unsigned long)container;
|
||||
cont_va = __va(container);
|
||||
#else
|
||||
/*
|
||||
* We need the physical address of the container for both bitness since
|
||||
* boot_params.hdr.ramdisk_image is a physical address.
|
||||
*/
|
||||
cont = __pa(container);
|
||||
cont_va = container;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Take into account the fact that the ramdisk might get relocated and
|
||||
* therefore we need to recompute the container's position in virtual
|
||||
* memory space.
|
||||
*/
|
||||
if (relocated_ramdisk)
|
||||
container = (u8 *)(__va(relocated_ramdisk) +
|
||||
(cont - boot_params.hdr.ramdisk_image));
|
||||
else
|
||||
container = cont_va;
|
||||
|
||||
if (ucode_new_rev)
|
||||
pr_info("microcode: updated early to new patch_level=0x%08x\n",
|
||||
ucode_new_rev);
|
||||
|
||||
eax = cpuid_eax(0x00000001);
|
||||
eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
|
||||
|
||||
ret = load_microcode_amd(smp_processor_id(), eax, container, container_size);
|
||||
if (ret != UCODE_OK)
|
||||
retval = -EINVAL;
|
||||
|
||||
/*
|
||||
* This will be freed any msec now, stash patches for the current
|
||||
* family and switch to patch cache for cpu hotplug, etc later.
|
||||
*/
|
||||
container = NULL;
|
||||
container_size = 0;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
void reload_ucode_amd(void)
|
||||
{
|
||||
struct microcode_amd *mc;
|
||||
u32 rev, eax;
|
||||
|
||||
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
|
||||
|
||||
mc = (struct microcode_amd *)amd_ucode_patch;
|
||||
|
||||
if (mc && rev < mc->hdr.patch_id) {
|
||||
if (!__apply_microcode_amd(mc)) {
|
||||
ucode_new_rev = mc->hdr.patch_id;
|
||||
pr_info("microcode: reload patch_level=0x%08x\n",
|
||||
ucode_new_rev);
|
||||
}
|
||||
}
|
||||
}
|
||||
653
arch/x86/kernel/cpu/microcode/core.c
Normal file
653
arch/x86/kernel/cpu/microcode/core.c
Normal file
|
|
@ -0,0 +1,653 @@
|
|||
/*
|
||||
* Intel CPU Microcode Update Driver for Linux
|
||||
*
|
||||
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
|
||||
* 2006 Shaohua Li <shaohua.li@intel.com>
|
||||
*
|
||||
* This driver allows to upgrade microcode on Intel processors
|
||||
* belonging to IA-32 family - PentiumPro, Pentium II,
|
||||
* Pentium III, Xeon, Pentium 4, etc.
|
||||
*
|
||||
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
|
||||
* Software Developer's Manual
|
||||
* Order Number 253668 or free download from:
|
||||
*
|
||||
* http://developer.intel.com/Assets/PDF/manual/253668.pdf
|
||||
*
|
||||
* For more information, go to http://www.urbanmyth.org/microcode
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
|
||||
* Initial release.
|
||||
* 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
|
||||
* Added read() support + cleanups.
|
||||
* 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
|
||||
* Added 'device trimming' support. open(O_WRONLY) zeroes
|
||||
* and frees the saved copy of applied microcode.
|
||||
* 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
|
||||
* Made to use devfs (/dev/cpu/microcode) + cleanups.
|
||||
* 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
|
||||
* Added misc device support (now uses both devfs and misc).
|
||||
* Added MICROCODE_IOCFREE ioctl to clear memory.
|
||||
* 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
|
||||
* Messages for error cases (non Intel & no suitable microcode).
|
||||
* 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
|
||||
* Removed ->release(). Removed exclusive open and status bitmap.
|
||||
* Added microcode_rwsem to serialize read()/write()/ioctl().
|
||||
* Removed global kernel lock usage.
|
||||
* 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
|
||||
* Write 0 to 0x8B msr and then cpuid before reading revision,
|
||||
* so that it works even if there were no update done by the
|
||||
* BIOS. Otherwise, reading from 0x8B gives junk (which happened
|
||||
* to be 0 on my machine which is why it worked even when I
|
||||
* disabled update by the BIOS)
|
||||
* Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
|
||||
* 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
|
||||
* Tigran Aivazian <tigran@veritas.com>
|
||||
* Intel Pentium 4 processor support and bugfixes.
|
||||
* 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
|
||||
* Bugfix for HT (Hyper-Threading) enabled processors
|
||||
* whereby processor resources are shared by all logical processors
|
||||
* in a single CPU package.
|
||||
* 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
|
||||
* Tigran Aivazian <tigran@veritas.com>,
|
||||
* Serialize updates as required on HT processors due to
|
||||
* speculative nature of implementation.
|
||||
* 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
|
||||
* Fix the panic when writing zero-length microcode chunk.
|
||||
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
|
||||
* Jun Nakajima <jun.nakajima@intel.com>
|
||||
* Support for the microcode updates in the new format.
|
||||
* 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
|
||||
* Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
|
||||
* because we no longer hold a copy of applied microcode
|
||||
* in kernel memory.
|
||||
* 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
|
||||
* Fix sigmatch() macro to handle old CPUs with pf == 0.
|
||||
* Thanks to Stuart Swales for pointing out this bug.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/perf_event.h>
|
||||
|
||||
MODULE_DESCRIPTION("Microcode Update Driver");
|
||||
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
#define MICROCODE_VERSION "2.00"
|
||||
|
||||
static struct microcode_ops *microcode_ops;
|
||||
|
||||
bool dis_ucode_ldr;
|
||||
module_param(dis_ucode_ldr, bool, 0);
|
||||
|
||||
/*
|
||||
* Synchronization.
|
||||
*
|
||||
* All non cpu-hotplug-callback call sites use:
|
||||
*
|
||||
* - microcode_mutex to synchronize with each other;
|
||||
* - get/put_online_cpus() to synchronize with
|
||||
* the cpu-hotplug-callback call sites.
|
||||
*
|
||||
* We guarantee that only a single cpu is being
|
||||
* updated at any particular moment of time.
|
||||
*/
|
||||
static DEFINE_MUTEX(microcode_mutex);
|
||||
|
||||
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
|
||||
EXPORT_SYMBOL_GPL(ucode_cpu_info);
|
||||
|
||||
/*
|
||||
* Operations that are run on a target cpu:
|
||||
*/
|
||||
|
||||
struct cpu_info_ctx {
|
||||
struct cpu_signature *cpu_sig;
|
||||
int err;
|
||||
};
|
||||
|
||||
static void collect_cpu_info_local(void *arg)
|
||||
{
|
||||
struct cpu_info_ctx *ctx = arg;
|
||||
|
||||
ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(),
|
||||
ctx->cpu_sig);
|
||||
}
|
||||
|
||||
static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig)
|
||||
{
|
||||
struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 };
|
||||
int ret;
|
||||
|
||||
ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1);
|
||||
if (!ret)
|
||||
ret = ctx.err;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int collect_cpu_info(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
int ret;
|
||||
|
||||
memset(uci, 0, sizeof(*uci));
|
||||
|
||||
ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig);
|
||||
if (!ret)
|
||||
uci->valid = 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct apply_microcode_ctx {
|
||||
int err;
|
||||
};
|
||||
|
||||
static void apply_microcode_local(void *arg)
|
||||
{
|
||||
struct apply_microcode_ctx *ctx = arg;
|
||||
|
||||
ctx->err = microcode_ops->apply_microcode(smp_processor_id());
|
||||
}
|
||||
|
||||
static int apply_microcode_on_target(int cpu)
|
||||
{
|
||||
struct apply_microcode_ctx ctx = { .err = 0 };
|
||||
int ret;
|
||||
|
||||
ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
|
||||
if (!ret)
|
||||
ret = ctx.err;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
|
||||
static int do_microcode_update(const void __user *buf, size_t size)
|
||||
{
|
||||
int error = 0;
|
||||
int cpu;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
enum ucode_state ustate;
|
||||
|
||||
if (!uci->valid)
|
||||
continue;
|
||||
|
||||
ustate = microcode_ops->request_microcode_user(cpu, buf, size);
|
||||
if (ustate == UCODE_ERROR) {
|
||||
error = -1;
|
||||
break;
|
||||
} else if (ustate == UCODE_OK)
|
||||
apply_microcode_on_target(cpu);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static int microcode_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM;
|
||||
}
|
||||
|
||||
static ssize_t microcode_write(struct file *file, const char __user *buf,
|
||||
size_t len, loff_t *ppos)
|
||||
{
|
||||
ssize_t ret = -EINVAL;
|
||||
|
||||
if ((len >> PAGE_SHIFT) > totalram_pages) {
|
||||
pr_err("too much data (max %ld pages)\n", totalram_pages);
|
||||
return ret;
|
||||
}
|
||||
|
||||
get_online_cpus();
|
||||
mutex_lock(µcode_mutex);
|
||||
|
||||
if (do_microcode_update(buf, len) == 0)
|
||||
ret = (ssize_t)len;
|
||||
|
||||
if (ret > 0)
|
||||
perf_check_microcode();
|
||||
|
||||
mutex_unlock(µcode_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations microcode_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.write = microcode_write,
|
||||
.open = microcode_open,
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
static struct miscdevice microcode_dev = {
|
||||
.minor = MICROCODE_MINOR,
|
||||
.name = "microcode",
|
||||
.nodename = "cpu/microcode",
|
||||
.fops = µcode_fops,
|
||||
};
|
||||
|
||||
static int __init microcode_dev_init(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = misc_register(µcode_dev);
|
||||
if (error) {
|
||||
pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit microcode_dev_exit(void)
|
||||
{
|
||||
misc_deregister(µcode_dev);
|
||||
}
|
||||
|
||||
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
|
||||
MODULE_ALIAS("devname:cpu/microcode");
|
||||
#else
|
||||
#define microcode_dev_init() 0
|
||||
#define microcode_dev_exit() do { } while (0)
|
||||
#endif
|
||||
|
||||
/* fake device for request_firmware */
|
||||
static struct platform_device *microcode_pdev;
|
||||
|
||||
static int reload_for_cpu(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
enum ucode_state ustate;
|
||||
int err = 0;
|
||||
|
||||
if (!uci->valid)
|
||||
return err;
|
||||
|
||||
ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true);
|
||||
if (ustate == UCODE_OK)
|
||||
apply_microcode_on_target(cpu);
|
||||
else
|
||||
if (ustate == UCODE_ERROR)
|
||||
err = -EINVAL;
|
||||
return err;
|
||||
}
|
||||
|
||||
static ssize_t reload_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
unsigned long val;
|
||||
int cpu;
|
||||
ssize_t ret = 0, tmp_ret;
|
||||
|
||||
ret = kstrtoul(buf, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (val != 1)
|
||||
return size;
|
||||
|
||||
get_online_cpus();
|
||||
mutex_lock(µcode_mutex);
|
||||
for_each_online_cpu(cpu) {
|
||||
tmp_ret = reload_for_cpu(cpu);
|
||||
if (tmp_ret != 0)
|
||||
pr_warn("Error reloading microcode on CPU %d\n", cpu);
|
||||
|
||||
/* save retval of the first encountered reload error */
|
||||
if (!ret)
|
||||
ret = tmp_ret;
|
||||
}
|
||||
if (!ret)
|
||||
perf_check_microcode();
|
||||
mutex_unlock(µcode_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
if (!ret)
|
||||
ret = size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t version_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
|
||||
|
||||
return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
|
||||
}
|
||||
|
||||
static ssize_t pf_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
|
||||
|
||||
return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(reload, 0200, NULL, reload_store);
|
||||
static DEVICE_ATTR(version, 0400, version_show, NULL);
|
||||
static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
|
||||
|
||||
static struct attribute *mc_default_attrs[] = {
|
||||
&dev_attr_version.attr,
|
||||
&dev_attr_processor_flags.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group mc_attr_group = {
|
||||
.attrs = mc_default_attrs,
|
||||
.name = "microcode",
|
||||
};
|
||||
|
||||
static void microcode_fini_cpu(int cpu)
|
||||
{
|
||||
microcode_ops->microcode_fini_cpu(cpu);
|
||||
}
|
||||
|
||||
static enum ucode_state microcode_resume_cpu(int cpu)
|
||||
{
|
||||
pr_debug("CPU%d updated upon resume\n", cpu);
|
||||
|
||||
if (apply_microcode_on_target(cpu))
|
||||
return UCODE_ERROR;
|
||||
|
||||
return UCODE_OK;
|
||||
}
|
||||
|
||||
static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
|
||||
{
|
||||
enum ucode_state ustate;
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
if (uci && uci->valid)
|
||||
return UCODE_OK;
|
||||
|
||||
if (collect_cpu_info(cpu))
|
||||
return UCODE_ERROR;
|
||||
|
||||
/* --dimm. Trigger a delayed update? */
|
||||
if (system_state != SYSTEM_RUNNING)
|
||||
return UCODE_NFOUND;
|
||||
|
||||
ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev,
|
||||
refresh_fw);
|
||||
|
||||
if (ustate == UCODE_OK) {
|
||||
pr_debug("CPU%d updated upon init\n", cpu);
|
||||
apply_microcode_on_target(cpu);
|
||||
}
|
||||
|
||||
return ustate;
|
||||
}
|
||||
|
||||
static enum ucode_state microcode_update_cpu(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
if (uci->valid)
|
||||
return microcode_resume_cpu(cpu);
|
||||
|
||||
return microcode_init_cpu(cpu, false);
|
||||
}
|
||||
|
||||
static int mc_device_add(struct device *dev, struct subsys_interface *sif)
|
||||
{
|
||||
int err, cpu = dev->id;
|
||||
|
||||
if (!cpu_online(cpu))
|
||||
return 0;
|
||||
|
||||
pr_debug("CPU%d added\n", cpu);
|
||||
|
||||
err = sysfs_create_group(&dev->kobj, &mc_attr_group);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
|
||||
return -EINVAL;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
|
||||
{
|
||||
int cpu = dev->id;
|
||||
|
||||
if (!cpu_online(cpu))
|
||||
return 0;
|
||||
|
||||
pr_debug("CPU%d removed\n", cpu);
|
||||
microcode_fini_cpu(cpu);
|
||||
sysfs_remove_group(&dev->kobj, &mc_attr_group);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct subsys_interface mc_cpu_interface = {
|
||||
.name = "microcode",
|
||||
.subsys = &cpu_subsys,
|
||||
.add_dev = mc_device_add,
|
||||
.remove_dev = mc_device_remove,
|
||||
};
|
||||
|
||||
/**
|
||||
* mc_bp_resume - Update boot CPU microcode during resume.
|
||||
*/
|
||||
static void mc_bp_resume(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
if (uci->valid && uci->mc)
|
||||
microcode_ops->apply_microcode(cpu);
|
||||
else if (!uci->mc)
|
||||
reload_early_microcode();
|
||||
}
|
||||
|
||||
static struct syscore_ops mc_syscore_ops = {
|
||||
.resume = mc_bp_resume,
|
||||
};
|
||||
|
||||
static int
|
||||
mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct device *dev;
|
||||
|
||||
dev = get_cpu_device(cpu);
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_ONLINE:
|
||||
microcode_update_cpu(cpu);
|
||||
pr_debug("CPU%d added\n", cpu);
|
||||
/*
|
||||
* "break" is missing on purpose here because we want to fall
|
||||
* through in order to create the sysfs group.
|
||||
*/
|
||||
|
||||
case CPU_DOWN_FAILED:
|
||||
if (sysfs_create_group(&dev->kobj, &mc_attr_group))
|
||||
pr_err("Failed to create group for CPU%d\n", cpu);
|
||||
break;
|
||||
|
||||
case CPU_DOWN_PREPARE:
|
||||
/* Suspend is in progress, only remove the interface */
|
||||
sysfs_remove_group(&dev->kobj, &mc_attr_group);
|
||||
pr_debug("CPU%d removed\n", cpu);
|
||||
break;
|
||||
|
||||
/*
|
||||
* case CPU_DEAD:
|
||||
*
|
||||
* When a CPU goes offline, don't free up or invalidate the copy of
|
||||
* the microcode in kernel memory, so that we can reuse it when the
|
||||
* CPU comes back online without unnecessarily requesting the userspace
|
||||
* for it again.
|
||||
*/
|
||||
}
|
||||
|
||||
/* The CPU refused to come up during a system resume */
|
||||
if (action == CPU_UP_CANCELED_FROZEN)
|
||||
microcode_fini_cpu(cpu);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __refdata mc_cpu_notifier = {
|
||||
.notifier_call = mc_cpu_callback,
|
||||
};
|
||||
|
||||
#ifdef MODULE
|
||||
/* Autoload on Intel and AMD systems */
|
||||
static const struct x86_cpu_id __initconst microcode_id[] = {
|
||||
#ifdef CONFIG_MICROCODE_INTEL
|
||||
{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
|
||||
#endif
|
||||
#ifdef CONFIG_MICROCODE_AMD
|
||||
{ X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
|
||||
#endif
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, microcode_id);
|
||||
#endif
|
||||
|
||||
static struct attribute *cpu_root_microcode_attrs[] = {
|
||||
&dev_attr_reload.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group cpu_root_microcode_group = {
|
||||
.name = "microcode",
|
||||
.attrs = cpu_root_microcode_attrs,
|
||||
};
|
||||
|
||||
static int __init microcode_init(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(0);
|
||||
int error;
|
||||
|
||||
if (paravirt_enabled() || dis_ucode_ldr)
|
||||
return -EINVAL;
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_INTEL)
|
||||
microcode_ops = init_intel_microcode();
|
||||
else if (c->x86_vendor == X86_VENDOR_AMD)
|
||||
microcode_ops = init_amd_microcode();
|
||||
else
|
||||
pr_err("no support for this CPU vendor\n");
|
||||
|
||||
if (!microcode_ops)
|
||||
return -ENODEV;
|
||||
|
||||
microcode_pdev = platform_device_register_simple("microcode", -1,
|
||||
NULL, 0);
|
||||
if (IS_ERR(microcode_pdev))
|
||||
return PTR_ERR(microcode_pdev);
|
||||
|
||||
get_online_cpus();
|
||||
mutex_lock(µcode_mutex);
|
||||
|
||||
error = subsys_interface_register(&mc_cpu_interface);
|
||||
if (!error)
|
||||
perf_check_microcode();
|
||||
mutex_unlock(µcode_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
if (error)
|
||||
goto out_pdev;
|
||||
|
||||
error = sysfs_create_group(&cpu_subsys.dev_root->kobj,
|
||||
&cpu_root_microcode_group);
|
||||
|
||||
if (error) {
|
||||
pr_err("Error creating microcode group!\n");
|
||||
goto out_driver;
|
||||
}
|
||||
|
||||
error = microcode_dev_init();
|
||||
if (error)
|
||||
goto out_ucode_group;
|
||||
|
||||
register_syscore_ops(&mc_syscore_ops);
|
||||
register_hotcpu_notifier(&mc_cpu_notifier);
|
||||
|
||||
pr_info("Microcode Update Driver: v" MICROCODE_VERSION
|
||||
" <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
|
||||
|
||||
return 0;
|
||||
|
||||
out_ucode_group:
|
||||
sysfs_remove_group(&cpu_subsys.dev_root->kobj,
|
||||
&cpu_root_microcode_group);
|
||||
|
||||
out_driver:
|
||||
get_online_cpus();
|
||||
mutex_lock(µcode_mutex);
|
||||
|
||||
subsys_interface_unregister(&mc_cpu_interface);
|
||||
|
||||
mutex_unlock(µcode_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
out_pdev:
|
||||
platform_device_unregister(microcode_pdev);
|
||||
return error;
|
||||
|
||||
}
|
||||
module_init(microcode_init);
|
||||
|
||||
static void __exit microcode_exit(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(0);
|
||||
|
||||
microcode_dev_exit();
|
||||
|
||||
unregister_hotcpu_notifier(&mc_cpu_notifier);
|
||||
unregister_syscore_ops(&mc_syscore_ops);
|
||||
|
||||
sysfs_remove_group(&cpu_subsys.dev_root->kobj,
|
||||
&cpu_root_microcode_group);
|
||||
|
||||
get_online_cpus();
|
||||
mutex_lock(µcode_mutex);
|
||||
|
||||
subsys_interface_unregister(&mc_cpu_interface);
|
||||
|
||||
mutex_unlock(µcode_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
platform_device_unregister(microcode_pdev);
|
||||
|
||||
microcode_ops = NULL;
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_AMD)
|
||||
exit_amd_microcode();
|
||||
|
||||
pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
|
||||
}
|
||||
module_exit(microcode_exit);
|
||||
199
arch/x86/kernel/cpu/microcode/core_early.c
Normal file
199
arch/x86/kernel/cpu/microcode/core_early.c
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
/*
|
||||
* X86 CPU microcode early update for Linux
|
||||
*
|
||||
* Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
|
||||
* H Peter Anvin" <hpa@zytor.com>
|
||||
*
|
||||
* This driver allows to early upgrade microcode on Intel processors
|
||||
* belonging to IA-32 family - PentiumPro, Pentium II,
|
||||
* Pentium III, Xeon, Pentium 4, etc.
|
||||
*
|
||||
* Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
|
||||
* Software Developer's Manual.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/microcode_intel.h>
|
||||
#include <asm/microcode_amd.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cmdline.h>
|
||||
|
||||
#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
|
||||
#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
|
||||
#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
|
||||
#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
|
||||
#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
|
||||
#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
|
||||
#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
|
||||
|
||||
#define CPUID_IS(a, b, c, ebx, ecx, edx) \
|
||||
(!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
|
||||
|
||||
/*
|
||||
* In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
|
||||
* x86_vendor() gets vendor id for BSP.
|
||||
*
|
||||
* In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
|
||||
* coding, we still use x86_vendor() to get vendor id for AP.
|
||||
*
|
||||
* x86_vendor() gets vendor information directly through cpuid.
|
||||
*/
|
||||
static int x86_vendor(void)
|
||||
{
|
||||
u32 eax = 0x00000000;
|
||||
u32 ebx, ecx = 0, edx;
|
||||
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
|
||||
if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
|
||||
return X86_VENDOR_INTEL;
|
||||
|
||||
if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
|
||||
return X86_VENDOR_AMD;
|
||||
|
||||
return X86_VENDOR_UNKNOWN;
|
||||
}
|
||||
|
||||
static int x86_family(void)
|
||||
{
|
||||
u32 eax = 0x00000001;
|
||||
u32 ebx, ecx = 0, edx;
|
||||
int x86;
|
||||
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
|
||||
x86 = (eax >> 8) & 0xf;
|
||||
if (x86 == 15)
|
||||
x86 += (eax >> 20) & 0xff;
|
||||
|
||||
return x86;
|
||||
}
|
||||
|
||||
static bool __init check_loader_disabled_bsp(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
|
||||
const char *opt = "dis_ucode_ldr";
|
||||
const char *option = (const char *)__pa_nodebug(opt);
|
||||
bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
const char *cmdline = boot_command_line;
|
||||
const char *option = "dis_ucode_ldr";
|
||||
bool *res = &dis_ucode_ldr;
|
||||
#endif
|
||||
|
||||
if (cmdline_find_option_bool(cmdline, option))
|
||||
*res = true;
|
||||
|
||||
return *res;
|
||||
}
|
||||
|
||||
void __init load_ucode_bsp(void)
|
||||
{
|
||||
int vendor, x86;
|
||||
|
||||
if (check_loader_disabled_bsp())
|
||||
return;
|
||||
|
||||
if (!have_cpuid_p())
|
||||
return;
|
||||
|
||||
vendor = x86_vendor();
|
||||
x86 = x86_family();
|
||||
|
||||
switch (vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
if (x86 >= 6)
|
||||
load_ucode_intel_bsp();
|
||||
break;
|
||||
case X86_VENDOR_AMD:
|
||||
if (x86 >= 0x10)
|
||||
load_ucode_amd_bsp();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static bool check_loader_disabled_ap(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return *((bool *)__pa_nodebug(&dis_ucode_ldr));
|
||||
#else
|
||||
return dis_ucode_ldr;
|
||||
#endif
|
||||
}
|
||||
|
||||
void load_ucode_ap(void)
|
||||
{
|
||||
int vendor, x86;
|
||||
|
||||
if (check_loader_disabled_ap())
|
||||
return;
|
||||
|
||||
if (!have_cpuid_p())
|
||||
return;
|
||||
|
||||
vendor = x86_vendor();
|
||||
x86 = x86_family();
|
||||
|
||||
switch (vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
if (x86 >= 6)
|
||||
load_ucode_intel_ap();
|
||||
break;
|
||||
case X86_VENDOR_AMD:
|
||||
if (x86 >= 0x10)
|
||||
load_ucode_amd_ap();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int __init save_microcode_in_initrd(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
if (c->x86 >= 6)
|
||||
save_microcode_in_initrd_intel();
|
||||
break;
|
||||
case X86_VENDOR_AMD:
|
||||
if (c->x86 >= 0x10)
|
||||
save_microcode_in_initrd_amd();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void reload_early_microcode(void)
|
||||
{
|
||||
int vendor, x86;
|
||||
|
||||
vendor = x86_vendor();
|
||||
x86 = x86_family();
|
||||
|
||||
switch (vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
if (x86 >= 6)
|
||||
reload_ucode_intel();
|
||||
break;
|
||||
case X86_VENDOR_AMD:
|
||||
if (x86 >= 0x10)
|
||||
reload_ucode_amd();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
333
arch/x86/kernel/cpu/microcode/intel.c
Normal file
333
arch/x86/kernel/cpu/microcode/intel.c
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
/*
|
||||
* Intel CPU Microcode Update Driver for Linux
|
||||
*
|
||||
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
|
||||
* 2006 Shaohua Li <shaohua.li@intel.com>
|
||||
*
|
||||
* This driver allows to upgrade microcode on Intel processors
|
||||
* belonging to IA-32 family - PentiumPro, Pentium II,
|
||||
* Pentium III, Xeon, Pentium 4, etc.
|
||||
*
|
||||
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
|
||||
* Software Developer's Manual
|
||||
* Order Number 253668 or free download from:
|
||||
*
|
||||
* http://developer.intel.com/Assets/PDF/manual/253668.pdf
|
||||
*
|
||||
* For more information, go to http://www.urbanmyth.org/microcode
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
|
||||
* Initial release.
|
||||
* 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
|
||||
* Added read() support + cleanups.
|
||||
* 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
|
||||
* Added 'device trimming' support. open(O_WRONLY) zeroes
|
||||
* and frees the saved copy of applied microcode.
|
||||
* 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
|
||||
* Made to use devfs (/dev/cpu/microcode) + cleanups.
|
||||
* 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
|
||||
* Added misc device support (now uses both devfs and misc).
|
||||
* Added MICROCODE_IOCFREE ioctl to clear memory.
|
||||
* 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
|
||||
* Messages for error cases (non Intel & no suitable microcode).
|
||||
* 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
|
||||
* Removed ->release(). Removed exclusive open and status bitmap.
|
||||
* Added microcode_rwsem to serialize read()/write()/ioctl().
|
||||
* Removed global kernel lock usage.
|
||||
* 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
|
||||
* Write 0 to 0x8B msr and then cpuid before reading revision,
|
||||
* so that it works even if there were no update done by the
|
||||
* BIOS. Otherwise, reading from 0x8B gives junk (which happened
|
||||
* to be 0 on my machine which is why it worked even when I
|
||||
* disabled update by the BIOS)
|
||||
* Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
|
||||
* 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
|
||||
* Tigran Aivazian <tigran@veritas.com>
|
||||
* Intel Pentium 4 processor support and bugfixes.
|
||||
* 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
|
||||
* Bugfix for HT (Hyper-Threading) enabled processors
|
||||
* whereby processor resources are shared by all logical processors
|
||||
* in a single CPU package.
|
||||
* 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
|
||||
* Tigran Aivazian <tigran@veritas.com>,
|
||||
* Serialize updates as required on HT processors due to
|
||||
* speculative nature of implementation.
|
||||
* 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
|
||||
* Fix the panic when writing zero-length microcode chunk.
|
||||
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
|
||||
* Jun Nakajima <jun.nakajima@intel.com>
|
||||
* Support for the microcode updates in the new format.
|
||||
* 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
|
||||
* Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
|
||||
* because we no longer hold a copy of applied microcode
|
||||
* in kernel memory.
|
||||
* 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
|
||||
* Fix sigmatch() macro to handle old CPUs with pf == 0.
|
||||
* Thanks to Stuart Swales for pointing out this bug.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <asm/microcode_intel.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
MODULE_DESCRIPTION("Microcode Update Driver");
|
||||
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
|
||||
unsigned int val[2];
|
||||
|
||||
memset(csig, 0, sizeof(*csig));
|
||||
|
||||
csig->sig = cpuid_eax(0x00000001);
|
||||
|
||||
if ((c->x86_model >= 5) || (c->x86 > 6)) {
|
||||
/* get processor flags from MSR 0x17 */
|
||||
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
|
||||
csig->pf = 1 << ((val[1] >> 18) & 7);
|
||||
}
|
||||
|
||||
csig->rev = c->microcode;
|
||||
pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
|
||||
cpu_num, csig->sig, csig->pf, csig->rev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* return 0 - no update found
|
||||
* return 1 - found update
|
||||
*/
|
||||
static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
|
||||
{
|
||||
struct cpu_signature cpu_sig;
|
||||
unsigned int csig, cpf, crev;
|
||||
|
||||
collect_cpu_info(cpu, &cpu_sig);
|
||||
|
||||
csig = cpu_sig.sig;
|
||||
cpf = cpu_sig.pf;
|
||||
crev = cpu_sig.rev;
|
||||
|
||||
return get_matching_microcode(csig, cpf, mc_intel, crev);
|
||||
}
|
||||
|
||||
static int apply_microcode_intel(int cpu)
|
||||
{
|
||||
struct microcode_intel *mc_intel;
|
||||
struct ucode_cpu_info *uci;
|
||||
unsigned int val[2];
|
||||
int cpu_num = raw_smp_processor_id();
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
|
||||
|
||||
uci = ucode_cpu_info + cpu;
|
||||
mc_intel = uci->mc;
|
||||
|
||||
/* We should bind the task to the CPU */
|
||||
BUG_ON(cpu_num != cpu);
|
||||
|
||||
if (mc_intel == NULL)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Microcode on this CPU could be updated earlier. Only apply the
|
||||
* microcode patch in mc_intel when it is newer than the one on this
|
||||
* CPU.
|
||||
*/
|
||||
if (get_matching_mc(mc_intel, cpu) == 0)
|
||||
return 0;
|
||||
|
||||
/* write microcode via MSR 0x79 */
|
||||
wrmsr(MSR_IA32_UCODE_WRITE,
|
||||
(unsigned long) mc_intel->bits,
|
||||
(unsigned long) mc_intel->bits >> 16 >> 16);
|
||||
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
|
||||
|
||||
/* As documented in the SDM: Do a CPUID 1 here */
|
||||
sync_core();
|
||||
|
||||
/* get the current revision from MSR 0x8B */
|
||||
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
|
||||
|
||||
if (val[1] != mc_intel->hdr.rev) {
|
||||
pr_err("CPU%d update to revision 0x%x failed\n",
|
||||
cpu_num, mc_intel->hdr.rev);
|
||||
return -1;
|
||||
}
|
||||
pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
|
||||
cpu_num, val[1],
|
||||
mc_intel->hdr.date & 0xffff,
|
||||
mc_intel->hdr.date >> 24,
|
||||
(mc_intel->hdr.date >> 16) & 0xff);
|
||||
|
||||
uci->cpu_sig.rev = val[1];
|
||||
c->microcode = val[1];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
|
||||
int (*get_ucode_data)(void *, const void *, size_t))
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
|
||||
int new_rev = uci->cpu_sig.rev;
|
||||
unsigned int leftover = size;
|
||||
enum ucode_state state = UCODE_OK;
|
||||
unsigned int curr_mc_size = 0;
|
||||
unsigned int csig, cpf;
|
||||
|
||||
while (leftover) {
|
||||
struct microcode_header_intel mc_header;
|
||||
unsigned int mc_size;
|
||||
|
||||
if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
|
||||
break;
|
||||
|
||||
mc_size = get_totalsize(&mc_header);
|
||||
if (!mc_size || mc_size > leftover) {
|
||||
pr_err("error! Bad data in microcode data file\n");
|
||||
break;
|
||||
}
|
||||
|
||||
/* For performance reasons, reuse mc area when possible */
|
||||
if (!mc || mc_size > curr_mc_size) {
|
||||
vfree(mc);
|
||||
mc = vmalloc(mc_size);
|
||||
if (!mc)
|
||||
break;
|
||||
curr_mc_size = mc_size;
|
||||
}
|
||||
|
||||
if (get_ucode_data(mc, ucode_ptr, mc_size) ||
|
||||
microcode_sanity_check(mc, 1) < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
csig = uci->cpu_sig.sig;
|
||||
cpf = uci->cpu_sig.pf;
|
||||
if (get_matching_microcode(csig, cpf, mc, new_rev)) {
|
||||
vfree(new_mc);
|
||||
new_rev = mc_header.rev;
|
||||
new_mc = mc;
|
||||
mc = NULL; /* trigger new vmalloc */
|
||||
}
|
||||
|
||||
ucode_ptr += mc_size;
|
||||
leftover -= mc_size;
|
||||
}
|
||||
|
||||
vfree(mc);
|
||||
|
||||
if (leftover) {
|
||||
vfree(new_mc);
|
||||
state = UCODE_ERROR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!new_mc) {
|
||||
state = UCODE_NFOUND;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vfree(uci->mc);
|
||||
uci->mc = (struct microcode_intel *)new_mc;
|
||||
|
||||
/*
|
||||
* If early loading microcode is supported, save this mc into
|
||||
* permanent memory. So it will be loaded early when a CPU is hot added
|
||||
* or resumes.
|
||||
*/
|
||||
save_mc_for_early(new_mc);
|
||||
|
||||
pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
|
||||
cpu, new_rev, uci->cpu_sig.rev);
|
||||
out:
|
||||
return state;
|
||||
}
|
||||
|
||||
static int get_ucode_fw(void *to, const void *from, size_t n)
|
||||
{
|
||||
memcpy(to, from, n);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static enum ucode_state request_microcode_fw(int cpu, struct device *device,
|
||||
bool refresh_fw)
|
||||
{
|
||||
char name[30];
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
const struct firmware *firmware;
|
||||
enum ucode_state ret;
|
||||
|
||||
sprintf(name, "intel-ucode/%02x-%02x-%02x",
|
||||
c->x86, c->x86_model, c->x86_mask);
|
||||
|
||||
if (request_firmware_direct(&firmware, name, device)) {
|
||||
pr_debug("data file %s load failed\n", name);
|
||||
return UCODE_NFOUND;
|
||||
}
|
||||
|
||||
ret = generic_load_microcode(cpu, (void *)firmware->data,
|
||||
firmware->size, &get_ucode_fw);
|
||||
|
||||
release_firmware(firmware);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_ucode_user(void *to, const void *from, size_t n)
|
||||
{
|
||||
return copy_from_user(to, from, n);
|
||||
}
|
||||
|
||||
static enum ucode_state
|
||||
request_microcode_user(int cpu, const void __user *buf, size_t size)
|
||||
{
|
||||
return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
|
||||
}
|
||||
|
||||
static void microcode_fini_cpu(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
vfree(uci->mc);
|
||||
uci->mc = NULL;
|
||||
}
|
||||
|
||||
static struct microcode_ops microcode_intel_ops = {
|
||||
.request_microcode_user = request_microcode_user,
|
||||
.request_microcode_fw = request_microcode_fw,
|
||||
.collect_cpu_info = collect_cpu_info,
|
||||
.apply_microcode = apply_microcode_intel,
|
||||
.microcode_fini_cpu = microcode_fini_cpu,
|
||||
};
|
||||
|
||||
struct microcode_ops * __init init_intel_microcode(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(0);
|
||||
|
||||
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
|
||||
cpu_has(c, X86_FEATURE_IA64)) {
|
||||
pr_err("Intel CPU family 0x%x not supported\n", c->x86);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return µcode_intel_ops;
|
||||
}
|
||||
|
||||
813
arch/x86/kernel/cpu/microcode/intel_early.c
Normal file
813
arch/x86/kernel/cpu/microcode/intel_early.c
Normal file
|
|
@ -0,0 +1,813 @@
|
|||
/*
|
||||
* Intel CPU microcode early update for Linux
|
||||
*
|
||||
* Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
|
||||
* H Peter Anvin" <hpa@zytor.com>
|
||||
*
|
||||
* This allows to early upgrade microcode on Intel processors
|
||||
* belonging to IA-32 family - PentiumPro, Pentium II,
|
||||
* Pentium III, Xeon, Pentium 4, etc.
|
||||
*
|
||||
* Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
|
||||
* Software Developer's Manual.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/earlycpio.h>
|
||||
#include <linux/initrd.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/microcode_intel.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
|
||||
static struct mc_saved_data {
|
||||
unsigned int mc_saved_count;
|
||||
struct microcode_intel **mc_saved;
|
||||
} mc_saved_data;
|
||||
|
||||
static enum ucode_state
|
||||
generic_load_microcode_early(struct microcode_intel **mc_saved_p,
|
||||
unsigned int mc_saved_count,
|
||||
struct ucode_cpu_info *uci)
|
||||
{
|
||||
struct microcode_intel *ucode_ptr, *new_mc = NULL;
|
||||
int new_rev = uci->cpu_sig.rev;
|
||||
enum ucode_state state = UCODE_OK;
|
||||
unsigned int mc_size;
|
||||
struct microcode_header_intel *mc_header;
|
||||
unsigned int csig = uci->cpu_sig.sig;
|
||||
unsigned int cpf = uci->cpu_sig.pf;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mc_saved_count; i++) {
|
||||
ucode_ptr = mc_saved_p[i];
|
||||
|
||||
mc_header = (struct microcode_header_intel *)ucode_ptr;
|
||||
mc_size = get_totalsize(mc_header);
|
||||
if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
|
||||
new_rev = mc_header->rev;
|
||||
new_mc = ucode_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!new_mc) {
|
||||
state = UCODE_NFOUND;
|
||||
goto out;
|
||||
}
|
||||
|
||||
uci->mc = (struct microcode_intel *)new_mc;
|
||||
out:
|
||||
return state;
|
||||
}
|
||||
|
||||
static void
|
||||
microcode_pointer(struct microcode_intel **mc_saved,
|
||||
unsigned long *mc_saved_in_initrd,
|
||||
unsigned long initrd_start, int mc_saved_count)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mc_saved_count; i++)
|
||||
mc_saved[i] = (struct microcode_intel *)
|
||||
(mc_saved_in_initrd[i] + initrd_start);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static void
|
||||
microcode_phys(struct microcode_intel **mc_saved_tmp,
|
||||
struct mc_saved_data *mc_saved_data)
|
||||
{
|
||||
int i;
|
||||
struct microcode_intel ***mc_saved;
|
||||
|
||||
mc_saved = (struct microcode_intel ***)
|
||||
__pa_nodebug(&mc_saved_data->mc_saved);
|
||||
for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
|
||||
struct microcode_intel *p;
|
||||
|
||||
p = *(struct microcode_intel **)
|
||||
__pa_nodebug(mc_saved_data->mc_saved + i);
|
||||
mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static enum ucode_state
|
||||
load_microcode(struct mc_saved_data *mc_saved_data,
|
||||
unsigned long *mc_saved_in_initrd,
|
||||
unsigned long initrd_start,
|
||||
struct ucode_cpu_info *uci)
|
||||
{
|
||||
struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
|
||||
unsigned int count = mc_saved_data->mc_saved_count;
|
||||
|
||||
if (!mc_saved_data->mc_saved) {
|
||||
microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
|
||||
initrd_start, count);
|
||||
|
||||
return generic_load_microcode_early(mc_saved_tmp, count, uci);
|
||||
} else {
|
||||
#ifdef CONFIG_X86_32
|
||||
microcode_phys(mc_saved_tmp, mc_saved_data);
|
||||
return generic_load_microcode_early(mc_saved_tmp, count, uci);
|
||||
#else
|
||||
return generic_load_microcode_early(mc_saved_data->mc_saved,
|
||||
count, uci);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static u8 get_x86_family(unsigned long sig)
|
||||
{
|
||||
u8 x86;
|
||||
|
||||
x86 = (sig >> 8) & 0xf;
|
||||
|
||||
if (x86 == 0xf)
|
||||
x86 += (sig >> 20) & 0xff;
|
||||
|
||||
return x86;
|
||||
}
|
||||
|
||||
static u8 get_x86_model(unsigned long sig)
|
||||
{
|
||||
u8 x86, x86_model;
|
||||
|
||||
x86 = get_x86_family(sig);
|
||||
x86_model = (sig >> 4) & 0xf;
|
||||
|
||||
if (x86 == 0x6 || x86 == 0xf)
|
||||
x86_model += ((sig >> 16) & 0xf) << 4;
|
||||
|
||||
return x86_model;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given CPU signature and a microcode patch, this function finds if the
|
||||
* microcode patch has matching family and model with the CPU.
|
||||
*/
|
||||
static enum ucode_state
|
||||
matching_model_microcode(struct microcode_header_intel *mc_header,
|
||||
unsigned long sig)
|
||||
{
|
||||
u8 x86, x86_model;
|
||||
u8 x86_ucode, x86_model_ucode;
|
||||
struct extended_sigtable *ext_header;
|
||||
unsigned long total_size = get_totalsize(mc_header);
|
||||
unsigned long data_size = get_datasize(mc_header);
|
||||
int ext_sigcount, i;
|
||||
struct extended_signature *ext_sig;
|
||||
|
||||
x86 = get_x86_family(sig);
|
||||
x86_model = get_x86_model(sig);
|
||||
|
||||
x86_ucode = get_x86_family(mc_header->sig);
|
||||
x86_model_ucode = get_x86_model(mc_header->sig);
|
||||
|
||||
if (x86 == x86_ucode && x86_model == x86_model_ucode)
|
||||
return UCODE_OK;
|
||||
|
||||
/* Look for ext. headers: */
|
||||
if (total_size <= data_size + MC_HEADER_SIZE)
|
||||
return UCODE_NFOUND;
|
||||
|
||||
ext_header = (struct extended_sigtable *)
|
||||
mc_header + data_size + MC_HEADER_SIZE;
|
||||
ext_sigcount = ext_header->count;
|
||||
ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
|
||||
|
||||
for (i = 0; i < ext_sigcount; i++) {
|
||||
x86_ucode = get_x86_family(ext_sig->sig);
|
||||
x86_model_ucode = get_x86_model(ext_sig->sig);
|
||||
|
||||
if (x86 == x86_ucode && x86_model == x86_model_ucode)
|
||||
return UCODE_OK;
|
||||
|
||||
ext_sig++;
|
||||
}
|
||||
|
||||
return UCODE_NFOUND;
|
||||
}
|
||||
|
||||
static int
|
||||
save_microcode(struct mc_saved_data *mc_saved_data,
|
||||
struct microcode_intel **mc_saved_src,
|
||||
unsigned int mc_saved_count)
|
||||
{
|
||||
int i, j;
|
||||
struct microcode_intel **mc_saved_p;
|
||||
int ret;
|
||||
|
||||
if (!mc_saved_count)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Copy new microcode data.
|
||||
*/
|
||||
mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
|
||||
GFP_KERNEL);
|
||||
if (!mc_saved_p)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < mc_saved_count; i++) {
|
||||
struct microcode_intel *mc = mc_saved_src[i];
|
||||
struct microcode_header_intel *mc_header = &mc->hdr;
|
||||
unsigned long mc_size = get_totalsize(mc_header);
|
||||
mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
|
||||
if (!mc_saved_p[i]) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
if (!mc_saved_src[i]) {
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
memcpy(mc_saved_p[i], mc, mc_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Point to newly saved microcode.
|
||||
*/
|
||||
mc_saved_data->mc_saved = mc_saved_p;
|
||||
mc_saved_data->mc_saved_count = mc_saved_count;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
for (j = 0; j <= i; j++)
|
||||
kfree(mc_saved_p[j]);
|
||||
kfree(mc_saved_p);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* A microcode patch in ucode_ptr is saved into mc_saved
|
||||
* - if it has matching signature and newer revision compared to an existing
|
||||
* patch mc_saved.
|
||||
* - or if it is a newly discovered microcode patch.
|
||||
*
|
||||
* The microcode patch should have matching model with CPU.
|
||||
*/
|
||||
static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
|
||||
unsigned int *mc_saved_count_p)
|
||||
{
|
||||
int i;
|
||||
int found = 0;
|
||||
unsigned int mc_saved_count = *mc_saved_count_p;
|
||||
struct microcode_header_intel *mc_header;
|
||||
|
||||
mc_header = (struct microcode_header_intel *)ucode_ptr;
|
||||
for (i = 0; i < mc_saved_count; i++) {
|
||||
unsigned int sig, pf;
|
||||
unsigned int new_rev;
|
||||
struct microcode_header_intel *mc_saved_header =
|
||||
(struct microcode_header_intel *)mc_saved[i];
|
||||
sig = mc_saved_header->sig;
|
||||
pf = mc_saved_header->pf;
|
||||
new_rev = mc_header->rev;
|
||||
|
||||
if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
|
||||
found = 1;
|
||||
if (update_match_revision(mc_header, new_rev)) {
|
||||
/*
|
||||
* Found an older ucode saved before.
|
||||
* Replace the older one with this newer
|
||||
* one.
|
||||
*/
|
||||
mc_saved[i] =
|
||||
(struct microcode_intel *)ucode_ptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i >= mc_saved_count && !found)
|
||||
/*
|
||||
* This ucode is first time discovered in ucode file.
|
||||
* Save it to memory.
|
||||
*/
|
||||
mc_saved[mc_saved_count++] =
|
||||
(struct microcode_intel *)ucode_ptr;
|
||||
|
||||
*mc_saved_count_p = mc_saved_count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get microcode matching with BSP's model. Only CPUs with the same model as
|
||||
* BSP can stay in the platform.
|
||||
*/
|
||||
static enum ucode_state __init
|
||||
get_matching_model_microcode(int cpu, unsigned long start,
|
||||
void *data, size_t size,
|
||||
struct mc_saved_data *mc_saved_data,
|
||||
unsigned long *mc_saved_in_initrd,
|
||||
struct ucode_cpu_info *uci)
|
||||
{
|
||||
u8 *ucode_ptr = data;
|
||||
unsigned int leftover = size;
|
||||
enum ucode_state state = UCODE_OK;
|
||||
unsigned int mc_size;
|
||||
struct microcode_header_intel *mc_header;
|
||||
struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
|
||||
unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
|
||||
int i;
|
||||
|
||||
while (leftover) {
|
||||
mc_header = (struct microcode_header_intel *)ucode_ptr;
|
||||
|
||||
mc_size = get_totalsize(mc_header);
|
||||
if (!mc_size || mc_size > leftover ||
|
||||
microcode_sanity_check(ucode_ptr, 0) < 0)
|
||||
break;
|
||||
|
||||
leftover -= mc_size;
|
||||
|
||||
/*
|
||||
* Since APs with same family and model as the BSP may boot in
|
||||
* the platform, we need to find and save microcode patches
|
||||
* with the same family and model as the BSP.
|
||||
*/
|
||||
if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
|
||||
UCODE_OK) {
|
||||
ucode_ptr += mc_size;
|
||||
continue;
|
||||
}
|
||||
|
||||
_save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
|
||||
|
||||
ucode_ptr += mc_size;
|
||||
}
|
||||
|
||||
if (leftover) {
|
||||
state = UCODE_ERROR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (mc_saved_count == 0) {
|
||||
state = UCODE_NFOUND;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < mc_saved_count; i++)
|
||||
mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
|
||||
|
||||
mc_saved_data->mc_saved_count = mc_saved_count;
|
||||
out:
|
||||
return state;
|
||||
}
|
||||
|
||||
static int collect_cpu_info_early(struct ucode_cpu_info *uci)
|
||||
{
|
||||
unsigned int val[2];
|
||||
u8 x86, x86_model;
|
||||
struct cpu_signature csig;
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
csig.sig = 0;
|
||||
csig.pf = 0;
|
||||
csig.rev = 0;
|
||||
|
||||
memset(uci, 0, sizeof(*uci));
|
||||
|
||||
eax = 0x00000001;
|
||||
ecx = 0;
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
csig.sig = eax;
|
||||
|
||||
x86 = get_x86_family(csig.sig);
|
||||
x86_model = get_x86_model(csig.sig);
|
||||
|
||||
if ((x86_model >= 5) || (x86 > 6)) {
|
||||
/* get processor flags from MSR 0x17 */
|
||||
native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
|
||||
csig.pf = 1 << ((val[1] >> 18) & 7);
|
||||
}
|
||||
native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
|
||||
|
||||
/* As documented in the SDM: Do a CPUID 1 here */
|
||||
sync_core();
|
||||
|
||||
/* get the current revision from MSR 0x8B */
|
||||
native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
|
||||
|
||||
csig.rev = val[1];
|
||||
|
||||
uci->cpu_sig = csig;
|
||||
uci->valid = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static void __ref show_saved_mc(void)
|
||||
{
|
||||
int i, j;
|
||||
unsigned int sig, pf, rev, total_size, data_size, date;
|
||||
struct ucode_cpu_info uci;
|
||||
|
||||
if (mc_saved_data.mc_saved_count == 0) {
|
||||
pr_debug("no microcode data saved.\n");
|
||||
return;
|
||||
}
|
||||
pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
|
||||
|
||||
collect_cpu_info_early(&uci);
|
||||
|
||||
sig = uci.cpu_sig.sig;
|
||||
pf = uci.cpu_sig.pf;
|
||||
rev = uci.cpu_sig.rev;
|
||||
pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
|
||||
smp_processor_id(), sig, pf, rev);
|
||||
|
||||
for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
|
||||
struct microcode_header_intel *mc_saved_header;
|
||||
struct extended_sigtable *ext_header;
|
||||
int ext_sigcount;
|
||||
struct extended_signature *ext_sig;
|
||||
|
||||
mc_saved_header = (struct microcode_header_intel *)
|
||||
mc_saved_data.mc_saved[i];
|
||||
sig = mc_saved_header->sig;
|
||||
pf = mc_saved_header->pf;
|
||||
rev = mc_saved_header->rev;
|
||||
total_size = get_totalsize(mc_saved_header);
|
||||
data_size = get_datasize(mc_saved_header);
|
||||
date = mc_saved_header->date;
|
||||
|
||||
pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
|
||||
i, sig, pf, rev, total_size,
|
||||
date & 0xffff,
|
||||
date >> 24,
|
||||
(date >> 16) & 0xff);
|
||||
|
||||
/* Look for ext. headers: */
|
||||
if (total_size <= data_size + MC_HEADER_SIZE)
|
||||
continue;
|
||||
|
||||
ext_header = (struct extended_sigtable *)
|
||||
mc_saved_header + data_size + MC_HEADER_SIZE;
|
||||
ext_sigcount = ext_header->count;
|
||||
ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
|
||||
|
||||
for (j = 0; j < ext_sigcount; j++) {
|
||||
sig = ext_sig->sig;
|
||||
pf = ext_sig->pf;
|
||||
|
||||
pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
|
||||
j, sig, pf);
|
||||
|
||||
ext_sig++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void show_saved_mc(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
|
||||
static DEFINE_MUTEX(x86_cpu_microcode_mutex);
|
||||
/*
|
||||
* Save this mc into mc_saved_data. So it will be loaded early when a CPU is
|
||||
* hot added or resumes.
|
||||
*
|
||||
* Please make sure this mc should be a valid microcode patch before calling
|
||||
* this function.
|
||||
*/
|
||||
int save_mc_for_early(u8 *mc)
|
||||
{
|
||||
struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
|
||||
unsigned int mc_saved_count_init;
|
||||
unsigned int mc_saved_count;
|
||||
struct microcode_intel **mc_saved;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Hold hotplug lock so mc_saved_data is not accessed by a CPU in
|
||||
* hotplug.
|
||||
*/
|
||||
mutex_lock(&x86_cpu_microcode_mutex);
|
||||
|
||||
mc_saved_count_init = mc_saved_data.mc_saved_count;
|
||||
mc_saved_count = mc_saved_data.mc_saved_count;
|
||||
mc_saved = mc_saved_data.mc_saved;
|
||||
|
||||
if (mc_saved && mc_saved_count)
|
||||
memcpy(mc_saved_tmp, mc_saved,
|
||||
mc_saved_count * sizeof(struct microcode_intel *));
|
||||
/*
|
||||
* Save the microcode patch mc in mc_save_tmp structure if it's a newer
|
||||
* version.
|
||||
*/
|
||||
|
||||
_save_mc(mc_saved_tmp, mc, &mc_saved_count);
|
||||
|
||||
/*
|
||||
* Save the mc_save_tmp in global mc_saved_data.
|
||||
*/
|
||||
ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
|
||||
if (ret) {
|
||||
pr_err("Cannot save microcode patch.\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
show_saved_mc();
|
||||
|
||||
/*
|
||||
* Free old saved microcode data.
|
||||
*/
|
||||
if (mc_saved) {
|
||||
for (i = 0; i < mc_saved_count_init; i++)
|
||||
kfree(mc_saved[i]);
|
||||
kfree(mc_saved);
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&x86_cpu_microcode_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(save_mc_for_early);
|
||||
#endif
|
||||
|
||||
static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
|
||||
static __init enum ucode_state
|
||||
scan_microcode(unsigned long start, unsigned long end,
|
||||
struct mc_saved_data *mc_saved_data,
|
||||
unsigned long *mc_saved_in_initrd,
|
||||
struct ucode_cpu_info *uci)
|
||||
{
|
||||
unsigned int size = end - start + 1;
|
||||
struct cpio_data cd;
|
||||
long offset = 0;
|
||||
#ifdef CONFIG_X86_32
|
||||
char *p = (char *)__pa_nodebug(ucode_name);
|
||||
#else
|
||||
char *p = ucode_name;
|
||||
#endif
|
||||
|
||||
cd.data = NULL;
|
||||
cd.size = 0;
|
||||
|
||||
cd = find_cpio_data(p, (void *)start, size, &offset);
|
||||
if (!cd.data)
|
||||
return UCODE_ERROR;
|
||||
|
||||
|
||||
return get_matching_model_microcode(0, start, cd.data, cd.size,
|
||||
mc_saved_data, mc_saved_in_initrd,
|
||||
uci);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print ucode update info.
|
||||
*/
|
||||
static void
|
||||
print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
|
||||
cpu,
|
||||
uci->cpu_sig.rev,
|
||||
date & 0xffff,
|
||||
date >> 24,
|
||||
(date >> 16) & 0xff);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
||||
static int delay_ucode_info;
|
||||
static int current_mc_date;
|
||||
|
||||
/*
|
||||
* Print early updated ucode info after printk works. This is delayed info dump.
|
||||
*/
|
||||
void show_ucode_info_early(void)
|
||||
{
|
||||
struct ucode_cpu_info uci;
|
||||
|
||||
if (delay_ucode_info) {
|
||||
collect_cpu_info_early(&uci);
|
||||
print_ucode_info(&uci, current_mc_date);
|
||||
delay_ucode_info = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point, we can not call printk() yet. Keep microcode patch number in
|
||||
* mc_saved_data.mc_saved and delay printing microcode info in
|
||||
* show_ucode_info_early() until printk() works.
|
||||
*/
|
||||
static void print_ucode(struct ucode_cpu_info *uci)
|
||||
{
|
||||
struct microcode_intel *mc_intel;
|
||||
int *delay_ucode_info_p;
|
||||
int *current_mc_date_p;
|
||||
|
||||
mc_intel = uci->mc;
|
||||
if (mc_intel == NULL)
|
||||
return;
|
||||
|
||||
delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
|
||||
current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date);
|
||||
|
||||
*delay_ucode_info_p = 1;
|
||||
*current_mc_date_p = mc_intel->hdr.date;
|
||||
}
|
||||
#else
|
||||
|
||||
/*
|
||||
* Flush global tlb. We only do this in x86_64 where paging has been enabled
|
||||
* already and PGE should be enabled as well.
|
||||
*/
|
||||
static inline void flush_tlb_early(void)
|
||||
{
|
||||
__native_flush_tlb_global_irq_disabled();
|
||||
}
|
||||
|
||||
static inline void print_ucode(struct ucode_cpu_info *uci)
|
||||
{
|
||||
struct microcode_intel *mc_intel;
|
||||
|
||||
mc_intel = uci->mc;
|
||||
if (mc_intel == NULL)
|
||||
return;
|
||||
|
||||
print_ucode_info(uci, mc_intel->hdr.date);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
|
||||
{
|
||||
struct microcode_intel *mc_intel;
|
||||
unsigned int val[2];
|
||||
|
||||
mc_intel = uci->mc;
|
||||
if (mc_intel == NULL)
|
||||
return 0;
|
||||
|
||||
/* write microcode via MSR 0x79 */
|
||||
native_wrmsr(MSR_IA32_UCODE_WRITE,
|
||||
(unsigned long) mc_intel->bits,
|
||||
(unsigned long) mc_intel->bits >> 16 >> 16);
|
||||
native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
|
||||
|
||||
/* As documented in the SDM: Do a CPUID 1 here */
|
||||
sync_core();
|
||||
|
||||
/* get the current revision from MSR 0x8B */
|
||||
native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
|
||||
if (val[1] != mc_intel->hdr.rev)
|
||||
return -1;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Flush global tlb. This is precaution. */
|
||||
flush_tlb_early();
|
||||
#endif
|
||||
uci->cpu_sig.rev = val[1];
|
||||
|
||||
if (early)
|
||||
print_ucode(uci);
|
||||
else
|
||||
print_ucode_info(uci, mc_intel->hdr.date);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function converts microcode patch offsets previously stored in
|
||||
* mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
|
||||
*/
|
||||
int __init save_microcode_in_initrd_intel(void)
|
||||
{
|
||||
unsigned int count = mc_saved_data.mc_saved_count;
|
||||
struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
|
||||
int ret = 0;
|
||||
|
||||
if (count == 0)
|
||||
return ret;
|
||||
|
||||
microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
|
||||
ret = save_microcode(&mc_saved_data, mc_saved, count);
|
||||
if (ret)
|
||||
pr_err("Cannot save microcode patches from initrd.\n");
|
||||
|
||||
show_saved_mc();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __init
|
||||
_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
|
||||
unsigned long *mc_saved_in_initrd,
|
||||
unsigned long initrd_start_early,
|
||||
unsigned long initrd_end_early,
|
||||
struct ucode_cpu_info *uci)
|
||||
{
|
||||
enum ucode_state ret;
|
||||
|
||||
collect_cpu_info_early(uci);
|
||||
scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
|
||||
mc_saved_in_initrd, uci);
|
||||
|
||||
ret = load_microcode(mc_saved_data, mc_saved_in_initrd,
|
||||
initrd_start_early, uci);
|
||||
|
||||
if (ret == UCODE_OK)
|
||||
apply_microcode_early(uci, true);
|
||||
}
|
||||
|
||||
void __init
|
||||
load_ucode_intel_bsp(void)
|
||||
{
|
||||
u64 ramdisk_image, ramdisk_size;
|
||||
unsigned long initrd_start_early, initrd_end_early;
|
||||
struct ucode_cpu_info uci;
|
||||
#ifdef CONFIG_X86_32
|
||||
struct boot_params *boot_params_p;
|
||||
|
||||
boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
|
||||
ramdisk_image = boot_params_p->hdr.ramdisk_image;
|
||||
ramdisk_size = boot_params_p->hdr.ramdisk_size;
|
||||
initrd_start_early = ramdisk_image;
|
||||
initrd_end_early = initrd_start_early + ramdisk_size;
|
||||
|
||||
_load_ucode_intel_bsp(
|
||||
(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
|
||||
(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
|
||||
initrd_start_early, initrd_end_early, &uci);
|
||||
#else
|
||||
ramdisk_image = boot_params.hdr.ramdisk_image;
|
||||
ramdisk_size = boot_params.hdr.ramdisk_size;
|
||||
initrd_start_early = ramdisk_image + PAGE_OFFSET;
|
||||
initrd_end_early = initrd_start_early + ramdisk_size;
|
||||
|
||||
_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
|
||||
initrd_start_early, initrd_end_early,
|
||||
&uci);
|
||||
#endif
|
||||
}
|
||||
|
||||
void load_ucode_intel_ap(void)
|
||||
{
|
||||
struct mc_saved_data *mc_saved_data_p;
|
||||
struct ucode_cpu_info uci;
|
||||
unsigned long *mc_saved_in_initrd_p;
|
||||
unsigned long initrd_start_addr;
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long *initrd_start_p;
|
||||
|
||||
mc_saved_in_initrd_p =
|
||||
(unsigned long *)__pa_nodebug(mc_saved_in_initrd);
|
||||
mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
|
||||
initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
|
||||
initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
|
||||
#else
|
||||
mc_saved_data_p = &mc_saved_data;
|
||||
mc_saved_in_initrd_p = mc_saved_in_initrd;
|
||||
initrd_start_addr = initrd_start;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If there is no valid ucode previously saved in memory, no need to
|
||||
* update ucode on this AP.
|
||||
*/
|
||||
if (mc_saved_data_p->mc_saved_count == 0)
|
||||
return;
|
||||
|
||||
collect_cpu_info_early(&uci);
|
||||
load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
|
||||
initrd_start_addr, &uci);
|
||||
apply_microcode_early(&uci, true);
|
||||
}
|
||||
|
||||
void reload_ucode_intel(void)
|
||||
{
|
||||
struct ucode_cpu_info uci;
|
||||
enum ucode_state ret;
|
||||
|
||||
if (!mc_saved_data.mc_saved_count)
|
||||
return;
|
||||
|
||||
collect_cpu_info_early(&uci);
|
||||
|
||||
ret = generic_load_microcode_early(mc_saved_data.mc_saved,
|
||||
mc_saved_data.mc_saved_count, &uci);
|
||||
if (ret != UCODE_OK)
|
||||
return;
|
||||
|
||||
apply_microcode_early(&uci, false);
|
||||
}
|
||||
174
arch/x86/kernel/cpu/microcode/intel_lib.c
Normal file
174
arch/x86/kernel/cpu/microcode/intel_lib.c
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
/*
|
||||
* Intel CPU Microcode Update Driver for Linux
|
||||
*
|
||||
* Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
|
||||
* H Peter Anvin" <hpa@zytor.com>
|
||||
*
|
||||
* This driver allows to upgrade microcode on Intel processors
|
||||
* belonging to IA-32 family - PentiumPro, Pentium II,
|
||||
* Pentium III, Xeon, Pentium 4, etc.
|
||||
*
|
||||
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
|
||||
* Software Developer's Manual
|
||||
* Order Number 253668 or free download from:
|
||||
*
|
||||
* http://developer.intel.com/Assets/PDF/manual/253668.pdf
|
||||
*
|
||||
* For more information, go to http://www.urbanmyth.org/microcode
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/microcode_intel.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
static inline int
|
||||
update_match_cpu(unsigned int csig, unsigned int cpf,
|
||||
unsigned int sig, unsigned int pf)
|
||||
{
|
||||
return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
|
||||
}
|
||||
|
||||
int
|
||||
update_match_revision(struct microcode_header_intel *mc_header, int rev)
|
||||
{
|
||||
return (mc_header->rev <= rev) ? 0 : 1;
|
||||
}
|
||||
|
||||
int microcode_sanity_check(void *mc, int print_err)
|
||||
{
|
||||
unsigned long total_size, data_size, ext_table_size;
|
||||
struct microcode_header_intel *mc_header = mc;
|
||||
struct extended_sigtable *ext_header = NULL;
|
||||
int sum, orig_sum, ext_sigcount = 0, i;
|
||||
struct extended_signature *ext_sig;
|
||||
|
||||
total_size = get_totalsize(mc_header);
|
||||
data_size = get_datasize(mc_header);
|
||||
|
||||
if (data_size + MC_HEADER_SIZE > total_size) {
|
||||
if (print_err)
|
||||
pr_err("error! Bad data size in microcode data file\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
|
||||
if (print_err)
|
||||
pr_err("error! Unknown microcode update format\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
|
||||
if (ext_table_size) {
|
||||
if ((ext_table_size < EXT_HEADER_SIZE)
|
||||
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
|
||||
if (print_err)
|
||||
pr_err("error! Small exttable size in microcode data file\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
ext_header = mc + MC_HEADER_SIZE + data_size;
|
||||
if (ext_table_size != exttable_size(ext_header)) {
|
||||
if (print_err)
|
||||
pr_err("error! Bad exttable size in microcode data file\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
ext_sigcount = ext_header->count;
|
||||
}
|
||||
|
||||
/* check extended table checksum */
|
||||
if (ext_table_size) {
|
||||
int ext_table_sum = 0;
|
||||
int *ext_tablep = (int *)ext_header;
|
||||
|
||||
i = ext_table_size / DWSIZE;
|
||||
while (i--)
|
||||
ext_table_sum += ext_tablep[i];
|
||||
if (ext_table_sum) {
|
||||
if (print_err)
|
||||
pr_warn("aborting, bad extended signature table checksum\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/* calculate the checksum */
|
||||
orig_sum = 0;
|
||||
i = (MC_HEADER_SIZE + data_size) / DWSIZE;
|
||||
while (i--)
|
||||
orig_sum += ((int *)mc)[i];
|
||||
if (orig_sum) {
|
||||
if (print_err)
|
||||
pr_err("aborting, bad checksum\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!ext_table_size)
|
||||
return 0;
|
||||
/* check extended signature checksum */
|
||||
for (i = 0; i < ext_sigcount; i++) {
|
||||
ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
|
||||
EXT_SIGNATURE_SIZE * i;
|
||||
sum = orig_sum
|
||||
- (mc_header->sig + mc_header->pf + mc_header->cksum)
|
||||
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
|
||||
if (sum) {
|
||||
if (print_err)
|
||||
pr_err("aborting, bad checksum\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(microcode_sanity_check);
|
||||
|
||||
/*
|
||||
* return 0 - no update found
|
||||
* return 1 - found update
|
||||
*/
|
||||
int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
|
||||
{
|
||||
struct microcode_header_intel *mc_header = mc;
|
||||
struct extended_sigtable *ext_header;
|
||||
unsigned long total_size = get_totalsize(mc_header);
|
||||
int ext_sigcount, i;
|
||||
struct extended_signature *ext_sig;
|
||||
|
||||
if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf))
|
||||
return 1;
|
||||
|
||||
/* Look for ext. headers: */
|
||||
if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
|
||||
return 0;
|
||||
|
||||
ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
|
||||
ext_sigcount = ext_header->count;
|
||||
ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
|
||||
|
||||
for (i = 0; i < ext_sigcount; i++) {
|
||||
if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf))
|
||||
return 1;
|
||||
ext_sig++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* return 0 - no update found
|
||||
* return 1 - found update
|
||||
*/
|
||||
int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
|
||||
{
|
||||
struct microcode_header_intel *mc_header = mc;
|
||||
|
||||
if (!update_match_revision(mc_header, rev))
|
||||
return 0;
|
||||
|
||||
return get_matching_sig(csig, cpf, mc, rev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_matching_microcode);
|
||||
64
arch/x86/kernel/cpu/mkcapflags.sh
Normal file
64
arch/x86/kernel/cpu/mkcapflags.sh
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
|
||||
#
|
||||
|
||||
IN=$1
|
||||
OUT=$2
|
||||
|
||||
function dump_array()
|
||||
{
|
||||
ARRAY=$1
|
||||
SIZE=$2
|
||||
PFX=$3
|
||||
POSTFIX=$4
|
||||
|
||||
PFX_SZ=$(echo $PFX | wc -c)
|
||||
TABS="$(printf '\t\t\t\t\t')"
|
||||
|
||||
echo "const char * const $ARRAY[$SIZE] = {"
|
||||
|
||||
# Iterate through any input lines starting with #define $PFX
|
||||
sed -n -e 's/\t/ /g' -e "s/^ *# *define *$PFX//p" $IN |
|
||||
while read i
|
||||
do
|
||||
# Name is everything up to the first whitespace
|
||||
NAME="$(echo "$i" | sed 's/ .*//')"
|
||||
|
||||
# If the /* comment */ starts with a quote string, grab that.
|
||||
VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')"
|
||||
[ -z "$VALUE" ] && VALUE="\"$NAME\""
|
||||
[ "$VALUE" == '""' ] && continue
|
||||
|
||||
# Name is uppercase, VALUE is all lowercase
|
||||
VALUE="$(echo "$VALUE" | tr A-Z a-z)"
|
||||
|
||||
if [ -n "$POSTFIX" ]; then
|
||||
T=$(( $PFX_SZ + $(echo $POSTFIX | wc -c) + 2 ))
|
||||
TABS="$(printf '\t\t\t\t\t\t')"
|
||||
TABCOUNT=$(( ( 6*8 - ($T + 1) - $(echo "$NAME" | wc -c) ) / 8 ))
|
||||
printf "\t[%s - %s]%.*s = %s,\n" "$PFX$NAME" "$POSTFIX" "$TABCOUNT" "$TABS" "$VALUE"
|
||||
else
|
||||
TABCOUNT=$(( ( 5*8 - ($PFX_SZ + 1) - $(echo "$NAME" | wc -c) ) / 8 ))
|
||||
printf "\t[%s]%.*s = %s,\n" "$PFX$NAME" "$TABCOUNT" "$TABS" "$VALUE"
|
||||
fi
|
||||
done
|
||||
echo "};"
|
||||
}
|
||||
|
||||
trap 'rm "$OUT"' EXIT
|
||||
|
||||
(
|
||||
echo "#ifndef _ASM_X86_CPUFEATURE_H"
|
||||
echo "#include <asm/cpufeature.h>"
|
||||
echo "#endif"
|
||||
echo ""
|
||||
|
||||
dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" ""
|
||||
echo ""
|
||||
|
||||
dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32"
|
||||
|
||||
) > $OUT
|
||||
|
||||
trap - EXIT
|
||||
153
arch/x86/kernel/cpu/mshyperv.c
Normal file
153
arch/x86/kernel/cpu/mshyperv.c
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
/*
|
||||
* HyperV Detection code.
|
||||
*
|
||||
* Copyright (C) 2010, Novell, Inc.
|
||||
* Author : K. Y. Srinivasan <ksrinivasan@novell.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; version 2 of the License.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/efi.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/hyperv.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/irq_regs.h>
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/timer.h>
|
||||
|
||||
struct ms_hyperv_info ms_hyperv;
|
||||
EXPORT_SYMBOL_GPL(ms_hyperv);
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
static void (*vmbus_handler)(void);
|
||||
|
||||
void hyperv_vector_handler(struct pt_regs *regs)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
irq_enter();
|
||||
exit_idle();
|
||||
|
||||
inc_irq_stat(irq_hv_callback_count);
|
||||
if (vmbus_handler)
|
||||
vmbus_handler();
|
||||
|
||||
irq_exit();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
void hv_setup_vmbus_irq(void (*handler)(void))
|
||||
{
|
||||
vmbus_handler = handler;
|
||||
/*
|
||||
* Setup the IDT for hypervisor callback. Prevent reallocation
|
||||
* at module reload.
|
||||
*/
|
||||
if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors))
|
||||
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
|
||||
hyperv_callback_vector);
|
||||
}
|
||||
|
||||
void hv_remove_vmbus_irq(void)
|
||||
{
|
||||
/* We have no way to deallocate the interrupt gate */
|
||||
vmbus_handler = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
|
||||
EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
|
||||
#endif
|
||||
|
||||
static uint32_t __init ms_hyperv_platform(void)
|
||||
{
|
||||
u32 eax;
|
||||
u32 hyp_signature[3];
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return 0;
|
||||
|
||||
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
|
||||
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
|
||||
|
||||
if (eax >= HYPERV_CPUID_MIN &&
|
||||
eax <= HYPERV_CPUID_MAX &&
|
||||
!memcmp("Microsoft Hv", hyp_signature, 12))
|
||||
return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static cycle_t read_hv_clock(struct clocksource *arg)
|
||||
{
|
||||
cycle_t current_tick;
|
||||
/*
|
||||
* Read the partition counter to get the current tick count. This count
|
||||
* is set to 0 when the partition is created and is incremented in
|
||||
* 100 nanosecond units.
|
||||
*/
|
||||
rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
|
||||
return current_tick;
|
||||
}
|
||||
|
||||
static struct clocksource hyperv_cs = {
|
||||
.name = "hyperv_clocksource",
|
||||
.rating = 400, /* use this when running on Hyperv*/
|
||||
.read = read_hv_clock,
|
||||
.mask = CLOCKSOURCE_MASK(64),
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
static void __init ms_hyperv_init_platform(void)
|
||||
{
|
||||
/*
|
||||
* Extract the features and hints
|
||||
*/
|
||||
ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
|
||||
|
||||
printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
|
||||
ms_hyperv.features, ms_hyperv.hints);
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
|
||||
/*
|
||||
* Get the APIC frequency.
|
||||
*/
|
||||
u64 hv_lapic_frequency;
|
||||
|
||||
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
|
||||
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
|
||||
lapic_timer_frequency = hv_lapic_frequency;
|
||||
printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
|
||||
lapic_timer_frequency);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
|
||||
clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
no_timer_check = 1;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
|
||||
.name = "Microsoft HyperV",
|
||||
.detect = ms_hyperv_platform,
|
||||
.init_platform = ms_hyperv_init_platform,
|
||||
};
|
||||
EXPORT_SYMBOL(x86_hyper_ms_hyperv);
|
||||
3
arch/x86/kernel/cpu/mtrr/Makefile
Normal file
3
arch/x86/kernel/cpu/mtrr/Makefile
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
obj-y := main.o if.o generic.o cleanup.o
|
||||
obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
|
||||
|
||||
124
arch/x86/kernel/cpu/mtrr/amd.c
Normal file
124
arch/x86/kernel/cpu/mtrr/amd.c
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mtrr.h"
|
||||
|
||||
static void
|
||||
amd_get_mtrr(unsigned int reg, unsigned long *base,
|
||||
unsigned long *size, mtrr_type *type)
|
||||
{
|
||||
unsigned long low, high;
|
||||
|
||||
rdmsr(MSR_K6_UWCCR, low, high);
|
||||
/* Upper dword is region 1, lower is region 0 */
|
||||
if (reg == 1)
|
||||
low = high;
|
||||
/* The base masks off on the right alignment */
|
||||
*base = (low & 0xFFFE0000) >> PAGE_SHIFT;
|
||||
*type = 0;
|
||||
if (low & 1)
|
||||
*type = MTRR_TYPE_UNCACHABLE;
|
||||
if (low & 2)
|
||||
*type = MTRR_TYPE_WRCOMB;
|
||||
if (!(low & 3)) {
|
||||
*size = 0;
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* This needs a little explaining. The size is stored as an
|
||||
* inverted mask of bits of 128K granularity 15 bits long offset
|
||||
* 2 bits.
|
||||
*
|
||||
* So to get a size we do invert the mask and add 1 to the lowest
|
||||
* mask bit (4 as its 2 bits in). This gives us a size we then shift
|
||||
* to turn into 128K blocks.
|
||||
*
|
||||
* eg 111 1111 1111 1100 is 512K
|
||||
*
|
||||
* invert 000 0000 0000 0011
|
||||
* +1 000 0000 0000 0100
|
||||
* *128K ...
|
||||
*/
|
||||
low = (~low) & 0x1FFFC;
|
||||
*size = (low + 4) << (15 - PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/**
|
||||
* amd_set_mtrr - Set variable MTRR register on the local CPU.
|
||||
*
|
||||
* @reg The register to set.
|
||||
* @base The base address of the region.
|
||||
* @size The size of the region. If this is 0 the region is disabled.
|
||||
* @type The type of the region.
|
||||
*
|
||||
* Returns nothing.
|
||||
*/
|
||||
static void
|
||||
amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
|
||||
{
|
||||
u32 regs[2];
|
||||
|
||||
/*
|
||||
* Low is MTRR0, High MTRR 1
|
||||
*/
|
||||
rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
|
||||
/*
|
||||
* Blank to disable
|
||||
*/
|
||||
if (size == 0) {
|
||||
regs[reg] = 0;
|
||||
} else {
|
||||
/*
|
||||
* Set the register to the base, the type (off by one) and an
|
||||
* inverted bitmask of the size The size is the only odd
|
||||
* bit. We are fed say 512K We invert this and we get 111 1111
|
||||
* 1111 1011 but if you subtract one and invert you get the
|
||||
* desired 111 1111 1111 1100 mask
|
||||
*
|
||||
* But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!
|
||||
*/
|
||||
regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
|
||||
| (base << PAGE_SHIFT) | (type + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* The writeback rule is quite specific. See the manual. Its
|
||||
* disable local interrupts, write back the cache, set the mtrr
|
||||
*/
|
||||
wbinvd();
|
||||
wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
|
||||
}
|
||||
|
||||
static int
|
||||
amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
|
||||
{
|
||||
/*
|
||||
* Apply the K6 block alignment and size rules
|
||||
* In order
|
||||
* o Uncached or gathering only
|
||||
* o 128K or bigger block
|
||||
* o Power of 2 block
|
||||
* o base suitably aligned to the power
|
||||
*/
|
||||
if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
|
||||
|| (size & ~(size - 1)) - size || (base & (size - 1)))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct mtrr_ops amd_mtrr_ops = {
|
||||
.vendor = X86_VENDOR_AMD,
|
||||
.set = amd_set_mtrr,
|
||||
.get = amd_get_mtrr,
|
||||
.get_free_region = generic_get_free_region,
|
||||
.validate_add_page = amd_validate_add_page,
|
||||
.have_wrcomb = positive_have_wrcomb,
|
||||
};
|
||||
|
||||
int __init amd_init_mtrr(void)
|
||||
{
|
||||
set_mtrr_ops(&amd_mtrr_ops);
|
||||
return 0;
|
||||
}
|
||||
126
arch/x86/kernel/cpu/mtrr/centaur.c
Normal file
126
arch/x86/kernel/cpu/mtrr/centaur.c
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mtrr.h"
|
||||
|
||||
static struct {
|
||||
unsigned long high;
|
||||
unsigned long low;
|
||||
} centaur_mcr[8];
|
||||
|
||||
static u8 centaur_mcr_reserved;
|
||||
static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */
|
||||
|
||||
/**
|
||||
* centaur_get_free_region - Get a free MTRR.
|
||||
*
|
||||
* @base: The starting (base) address of the region.
|
||||
* @size: The size (in bytes) of the region.
|
||||
*
|
||||
* Returns: the index of the region on success, else -1 on error.
|
||||
*/
|
||||
static int
|
||||
centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
|
||||
{
|
||||
unsigned long lbase, lsize;
|
||||
mtrr_type ltype;
|
||||
int i, max;
|
||||
|
||||
max = num_var_ranges;
|
||||
if (replace_reg >= 0 && replace_reg < max)
|
||||
return replace_reg;
|
||||
|
||||
for (i = 0; i < max; ++i) {
|
||||
if (centaur_mcr_reserved & (1 << i))
|
||||
continue;
|
||||
mtrr_if->get(i, &lbase, &lsize, <ype);
|
||||
if (lsize == 0)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/*
|
||||
* Report boot time MCR setups
|
||||
*/
|
||||
void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
|
||||
{
|
||||
centaur_mcr[mcr].low = lo;
|
||||
centaur_mcr[mcr].high = hi;
|
||||
}
|
||||
|
||||
static void
|
||||
centaur_get_mcr(unsigned int reg, unsigned long *base,
|
||||
unsigned long *size, mtrr_type * type)
|
||||
{
|
||||
*base = centaur_mcr[reg].high >> PAGE_SHIFT;
|
||||
*size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
|
||||
*type = MTRR_TYPE_WRCOMB; /* write-combining */
|
||||
|
||||
if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
|
||||
*type = MTRR_TYPE_UNCACHABLE;
|
||||
if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
|
||||
*type = MTRR_TYPE_WRBACK;
|
||||
if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
|
||||
*type = MTRR_TYPE_WRBACK;
|
||||
}
|
||||
|
||||
static void
|
||||
centaur_set_mcr(unsigned int reg, unsigned long base,
|
||||
unsigned long size, mtrr_type type)
|
||||
{
|
||||
unsigned long low, high;
|
||||
|
||||
if (size == 0) {
|
||||
/* Disable */
|
||||
high = low = 0;
|
||||
} else {
|
||||
high = base << PAGE_SHIFT;
|
||||
if (centaur_mcr_type == 0) {
|
||||
/* Only support write-combining... */
|
||||
low = -size << PAGE_SHIFT | 0x1f;
|
||||
} else {
|
||||
if (type == MTRR_TYPE_UNCACHABLE)
|
||||
low = -size << PAGE_SHIFT | 0x02; /* NC */
|
||||
else
|
||||
low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */
|
||||
}
|
||||
}
|
||||
centaur_mcr[reg].high = high;
|
||||
centaur_mcr[reg].low = low;
|
||||
wrmsr(MSR_IDT_MCR0 + reg, low, high);
|
||||
}
|
||||
|
||||
static int
|
||||
centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
|
||||
{
|
||||
/*
|
||||
* FIXME: Winchip2 supports uncached
|
||||
*/
|
||||
if (type != MTRR_TYPE_WRCOMB &&
|
||||
(centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
|
||||
pr_warning("mtrr: only write-combining%s supported\n",
|
||||
centaur_mcr_type ? " and uncacheable are" : " is");
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct mtrr_ops centaur_mtrr_ops = {
|
||||
.vendor = X86_VENDOR_CENTAUR,
|
||||
.set = centaur_set_mcr,
|
||||
.get = centaur_get_mcr,
|
||||
.get_free_region = centaur_get_free_region,
|
||||
.validate_add_page = centaur_validate_add_page,
|
||||
.have_wrcomb = positive_have_wrcomb,
|
||||
};
|
||||
|
||||
int __init centaur_init_mtrr(void)
|
||||
{
|
||||
set_mtrr_ops(¢aur_mtrr_ops);
|
||||
return 0;
|
||||
}
|
||||
980
arch/x86/kernel/cpu/mtrr/cleanup.c
Normal file
980
arch/x86/kernel/cpu/mtrr/cleanup.c
Normal file
|
|
@ -0,0 +1,980 @@
|
|||
/*
|
||||
* MTRR (Memory Type Range Register) cleanup
|
||||
*
|
||||
* Copyright (C) 2009 Yinghai Lu
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the Free
|
||||
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/kvm_para.h>
|
||||
#include <linux/range.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mtrr.h"
|
||||
|
||||
struct var_mtrr_range_state {
|
||||
unsigned long base_pfn;
|
||||
unsigned long size_pfn;
|
||||
mtrr_type type;
|
||||
};
|
||||
|
||||
struct var_mtrr_state {
|
||||
unsigned long range_startk;
|
||||
unsigned long range_sizek;
|
||||
unsigned long chunk_sizek;
|
||||
unsigned long gran_sizek;
|
||||
unsigned int reg;
|
||||
};
|
||||
|
||||
/* Should be related to MTRR_VAR_RANGES nums */
|
||||
#define RANGE_NUM 256
|
||||
|
||||
static struct range __initdata range[RANGE_NUM];
|
||||
static int __initdata nr_range;
|
||||
|
||||
static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
|
||||
|
||||
static int __initdata debug_print;
|
||||
#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
|
||||
|
||||
#define BIOS_BUG_MSG KERN_WARNING \
|
||||
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
|
||||
|
||||
static int __init
|
||||
x86_get_mtrr_mem_range(struct range *range, int nr_range,
|
||||
unsigned long extra_remove_base,
|
||||
unsigned long extra_remove_size)
|
||||
{
|
||||
unsigned long base, size;
|
||||
mtrr_type type;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
type = range_state[i].type;
|
||||
if (type != MTRR_TYPE_WRBACK)
|
||||
continue;
|
||||
base = range_state[i].base_pfn;
|
||||
size = range_state[i].size_pfn;
|
||||
nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
|
||||
base, base + size);
|
||||
}
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After WB checking\n");
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
|
||||
range[i].start, range[i].end);
|
||||
}
|
||||
|
||||
/* Take out UC ranges: */
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
type = range_state[i].type;
|
||||
if (type != MTRR_TYPE_UNCACHABLE &&
|
||||
type != MTRR_TYPE_WRPROT)
|
||||
continue;
|
||||
size = range_state[i].size_pfn;
|
||||
if (!size)
|
||||
continue;
|
||||
base = range_state[i].base_pfn;
|
||||
if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
|
||||
(mtrr_state.enabled & 1)) {
|
||||
/* Var MTRR contains UC entry below 1M? Skip it: */
|
||||
printk(BIOS_BUG_MSG, i);
|
||||
if (base + size <= (1<<(20-PAGE_SHIFT)))
|
||||
continue;
|
||||
size -= (1<<(20-PAGE_SHIFT)) - base;
|
||||
base = 1<<(20-PAGE_SHIFT);
|
||||
}
|
||||
subtract_range(range, RANGE_NUM, base, base + size);
|
||||
}
|
||||
if (extra_remove_size)
|
||||
subtract_range(range, RANGE_NUM, extra_remove_base,
|
||||
extra_remove_base + extra_remove_size);
|
||||
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After UC checking\n");
|
||||
for (i = 0; i < RANGE_NUM; i++) {
|
||||
if (!range[i].end)
|
||||
continue;
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
|
||||
range[i].start, range[i].end);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort the ranges */
|
||||
nr_range = clean_sort_range(range, RANGE_NUM);
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After sorting\n");
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
|
||||
range[i].start, range[i].end);
|
||||
}
|
||||
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MTRR_SANITIZER
|
||||
|
||||
static unsigned long __init sum_ranges(struct range *range, int nr_range)
|
||||
{
|
||||
unsigned long sum = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_range; i++)
|
||||
sum += range[i].end - range[i].start;
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int enable_mtrr_cleanup __initdata =
|
||||
CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
|
||||
|
||||
static int __init disable_mtrr_cleanup_setup(char *str)
|
||||
{
|
||||
enable_mtrr_cleanup = 0;
|
||||
return 0;
|
||||
}
|
||||
early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
|
||||
|
||||
static int __init enable_mtrr_cleanup_setup(char *str)
|
||||
{
|
||||
enable_mtrr_cleanup = 1;
|
||||
return 0;
|
||||
}
|
||||
early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
|
||||
|
||||
static int __init mtrr_cleanup_debug_setup(char *str)
|
||||
{
|
||||
debug_print = 1;
|
||||
return 0;
|
||||
}
|
||||
early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
|
||||
|
||||
static void __init
|
||||
set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
|
||||
unsigned char type, unsigned int address_bits)
|
||||
{
|
||||
u32 base_lo, base_hi, mask_lo, mask_hi;
|
||||
u64 base, mask;
|
||||
|
||||
if (!sizek) {
|
||||
fill_mtrr_var_range(reg, 0, 0, 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
mask = (1ULL << address_bits) - 1;
|
||||
mask &= ~((((u64)sizek) << 10) - 1);
|
||||
|
||||
base = ((u64)basek) << 10;
|
||||
|
||||
base |= type;
|
||||
mask |= 0x800;
|
||||
|
||||
base_lo = base & ((1ULL<<32) - 1);
|
||||
base_hi = base >> 32;
|
||||
|
||||
mask_lo = mask & ((1ULL<<32) - 1);
|
||||
mask_hi = mask >> 32;
|
||||
|
||||
fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
|
||||
}
|
||||
|
||||
static void __init
|
||||
save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
|
||||
unsigned char type)
|
||||
{
|
||||
range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
|
||||
range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
|
||||
range_state[reg].type = type;
|
||||
}
|
||||
|
||||
static void __init set_var_mtrr_all(unsigned int address_bits)
|
||||
{
|
||||
unsigned long basek, sizek;
|
||||
unsigned char type;
|
||||
unsigned int reg;
|
||||
|
||||
for (reg = 0; reg < num_var_ranges; reg++) {
|
||||
basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
|
||||
sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
|
||||
type = range_state[reg].type;
|
||||
|
||||
set_var_mtrr(reg, basek, sizek, type, address_bits);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long to_size_factor(unsigned long sizek, char *factorp)
|
||||
{
|
||||
unsigned long base = sizek;
|
||||
char factor;
|
||||
|
||||
if (base & ((1<<10) - 1)) {
|
||||
/* Not MB-aligned: */
|
||||
factor = 'K';
|
||||
} else if (base & ((1<<20) - 1)) {
|
||||
factor = 'M';
|
||||
base >>= 10;
|
||||
} else {
|
||||
factor = 'G';
|
||||
base >>= 20;
|
||||
}
|
||||
|
||||
*factorp = factor;
|
||||
|
||||
return base;
|
||||
}
|
||||
|
||||
static unsigned int __init
|
||||
range_to_mtrr(unsigned int reg, unsigned long range_startk,
|
||||
unsigned long range_sizek, unsigned char type)
|
||||
{
|
||||
if (!range_sizek || (reg >= num_var_ranges))
|
||||
return reg;
|
||||
|
||||
while (range_sizek) {
|
||||
unsigned long max_align, align;
|
||||
unsigned long sizek;
|
||||
|
||||
/* Compute the maximum size with which we can make a range: */
|
||||
if (range_startk)
|
||||
max_align = __ffs(range_startk);
|
||||
else
|
||||
max_align = BITS_PER_LONG - 1;
|
||||
|
||||
align = __fls(range_sizek);
|
||||
if (align > max_align)
|
||||
align = max_align;
|
||||
|
||||
sizek = 1UL << align;
|
||||
if (debug_print) {
|
||||
char start_factor = 'K', size_factor = 'K';
|
||||
unsigned long start_base, size_base;
|
||||
|
||||
start_base = to_size_factor(range_startk, &start_factor);
|
||||
size_base = to_size_factor(sizek, &size_factor);
|
||||
|
||||
Dprintk("Setting variable MTRR %d, "
|
||||
"base: %ld%cB, range: %ld%cB, type %s\n",
|
||||
reg, start_base, start_factor,
|
||||
size_base, size_factor,
|
||||
(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
|
||||
((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
|
||||
);
|
||||
}
|
||||
save_var_mtrr(reg++, range_startk, sizek, type);
|
||||
range_startk += sizek;
|
||||
range_sizek -= sizek;
|
||||
if (reg >= num_var_ranges)
|
||||
break;
|
||||
}
|
||||
return reg;
|
||||
}
|
||||
|
||||
static unsigned __init
|
||||
range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
|
||||
unsigned long sizek)
|
||||
{
|
||||
unsigned long hole_basek, hole_sizek;
|
||||
unsigned long second_basek, second_sizek;
|
||||
unsigned long range0_basek, range0_sizek;
|
||||
unsigned long range_basek, range_sizek;
|
||||
unsigned long chunk_sizek;
|
||||
unsigned long gran_sizek;
|
||||
|
||||
hole_basek = 0;
|
||||
hole_sizek = 0;
|
||||
second_basek = 0;
|
||||
second_sizek = 0;
|
||||
chunk_sizek = state->chunk_sizek;
|
||||
gran_sizek = state->gran_sizek;
|
||||
|
||||
/* Align with gran size, prevent small block used up MTRRs: */
|
||||
range_basek = ALIGN(state->range_startk, gran_sizek);
|
||||
if ((range_basek > basek) && basek)
|
||||
return second_sizek;
|
||||
|
||||
state->range_sizek -= (range_basek - state->range_startk);
|
||||
range_sizek = ALIGN(state->range_sizek, gran_sizek);
|
||||
|
||||
while (range_sizek > state->range_sizek) {
|
||||
range_sizek -= gran_sizek;
|
||||
if (!range_sizek)
|
||||
return 0;
|
||||
}
|
||||
state->range_sizek = range_sizek;
|
||||
|
||||
/* Try to append some small hole: */
|
||||
range0_basek = state->range_startk;
|
||||
range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
|
||||
|
||||
/* No increase: */
|
||||
if (range0_sizek == state->range_sizek) {
|
||||
Dprintk("rangeX: %016lx - %016lx\n",
|
||||
range0_basek<<10,
|
||||
(range0_basek + state->range_sizek)<<10);
|
||||
state->reg = range_to_mtrr(state->reg, range0_basek,
|
||||
state->range_sizek, MTRR_TYPE_WRBACK);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Only cut back when it is not the last: */
|
||||
if (sizek) {
|
||||
while (range0_basek + range0_sizek > (basek + sizek)) {
|
||||
if (range0_sizek >= chunk_sizek)
|
||||
range0_sizek -= chunk_sizek;
|
||||
else
|
||||
range0_sizek = 0;
|
||||
|
||||
if (!range0_sizek)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
second_try:
|
||||
range_basek = range0_basek + range0_sizek;
|
||||
|
||||
/* One hole in the middle: */
|
||||
if (range_basek > basek && range_basek <= (basek + sizek))
|
||||
second_sizek = range_basek - basek;
|
||||
|
||||
if (range0_sizek > state->range_sizek) {
|
||||
|
||||
/* One hole in middle or at the end: */
|
||||
hole_sizek = range0_sizek - state->range_sizek - second_sizek;
|
||||
|
||||
/* Hole size should be less than half of range0 size: */
|
||||
if (hole_sizek >= (range0_sizek >> 1) &&
|
||||
range0_sizek >= chunk_sizek) {
|
||||
range0_sizek -= chunk_sizek;
|
||||
second_sizek = 0;
|
||||
hole_sizek = 0;
|
||||
|
||||
goto second_try;
|
||||
}
|
||||
}
|
||||
|
||||
if (range0_sizek) {
|
||||
Dprintk("range0: %016lx - %016lx\n",
|
||||
range0_basek<<10,
|
||||
(range0_basek + range0_sizek)<<10);
|
||||
state->reg = range_to_mtrr(state->reg, range0_basek,
|
||||
range0_sizek, MTRR_TYPE_WRBACK);
|
||||
}
|
||||
|
||||
if (range0_sizek < state->range_sizek) {
|
||||
/* Need to handle left over range: */
|
||||
range_sizek = state->range_sizek - range0_sizek;
|
||||
|
||||
Dprintk("range: %016lx - %016lx\n",
|
||||
range_basek<<10,
|
||||
(range_basek + range_sizek)<<10);
|
||||
|
||||
state->reg = range_to_mtrr(state->reg, range_basek,
|
||||
range_sizek, MTRR_TYPE_WRBACK);
|
||||
}
|
||||
|
||||
if (hole_sizek) {
|
||||
hole_basek = range_basek - hole_sizek - second_sizek;
|
||||
Dprintk("hole: %016lx - %016lx\n",
|
||||
hole_basek<<10,
|
||||
(hole_basek + hole_sizek)<<10);
|
||||
state->reg = range_to_mtrr(state->reg, hole_basek,
|
||||
hole_sizek, MTRR_TYPE_UNCACHABLE);
|
||||
}
|
||||
|
||||
return second_sizek;
|
||||
}
|
||||
|
||||
static void __init
|
||||
set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
|
||||
unsigned long size_pfn)
|
||||
{
|
||||
unsigned long basek, sizek;
|
||||
unsigned long second_sizek = 0;
|
||||
|
||||
if (state->reg >= num_var_ranges)
|
||||
return;
|
||||
|
||||
basek = base_pfn << (PAGE_SHIFT - 10);
|
||||
sizek = size_pfn << (PAGE_SHIFT - 10);
|
||||
|
||||
/* See if I can merge with the last range: */
|
||||
if ((basek <= 1024) ||
|
||||
(state->range_startk + state->range_sizek == basek)) {
|
||||
unsigned long endk = basek + sizek;
|
||||
state->range_sizek = endk - state->range_startk;
|
||||
return;
|
||||
}
|
||||
/* Write the range mtrrs: */
|
||||
if (state->range_sizek != 0)
|
||||
second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
|
||||
|
||||
/* Allocate an msr: */
|
||||
state->range_startk = basek + second_sizek;
|
||||
state->range_sizek = sizek - second_sizek;
|
||||
}
|
||||
|
||||
/* Mininum size of mtrr block that can take hole: */
|
||||
static u64 mtrr_chunk_size __initdata = (256ULL<<20);
|
||||
|
||||
static int __init parse_mtrr_chunk_size_opt(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
mtrr_chunk_size = memparse(p, &p);
|
||||
return 0;
|
||||
}
|
||||
early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
|
||||
|
||||
/* Granularity of mtrr of block: */
|
||||
static u64 mtrr_gran_size __initdata;
|
||||
|
||||
static int __init parse_mtrr_gran_size_opt(char *p)
|
||||
{
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
mtrr_gran_size = memparse(p, &p);
|
||||
return 0;
|
||||
}
|
||||
early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
|
||||
|
||||
static unsigned long nr_mtrr_spare_reg __initdata =
|
||||
CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
|
||||
|
||||
static int __init parse_mtrr_spare_reg(char *arg)
|
||||
{
|
||||
if (arg)
|
||||
nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
|
||||
|
||||
static int __init
|
||||
x86_setup_var_mtrrs(struct range *range, int nr_range,
|
||||
u64 chunk_size, u64 gran_size)
|
||||
{
|
||||
struct var_mtrr_state var_state;
|
||||
int num_reg;
|
||||
int i;
|
||||
|
||||
var_state.range_startk = 0;
|
||||
var_state.range_sizek = 0;
|
||||
var_state.reg = 0;
|
||||
var_state.chunk_sizek = chunk_size >> 10;
|
||||
var_state.gran_sizek = gran_size >> 10;
|
||||
|
||||
memset(range_state, 0, sizeof(range_state));
|
||||
|
||||
/* Write the range: */
|
||||
for (i = 0; i < nr_range; i++) {
|
||||
set_var_mtrr_range(&var_state, range[i].start,
|
||||
range[i].end - range[i].start);
|
||||
}
|
||||
|
||||
/* Write the last range: */
|
||||
if (var_state.range_sizek != 0)
|
||||
range_to_mtrr_with_hole(&var_state, 0, 0);
|
||||
|
||||
num_reg = var_state.reg;
|
||||
/* Clear out the extra MTRR's: */
|
||||
while (var_state.reg < num_var_ranges) {
|
||||
save_var_mtrr(var_state.reg, 0, 0, 0);
|
||||
var_state.reg++;
|
||||
}
|
||||
|
||||
return num_reg;
|
||||
}
|
||||
|
||||
struct mtrr_cleanup_result {
|
||||
unsigned long gran_sizek;
|
||||
unsigned long chunk_sizek;
|
||||
unsigned long lose_cover_sizek;
|
||||
unsigned int num_reg;
|
||||
int bad;
|
||||
};
|
||||
|
||||
/*
|
||||
* gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
|
||||
* chunk size: gran_size, ..., 2G
|
||||
* so we need (1+16)*8
|
||||
*/
|
||||
#define NUM_RESULT 136
|
||||
#define PSHIFT (PAGE_SHIFT - 10)
|
||||
|
||||
static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
|
||||
static unsigned long __initdata min_loss_pfn[RANGE_NUM];
|
||||
|
||||
static void __init print_out_mtrr_range_state(void)
|
||||
{
|
||||
char start_factor = 'K', size_factor = 'K';
|
||||
unsigned long start_base, size_base;
|
||||
mtrr_type type;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
|
||||
size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
|
||||
if (!size_base)
|
||||
continue;
|
||||
|
||||
size_base = to_size_factor(size_base, &size_factor),
|
||||
start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
|
||||
start_base = to_size_factor(start_base, &start_factor),
|
||||
type = range_state[i].type;
|
||||
|
||||
printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
|
||||
i, start_base, start_factor,
|
||||
size_base, size_factor,
|
||||
(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
|
||||
((type == MTRR_TYPE_WRPROT) ? "WP" :
|
||||
((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init mtrr_need_cleanup(void)
|
||||
{
|
||||
int i;
|
||||
mtrr_type type;
|
||||
unsigned long size;
|
||||
/* Extra one for all 0: */
|
||||
int num[MTRR_NUM_TYPES + 1];
|
||||
|
||||
/* Check entries number: */
|
||||
memset(num, 0, sizeof(num));
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
type = range_state[i].type;
|
||||
size = range_state[i].size_pfn;
|
||||
if (type >= MTRR_NUM_TYPES)
|
||||
continue;
|
||||
if (!size)
|
||||
type = MTRR_NUM_TYPES;
|
||||
num[type]++;
|
||||
}
|
||||
|
||||
/* Check if we got UC entries: */
|
||||
if (!num[MTRR_TYPE_UNCACHABLE])
|
||||
return 0;
|
||||
|
||||
/* Check if we only had WB and UC */
|
||||
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
|
||||
num_var_ranges - num[MTRR_NUM_TYPES])
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static unsigned long __initdata range_sums;
|
||||
|
||||
static void __init
|
||||
mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
|
||||
unsigned long x_remove_base,
|
||||
unsigned long x_remove_size, int i)
|
||||
{
|
||||
static struct range range_new[RANGE_NUM];
|
||||
unsigned long range_sums_new;
|
||||
static int nr_range_new;
|
||||
int num_reg;
|
||||
|
||||
/* Convert ranges to var ranges state: */
|
||||
num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
|
||||
|
||||
/* We got new setting in range_state, check it: */
|
||||
memset(range_new, 0, sizeof(range_new));
|
||||
nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
|
||||
x_remove_base, x_remove_size);
|
||||
range_sums_new = sum_ranges(range_new, nr_range_new);
|
||||
|
||||
result[i].chunk_sizek = chunk_size >> 10;
|
||||
result[i].gran_sizek = gran_size >> 10;
|
||||
result[i].num_reg = num_reg;
|
||||
|
||||
if (range_sums < range_sums_new) {
|
||||
result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT;
|
||||
result[i].bad = 1;
|
||||
} else {
|
||||
result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT;
|
||||
}
|
||||
|
||||
/* Double check it: */
|
||||
if (!result[i].bad && !result[i].lose_cover_sizek) {
|
||||
if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range)))
|
||||
result[i].bad = 1;
|
||||
}
|
||||
|
||||
if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg]))
|
||||
min_loss_pfn[num_reg] = range_sums - range_sums_new;
|
||||
}
|
||||
|
||||
static void __init mtrr_print_out_one_result(int i)
|
||||
{
|
||||
unsigned long gran_base, chunk_base, lose_base;
|
||||
char gran_factor, chunk_factor, lose_factor;
|
||||
|
||||
gran_base = to_size_factor(result[i].gran_sizek, &gran_factor);
|
||||
chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor);
|
||||
lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor);
|
||||
|
||||
pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
|
||||
result[i].bad ? "*BAD*" : " ",
|
||||
gran_base, gran_factor, chunk_base, chunk_factor);
|
||||
pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n",
|
||||
result[i].num_reg, result[i].bad ? "-" : "",
|
||||
lose_base, lose_factor);
|
||||
}
|
||||
|
||||
static int __init mtrr_search_optimal_index(void)
|
||||
{
|
||||
int num_reg_good;
|
||||
int index_good;
|
||||
int i;
|
||||
|
||||
if (nr_mtrr_spare_reg >= num_var_ranges)
|
||||
nr_mtrr_spare_reg = num_var_ranges - 1;
|
||||
|
||||
num_reg_good = -1;
|
||||
for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
|
||||
if (!min_loss_pfn[i])
|
||||
num_reg_good = i;
|
||||
}
|
||||
|
||||
index_good = -1;
|
||||
if (num_reg_good != -1) {
|
||||
for (i = 0; i < NUM_RESULT; i++) {
|
||||
if (!result[i].bad &&
|
||||
result[i].num_reg == num_reg_good &&
|
||||
!result[i].lose_cover_sizek) {
|
||||
index_good = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return index_good;
|
||||
}
|
||||
|
||||
int __init mtrr_cleanup(unsigned address_bits)
|
||||
{
|
||||
unsigned long x_remove_base, x_remove_size;
|
||||
unsigned long base, size, def, dummy;
|
||||
u64 chunk_size, gran_size;
|
||||
mtrr_type type;
|
||||
int index_good;
|
||||
int i;
|
||||
|
||||
if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
|
||||
return 0;
|
||||
|
||||
rdmsr(MSR_MTRRdefType, def, dummy);
|
||||
def &= 0xff;
|
||||
if (def != MTRR_TYPE_UNCACHABLE)
|
||||
return 0;
|
||||
|
||||
/* Get it and store it aside: */
|
||||
memset(range_state, 0, sizeof(range_state));
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
mtrr_if->get(i, &base, &size, &type);
|
||||
range_state[i].base_pfn = base;
|
||||
range_state[i].size_pfn = size;
|
||||
range_state[i].type = type;
|
||||
}
|
||||
|
||||
/* Check if we need handle it and can handle it: */
|
||||
if (!mtrr_need_cleanup())
|
||||
return 0;
|
||||
|
||||
/* Print original var MTRRs at first, for debugging: */
|
||||
printk(KERN_DEBUG "original variable MTRRs\n");
|
||||
print_out_mtrr_range_state();
|
||||
|
||||
memset(range, 0, sizeof(range));
|
||||
x_remove_size = 0;
|
||||
x_remove_base = 1 << (32 - PAGE_SHIFT);
|
||||
if (mtrr_tom2)
|
||||
x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
|
||||
|
||||
/*
|
||||
* [0, 1M) should always be covered by var mtrr with WB
|
||||
* and fixed mtrrs should take effect before var mtrr for it:
|
||||
*/
|
||||
nr_range = add_range_with_merge(range, RANGE_NUM, 0, 0,
|
||||
1ULL<<(20 - PAGE_SHIFT));
|
||||
/* add from var mtrr at last */
|
||||
nr_range = x86_get_mtrr_mem_range(range, nr_range,
|
||||
x_remove_base, x_remove_size);
|
||||
|
||||
range_sums = sum_ranges(range, nr_range);
|
||||
printk(KERN_INFO "total RAM covered: %ldM\n",
|
||||
range_sums >> (20 - PAGE_SHIFT));
|
||||
|
||||
if (mtrr_chunk_size && mtrr_gran_size) {
|
||||
i = 0;
|
||||
mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
|
||||
x_remove_base, x_remove_size, i);
|
||||
|
||||
mtrr_print_out_one_result(i);
|
||||
|
||||
if (!result[i].bad) {
|
||||
set_var_mtrr_all(address_bits);
|
||||
printk(KERN_DEBUG "New variable MTRRs\n");
|
||||
print_out_mtrr_range_state();
|
||||
return 1;
|
||||
}
|
||||
printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
|
||||
"will find optimal one\n");
|
||||
}
|
||||
|
||||
i = 0;
|
||||
memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
|
||||
memset(result, 0, sizeof(result));
|
||||
for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
|
||||
|
||||
for (chunk_size = gran_size; chunk_size < (1ULL<<32);
|
||||
chunk_size <<= 1) {
|
||||
|
||||
if (i >= NUM_RESULT)
|
||||
continue;
|
||||
|
||||
mtrr_calc_range_state(chunk_size, gran_size,
|
||||
x_remove_base, x_remove_size, i);
|
||||
if (debug_print) {
|
||||
mtrr_print_out_one_result(i);
|
||||
printk(KERN_INFO "\n");
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to find the optimal index: */
|
||||
index_good = mtrr_search_optimal_index();
|
||||
|
||||
if (index_good != -1) {
|
||||
printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
|
||||
i = index_good;
|
||||
mtrr_print_out_one_result(i);
|
||||
|
||||
/* Convert ranges to var ranges state: */
|
||||
chunk_size = result[i].chunk_sizek;
|
||||
chunk_size <<= 10;
|
||||
gran_size = result[i].gran_sizek;
|
||||
gran_size <<= 10;
|
||||
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
|
||||
set_var_mtrr_all(address_bits);
|
||||
printk(KERN_DEBUG "New variable MTRRs\n");
|
||||
print_out_mtrr_range_state();
|
||||
return 1;
|
||||
} else {
|
||||
/* print out all */
|
||||
for (i = 0; i < NUM_RESULT; i++)
|
||||
mtrr_print_out_one_result(i);
|
||||
}
|
||||
|
||||
printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
|
||||
printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
int __init mtrr_cleanup(unsigned address_bits)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int disable_mtrr_trim;
|
||||
|
||||
static int __init disable_mtrr_trim_setup(char *str)
|
||||
{
|
||||
disable_mtrr_trim = 1;
|
||||
return 0;
|
||||
}
|
||||
early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
|
||||
|
||||
/*
|
||||
* Newer AMD K8s and later CPUs have a special magic MSR way to force WB
|
||||
* for memory >4GB. Check for that here.
|
||||
* Note this won't check if the MTRRs < 4GB where the magic bit doesn't
|
||||
* apply to are wrong, but so far we don't know of any such case in the wild.
|
||||
*/
|
||||
#define Tom2Enabled (1U << 21)
|
||||
#define Tom2ForceMemTypeWB (1U << 22)
|
||||
|
||||
int __init amd_special_default_mtrr(void)
|
||||
{
|
||||
u32 l, h;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
||||
return 0;
|
||||
if (boot_cpu_data.x86 < 0xf)
|
||||
return 0;
|
||||
/* In case some hypervisor doesn't pass SYSCFG through: */
|
||||
if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
|
||||
return 0;
|
||||
/*
|
||||
* Memory between 4GB and top of mem is forced WB by this magic bit.
|
||||
* Reserved before K8RevF, but should be zero there.
|
||||
*/
|
||||
if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
|
||||
(Tom2Enabled | Tom2ForceMemTypeWB))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 __init
|
||||
real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
|
||||
{
|
||||
u64 trim_start, trim_size;
|
||||
|
||||
trim_start = start_pfn;
|
||||
trim_start <<= PAGE_SHIFT;
|
||||
|
||||
trim_size = limit_pfn;
|
||||
trim_size <<= PAGE_SHIFT;
|
||||
trim_size -= trim_start;
|
||||
|
||||
return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED);
|
||||
}
|
||||
|
||||
/**
|
||||
* mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
|
||||
* @end_pfn: ending page frame number
|
||||
*
|
||||
* Some buggy BIOSes don't setup the MTRRs properly for systems with certain
|
||||
* memory configurations. This routine checks that the highest MTRR matches
|
||||
* the end of memory, to make sure the MTRRs having a write back type cover
|
||||
* all of the memory the kernel is intending to use. If not, it'll trim any
|
||||
* memory off the end by adjusting end_pfn, removing it from the kernel's
|
||||
* allocation pools, warning the user with an obnoxious message.
|
||||
*/
|
||||
int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
|
||||
{
|
||||
unsigned long i, base, size, highest_pfn = 0, def, dummy;
|
||||
mtrr_type type;
|
||||
u64 total_trim_size;
|
||||
/* extra one for all 0 */
|
||||
int num[MTRR_NUM_TYPES + 1];
|
||||
|
||||
/*
|
||||
* Make sure we only trim uncachable memory on machines that
|
||||
* support the Intel MTRR architecture:
|
||||
*/
|
||||
if (!is_cpu(INTEL) || disable_mtrr_trim)
|
||||
return 0;
|
||||
|
||||
rdmsr(MSR_MTRRdefType, def, dummy);
|
||||
def &= 0xff;
|
||||
if (def != MTRR_TYPE_UNCACHABLE)
|
||||
return 0;
|
||||
|
||||
/* Get it and store it aside: */
|
||||
memset(range_state, 0, sizeof(range_state));
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
mtrr_if->get(i, &base, &size, &type);
|
||||
range_state[i].base_pfn = base;
|
||||
range_state[i].size_pfn = size;
|
||||
range_state[i].type = type;
|
||||
}
|
||||
|
||||
/* Find highest cached pfn: */
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
type = range_state[i].type;
|
||||
if (type != MTRR_TYPE_WRBACK)
|
||||
continue;
|
||||
base = range_state[i].base_pfn;
|
||||
size = range_state[i].size_pfn;
|
||||
if (highest_pfn < base + size)
|
||||
highest_pfn = base + size;
|
||||
}
|
||||
|
||||
/* kvm/qemu doesn't have mtrr set right, don't trim them all: */
|
||||
if (!highest_pfn) {
|
||||
printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check entries number: */
|
||||
memset(num, 0, sizeof(num));
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
type = range_state[i].type;
|
||||
if (type >= MTRR_NUM_TYPES)
|
||||
continue;
|
||||
size = range_state[i].size_pfn;
|
||||
if (!size)
|
||||
type = MTRR_NUM_TYPES;
|
||||
num[type]++;
|
||||
}
|
||||
|
||||
/* No entry for WB? */
|
||||
if (!num[MTRR_TYPE_WRBACK])
|
||||
return 0;
|
||||
|
||||
/* Check if we only had WB and UC: */
|
||||
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
|
||||
num_var_ranges - num[MTRR_NUM_TYPES])
|
||||
return 0;
|
||||
|
||||
memset(range, 0, sizeof(range));
|
||||
nr_range = 0;
|
||||
if (mtrr_tom2) {
|
||||
range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
|
||||
range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT;
|
||||
if (highest_pfn < range[nr_range].end)
|
||||
highest_pfn = range[nr_range].end;
|
||||
nr_range++;
|
||||
}
|
||||
nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
|
||||
|
||||
/* Check the head: */
|
||||
total_trim_size = 0;
|
||||
if (range[0].start)
|
||||
total_trim_size += real_trim_memory(0, range[0].start);
|
||||
|
||||
/* Check the holes: */
|
||||
for (i = 0; i < nr_range - 1; i++) {
|
||||
if (range[i].end < range[i+1].start)
|
||||
total_trim_size += real_trim_memory(range[i].end,
|
||||
range[i+1].start);
|
||||
}
|
||||
|
||||
/* Check the top: */
|
||||
i = nr_range - 1;
|
||||
if (range[i].end < end_pfn)
|
||||
total_trim_size += real_trim_memory(range[i].end,
|
||||
end_pfn);
|
||||
|
||||
if (total_trim_size) {
|
||||
pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
|
||||
|
||||
if (!changed_by_mtrr_cleanup)
|
||||
WARN_ON(1);
|
||||
|
||||
pr_info("update e820 for mtrr\n");
|
||||
update_e820();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
282
arch/x86/kernel/cpu/mtrr/cyrix.c
Normal file
282
arch/x86/kernel/cpu/mtrr/cyrix.c
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm/processor-cyrix.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mtrr.h"
|
||||
|
||||
static void
|
||||
cyrix_get_arr(unsigned int reg, unsigned long *base,
|
||||
unsigned long *size, mtrr_type * type)
|
||||
{
|
||||
unsigned char arr, ccr3, rcr, shift;
|
||||
unsigned long flags;
|
||||
|
||||
arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
ccr3 = getCx86(CX86_CCR3);
|
||||
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
|
||||
((unsigned char *)base)[3] = getCx86(arr);
|
||||
((unsigned char *)base)[2] = getCx86(arr + 1);
|
||||
((unsigned char *)base)[1] = getCx86(arr + 2);
|
||||
rcr = getCx86(CX86_RCR_BASE + reg);
|
||||
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
shift = ((unsigned char *) base)[1] & 0x0f;
|
||||
*base >>= PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
|
||||
* Note: shift==0xf means 4G, this is unsupported.
|
||||
*/
|
||||
if (shift)
|
||||
*size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1);
|
||||
else
|
||||
*size = 0;
|
||||
|
||||
/* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */
|
||||
if (reg < 7) {
|
||||
switch (rcr) {
|
||||
case 1:
|
||||
*type = MTRR_TYPE_UNCACHABLE;
|
||||
break;
|
||||
case 8:
|
||||
*type = MTRR_TYPE_WRBACK;
|
||||
break;
|
||||
case 9:
|
||||
*type = MTRR_TYPE_WRCOMB;
|
||||
break;
|
||||
case 24:
|
||||
default:
|
||||
*type = MTRR_TYPE_WRTHROUGH;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (rcr) {
|
||||
case 0:
|
||||
*type = MTRR_TYPE_UNCACHABLE;
|
||||
break;
|
||||
case 8:
|
||||
*type = MTRR_TYPE_WRCOMB;
|
||||
break;
|
||||
case 9:
|
||||
*type = MTRR_TYPE_WRBACK;
|
||||
break;
|
||||
case 25:
|
||||
default:
|
||||
*type = MTRR_TYPE_WRTHROUGH;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* cyrix_get_free_region - get a free ARR.
|
||||
*
|
||||
* @base: the starting (base) address of the region.
|
||||
* @size: the size (in bytes) of the region.
|
||||
*
|
||||
* Returns: the index of the region on success, else -1 on error.
|
||||
*/
|
||||
static int
|
||||
cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
|
||||
{
|
||||
unsigned long lbase, lsize;
|
||||
mtrr_type ltype;
|
||||
int i;
|
||||
|
||||
switch (replace_reg) {
|
||||
case 7:
|
||||
if (size < 0x40)
|
||||
break;
|
||||
case 6:
|
||||
case 5:
|
||||
case 4:
|
||||
return replace_reg;
|
||||
case 3:
|
||||
case 2:
|
||||
case 1:
|
||||
case 0:
|
||||
return replace_reg;
|
||||
}
|
||||
/* If we are to set up a region >32M then look at ARR7 immediately */
|
||||
if (size > 0x2000) {
|
||||
cyrix_get_arr(7, &lbase, &lsize, <ype);
|
||||
if (lsize == 0)
|
||||
return 7;
|
||||
/* Else try ARR0-ARR6 first */
|
||||
} else {
|
||||
for (i = 0; i < 7; i++) {
|
||||
cyrix_get_arr(i, &lbase, &lsize, <ype);
|
||||
if (lsize == 0)
|
||||
return i;
|
||||
}
|
||||
/*
|
||||
* ARR0-ARR6 isn't free
|
||||
* try ARR7 but its size must be at least 256K
|
||||
*/
|
||||
cyrix_get_arr(i, &lbase, &lsize, <ype);
|
||||
if ((lsize == 0) && (size >= 0x40))
|
||||
return i;
|
||||
}
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
static u32 cr4, ccr3;
|
||||
|
||||
static void prepare_set(void)
|
||||
{
|
||||
u32 cr0;
|
||||
|
||||
/* Save value of CR4 and clear Page Global Enable (bit 7) */
|
||||
if (cpu_has_pge) {
|
||||
cr4 = read_cr4();
|
||||
write_cr4(cr4 & ~X86_CR4_PGE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable and flush caches.
|
||||
* Note that wbinvd flushes the TLBs as a side-effect
|
||||
*/
|
||||
cr0 = read_cr0() | X86_CR0_CD;
|
||||
wbinvd();
|
||||
write_cr0(cr0);
|
||||
wbinvd();
|
||||
|
||||
/* Cyrix ARRs - everything else was excluded at the top */
|
||||
ccr3 = getCx86(CX86_CCR3);
|
||||
|
||||
/* Cyrix ARRs - everything else was excluded at the top */
|
||||
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
|
||||
}
|
||||
|
||||
static void post_set(void)
|
||||
{
|
||||
/* Flush caches and TLBs */
|
||||
wbinvd();
|
||||
|
||||
/* Cyrix ARRs - everything else was excluded at the top */
|
||||
setCx86(CX86_CCR3, ccr3);
|
||||
|
||||
/* Enable caches */
|
||||
write_cr0(read_cr0() & ~X86_CR0_CD);
|
||||
|
||||
/* Restore value of CR4 */
|
||||
if (cpu_has_pge)
|
||||
write_cr4(cr4);
|
||||
}
|
||||
|
||||
static void cyrix_set_arr(unsigned int reg, unsigned long base,
|
||||
unsigned long size, mtrr_type type)
|
||||
{
|
||||
unsigned char arr, arr_type, arr_size;
|
||||
|
||||
arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
|
||||
|
||||
/* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */
|
||||
if (reg >= 7)
|
||||
size >>= 6;
|
||||
|
||||
size &= 0x7fff; /* make sure arr_size <= 14 */
|
||||
for (arr_size = 0; size; arr_size++, size >>= 1)
|
||||
;
|
||||
|
||||
if (reg < 7) {
|
||||
switch (type) {
|
||||
case MTRR_TYPE_UNCACHABLE:
|
||||
arr_type = 1;
|
||||
break;
|
||||
case MTRR_TYPE_WRCOMB:
|
||||
arr_type = 9;
|
||||
break;
|
||||
case MTRR_TYPE_WRTHROUGH:
|
||||
arr_type = 24;
|
||||
break;
|
||||
default:
|
||||
arr_type = 8;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (type) {
|
||||
case MTRR_TYPE_UNCACHABLE:
|
||||
arr_type = 0;
|
||||
break;
|
||||
case MTRR_TYPE_WRCOMB:
|
||||
arr_type = 8;
|
||||
break;
|
||||
case MTRR_TYPE_WRTHROUGH:
|
||||
arr_type = 25;
|
||||
break;
|
||||
default:
|
||||
arr_type = 9;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
prepare_set();
|
||||
|
||||
base <<= PAGE_SHIFT;
|
||||
setCx86(arr + 0, ((unsigned char *)&base)[3]);
|
||||
setCx86(arr + 1, ((unsigned char *)&base)[2]);
|
||||
setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size);
|
||||
setCx86(CX86_RCR_BASE + reg, arr_type);
|
||||
|
||||
post_set();
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
unsigned long base;
|
||||
unsigned long size;
|
||||
mtrr_type type;
|
||||
} arr_state_t;
|
||||
|
||||
static arr_state_t arr_state[8] = {
|
||||
{0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL},
|
||||
{0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}
|
||||
};
|
||||
|
||||
static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
static void cyrix_set_all(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
prepare_set();
|
||||
|
||||
/* the CCRs are not contiguous */
|
||||
for (i = 0; i < 4; i++)
|
||||
setCx86(CX86_CCR0 + i, ccr_state[i]);
|
||||
for (; i < 7; i++)
|
||||
setCx86(CX86_CCR4 + i, ccr_state[i]);
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
cyrix_set_arr(i, arr_state[i].base,
|
||||
arr_state[i].size, arr_state[i].type);
|
||||
}
|
||||
|
||||
post_set();
|
||||
}
|
||||
|
||||
static const struct mtrr_ops cyrix_mtrr_ops = {
|
||||
.vendor = X86_VENDOR_CYRIX,
|
||||
.set_all = cyrix_set_all,
|
||||
.set = cyrix_set_arr,
|
||||
.get = cyrix_get_arr,
|
||||
.get_free_region = cyrix_get_free_region,
|
||||
.validate_add_page = generic_validate_add_page,
|
||||
.have_wrcomb = positive_have_wrcomb,
|
||||
};
|
||||
|
||||
int __init cyrix_init_mtrr(void)
|
||||
{
|
||||
set_mtrr_ops(&cyrix_mtrr_ops);
|
||||
return 0;
|
||||
}
|
||||
845
arch/x86/kernel/cpu/mtrr/generic.c
Normal file
845
arch/x86/kernel/cpu/mtrr/generic.c
Normal file
|
|
@ -0,0 +1,845 @@
|
|||
/*
|
||||
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
|
||||
* because MTRRs can span up to 40 bits (36bits on most modern x86)
|
||||
*/
|
||||
#define DEBUG
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/pat.h>
|
||||
|
||||
#include "mtrr.h"
|
||||
|
||||
struct fixed_range_block {
|
||||
int base_msr; /* start address of an MTRR block */
|
||||
int ranges; /* number of MTRRs in this block */
|
||||
};
|
||||
|
||||
static struct fixed_range_block fixed_range_blocks[] = {
|
||||
{ MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */
|
||||
{ MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */
|
||||
{ MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */
|
||||
{}
|
||||
};
|
||||
|
||||
static unsigned long smp_changes_mask;
|
||||
static int mtrr_state_set;
|
||||
u64 mtrr_tom2;
|
||||
|
||||
struct mtrr_state_type mtrr_state;
|
||||
EXPORT_SYMBOL_GPL(mtrr_state);
|
||||
|
||||
/*
|
||||
* BIOS is expected to clear MtrrFixDramModEn bit, see for example
|
||||
* "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
|
||||
* Opteron Processors" (26094 Rev. 3.30 February 2006), section
|
||||
* "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set
|
||||
* to 1 during BIOS initalization of the fixed MTRRs, then cleared to
|
||||
* 0 for operation."
|
||||
*/
|
||||
static inline void k8_check_syscfg_dram_mod_en(void)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
|
||||
(boot_cpu_data.x86 >= 0x0f)))
|
||||
return;
|
||||
|
||||
rdmsr(MSR_K8_SYSCFG, lo, hi);
|
||||
if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
|
||||
printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
|
||||
" not cleared by BIOS, clearing this bit\n",
|
||||
smp_processor_id());
|
||||
lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
|
||||
mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi);
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the size of contiguous MTRR range */
|
||||
static u64 get_mtrr_size(u64 mask)
|
||||
{
|
||||
u64 size;
|
||||
|
||||
mask >>= PAGE_SHIFT;
|
||||
mask |= size_or_mask;
|
||||
size = -mask;
|
||||
size <<= PAGE_SHIFT;
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check and return the effective type for MTRR-MTRR type overlap.
|
||||
* Returns 1 if the effective type is UNCACHEABLE, else returns 0
|
||||
*/
|
||||
static int check_type_overlap(u8 *prev, u8 *curr)
|
||||
{
|
||||
if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) {
|
||||
*prev = MTRR_TYPE_UNCACHABLE;
|
||||
*curr = MTRR_TYPE_UNCACHABLE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) ||
|
||||
(*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) {
|
||||
*prev = MTRR_TYPE_WRTHROUGH;
|
||||
*curr = MTRR_TYPE_WRTHROUGH;
|
||||
}
|
||||
|
||||
if (*prev != *curr) {
|
||||
*prev = MTRR_TYPE_UNCACHABLE;
|
||||
*curr = MTRR_TYPE_UNCACHABLE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Error/Semi-error returns:
|
||||
* 0xFF - when MTRR is not enabled
|
||||
* *repeat == 1 implies [start:end] spanned across MTRR range and type returned
|
||||
* corresponds only to [start:*partial_end].
|
||||
* Caller has to lookup again for [*partial_end:end].
|
||||
*/
|
||||
static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
|
||||
{
|
||||
int i;
|
||||
u64 base, mask;
|
||||
u8 prev_match, curr_match;
|
||||
|
||||
*repeat = 0;
|
||||
if (!mtrr_state_set)
|
||||
return 0xFF;
|
||||
|
||||
if (!mtrr_state.enabled)
|
||||
return 0xFF;
|
||||
|
||||
/* Make end inclusive end, instead of exclusive */
|
||||
end--;
|
||||
|
||||
/* Look in fixed ranges. Just return the type as per start */
|
||||
if (mtrr_state.have_fixed && (start < 0x100000)) {
|
||||
int idx;
|
||||
|
||||
if (start < 0x80000) {
|
||||
idx = 0;
|
||||
idx += (start >> 16);
|
||||
return mtrr_state.fixed_ranges[idx];
|
||||
} else if (start < 0xC0000) {
|
||||
idx = 1 * 8;
|
||||
idx += ((start - 0x80000) >> 14);
|
||||
return mtrr_state.fixed_ranges[idx];
|
||||
} else if (start < 0x1000000) {
|
||||
idx = 3 * 8;
|
||||
idx += ((start - 0xC0000) >> 12);
|
||||
return mtrr_state.fixed_ranges[idx];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Look in variable ranges
|
||||
* Look of multiple ranges matching this address and pick type
|
||||
* as per MTRR precedence
|
||||
*/
|
||||
if (!(mtrr_state.enabled & 2))
|
||||
return mtrr_state.def_type;
|
||||
|
||||
prev_match = 0xFF;
|
||||
for (i = 0; i < num_var_ranges; ++i) {
|
||||
unsigned short start_state, end_state;
|
||||
|
||||
if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11)))
|
||||
continue;
|
||||
|
||||
base = (((u64)mtrr_state.var_ranges[i].base_hi) << 32) +
|
||||
(mtrr_state.var_ranges[i].base_lo & PAGE_MASK);
|
||||
mask = (((u64)mtrr_state.var_ranges[i].mask_hi) << 32) +
|
||||
(mtrr_state.var_ranges[i].mask_lo & PAGE_MASK);
|
||||
|
||||
start_state = ((start & mask) == (base & mask));
|
||||
end_state = ((end & mask) == (base & mask));
|
||||
|
||||
if (start_state != end_state) {
|
||||
/*
|
||||
* We have start:end spanning across an MTRR.
|
||||
* We split the region into
|
||||
* either
|
||||
* (start:mtrr_end) (mtrr_end:end)
|
||||
* or
|
||||
* (start:mtrr_start) (mtrr_start:end)
|
||||
* depending on kind of overlap.
|
||||
* Return the type for first region and a pointer to
|
||||
* the start of second region so that caller will
|
||||
* lookup again on the second region.
|
||||
* Note: This way we handle multiple overlaps as well.
|
||||
*/
|
||||
if (start_state)
|
||||
*partial_end = base + get_mtrr_size(mask);
|
||||
else
|
||||
*partial_end = base;
|
||||
|
||||
if (unlikely(*partial_end <= start)) {
|
||||
WARN_ON(1);
|
||||
*partial_end = start + PAGE_SIZE;
|
||||
}
|
||||
|
||||
end = *partial_end - 1; /* end is inclusive */
|
||||
*repeat = 1;
|
||||
}
|
||||
|
||||
if ((start & mask) != (base & mask))
|
||||
continue;
|
||||
|
||||
curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
|
||||
if (prev_match == 0xFF) {
|
||||
prev_match = curr_match;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check_type_overlap(&prev_match, &curr_match))
|
||||
return curr_match;
|
||||
}
|
||||
|
||||
if (mtrr_tom2) {
|
||||
if (start >= (1ULL<<32) && (end < mtrr_tom2))
|
||||
return MTRR_TYPE_WRBACK;
|
||||
}
|
||||
|
||||
if (prev_match != 0xFF)
|
||||
return prev_match;
|
||||
|
||||
return mtrr_state.def_type;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the effective MTRR type for the region
|
||||
* Error return:
|
||||
* 0xFF - when MTRR is not enabled
|
||||
*/
|
||||
u8 mtrr_type_lookup(u64 start, u64 end)
|
||||
{
|
||||
u8 type, prev_type;
|
||||
int repeat;
|
||||
u64 partial_end;
|
||||
|
||||
type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
|
||||
|
||||
/*
|
||||
* Common path is with repeat = 0.
|
||||
* However, we can have cases where [start:end] spans across some
|
||||
* MTRR range. Do repeated lookups for that case here.
|
||||
*/
|
||||
while (repeat) {
|
||||
prev_type = type;
|
||||
start = partial_end;
|
||||
type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
|
||||
|
||||
if (check_type_overlap(&prev_type, &type))
|
||||
return type;
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
/* Get the MSR pair relating to a var range */
|
||||
static void
|
||||
get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
|
||||
{
|
||||
rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
|
||||
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
|
||||
}
|
||||
|
||||
/* Fill the MSR pair relating to a var range */
|
||||
void fill_mtrr_var_range(unsigned int index,
|
||||
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
|
||||
{
|
||||
struct mtrr_var_range *vr;
|
||||
|
||||
vr = mtrr_state.var_ranges;
|
||||
|
||||
vr[index].base_lo = base_lo;
|
||||
vr[index].base_hi = base_hi;
|
||||
vr[index].mask_lo = mask_lo;
|
||||
vr[index].mask_hi = mask_hi;
|
||||
}
|
||||
|
||||
static void get_fixed_ranges(mtrr_type *frs)
|
||||
{
|
||||
unsigned int *p = (unsigned int *)frs;
|
||||
int i;
|
||||
|
||||
k8_check_syscfg_dram_mod_en();
|
||||
|
||||
rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
|
||||
for (i = 0; i < 8; i++)
|
||||
rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
|
||||
}
|
||||
|
||||
void mtrr_save_fixed_ranges(void *info)
|
||||
{
|
||||
if (cpu_has_mtrr)
|
||||
get_fixed_ranges(mtrr_state.fixed_ranges);
|
||||
}
|
||||
|
||||
static unsigned __initdata last_fixed_start;
|
||||
static unsigned __initdata last_fixed_end;
|
||||
static mtrr_type __initdata last_fixed_type;
|
||||
|
||||
static void __init print_fixed_last(void)
|
||||
{
|
||||
if (!last_fixed_end)
|
||||
return;
|
||||
|
||||
pr_debug(" %05X-%05X %s\n", last_fixed_start,
|
||||
last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
|
||||
|
||||
last_fixed_end = 0;
|
||||
}
|
||||
|
||||
static void __init update_fixed_last(unsigned base, unsigned end,
|
||||
mtrr_type type)
|
||||
{
|
||||
last_fixed_start = base;
|
||||
last_fixed_end = end;
|
||||
last_fixed_type = type;
|
||||
}
|
||||
|
||||
static void __init
|
||||
print_fixed(unsigned base, unsigned step, const mtrr_type *types)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < 8; ++i, ++types, base += step) {
|
||||
if (last_fixed_end == 0) {
|
||||
update_fixed_last(base, base + step, *types);
|
||||
continue;
|
||||
}
|
||||
if (last_fixed_end == base && last_fixed_type == *types) {
|
||||
last_fixed_end = base + step;
|
||||
continue;
|
||||
}
|
||||
/* new segments: gap or different type */
|
||||
print_fixed_last();
|
||||
update_fixed_last(base, base + step, *types);
|
||||
}
|
||||
}
|
||||
|
||||
static void prepare_set(void);
|
||||
static void post_set(void);
|
||||
|
||||
static void __init print_mtrr_state(void)
|
||||
{
|
||||
unsigned int i;
|
||||
int high_width;
|
||||
|
||||
pr_debug("MTRR default type: %s\n",
|
||||
mtrr_attrib_to_str(mtrr_state.def_type));
|
||||
if (mtrr_state.have_fixed) {
|
||||
pr_debug("MTRR fixed ranges %sabled:\n",
|
||||
mtrr_state.enabled & 1 ? "en" : "dis");
|
||||
print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
|
||||
for (i = 0; i < 2; ++i)
|
||||
print_fixed(0x80000 + i * 0x20000, 0x04000,
|
||||
mtrr_state.fixed_ranges + (i + 1) * 8);
|
||||
for (i = 0; i < 8; ++i)
|
||||
print_fixed(0xC0000 + i * 0x08000, 0x01000,
|
||||
mtrr_state.fixed_ranges + (i + 3) * 8);
|
||||
|
||||
/* tail */
|
||||
print_fixed_last();
|
||||
}
|
||||
pr_debug("MTRR variable ranges %sabled:\n",
|
||||
mtrr_state.enabled & 2 ? "en" : "dis");
|
||||
high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
|
||||
|
||||
for (i = 0; i < num_var_ranges; ++i) {
|
||||
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
|
||||
pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n",
|
||||
i,
|
||||
high_width,
|
||||
mtrr_state.var_ranges[i].base_hi,
|
||||
mtrr_state.var_ranges[i].base_lo >> 12,
|
||||
high_width,
|
||||
mtrr_state.var_ranges[i].mask_hi,
|
||||
mtrr_state.var_ranges[i].mask_lo >> 12,
|
||||
mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
|
||||
else
|
||||
pr_debug(" %u disabled\n", i);
|
||||
}
|
||||
if (mtrr_tom2)
|
||||
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
|
||||
}
|
||||
|
||||
/* Grab all of the MTRR state for this CPU into *state */
|
||||
void __init get_mtrr_state(void)
|
||||
{
|
||||
struct mtrr_var_range *vrs;
|
||||
unsigned long flags;
|
||||
unsigned lo, dummy;
|
||||
unsigned int i;
|
||||
|
||||
vrs = mtrr_state.var_ranges;
|
||||
|
||||
rdmsr(MSR_MTRRcap, lo, dummy);
|
||||
mtrr_state.have_fixed = (lo >> 8) & 1;
|
||||
|
||||
for (i = 0; i < num_var_ranges; i++)
|
||||
get_mtrr_var_range(i, &vrs[i]);
|
||||
if (mtrr_state.have_fixed)
|
||||
get_fixed_ranges(mtrr_state.fixed_ranges);
|
||||
|
||||
rdmsr(MSR_MTRRdefType, lo, dummy);
|
||||
mtrr_state.def_type = (lo & 0xff);
|
||||
mtrr_state.enabled = (lo & 0xc00) >> 10;
|
||||
|
||||
if (amd_special_default_mtrr()) {
|
||||
unsigned low, high;
|
||||
|
||||
/* TOP_MEM2 */
|
||||
rdmsr(MSR_K8_TOP_MEM2, low, high);
|
||||
mtrr_tom2 = high;
|
||||
mtrr_tom2 <<= 32;
|
||||
mtrr_tom2 |= low;
|
||||
mtrr_tom2 &= 0xffffff800000ULL;
|
||||
}
|
||||
|
||||
print_mtrr_state();
|
||||
|
||||
mtrr_state_set = 1;
|
||||
|
||||
/* PAT setup for BP. We need to go through sync steps here */
|
||||
local_irq_save(flags);
|
||||
prepare_set();
|
||||
|
||||
pat_init();
|
||||
|
||||
post_set();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/* Some BIOS's are messed up and don't set all MTRRs the same! */
|
||||
void __init mtrr_state_warn(void)
|
||||
{
|
||||
unsigned long mask = smp_changes_mask;
|
||||
|
||||
if (!mask)
|
||||
return;
|
||||
if (mask & MTRR_CHANGE_MASK_FIXED)
|
||||
pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
|
||||
if (mask & MTRR_CHANGE_MASK_VARIABLE)
|
||||
pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
|
||||
if (mask & MTRR_CHANGE_MASK_DEFTYPE)
|
||||
pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
|
||||
|
||||
printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
|
||||
printk(KERN_INFO "mtrr: corrected configuration.\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Doesn't attempt to pass an error out to MTRR users
|
||||
* because it's quite complicated in some cases and probably not
|
||||
* worth it because the best error handling is to ignore it.
|
||||
*/
|
||||
void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
|
||||
{
|
||||
if (wrmsr_safe(msr, a, b) < 0) {
|
||||
printk(KERN_ERR
|
||||
"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
|
||||
smp_processor_id(), msr, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* set_fixed_range - checks & updates a fixed-range MTRR if it
|
||||
* differs from the value it should have
|
||||
* @msr: MSR address of the MTTR which should be checked and updated
|
||||
* @changed: pointer which indicates whether the MTRR needed to be changed
|
||||
* @msrwords: pointer to the MSR values which the MSR should have
|
||||
*/
|
||||
static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
|
||||
{
|
||||
unsigned lo, hi;
|
||||
|
||||
rdmsr(msr, lo, hi);
|
||||
|
||||
if (lo != msrwords[0] || hi != msrwords[1]) {
|
||||
mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
|
||||
*changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* generic_get_free_region - Get a free MTRR.
|
||||
* @base: The starting (base) address of the region.
|
||||
* @size: The size (in bytes) of the region.
|
||||
* @replace_reg: mtrr index to be replaced; set to invalid value if none.
|
||||
*
|
||||
* Returns: The index of the region on success, else negative on error.
|
||||
*/
|
||||
int
|
||||
generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
|
||||
{
|
||||
unsigned long lbase, lsize;
|
||||
mtrr_type ltype;
|
||||
int i, max;
|
||||
|
||||
max = num_var_ranges;
|
||||
if (replace_reg >= 0 && replace_reg < max)
|
||||
return replace_reg;
|
||||
|
||||
for (i = 0; i < max; ++i) {
|
||||
mtrr_if->get(i, &lbase, &lsize, <ype);
|
||||
if (lsize == 0)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
static void generic_get_mtrr(unsigned int reg, unsigned long *base,
|
||||
unsigned long *size, mtrr_type *type)
|
||||
{
|
||||
u32 mask_lo, mask_hi, base_lo, base_hi;
|
||||
unsigned int hi;
|
||||
u64 tmp, mask;
|
||||
|
||||
/*
|
||||
* get_mtrr doesn't need to update mtrr_state, also it could be called
|
||||
* from any cpu, so try to print it out directly.
|
||||
*/
|
||||
get_cpu();
|
||||
|
||||
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
|
||||
|
||||
if ((mask_lo & 0x800) == 0) {
|
||||
/* Invalid (i.e. free) range */
|
||||
*base = 0;
|
||||
*size = 0;
|
||||
*type = 0;
|
||||
goto out_put_cpu;
|
||||
}
|
||||
|
||||
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
|
||||
|
||||
/* Work out the shifted address mask: */
|
||||
tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
|
||||
mask = size_or_mask | tmp;
|
||||
|
||||
/* Expand tmp with high bits to all 1s: */
|
||||
hi = fls64(tmp);
|
||||
if (hi > 0) {
|
||||
tmp |= ~((1ULL<<(hi - 1)) - 1);
|
||||
|
||||
if (tmp != mask) {
|
||||
printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
|
||||
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
|
||||
mask = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This works correctly if size is a power of two, i.e. a
|
||||
* contiguous range:
|
||||
*/
|
||||
*size = -mask;
|
||||
*base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
|
||||
*type = base_lo & 0xff;
|
||||
|
||||
out_put_cpu:
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
/**
|
||||
* set_fixed_ranges - checks & updates the fixed-range MTRRs if they
|
||||
* differ from the saved set
|
||||
* @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
|
||||
*/
|
||||
static int set_fixed_ranges(mtrr_type *frs)
|
||||
{
|
||||
unsigned long long *saved = (unsigned long long *)frs;
|
||||
bool changed = false;
|
||||
int block = -1, range;
|
||||
|
||||
k8_check_syscfg_dram_mod_en();
|
||||
|
||||
while (fixed_range_blocks[++block].ranges) {
|
||||
for (range = 0; range < fixed_range_blocks[block].ranges; range++)
|
||||
set_fixed_range(fixed_range_blocks[block].base_msr + range,
|
||||
&changed, (unsigned int *)saved++);
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the MSR pair relating to a var range.
|
||||
* Returns true if changes are made.
|
||||
*/
|
||||
static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
|
||||
{
|
||||
unsigned int lo, hi;
|
||||
bool changed = false;
|
||||
|
||||
rdmsr(MTRRphysBase_MSR(index), lo, hi);
|
||||
if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
|
||||
|| (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
|
||||
(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
|
||||
|
||||
mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
|
||||
changed = true;
|
||||
}
|
||||
|
||||
rdmsr(MTRRphysMask_MSR(index), lo, hi);
|
||||
|
||||
if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
|
||||
|| (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
|
||||
(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
|
||||
mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
|
||||
changed = true;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
static u32 deftype_lo, deftype_hi;
|
||||
|
||||
/**
|
||||
* set_mtrr_state - Set the MTRR state for this CPU.
|
||||
*
|
||||
* NOTE: The CPU must already be in a safe state for MTRR changes.
|
||||
* RETURNS: 0 if no changes made, else a mask indicating what was changed.
|
||||
*/
|
||||
static unsigned long set_mtrr_state(void)
|
||||
{
|
||||
unsigned long change_mask = 0;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
|
||||
change_mask |= MTRR_CHANGE_MASK_VARIABLE;
|
||||
}
|
||||
|
||||
if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges))
|
||||
change_mask |= MTRR_CHANGE_MASK_FIXED;
|
||||
|
||||
/*
|
||||
* Set_mtrr_restore restores the old value of MTRRdefType,
|
||||
* so to set it we fiddle with the saved value:
|
||||
*/
|
||||
if ((deftype_lo & 0xff) != mtrr_state.def_type
|
||||
|| ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
|
||||
|
||||
deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type |
|
||||
(mtrr_state.enabled << 10);
|
||||
change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
|
||||
}
|
||||
|
||||
return change_mask;
|
||||
}
|
||||
|
||||
|
||||
static unsigned long cr4;
|
||||
static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
|
||||
|
||||
/*
|
||||
* Since we are disabling the cache don't allow any interrupts,
|
||||
* they would run extremely slow and would only increase the pain.
|
||||
*
|
||||
* The caller must ensure that local interrupts are disabled and
|
||||
* are reenabled after post_set() has been called.
|
||||
*/
|
||||
static void prepare_set(void) __acquires(set_atomicity_lock)
|
||||
{
|
||||
unsigned long cr0;
|
||||
|
||||
/*
|
||||
* Note that this is not ideal
|
||||
* since the cache is only flushed/disabled for this CPU while the
|
||||
* MTRRs are changed, but changing this requires more invasive
|
||||
* changes to the way the kernel boots
|
||||
*/
|
||||
|
||||
raw_spin_lock(&set_atomicity_lock);
|
||||
|
||||
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
|
||||
cr0 = read_cr0() | X86_CR0_CD;
|
||||
write_cr0(cr0);
|
||||
wbinvd();
|
||||
|
||||
/* Save value of CR4 and clear Page Global Enable (bit 7) */
|
||||
if (cpu_has_pge) {
|
||||
cr4 = read_cr4();
|
||||
write_cr4(cr4 & ~X86_CR4_PGE);
|
||||
}
|
||||
|
||||
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
__flush_tlb();
|
||||
|
||||
/* Save MTRR state */
|
||||
rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
|
||||
|
||||
/* Disable MTRRs, and set the default type to uncached */
|
||||
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
|
||||
wbinvd();
|
||||
}
|
||||
|
||||
static void post_set(void) __releases(set_atomicity_lock)
|
||||
{
|
||||
/* Flush TLBs (no need to flush caches - they are disabled) */
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
__flush_tlb();
|
||||
|
||||
/* Intel (P6) standard MTRRs */
|
||||
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
|
||||
|
||||
/* Enable caches */
|
||||
write_cr0(read_cr0() & ~X86_CR0_CD);
|
||||
|
||||
/* Restore value of CR4 */
|
||||
if (cpu_has_pge)
|
||||
write_cr4(cr4);
|
||||
raw_spin_unlock(&set_atomicity_lock);
|
||||
}
|
||||
|
||||
static void generic_set_all(void)
|
||||
{
|
||||
unsigned long mask, count;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
prepare_set();
|
||||
|
||||
/* Actually set the state */
|
||||
mask = set_mtrr_state();
|
||||
|
||||
/* also set PAT */
|
||||
pat_init();
|
||||
|
||||
post_set();
|
||||
local_irq_restore(flags);
|
||||
|
||||
/* Use the atomic bitops to update the global mask */
|
||||
for (count = 0; count < sizeof mask * 8; ++count) {
|
||||
if (mask & 0x01)
|
||||
set_bit(count, &smp_changes_mask);
|
||||
mask >>= 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* generic_set_mtrr - set variable MTRR register on the local CPU.
|
||||
*
|
||||
* @reg: The register to set.
|
||||
* @base: The base address of the region.
|
||||
* @size: The size of the region. If this is 0 the region is disabled.
|
||||
* @type: The type of the region.
|
||||
*
|
||||
* Returns nothing.
|
||||
*/
|
||||
static void generic_set_mtrr(unsigned int reg, unsigned long base,
|
||||
unsigned long size, mtrr_type type)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct mtrr_var_range *vr;
|
||||
|
||||
vr = &mtrr_state.var_ranges[reg];
|
||||
|
||||
local_irq_save(flags);
|
||||
prepare_set();
|
||||
|
||||
if (size == 0) {
|
||||
/*
|
||||
* The invalid bit is kept in the mask, so we simply
|
||||
* clear the relevant mask register to disable a range.
|
||||
*/
|
||||
mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
|
||||
memset(vr, 0, sizeof(struct mtrr_var_range));
|
||||
} else {
|
||||
vr->base_lo = base << PAGE_SHIFT | type;
|
||||
vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
|
||||
vr->mask_lo = -size << PAGE_SHIFT | 0x800;
|
||||
vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
|
||||
|
||||
mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi);
|
||||
mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
|
||||
}
|
||||
|
||||
post_set();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
int generic_validate_add_page(unsigned long base, unsigned long size,
|
||||
unsigned int type)
|
||||
{
|
||||
unsigned long lbase, last;
|
||||
|
||||
/*
|
||||
* For Intel PPro stepping <= 7
|
||||
* must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF
|
||||
*/
|
||||
if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
|
||||
boot_cpu_data.x86_model == 1 &&
|
||||
boot_cpu_data.x86_mask <= 7) {
|
||||
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
|
||||
pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!(base + size < 0x70000 || base > 0x7003F) &&
|
||||
(type == MTRR_TYPE_WRCOMB
|
||||
|| type == MTRR_TYPE_WRBACK)) {
|
||||
pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check upper bits of base and last are equal and lower bits are 0
|
||||
* for base and 1 for last
|
||||
*/
|
||||
last = base + size - 1;
|
||||
for (lbase = base; !(lbase & 1) && (last & 1);
|
||||
lbase = lbase >> 1, last = last >> 1)
|
||||
;
|
||||
if (lbase != last) {
|
||||
pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int generic_have_wrcomb(void)
|
||||
{
|
||||
unsigned long config, dummy;
|
||||
rdmsr(MSR_MTRRcap, config, dummy);
|
||||
return config & (1 << 10);
|
||||
}
|
||||
|
||||
int positive_have_wrcomb(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generic structure...
|
||||
*/
|
||||
const struct mtrr_ops generic_mtrr_ops = {
|
||||
.use_intel_if = 1,
|
||||
.set_all = generic_set_all,
|
||||
.get = generic_get_mtrr,
|
||||
.get_free_region = generic_get_free_region,
|
||||
.set = generic_set_mtrr,
|
||||
.validate_add_page = generic_validate_add_page,
|
||||
.have_wrcomb = generic_have_wrcomb,
|
||||
};
|
||||
451
arch/x86/kernel/cpu/mtrr/if.c
Normal file
451
arch/x86/kernel/cpu/mtrr/if.c
Normal file
|
|
@ -0,0 +1,451 @@
|
|||
#include <linux/capability.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#define LINE_SIZE 80
|
||||
|
||||
#include <asm/mtrr.h>
|
||||
|
||||
#include "mtrr.h"
|
||||
|
||||
#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
|
||||
|
||||
static const char *const mtrr_strings[MTRR_NUM_TYPES] =
|
||||
{
|
||||
"uncachable", /* 0 */
|
||||
"write-combining", /* 1 */
|
||||
"?", /* 2 */
|
||||
"?", /* 3 */
|
||||
"write-through", /* 4 */
|
||||
"write-protect", /* 5 */
|
||||
"write-back", /* 6 */
|
||||
};
|
||||
|
||||
const char *mtrr_attrib_to_str(int x)
|
||||
{
|
||||
return (x <= 6) ? mtrr_strings[x] : "?";
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
static int
|
||||
mtrr_file_add(unsigned long base, unsigned long size,
|
||||
unsigned int type, bool increment, struct file *file, int page)
|
||||
{
|
||||
unsigned int *fcount = FILE_FCOUNT(file);
|
||||
int reg, max;
|
||||
|
||||
max = num_var_ranges;
|
||||
if (fcount == NULL) {
|
||||
fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL);
|
||||
if (!fcount)
|
||||
return -ENOMEM;
|
||||
FILE_FCOUNT(file) = fcount;
|
||||
}
|
||||
if (!page) {
|
||||
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
|
||||
return -EINVAL;
|
||||
base >>= PAGE_SHIFT;
|
||||
size >>= PAGE_SHIFT;
|
||||
}
|
||||
reg = mtrr_add_page(base, size, type, true);
|
||||
if (reg >= 0)
|
||||
++fcount[reg];
|
||||
return reg;
|
||||
}
|
||||
|
||||
static int
|
||||
mtrr_file_del(unsigned long base, unsigned long size,
|
||||
struct file *file, int page)
|
||||
{
|
||||
unsigned int *fcount = FILE_FCOUNT(file);
|
||||
int reg;
|
||||
|
||||
if (!page) {
|
||||
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
|
||||
return -EINVAL;
|
||||
base >>= PAGE_SHIFT;
|
||||
size >>= PAGE_SHIFT;
|
||||
}
|
||||
reg = mtrr_del_page(-1, base, size);
|
||||
if (reg < 0)
|
||||
return reg;
|
||||
if (fcount == NULL)
|
||||
return reg;
|
||||
if (fcount[reg] < 1)
|
||||
return -EINVAL;
|
||||
--fcount[reg];
|
||||
return reg;
|
||||
}
|
||||
|
||||
/*
|
||||
* seq_file can seek but we ignore it.
|
||||
*
|
||||
* Format of control line:
|
||||
* "base=%Lx size=%Lx type=%s" or "disable=%d"
|
||||
*/
|
||||
static ssize_t
|
||||
mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
|
||||
{
|
||||
int i, err;
|
||||
unsigned long reg;
|
||||
unsigned long long base, size;
|
||||
char *ptr;
|
||||
char line[LINE_SIZE];
|
||||
int length;
|
||||
size_t linelen;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
memset(line, 0, LINE_SIZE);
|
||||
|
||||
length = len;
|
||||
length--;
|
||||
|
||||
if (length > LINE_SIZE - 1)
|
||||
length = LINE_SIZE - 1;
|
||||
|
||||
if (length < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(line, buf, length))
|
||||
return -EFAULT;
|
||||
|
||||
linelen = strlen(line);
|
||||
ptr = line + linelen - 1;
|
||||
if (linelen && *ptr == '\n')
|
||||
*ptr = '\0';
|
||||
|
||||
if (!strncmp(line, "disable=", 8)) {
|
||||
reg = simple_strtoul(line + 8, &ptr, 0);
|
||||
err = mtrr_del_page(reg, 0, 0);
|
||||
if (err < 0)
|
||||
return err;
|
||||
return len;
|
||||
}
|
||||
|
||||
if (strncmp(line, "base=", 5))
|
||||
return -EINVAL;
|
||||
|
||||
base = simple_strtoull(line + 5, &ptr, 0);
|
||||
ptr = skip_spaces(ptr);
|
||||
|
||||
if (strncmp(ptr, "size=", 5))
|
||||
return -EINVAL;
|
||||
|
||||
size = simple_strtoull(ptr + 5, &ptr, 0);
|
||||
if ((base & 0xfff) || (size & 0xfff))
|
||||
return -EINVAL;
|
||||
ptr = skip_spaces(ptr);
|
||||
|
||||
if (strncmp(ptr, "type=", 5))
|
||||
return -EINVAL;
|
||||
ptr = skip_spaces(ptr + 5);
|
||||
|
||||
for (i = 0; i < MTRR_NUM_TYPES; ++i) {
|
||||
if (strcmp(ptr, mtrr_strings[i]))
|
||||
continue;
|
||||
base >>= PAGE_SHIFT;
|
||||
size >>= PAGE_SHIFT;
|
||||
err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true);
|
||||
if (err < 0)
|
||||
return err;
|
||||
return len;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static long
|
||||
mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
|
||||
{
|
||||
int err = 0;
|
||||
mtrr_type type;
|
||||
unsigned long base;
|
||||
unsigned long size;
|
||||
struct mtrr_sentry sentry;
|
||||
struct mtrr_gentry gentry;
|
||||
void __user *arg = (void __user *) __arg;
|
||||
|
||||
switch (cmd) {
|
||||
case MTRRIOC_ADD_ENTRY:
|
||||
case MTRRIOC_SET_ENTRY:
|
||||
case MTRRIOC_DEL_ENTRY:
|
||||
case MTRRIOC_KILL_ENTRY:
|
||||
case MTRRIOC_ADD_PAGE_ENTRY:
|
||||
case MTRRIOC_SET_PAGE_ENTRY:
|
||||
case MTRRIOC_DEL_PAGE_ENTRY:
|
||||
case MTRRIOC_KILL_PAGE_ENTRY:
|
||||
if (copy_from_user(&sentry, arg, sizeof sentry))
|
||||
return -EFAULT;
|
||||
break;
|
||||
case MTRRIOC_GET_ENTRY:
|
||||
case MTRRIOC_GET_PAGE_ENTRY:
|
||||
if (copy_from_user(&gentry, arg, sizeof gentry))
|
||||
return -EFAULT;
|
||||
break;
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_ADD_ENTRY:
|
||||
case MTRRIOC32_SET_ENTRY:
|
||||
case MTRRIOC32_DEL_ENTRY:
|
||||
case MTRRIOC32_KILL_ENTRY:
|
||||
case MTRRIOC32_ADD_PAGE_ENTRY:
|
||||
case MTRRIOC32_SET_PAGE_ENTRY:
|
||||
case MTRRIOC32_DEL_PAGE_ENTRY:
|
||||
case MTRRIOC32_KILL_PAGE_ENTRY: {
|
||||
struct mtrr_sentry32 __user *s32;
|
||||
|
||||
s32 = (struct mtrr_sentry32 __user *)__arg;
|
||||
err = get_user(sentry.base, &s32->base);
|
||||
err |= get_user(sentry.size, &s32->size);
|
||||
err |= get_user(sentry.type, &s32->type);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
}
|
||||
case MTRRIOC32_GET_ENTRY:
|
||||
case MTRRIOC32_GET_PAGE_ENTRY: {
|
||||
struct mtrr_gentry32 __user *g32;
|
||||
|
||||
g32 = (struct mtrr_gentry32 __user *)__arg;
|
||||
err = get_user(gentry.regnum, &g32->regnum);
|
||||
err |= get_user(gentry.base, &g32->base);
|
||||
err |= get_user(gentry.size, &g32->size);
|
||||
err |= get_user(gentry.type, &g32->type);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
switch (cmd) {
|
||||
default:
|
||||
return -ENOTTY;
|
||||
case MTRRIOC_ADD_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_ADD_ENTRY:
|
||||
#endif
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err =
|
||||
mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
|
||||
file, 0);
|
||||
break;
|
||||
case MTRRIOC_SET_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_SET_ENTRY:
|
||||
#endif
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = mtrr_add(sentry.base, sentry.size, sentry.type, false);
|
||||
break;
|
||||
case MTRRIOC_DEL_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_DEL_ENTRY:
|
||||
#endif
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = mtrr_file_del(sentry.base, sentry.size, file, 0);
|
||||
break;
|
||||
case MTRRIOC_KILL_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_KILL_ENTRY:
|
||||
#endif
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = mtrr_del(-1, sentry.base, sentry.size);
|
||||
break;
|
||||
case MTRRIOC_GET_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_GET_ENTRY:
|
||||
#endif
|
||||
if (gentry.regnum >= num_var_ranges)
|
||||
return -EINVAL;
|
||||
mtrr_if->get(gentry.regnum, &base, &size, &type);
|
||||
|
||||
/* Hide entries that go above 4GB */
|
||||
if (base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))
|
||||
|| size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)))
|
||||
gentry.base = gentry.size = gentry.type = 0;
|
||||
else {
|
||||
gentry.base = base << PAGE_SHIFT;
|
||||
gentry.size = size << PAGE_SHIFT;
|
||||
gentry.type = type;
|
||||
}
|
||||
|
||||
break;
|
||||
case MTRRIOC_ADD_PAGE_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_ADD_PAGE_ENTRY:
|
||||
#endif
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err =
|
||||
mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
|
||||
file, 1);
|
||||
break;
|
||||
case MTRRIOC_SET_PAGE_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_SET_PAGE_ENTRY:
|
||||
#endif
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err =
|
||||
mtrr_add_page(sentry.base, sentry.size, sentry.type, false);
|
||||
break;
|
||||
case MTRRIOC_DEL_PAGE_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_DEL_PAGE_ENTRY:
|
||||
#endif
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = mtrr_file_del(sentry.base, sentry.size, file, 1);
|
||||
break;
|
||||
case MTRRIOC_KILL_PAGE_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_KILL_PAGE_ENTRY:
|
||||
#endif
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = mtrr_del_page(-1, sentry.base, sentry.size);
|
||||
break;
|
||||
case MTRRIOC_GET_PAGE_ENTRY:
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_GET_PAGE_ENTRY:
|
||||
#endif
|
||||
if (gentry.regnum >= num_var_ranges)
|
||||
return -EINVAL;
|
||||
mtrr_if->get(gentry.regnum, &base, &size, &type);
|
||||
/* Hide entries that would overflow */
|
||||
if (size != (__typeof__(gentry.size))size)
|
||||
gentry.base = gentry.size = gentry.type = 0;
|
||||
else {
|
||||
gentry.base = base;
|
||||
gentry.size = size;
|
||||
gentry.type = type;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
switch (cmd) {
|
||||
case MTRRIOC_GET_ENTRY:
|
||||
case MTRRIOC_GET_PAGE_ENTRY:
|
||||
if (copy_to_user(arg, &gentry, sizeof gentry))
|
||||
err = -EFAULT;
|
||||
break;
|
||||
#ifdef CONFIG_COMPAT
|
||||
case MTRRIOC32_GET_ENTRY:
|
||||
case MTRRIOC32_GET_PAGE_ENTRY: {
|
||||
struct mtrr_gentry32 __user *g32;
|
||||
|
||||
g32 = (struct mtrr_gentry32 __user *)__arg;
|
||||
err = put_user(gentry.base, &g32->base);
|
||||
err |= put_user(gentry.size, &g32->size);
|
||||
err |= put_user(gentry.regnum, &g32->regnum);
|
||||
err |= put_user(gentry.type, &g32->type);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mtrr_close(struct inode *ino, struct file *file)
|
||||
{
|
||||
unsigned int *fcount = FILE_FCOUNT(file);
|
||||
int i, max;
|
||||
|
||||
if (fcount != NULL) {
|
||||
max = num_var_ranges;
|
||||
for (i = 0; i < max; ++i) {
|
||||
while (fcount[i] > 0) {
|
||||
mtrr_del(i, 0, 0);
|
||||
--fcount[i];
|
||||
}
|
||||
}
|
||||
kfree(fcount);
|
||||
FILE_FCOUNT(file) = NULL;
|
||||
}
|
||||
return single_release(ino, file);
|
||||
}
|
||||
|
||||
static int mtrr_seq_show(struct seq_file *seq, void *offset);
|
||||
|
||||
static int mtrr_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
if (!mtrr_if)
|
||||
return -EIO;
|
||||
if (!mtrr_if->get)
|
||||
return -ENXIO;
|
||||
return single_open(file, mtrr_seq_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations mtrr_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = mtrr_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.write = mtrr_write,
|
||||
.unlocked_ioctl = mtrr_ioctl,
|
||||
.compat_ioctl = mtrr_ioctl,
|
||||
.release = mtrr_close,
|
||||
};
|
||||
|
||||
static int mtrr_seq_show(struct seq_file *seq, void *offset)
|
||||
{
|
||||
char factor;
|
||||
int i, max, len;
|
||||
mtrr_type type;
|
||||
unsigned long base, size;
|
||||
|
||||
len = 0;
|
||||
max = num_var_ranges;
|
||||
for (i = 0; i < max; i++) {
|
||||
mtrr_if->get(i, &base, &size, &type);
|
||||
if (size == 0) {
|
||||
mtrr_usage_table[i] = 0;
|
||||
continue;
|
||||
}
|
||||
if (size < (0x100000 >> PAGE_SHIFT)) {
|
||||
/* less than 1MB */
|
||||
factor = 'K';
|
||||
size <<= PAGE_SHIFT - 10;
|
||||
} else {
|
||||
factor = 'M';
|
||||
size >>= 20 - PAGE_SHIFT;
|
||||
}
|
||||
/* Base can be > 32bit */
|
||||
len += seq_printf(seq, "reg%02i: base=0x%06lx000 "
|
||||
"(%5luMB), size=%5lu%cB, count=%d: %s\n",
|
||||
i, base, base >> (20 - PAGE_SHIFT), size,
|
||||
factor, mtrr_usage_table[i],
|
||||
mtrr_attrib_to_str(type));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init mtrr_if_init(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
|
||||
(!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
|
||||
(!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
|
||||
(!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
|
||||
return -ENODEV;
|
||||
|
||||
proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(mtrr_if_init);
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
842
arch/x86/kernel/cpu/mtrr/main.c
Normal file
842
arch/x86/kernel/cpu/mtrr/main.c
Normal file
|
|
@ -0,0 +1,842 @@
|
|||
/* Generic MTRR (Memory Type Range Register) driver.
|
||||
|
||||
Copyright (C) 1997-2000 Richard Gooch
|
||||
Copyright (c) 2002 Patrick Mochel
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with this library; if not, write to the Free
|
||||
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
Richard Gooch may be reached by email at rgooch@atnf.csiro.au
|
||||
The postal address is:
|
||||
Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
|
||||
|
||||
Source: "Pentium Pro Family Developer's Manual, Volume 3:
|
||||
Operating System Writer's Guide" (Intel document number 242692),
|
||||
section 11.11.7
|
||||
|
||||
This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
|
||||
on 6-7 March 2002.
|
||||
Source: Intel Architecture Software Developers Manual, Volume 3:
|
||||
System Programming Guide; Section 9.11. (1997 edition - PPro).
|
||||
*/
|
||||
|
||||
#define DEBUG
|
||||
|
||||
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
|
||||
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/kvm_para.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/pat.h>
|
||||
|
||||
#include "mtrr.h"
|
||||
|
||||
/* arch_phys_wc_add returns an MTRR register index plus this offset. */
|
||||
#define MTRR_TO_PHYS_WC_OFFSET 1000
|
||||
|
||||
u32 num_var_ranges;
|
||||
|
||||
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
|
||||
static DEFINE_MUTEX(mtrr_mutex);
|
||||
|
||||
u64 size_or_mask, size_and_mask;
|
||||
static bool mtrr_aps_delayed_init;
|
||||
|
||||
static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
|
||||
|
||||
const struct mtrr_ops *mtrr_if;
|
||||
|
||||
static void set_mtrr(unsigned int reg, unsigned long base,
|
||||
unsigned long size, mtrr_type type);
|
||||
|
||||
void set_mtrr_ops(const struct mtrr_ops *ops)
|
||||
{
|
||||
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
|
||||
mtrr_ops[ops->vendor] = ops;
|
||||
}
|
||||
|
||||
/* Returns non-zero if we have the write-combining memory type */
|
||||
static int have_wrcomb(void)
|
||||
{
|
||||
struct pci_dev *dev;
|
||||
|
||||
dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
|
||||
if (dev != NULL) {
|
||||
/*
|
||||
* ServerWorks LE chipsets < rev 6 have problems with
|
||||
* write-combining. Don't allow it and leave room for other
|
||||
* chipsets to be tagged
|
||||
*/
|
||||
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
|
||||
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
|
||||
dev->revision <= 5) {
|
||||
pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
|
||||
pci_dev_put(dev);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Intel 450NX errata # 23. Non ascending cacheline evictions to
|
||||
* write combining memory may resulting in data corruption
|
||||
*/
|
||||
if (dev->vendor == PCI_VENDOR_ID_INTEL &&
|
||||
dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
|
||||
pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
|
||||
pci_dev_put(dev);
|
||||
return 0;
|
||||
}
|
||||
pci_dev_put(dev);
|
||||
}
|
||||
return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
|
||||
}
|
||||
|
||||
/* This function returns the number of variable MTRRs */
|
||||
static void __init set_num_var_ranges(void)
|
||||
{
|
||||
unsigned long config = 0, dummy;
|
||||
|
||||
if (use_intel())
|
||||
rdmsr(MSR_MTRRcap, config, dummy);
|
||||
else if (is_cpu(AMD))
|
||||
config = 2;
|
||||
else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
|
||||
config = 8;
|
||||
|
||||
num_var_ranges = config & 0xff;
|
||||
}
|
||||
|
||||
static void __init init_table(void)
|
||||
{
|
||||
int i, max;
|
||||
|
||||
max = num_var_ranges;
|
||||
for (i = 0; i < max; i++)
|
||||
mtrr_usage_table[i] = 1;
|
||||
}
|
||||
|
||||
struct set_mtrr_data {
|
||||
unsigned long smp_base;
|
||||
unsigned long smp_size;
|
||||
unsigned int smp_reg;
|
||||
mtrr_type smp_type;
|
||||
};
|
||||
|
||||
/**
|
||||
* mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
|
||||
* by all the CPUs.
|
||||
* @info: pointer to mtrr configuration data
|
||||
*
|
||||
* Returns nothing.
|
||||
*/
|
||||
static int mtrr_rendezvous_handler(void *info)
|
||||
{
|
||||
struct set_mtrr_data *data = info;
|
||||
|
||||
/*
|
||||
* We use this same function to initialize the mtrrs during boot,
|
||||
* resume, runtime cpu online and on an explicit request to set a
|
||||
* specific MTRR.
|
||||
*
|
||||
* During boot or suspend, the state of the boot cpu's mtrrs has been
|
||||
* saved, and we want to replicate that across all the cpus that come
|
||||
* online (either at the end of boot or resume or during a runtime cpu
|
||||
* online). If we're doing that, @reg is set to something special and on
|
||||
* all the cpu's we do mtrr_if->set_all() (On the logical cpu that
|
||||
* started the boot/resume sequence, this might be a duplicate
|
||||
* set_all()).
|
||||
*/
|
||||
if (data->smp_reg != ~0U) {
|
||||
mtrr_if->set(data->smp_reg, data->smp_base,
|
||||
data->smp_size, data->smp_type);
|
||||
} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
|
||||
mtrr_if->set_all();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int types_compatible(mtrr_type type1, mtrr_type type2)
|
||||
{
|
||||
return type1 == MTRR_TYPE_UNCACHABLE ||
|
||||
type2 == MTRR_TYPE_UNCACHABLE ||
|
||||
(type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
|
||||
(type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
|
||||
}
|
||||
|
||||
/**
|
||||
* set_mtrr - update mtrrs on all processors
|
||||
* @reg: mtrr in question
|
||||
* @base: mtrr base
|
||||
* @size: mtrr size
|
||||
* @type: mtrr type
|
||||
*
|
||||
* This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
|
||||
*
|
||||
* 1. Queue work to do the following on all processors:
|
||||
* 2. Disable Interrupts
|
||||
* 3. Wait for all procs to do so
|
||||
* 4. Enter no-fill cache mode
|
||||
* 5. Flush caches
|
||||
* 6. Clear PGE bit
|
||||
* 7. Flush all TLBs
|
||||
* 8. Disable all range registers
|
||||
* 9. Update the MTRRs
|
||||
* 10. Enable all range registers
|
||||
* 11. Flush all TLBs and caches again
|
||||
* 12. Enter normal cache mode and reenable caching
|
||||
* 13. Set PGE
|
||||
* 14. Wait for buddies to catch up
|
||||
* 15. Enable interrupts.
|
||||
*
|
||||
* What does that mean for us? Well, stop_machine() will ensure that
|
||||
* the rendezvous handler is started on each CPU. And in lockstep they
|
||||
* do the state transition of disabling interrupts, updating MTRR's
|
||||
* (the CPU vendors may each do it differently, so we call mtrr_if->set()
|
||||
* callback and let them take care of it.) and enabling interrupts.
|
||||
*
|
||||
* Note that the mechanism is the same for UP systems, too; all the SMP stuff
|
||||
* becomes nops.
|
||||
*/
|
||||
static void
|
||||
set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
|
||||
{
|
||||
struct set_mtrr_data data = { .smp_reg = reg,
|
||||
.smp_base = base,
|
||||
.smp_size = size,
|
||||
.smp_type = type
|
||||
};
|
||||
|
||||
stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
|
||||
}
|
||||
|
||||
static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
|
||||
unsigned long size, mtrr_type type)
|
||||
{
|
||||
struct set_mtrr_data data = { .smp_reg = reg,
|
||||
.smp_base = base,
|
||||
.smp_size = size,
|
||||
.smp_type = type
|
||||
};
|
||||
|
||||
stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
|
||||
cpu_callout_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* mtrr_add_page - Add a memory type region
|
||||
* @base: Physical base address of region in pages (in units of 4 kB!)
|
||||
* @size: Physical size of region in pages (4 kB)
|
||||
* @type: Type of MTRR desired
|
||||
* @increment: If this is true do usage counting on the region
|
||||
*
|
||||
* Memory type region registers control the caching on newer Intel and
|
||||
* non Intel processors. This function allows drivers to request an
|
||||
* MTRR is added. The details and hardware specifics of each processor's
|
||||
* implementation are hidden from the caller, but nevertheless the
|
||||
* caller should expect to need to provide a power of two size on an
|
||||
* equivalent power of two boundary.
|
||||
*
|
||||
* If the region cannot be added either because all regions are in use
|
||||
* or the CPU cannot support it a negative value is returned. On success
|
||||
* the register number for this entry is returned, but should be treated
|
||||
* as a cookie only.
|
||||
*
|
||||
* On a multiprocessor machine the changes are made to all processors.
|
||||
* This is required on x86 by the Intel processors.
|
||||
*
|
||||
* The available types are
|
||||
*
|
||||
* %MTRR_TYPE_UNCACHABLE - No caching
|
||||
*
|
||||
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
|
||||
*
|
||||
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
|
||||
*
|
||||
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
|
||||
*
|
||||
* BUGS: Needs a quiet flag for the cases where drivers do not mind
|
||||
* failures and do not wish system log messages to be sent.
|
||||
*/
|
||||
int mtrr_add_page(unsigned long base, unsigned long size,
|
||||
unsigned int type, bool increment)
|
||||
{
|
||||
unsigned long lbase, lsize;
|
||||
int i, replace, error;
|
||||
mtrr_type ltype;
|
||||
|
||||
if (!mtrr_if)
|
||||
return -ENXIO;
|
||||
|
||||
error = mtrr_if->validate_add_page(base, size, type);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (type >= MTRR_NUM_TYPES) {
|
||||
pr_warning("mtrr: type: %u invalid\n", type);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* If the type is WC, check that this processor supports it */
|
||||
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
|
||||
pr_warning("mtrr: your processor doesn't support write-combining\n");
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
if (!size) {
|
||||
pr_warning("mtrr: zero sized request\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((base | (base + size - 1)) >>
|
||||
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
|
||||
pr_warning("mtrr: base or size exceeds the MTRR width\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
error = -EINVAL;
|
||||
replace = -1;
|
||||
|
||||
/* No CPU hotplug when we change MTRR entries */
|
||||
get_online_cpus();
|
||||
|
||||
/* Search for existing MTRR */
|
||||
mutex_lock(&mtrr_mutex);
|
||||
for (i = 0; i < num_var_ranges; ++i) {
|
||||
mtrr_if->get(i, &lbase, &lsize, <ype);
|
||||
if (!lsize || base > lbase + lsize - 1 ||
|
||||
base + size - 1 < lbase)
|
||||
continue;
|
||||
/*
|
||||
* At this point we know there is some kind of
|
||||
* overlap/enclosure
|
||||
*/
|
||||
if (base < lbase || base + size - 1 > lbase + lsize - 1) {
|
||||
if (base <= lbase &&
|
||||
base + size - 1 >= lbase + lsize - 1) {
|
||||
/* New region encloses an existing region */
|
||||
if (type == ltype) {
|
||||
replace = replace == -1 ? i : -2;
|
||||
continue;
|
||||
} else if (types_compatible(type, ltype))
|
||||
continue;
|
||||
}
|
||||
pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
|
||||
" 0x%lx000,0x%lx000\n", base, size, lbase,
|
||||
lsize);
|
||||
goto out;
|
||||
}
|
||||
/* New region is enclosed by an existing region */
|
||||
if (ltype != type) {
|
||||
if (types_compatible(type, ltype))
|
||||
continue;
|
||||
pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
|
||||
base, size, mtrr_attrib_to_str(ltype),
|
||||
mtrr_attrib_to_str(type));
|
||||
goto out;
|
||||
}
|
||||
if (increment)
|
||||
++mtrr_usage_table[i];
|
||||
error = i;
|
||||
goto out;
|
||||
}
|
||||
/* Search for an empty MTRR */
|
||||
i = mtrr_if->get_free_region(base, size, replace);
|
||||
if (i >= 0) {
|
||||
set_mtrr(i, base, size, type);
|
||||
if (likely(replace < 0)) {
|
||||
mtrr_usage_table[i] = 1;
|
||||
} else {
|
||||
mtrr_usage_table[i] = mtrr_usage_table[replace];
|
||||
if (increment)
|
||||
mtrr_usage_table[i]++;
|
||||
if (unlikely(replace != i)) {
|
||||
set_mtrr(replace, 0, 0, 0);
|
||||
mtrr_usage_table[replace] = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pr_info("mtrr: no more MTRRs available\n");
|
||||
}
|
||||
error = i;
|
||||
out:
|
||||
mutex_unlock(&mtrr_mutex);
|
||||
put_online_cpus();
|
||||
return error;
|
||||
}
|
||||
|
||||
static int mtrr_check(unsigned long base, unsigned long size)
|
||||
{
|
||||
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
|
||||
pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
|
||||
pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
|
||||
dump_stack();
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* mtrr_add - Add a memory type region
|
||||
* @base: Physical base address of region
|
||||
* @size: Physical size of region
|
||||
* @type: Type of MTRR desired
|
||||
* @increment: If this is true do usage counting on the region
|
||||
*
|
||||
* Memory type region registers control the caching on newer Intel and
|
||||
* non Intel processors. This function allows drivers to request an
|
||||
* MTRR is added. The details and hardware specifics of each processor's
|
||||
* implementation are hidden from the caller, but nevertheless the
|
||||
* caller should expect to need to provide a power of two size on an
|
||||
* equivalent power of two boundary.
|
||||
*
|
||||
* If the region cannot be added either because all regions are in use
|
||||
* or the CPU cannot support it a negative value is returned. On success
|
||||
* the register number for this entry is returned, but should be treated
|
||||
* as a cookie only.
|
||||
*
|
||||
* On a multiprocessor machine the changes are made to all processors.
|
||||
* This is required on x86 by the Intel processors.
|
||||
*
|
||||
* The available types are
|
||||
*
|
||||
* %MTRR_TYPE_UNCACHABLE - No caching
|
||||
*
|
||||
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
|
||||
*
|
||||
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
|
||||
*
|
||||
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
|
||||
*
|
||||
* BUGS: Needs a quiet flag for the cases where drivers do not mind
|
||||
* failures and do not wish system log messages to be sent.
|
||||
*/
|
||||
int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
|
||||
bool increment)
|
||||
{
|
||||
if (mtrr_check(base, size))
|
||||
return -EINVAL;
|
||||
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
|
||||
increment);
|
||||
}
|
||||
EXPORT_SYMBOL(mtrr_add);
|
||||
|
||||
/**
|
||||
* mtrr_del_page - delete a memory type region
|
||||
* @reg: Register returned by mtrr_add
|
||||
* @base: Physical base address
|
||||
* @size: Size of region
|
||||
*
|
||||
* If register is supplied then base and size are ignored. This is
|
||||
* how drivers should call it.
|
||||
*
|
||||
* Releases an MTRR region. If the usage count drops to zero the
|
||||
* register is freed and the region returns to default state.
|
||||
* On success the register is returned, on failure a negative error
|
||||
* code.
|
||||
*/
|
||||
int mtrr_del_page(int reg, unsigned long base, unsigned long size)
|
||||
{
|
||||
int i, max;
|
||||
mtrr_type ltype;
|
||||
unsigned long lbase, lsize;
|
||||
int error = -EINVAL;
|
||||
|
||||
if (!mtrr_if)
|
||||
return -ENXIO;
|
||||
|
||||
max = num_var_ranges;
|
||||
/* No CPU hotplug when we change MTRR entries */
|
||||
get_online_cpus();
|
||||
mutex_lock(&mtrr_mutex);
|
||||
if (reg < 0) {
|
||||
/* Search for existing MTRR */
|
||||
for (i = 0; i < max; ++i) {
|
||||
mtrr_if->get(i, &lbase, &lsize, <ype);
|
||||
if (lbase == base && lsize == size) {
|
||||
reg = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (reg < 0) {
|
||||
pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
|
||||
base, size);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (reg >= max) {
|
||||
pr_warning("mtrr: register: %d too big\n", reg);
|
||||
goto out;
|
||||
}
|
||||
mtrr_if->get(reg, &lbase, &lsize, <ype);
|
||||
if (lsize < 1) {
|
||||
pr_warning("mtrr: MTRR %d not used\n", reg);
|
||||
goto out;
|
||||
}
|
||||
if (mtrr_usage_table[reg] < 1) {
|
||||
pr_warning("mtrr: reg: %d has count=0\n", reg);
|
||||
goto out;
|
||||
}
|
||||
if (--mtrr_usage_table[reg] < 1)
|
||||
set_mtrr(reg, 0, 0, 0);
|
||||
error = reg;
|
||||
out:
|
||||
mutex_unlock(&mtrr_mutex);
|
||||
put_online_cpus();
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* mtrr_del - delete a memory type region
|
||||
* @reg: Register returned by mtrr_add
|
||||
* @base: Physical base address
|
||||
* @size: Size of region
|
||||
*
|
||||
* If register is supplied then base and size are ignored. This is
|
||||
* how drivers should call it.
|
||||
*
|
||||
* Releases an MTRR region. If the usage count drops to zero the
|
||||
* register is freed and the region returns to default state.
|
||||
* On success the register is returned, on failure a negative error
|
||||
* code.
|
||||
*/
|
||||
int mtrr_del(int reg, unsigned long base, unsigned long size)
|
||||
{
|
||||
if (mtrr_check(base, size))
|
||||
return -EINVAL;
|
||||
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
|
||||
}
|
||||
EXPORT_SYMBOL(mtrr_del);
|
||||
|
||||
/**
|
||||
* arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
|
||||
* @base: Physical base address
|
||||
* @size: Size of region
|
||||
*
|
||||
* If PAT is available, this does nothing. If PAT is unavailable, it
|
||||
* attempts to add a WC MTRR covering size bytes starting at base and
|
||||
* logs an error if this fails.
|
||||
*
|
||||
* Drivers must store the return value to pass to mtrr_del_wc_if_needed,
|
||||
* but drivers should not try to interpret that return value.
|
||||
*/
|
||||
int arch_phys_wc_add(unsigned long base, unsigned long size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (pat_enabled)
|
||||
return 0; /* Success! (We don't need to do anything.) */
|
||||
|
||||
ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
|
||||
if (ret < 0) {
|
||||
pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
|
||||
(void *)base, (void *)(base + size - 1));
|
||||
return ret;
|
||||
}
|
||||
return ret + MTRR_TO_PHYS_WC_OFFSET;
|
||||
}
|
||||
EXPORT_SYMBOL(arch_phys_wc_add);
|
||||
|
||||
/*
|
||||
* arch_phys_wc_del - undoes arch_phys_wc_add
|
||||
* @handle: Return value from arch_phys_wc_add
|
||||
*
|
||||
* This cleans up after mtrr_add_wc_if_needed.
|
||||
*
|
||||
* The API guarantees that mtrr_del_wc_if_needed(error code) and
|
||||
* mtrr_del_wc_if_needed(0) do nothing.
|
||||
*/
|
||||
void arch_phys_wc_del(int handle)
|
||||
{
|
||||
if (handle >= 1) {
|
||||
WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
|
||||
mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(arch_phys_wc_del);
|
||||
|
||||
/*
|
||||
* phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
|
||||
* @handle: Return value from arch_phys_wc_add
|
||||
*
|
||||
* This will turn the return value from arch_phys_wc_add into an mtrr
|
||||
* index suitable for debugging.
|
||||
*
|
||||
* Note: There is no legitimate use for this function, except possibly
|
||||
* in printk line. Alas there is an illegitimate use in some ancient
|
||||
* drm ioctls.
|
||||
*/
|
||||
int phys_wc_to_mtrr_index(int handle)
|
||||
{
|
||||
if (handle < MTRR_TO_PHYS_WC_OFFSET)
|
||||
return -1;
|
||||
else
|
||||
return handle - MTRR_TO_PHYS_WC_OFFSET;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
|
||||
|
||||
/*
|
||||
* HACK ALERT!
|
||||
* These should be called implicitly, but we can't yet until all the initcall
|
||||
* stuff is done...
|
||||
*/
|
||||
static void __init init_ifs(void)
|
||||
{
|
||||
#ifndef CONFIG_X86_64
|
||||
amd_init_mtrr();
|
||||
cyrix_init_mtrr();
|
||||
centaur_init_mtrr();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
|
||||
* MTRR driver doesn't require this
|
||||
*/
|
||||
struct mtrr_value {
|
||||
mtrr_type ltype;
|
||||
unsigned long lbase;
|
||||
unsigned long lsize;
|
||||
};
|
||||
|
||||
static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
|
||||
|
||||
static int mtrr_save(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
mtrr_if->get(i, &mtrr_value[i].lbase,
|
||||
&mtrr_value[i].lsize,
|
||||
&mtrr_value[i].ltype);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mtrr_restore(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_var_ranges; i++) {
|
||||
if (mtrr_value[i].lsize) {
|
||||
set_mtrr(i, mtrr_value[i].lbase,
|
||||
mtrr_value[i].lsize,
|
||||
mtrr_value[i].ltype);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static struct syscore_ops mtrr_syscore_ops = {
|
||||
.suspend = mtrr_save,
|
||||
.resume = mtrr_restore,
|
||||
};
|
||||
|
||||
int __initdata changed_by_mtrr_cleanup;
|
||||
|
||||
#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
|
||||
/**
|
||||
* mtrr_bp_init - initialize mtrrs on the boot CPU
|
||||
*
|
||||
* This needs to be called early; before any of the other CPUs are
|
||||
* initialized (i.e. before smp_init()).
|
||||
*
|
||||
*/
|
||||
void __init mtrr_bp_init(void)
|
||||
{
|
||||
u32 phys_addr;
|
||||
|
||||
init_ifs();
|
||||
|
||||
phys_addr = 32;
|
||||
|
||||
if (cpu_has_mtrr) {
|
||||
mtrr_if = &generic_mtrr_ops;
|
||||
size_or_mask = SIZE_OR_MASK_BITS(36);
|
||||
size_and_mask = 0x00f00000;
|
||||
phys_addr = 36;
|
||||
|
||||
/*
|
||||
* This is an AMD specific MSR, but we assume(hope?) that
|
||||
* Intel will implement it too when they extend the address
|
||||
* bus of the Xeon.
|
||||
*/
|
||||
if (cpuid_eax(0x80000000) >= 0x80000008) {
|
||||
phys_addr = cpuid_eax(0x80000008) & 0xff;
|
||||
/* CPUID workaround for Intel 0F33/0F34 CPU */
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
|
||||
boot_cpu_data.x86 == 0xF &&
|
||||
boot_cpu_data.x86_model == 0x3 &&
|
||||
(boot_cpu_data.x86_mask == 0x3 ||
|
||||
boot_cpu_data.x86_mask == 0x4))
|
||||
phys_addr = 36;
|
||||
|
||||
size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
|
||||
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
|
||||
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
|
||||
boot_cpu_data.x86 == 6) {
|
||||
/*
|
||||
* VIA C* family have Intel style MTRRs,
|
||||
* but don't support PAE
|
||||
*/
|
||||
size_or_mask = SIZE_OR_MASK_BITS(32);
|
||||
size_and_mask = 0;
|
||||
phys_addr = 32;
|
||||
}
|
||||
} else {
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) {
|
||||
/* Pre-Athlon (K6) AMD CPU MTRRs */
|
||||
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
|
||||
size_or_mask = SIZE_OR_MASK_BITS(32);
|
||||
size_and_mask = 0;
|
||||
}
|
||||
break;
|
||||
case X86_VENDOR_CENTAUR:
|
||||
if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) {
|
||||
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
|
||||
size_or_mask = SIZE_OR_MASK_BITS(32);
|
||||
size_and_mask = 0;
|
||||
}
|
||||
break;
|
||||
case X86_VENDOR_CYRIX:
|
||||
if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) {
|
||||
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
|
||||
size_or_mask = SIZE_OR_MASK_BITS(32);
|
||||
size_and_mask = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (mtrr_if) {
|
||||
set_num_var_ranges();
|
||||
init_table();
|
||||
if (use_intel()) {
|
||||
get_mtrr_state();
|
||||
|
||||
if (mtrr_cleanup(phys_addr)) {
|
||||
changed_by_mtrr_cleanup = 1;
|
||||
mtrr_if->set_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mtrr_ap_init(void)
|
||||
{
|
||||
if (!use_intel() || mtrr_aps_delayed_init)
|
||||
return;
|
||||
/*
|
||||
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
|
||||
* changed, but this routine will be called in cpu boot time,
|
||||
* holding the lock breaks it.
|
||||
*
|
||||
* This routine is called in two cases:
|
||||
*
|
||||
* 1. very earily time of software resume, when there absolutely
|
||||
* isn't mtrr entry changes;
|
||||
*
|
||||
* 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
|
||||
* lock to prevent mtrr entry changes
|
||||
*/
|
||||
set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
|
||||
*/
|
||||
void mtrr_save_state(void)
|
||||
{
|
||||
int first_cpu;
|
||||
|
||||
get_online_cpus();
|
||||
first_cpu = cpumask_first(cpu_online_mask);
|
||||
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
void set_mtrr_aps_delayed_init(void)
|
||||
{
|
||||
if (!use_intel())
|
||||
return;
|
||||
|
||||
mtrr_aps_delayed_init = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delayed MTRR initialization for all AP's
|
||||
*/
|
||||
void mtrr_aps_init(void)
|
||||
{
|
||||
if (!use_intel())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Check if someone has requested the delay of AP MTRR initialization,
|
||||
* by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
|
||||
* then we are done.
|
||||
*/
|
||||
if (!mtrr_aps_delayed_init)
|
||||
return;
|
||||
|
||||
set_mtrr(~0U, 0, 0, 0);
|
||||
mtrr_aps_delayed_init = false;
|
||||
}
|
||||
|
||||
void mtrr_bp_restore(void)
|
||||
{
|
||||
if (!use_intel())
|
||||
return;
|
||||
|
||||
mtrr_if->set_all();
|
||||
}
|
||||
|
||||
static int __init mtrr_init_finialize(void)
|
||||
{
|
||||
if (!mtrr_if)
|
||||
return 0;
|
||||
|
||||
if (use_intel()) {
|
||||
if (!changed_by_mtrr_cleanup)
|
||||
mtrr_state_warn();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The CPU has no MTRR and seems to not support SMP. They have
|
||||
* specific drivers, we use a tricky method to support
|
||||
* suspend/resume for them.
|
||||
*
|
||||
* TBD: is there any system with such CPU which supports
|
||||
* suspend/resume? If no, we should remove the code.
|
||||
*/
|
||||
register_syscore_ops(&mtrr_syscore_ops);
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(mtrr_init_finialize);
|
||||
78
arch/x86/kernel/cpu/mtrr/mtrr.h
Normal file
78
arch/x86/kernel/cpu/mtrr/mtrr.h
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* local MTRR defines.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/stddef.h>
|
||||
|
||||
#define MTRR_CHANGE_MASK_FIXED 0x01
|
||||
#define MTRR_CHANGE_MASK_VARIABLE 0x02
|
||||
#define MTRR_CHANGE_MASK_DEFTYPE 0x04
|
||||
|
||||
extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
|
||||
|
||||
struct mtrr_ops {
|
||||
u32 vendor;
|
||||
u32 use_intel_if;
|
||||
void (*set)(unsigned int reg, unsigned long base,
|
||||
unsigned long size, mtrr_type type);
|
||||
void (*set_all)(void);
|
||||
|
||||
void (*get)(unsigned int reg, unsigned long *base,
|
||||
unsigned long *size, mtrr_type *type);
|
||||
int (*get_free_region)(unsigned long base, unsigned long size,
|
||||
int replace_reg);
|
||||
int (*validate_add_page)(unsigned long base, unsigned long size,
|
||||
unsigned int type);
|
||||
int (*have_wrcomb)(void);
|
||||
};
|
||||
|
||||
extern int generic_get_free_region(unsigned long base, unsigned long size,
|
||||
int replace_reg);
|
||||
extern int generic_validate_add_page(unsigned long base, unsigned long size,
|
||||
unsigned int type);
|
||||
|
||||
extern const struct mtrr_ops generic_mtrr_ops;
|
||||
|
||||
extern int positive_have_wrcomb(void);
|
||||
|
||||
/* library functions for processor-specific routines */
|
||||
struct set_mtrr_context {
|
||||
unsigned long flags;
|
||||
unsigned long cr4val;
|
||||
u32 deftype_lo;
|
||||
u32 deftype_hi;
|
||||
u32 ccr3;
|
||||
};
|
||||
|
||||
void set_mtrr_done(struct set_mtrr_context *ctxt);
|
||||
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
|
||||
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
|
||||
|
||||
void fill_mtrr_var_range(unsigned int index,
|
||||
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
|
||||
void get_mtrr_state(void);
|
||||
|
||||
extern void set_mtrr_ops(const struct mtrr_ops *ops);
|
||||
|
||||
extern u64 size_or_mask, size_and_mask;
|
||||
extern const struct mtrr_ops *mtrr_if;
|
||||
|
||||
#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
|
||||
#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
|
||||
|
||||
extern unsigned int num_var_ranges;
|
||||
extern u64 mtrr_tom2;
|
||||
extern struct mtrr_state_type mtrr_state;
|
||||
|
||||
void mtrr_state_warn(void);
|
||||
const char *mtrr_attrib_to_str(int x);
|
||||
void mtrr_wrmsr(unsigned, unsigned, unsigned);
|
||||
|
||||
/* CPU specific mtrr init functions */
|
||||
int amd_init_mtrr(void);
|
||||
int cyrix_init_mtrr(void);
|
||||
int centaur_init_mtrr(void);
|
||||
|
||||
extern int changed_by_mtrr_cleanup;
|
||||
extern int mtrr_cleanup(unsigned address_bits);
|
||||
2172
arch/x86/kernel/cpu/perf_event.c
Normal file
2172
arch/x86/kernel/cpu/perf_event.c
Normal file
File diff suppressed because it is too large
Load diff
775
arch/x86/kernel/cpu/perf_event.h
Normal file
775
arch/x86/kernel/cpu/perf_event.h
Normal file
|
|
@ -0,0 +1,775 @@
|
|||
/*
|
||||
* Performance events x86 architecture header
|
||||
*
|
||||
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
|
||||
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
|
||||
* Copyright (C) 2009 Jaswinder Singh Rajput
|
||||
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
|
||||
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
||||
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
|
||||
* Copyright (C) 2009 Google, Inc., Stephane Eranian
|
||||
*
|
||||
* For licencing details see kernel-base/COPYING
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#if 0
|
||||
#undef wrmsrl
|
||||
#define wrmsrl(msr, val) \
|
||||
do { \
|
||||
unsigned int _msr = (msr); \
|
||||
u64 _val = (val); \
|
||||
trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \
|
||||
(unsigned long long)(_val)); \
|
||||
native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* | NHM/WSM | SNB |
|
||||
* register -------------------------------
|
||||
* | HT | no HT | HT | no HT |
|
||||
*-----------------------------------------
|
||||
* offcore | core | core | cpu | core |
|
||||
* lbr_sel | core | core | cpu | core |
|
||||
* ld_lat | cpu | core | cpu | core |
|
||||
*-----------------------------------------
|
||||
*
|
||||
* Given that there is a small number of shared regs,
|
||||
* we can pre-allocate their slot in the per-cpu
|
||||
* per-core reg tables.
|
||||
*/
|
||||
enum extra_reg_type {
|
||||
EXTRA_REG_NONE = -1, /* not used */
|
||||
|
||||
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
|
||||
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
|
||||
EXTRA_REG_LBR = 2, /* lbr_select */
|
||||
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
|
||||
|
||||
EXTRA_REG_MAX /* number of entries needed */
|
||||
};
|
||||
|
||||
struct event_constraint {
|
||||
union {
|
||||
unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
u64 idxmsk64;
|
||||
};
|
||||
u64 code;
|
||||
u64 cmask;
|
||||
int weight;
|
||||
int overlap;
|
||||
int flags;
|
||||
};
|
||||
/*
|
||||
* struct hw_perf_event.flags flags
|
||||
*/
|
||||
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */
|
||||
#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
|
||||
#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
|
||||
#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
|
||||
|
||||
struct amd_nb {
|
||||
int nb_id; /* NorthBridge id */
|
||||
int refcnt; /* reference count */
|
||||
struct perf_event *owners[X86_PMC_IDX_MAX];
|
||||
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
|
||||
};
|
||||
|
||||
/* The maximal number of PEBS events: */
|
||||
#define MAX_PEBS_EVENTS 8
|
||||
|
||||
/*
|
||||
* A debug store configuration.
|
||||
*
|
||||
* We only support architectures that use 64bit fields.
|
||||
*/
|
||||
struct debug_store {
|
||||
u64 bts_buffer_base;
|
||||
u64 bts_index;
|
||||
u64 bts_absolute_maximum;
|
||||
u64 bts_interrupt_threshold;
|
||||
u64 pebs_buffer_base;
|
||||
u64 pebs_index;
|
||||
u64 pebs_absolute_maximum;
|
||||
u64 pebs_interrupt_threshold;
|
||||
u64 pebs_event_reset[MAX_PEBS_EVENTS];
|
||||
};
|
||||
|
||||
/*
|
||||
* Per register state.
|
||||
*/
|
||||
struct er_account {
|
||||
raw_spinlock_t lock; /* per-core: protect structure */
|
||||
u64 config; /* extra MSR config */
|
||||
u64 reg; /* extra MSR number */
|
||||
atomic_t ref; /* reference count */
|
||||
};
|
||||
|
||||
/*
|
||||
* Per core/cpu state
|
||||
*
|
||||
* Used to coordinate shared registers between HT threads or
|
||||
* among events on a single PMU.
|
||||
*/
|
||||
struct intel_shared_regs {
|
||||
struct er_account regs[EXTRA_REG_MAX];
|
||||
int refcnt; /* per-core: #HT threads */
|
||||
unsigned core_id; /* per-core: core id */
|
||||
};
|
||||
|
||||
#define MAX_LBR_ENTRIES 16
|
||||
|
||||
struct cpu_hw_events {
|
||||
/*
|
||||
* Generic x86 PMC bits
|
||||
*/
|
||||
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
|
||||
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
int enabled;
|
||||
|
||||
int n_events; /* the # of events in the below arrays */
|
||||
int n_added; /* the # last events in the below arrays;
|
||||
they've never been enabled yet */
|
||||
int n_txn; /* the # last events in the below arrays;
|
||||
added in the current transaction */
|
||||
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
|
||||
u64 tags[X86_PMC_IDX_MAX];
|
||||
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
|
||||
|
||||
unsigned int group_flag;
|
||||
int is_fake;
|
||||
|
||||
/*
|
||||
* Intel DebugStore bits
|
||||
*/
|
||||
struct debug_store *ds;
|
||||
u64 pebs_enabled;
|
||||
|
||||
/*
|
||||
* Intel LBR bits
|
||||
*/
|
||||
int lbr_users;
|
||||
void *lbr_context;
|
||||
struct perf_branch_stack lbr_stack;
|
||||
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
|
||||
struct er_account *lbr_sel;
|
||||
u64 br_sel;
|
||||
|
||||
/*
|
||||
* Intel host/guest exclude bits
|
||||
*/
|
||||
u64 intel_ctrl_guest_mask;
|
||||
u64 intel_ctrl_host_mask;
|
||||
struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
|
||||
|
||||
/*
|
||||
* Intel checkpoint mask
|
||||
*/
|
||||
u64 intel_cp_status;
|
||||
|
||||
/*
|
||||
* manage shared (per-core, per-cpu) registers
|
||||
* used on Intel NHM/WSM/SNB
|
||||
*/
|
||||
struct intel_shared_regs *shared_regs;
|
||||
|
||||
/*
|
||||
* AMD specific bits
|
||||
*/
|
||||
struct amd_nb *amd_nb;
|
||||
/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
|
||||
u64 perf_ctr_virt_mask;
|
||||
|
||||
void *kfree_on_online;
|
||||
};
|
||||
|
||||
#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
|
||||
{ .idxmsk64 = (n) }, \
|
||||
.code = (c), \
|
||||
.cmask = (m), \
|
||||
.weight = (w), \
|
||||
.overlap = (o), \
|
||||
.flags = f, \
|
||||
}
|
||||
|
||||
#define EVENT_CONSTRAINT(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
|
||||
|
||||
/*
|
||||
* The overlap flag marks event constraints with overlapping counter
|
||||
* masks. This is the case if the counter mask of such an event is not
|
||||
* a subset of any other counter mask of a constraint with an equal or
|
||||
* higher weight, e.g.:
|
||||
*
|
||||
* c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
|
||||
* c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
|
||||
* c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
|
||||
*
|
||||
* The event scheduler may not select the correct counter in the first
|
||||
* cycle because it needs to know which subsequent events will be
|
||||
* scheduled. It may fail to schedule the events then. So we set the
|
||||
* overlap flag for such constraints to give the scheduler a hint which
|
||||
* events to select for counter rescheduling.
|
||||
*
|
||||
* Care must be taken as the rescheduling algorithm is O(n!) which
|
||||
* will increase scheduling cycles for an over-commited system
|
||||
* dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
|
||||
* and its counter masks must be kept at a minimum.
|
||||
*/
|
||||
#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code.
|
||||
*/
|
||||
#define INTEL_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code + UMask + fixed-mask
|
||||
*
|
||||
* filter mask to validate fixed counter events.
|
||||
* the following filters disqualify for fixed counters:
|
||||
* - inv
|
||||
* - edge
|
||||
* - cnt-mask
|
||||
* - in_tx
|
||||
* - in_tx_checkpointed
|
||||
* The other filters are supported by fixed counters.
|
||||
* The any-thread option is supported starting with v3.
|
||||
*/
|
||||
#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
|
||||
#define FIXED_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code + UMask
|
||||
*/
|
||||
#define INTEL_UEVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
|
||||
|
||||
#define INTEL_PLD_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
|
||||
|
||||
#define INTEL_PST_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
|
||||
|
||||
/* Event constraint, but match on all event flags too. */
|
||||
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
|
||||
|
||||
/* Check only flags, but allow all event/umask */
|
||||
#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
|
||||
EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
|
||||
|
||||
/* Check flags and event code, and set the HSW store flag */
|
||||
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
|
||||
|
||||
/* Check flags and event code, and set the HSW load flag */
|
||||
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
|
||||
|
||||
/* Check flags and event code/umask, and set the HSW store flag */
|
||||
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
|
||||
|
||||
/* Check flags and event code/umask, and set the HSW load flag */
|
||||
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
|
||||
|
||||
/* Check flags and event code/umask, and set the HSW N/A flag */
|
||||
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
INTEL_ARCH_EVENT_MASK|INTEL_ARCH_EVENT_MASK, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
|
||||
|
||||
|
||||
/*
|
||||
* We define the end marker as having a weight of -1
|
||||
* to enable blacklisting of events using a counter bitmask
|
||||
* of zero and thus a weight of zero.
|
||||
* The end marker has a weight that cannot possibly be
|
||||
* obtained from counting the bits in the bitmask.
|
||||
*/
|
||||
#define EVENT_CONSTRAINT_END { .weight = -1 }
|
||||
|
||||
/*
|
||||
* Check for end marker with weight == -1
|
||||
*/
|
||||
#define for_each_event_constraint(e, c) \
|
||||
for ((e) = (c); (e)->weight != -1; (e)++)
|
||||
|
||||
/*
|
||||
* Extra registers for specific events.
|
||||
*
|
||||
* Some events need large masks and require external MSRs.
|
||||
* Those extra MSRs end up being shared for all events on
|
||||
* a PMU and sometimes between PMU of sibling HT threads.
|
||||
* In either case, the kernel needs to handle conflicting
|
||||
* accesses to those extra, shared, regs. The data structure
|
||||
* to manage those registers is stored in cpu_hw_event.
|
||||
*/
|
||||
struct extra_reg {
|
||||
unsigned int event;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 valid_mask;
|
||||
int idx; /* per_xxx->regs[] reg index */
|
||||
bool extra_msr_access;
|
||||
};
|
||||
|
||||
#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
|
||||
.event = (e), \
|
||||
.msr = (ms), \
|
||||
.config_mask = (m), \
|
||||
.valid_mask = (vm), \
|
||||
.idx = EXTRA_REG_##i, \
|
||||
.extra_msr_access = true, \
|
||||
}
|
||||
|
||||
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
|
||||
|
||||
#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
|
||||
ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
|
||||
|
||||
#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
|
||||
INTEL_UEVENT_EXTRA_REG(c, \
|
||||
MSR_PEBS_LD_LAT_THRESHOLD, \
|
||||
0xffff, \
|
||||
LDLAT)
|
||||
|
||||
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
|
||||
|
||||
union perf_capabilities {
|
||||
struct {
|
||||
u64 lbr_format:6;
|
||||
u64 pebs_trap:1;
|
||||
u64 pebs_arch_reg:1;
|
||||
u64 pebs_format:4;
|
||||
u64 smm_freeze:1;
|
||||
/*
|
||||
* PMU supports separate counter range for writing
|
||||
* values > 32bit.
|
||||
*/
|
||||
u64 full_width_write:1;
|
||||
};
|
||||
u64 capabilities;
|
||||
};
|
||||
|
||||
struct x86_pmu_quirk {
|
||||
struct x86_pmu_quirk *next;
|
||||
void (*func)(void);
|
||||
};
|
||||
|
||||
union x86_pmu_config {
|
||||
struct {
|
||||
u64 event:8,
|
||||
umask:8,
|
||||
usr:1,
|
||||
os:1,
|
||||
edge:1,
|
||||
pc:1,
|
||||
interrupt:1,
|
||||
__reserved1:1,
|
||||
en:1,
|
||||
inv:1,
|
||||
cmask:8,
|
||||
event2:4,
|
||||
__reserved2:4,
|
||||
go:1,
|
||||
ho:1;
|
||||
} bits;
|
||||
u64 value;
|
||||
};
|
||||
|
||||
#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
|
||||
|
||||
/*
|
||||
* struct x86_pmu - generic x86 pmu
|
||||
*/
|
||||
struct x86_pmu {
|
||||
/*
|
||||
* Generic x86 PMC bits
|
||||
*/
|
||||
const char *name;
|
||||
int version;
|
||||
int (*handle_irq)(struct pt_regs *);
|
||||
void (*disable_all)(void);
|
||||
void (*enable_all)(int added);
|
||||
void (*enable)(struct perf_event *);
|
||||
void (*disable)(struct perf_event *);
|
||||
int (*hw_config)(struct perf_event *event);
|
||||
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
|
||||
unsigned eventsel;
|
||||
unsigned perfctr;
|
||||
int (*addr_offset)(int index, bool eventsel);
|
||||
int (*rdpmc_index)(int index);
|
||||
u64 (*event_map)(int);
|
||||
int max_events;
|
||||
int num_counters;
|
||||
int num_counters_fixed;
|
||||
int cntval_bits;
|
||||
u64 cntval_mask;
|
||||
union {
|
||||
unsigned long events_maskl;
|
||||
unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
|
||||
};
|
||||
int events_mask_len;
|
||||
int apic;
|
||||
u64 max_period;
|
||||
struct event_constraint *
|
||||
(*get_event_constraints)(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event);
|
||||
|
||||
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event);
|
||||
struct event_constraint *event_constraints;
|
||||
struct x86_pmu_quirk *quirks;
|
||||
int perfctr_second_write;
|
||||
bool late_ack;
|
||||
|
||||
/*
|
||||
* sysfs attrs
|
||||
*/
|
||||
int attr_rdpmc_broken;
|
||||
int attr_rdpmc;
|
||||
struct attribute **format_attrs;
|
||||
struct attribute **event_attrs;
|
||||
|
||||
ssize_t (*events_sysfs_show)(char *page, u64 config);
|
||||
struct attribute **cpu_events;
|
||||
|
||||
/*
|
||||
* CPU Hotplug hooks
|
||||
*/
|
||||
int (*cpu_prepare)(int cpu);
|
||||
void (*cpu_starting)(int cpu);
|
||||
void (*cpu_dying)(int cpu);
|
||||
void (*cpu_dead)(int cpu);
|
||||
|
||||
void (*check_microcode)(void);
|
||||
void (*flush_branch_stack)(void);
|
||||
|
||||
/*
|
||||
* Intel Arch Perfmon v2+
|
||||
*/
|
||||
u64 intel_ctrl;
|
||||
union perf_capabilities intel_cap;
|
||||
|
||||
/*
|
||||
* Intel DebugStore bits
|
||||
*/
|
||||
unsigned int bts :1,
|
||||
bts_active :1,
|
||||
pebs :1,
|
||||
pebs_active :1,
|
||||
pebs_broken :1;
|
||||
int pebs_record_size;
|
||||
void (*drain_pebs)(struct pt_regs *regs);
|
||||
struct event_constraint *pebs_constraints;
|
||||
void (*pebs_aliases)(struct perf_event *event);
|
||||
int max_pebs_events;
|
||||
|
||||
/*
|
||||
* Intel LBR
|
||||
*/
|
||||
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
|
||||
int lbr_nr; /* hardware stack size */
|
||||
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
|
||||
const int *lbr_sel_map; /* lbr_select mappings */
|
||||
bool lbr_double_abort; /* duplicated lbr aborts */
|
||||
|
||||
/*
|
||||
* Extra registers for events
|
||||
*/
|
||||
struct extra_reg *extra_regs;
|
||||
unsigned int er_flags;
|
||||
|
||||
/*
|
||||
* Intel host/guest support (KVM)
|
||||
*/
|
||||
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
|
||||
};
|
||||
|
||||
#define x86_add_quirk(func_) \
|
||||
do { \
|
||||
static struct x86_pmu_quirk __quirk __initdata = { \
|
||||
.func = func_, \
|
||||
}; \
|
||||
__quirk.next = x86_pmu.quirks; \
|
||||
x86_pmu.quirks = &__quirk; \
|
||||
} while (0)
|
||||
|
||||
#define ERF_NO_HT_SHARING 1
|
||||
#define ERF_HAS_RSP_1 2
|
||||
|
||||
#define EVENT_VAR(_id) event_attr_##_id
|
||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||
|
||||
#define EVENT_ATTR(_name, _id) \
|
||||
static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
|
||||
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
|
||||
.id = PERF_COUNT_HW_##_id, \
|
||||
.event_str = NULL, \
|
||||
};
|
||||
|
||||
#define EVENT_ATTR_STR(_name, v, str) \
|
||||
static struct perf_pmu_events_attr event_attr_##v = { \
|
||||
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
|
||||
.id = 0, \
|
||||
.event_str = str, \
|
||||
};
|
||||
|
||||
extern struct x86_pmu x86_pmu __read_mostly;
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
|
||||
|
||||
int x86_perf_event_set_period(struct perf_event *event);
|
||||
|
||||
/*
|
||||
* Generalized hw caching related hw_event table, filled
|
||||
* in on a per model basis. A value of 0 means
|
||||
* 'not supported', -1 means 'hw_event makes no sense on
|
||||
* this CPU', any other value means the raw hw_event
|
||||
* ID.
|
||||
*/
|
||||
|
||||
#define C(x) PERF_COUNT_HW_CACHE_##x
|
||||
|
||||
extern u64 __read_mostly hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
||||
extern u64 __read_mostly hw_cache_extra_regs
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
||||
|
||||
u64 x86_perf_event_update(struct perf_event *event);
|
||||
|
||||
static inline unsigned int x86_pmu_config_addr(int index)
|
||||
{
|
||||
return x86_pmu.eventsel + (x86_pmu.addr_offset ?
|
||||
x86_pmu.addr_offset(index, true) : index);
|
||||
}
|
||||
|
||||
static inline unsigned int x86_pmu_event_addr(int index)
|
||||
{
|
||||
return x86_pmu.perfctr + (x86_pmu.addr_offset ?
|
||||
x86_pmu.addr_offset(index, false) : index);
|
||||
}
|
||||
|
||||
static inline int x86_pmu_rdpmc_index(int index)
|
||||
{
|
||||
return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
|
||||
}
|
||||
|
||||
int x86_setup_perfctr(struct perf_event *event);
|
||||
|
||||
int x86_pmu_hw_config(struct perf_event *event);
|
||||
|
||||
void x86_pmu_disable_all(void);
|
||||
|
||||
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
|
||||
u64 enable_mask)
|
||||
{
|
||||
u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
|
||||
|
||||
if (hwc->extra_reg.reg)
|
||||
wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
|
||||
wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
|
||||
}
|
||||
|
||||
void x86_pmu_enable_all(int added);
|
||||
|
||||
int perf_assign_events(struct perf_event **events, int n,
|
||||
int wmin, int wmax, int *assign);
|
||||
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
|
||||
|
||||
void x86_pmu_stop(struct perf_event *event, int flags);
|
||||
|
||||
static inline void x86_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
wrmsrl(hwc->config_base, hwc->config);
|
||||
}
|
||||
|
||||
void x86_pmu_enable_event(struct perf_event *event);
|
||||
|
||||
int x86_pmu_handle_irq(struct pt_regs *regs);
|
||||
|
||||
extern struct event_constraint emptyconstraint;
|
||||
|
||||
extern struct event_constraint unconstrained;
|
||||
|
||||
static inline bool kernel_ip(unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return ip > PAGE_OFFSET;
|
||||
#else
|
||||
return (long)ip < 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Not all PMUs provide the right context information to place the reported IP
|
||||
* into full context. Specifically segment registers are typically not
|
||||
* supplied.
|
||||
*
|
||||
* Assuming the address is a linear address (it is for IBS), we fake the CS and
|
||||
* vm86 mode using the known zero-based code segment and 'fix up' the registers
|
||||
* to reflect this.
|
||||
*
|
||||
* Intel PEBS/LBR appear to typically provide the effective address, nothing
|
||||
* much we can do about that but pray and treat it like a linear address.
|
||||
*/
|
||||
static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
|
||||
{
|
||||
regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
|
||||
if (regs->flags & X86_VM_MASK)
|
||||
regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
|
||||
regs->ip = ip;
|
||||
}
|
||||
|
||||
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
|
||||
ssize_t intel_event_sysfs_show(char *page, u64 config);
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_AMD
|
||||
|
||||
int amd_pmu_init(void);
|
||||
|
||||
#else /* CONFIG_CPU_SUP_AMD */
|
||||
|
||||
static inline int amd_pmu_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_CPU_SUP_AMD */
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
|
||||
int intel_pmu_save_and_restart(struct perf_event *event);
|
||||
|
||||
struct event_constraint *
|
||||
x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event);
|
||||
|
||||
struct intel_shared_regs *allocate_shared_regs(int cpu);
|
||||
|
||||
int intel_pmu_init(void);
|
||||
|
||||
void init_debug_store_on_cpu(int cpu);
|
||||
|
||||
void fini_debug_store_on_cpu(int cpu);
|
||||
|
||||
void release_ds_buffers(void);
|
||||
|
||||
void reserve_ds_buffers(void);
|
||||
|
||||
extern struct event_constraint bts_constraint;
|
||||
|
||||
void intel_pmu_enable_bts(u64 config);
|
||||
|
||||
void intel_pmu_disable_bts(void);
|
||||
|
||||
int intel_pmu_drain_bts_buffer(void);
|
||||
|
||||
extern struct event_constraint intel_core2_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_atom_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_slm_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_westmere_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_snb_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_ivb_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_hsw_pebs_event_constraints[];
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_disable(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_enable_all(void);
|
||||
|
||||
void intel_pmu_pebs_disable_all(void);
|
||||
|
||||
void intel_ds_init(void);
|
||||
|
||||
void intel_pmu_lbr_reset(void);
|
||||
|
||||
void intel_pmu_lbr_enable(struct perf_event *event);
|
||||
|
||||
void intel_pmu_lbr_disable(struct perf_event *event);
|
||||
|
||||
void intel_pmu_lbr_enable_all(void);
|
||||
|
||||
void intel_pmu_lbr_disable_all(void);
|
||||
|
||||
void intel_pmu_lbr_read(void);
|
||||
|
||||
void intel_pmu_lbr_init_core(void);
|
||||
|
||||
void intel_pmu_lbr_init_nhm(void);
|
||||
|
||||
void intel_pmu_lbr_init_atom(void);
|
||||
|
||||
void intel_pmu_lbr_init_snb(void);
|
||||
|
||||
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
||||
|
||||
int p4_pmu_init(void);
|
||||
|
||||
int p6_pmu_init(void);
|
||||
|
||||
int knc_pmu_init(void);
|
||||
|
||||
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page);
|
||||
|
||||
#else /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
static inline void reserve_ds_buffers(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void release_ds_buffers(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int intel_pmu_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_CPU_SUP_INTEL */
|
||||
728
arch/x86/kernel/cpu/perf_event_amd.c
Normal file
728
arch/x86/kernel/cpu/perf_event_amd.c
Normal file
|
|
@ -0,0 +1,728 @@
|
|||
#include <linux/perf_event.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/apicdef.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static __initconst const u64 amd_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
|
||||
[ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
|
||||
[ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
|
||||
[ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
|
||||
[ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
|
||||
[ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
|
||||
[ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
|
||||
[ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(NODE) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
|
||||
[ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* AMD Performance Monitor K7 and later.
|
||||
*/
|
||||
static const u64 amd_perfmon_event_map[] =
|
||||
{
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
|
||||
};
|
||||
|
||||
static u64 amd_pmu_event_map(int hw_event)
|
||||
{
|
||||
return amd_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
/*
|
||||
* Previously calculated offsets
|
||||
*/
|
||||
static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
|
||||
static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
|
||||
|
||||
/*
|
||||
* Legacy CPUs:
|
||||
* 4 counters starting at 0xc0010000 each offset by 1
|
||||
*
|
||||
* CPUs with core performance counter extensions:
|
||||
* 6 counters starting at 0xc0010200 each offset by 2
|
||||
*/
|
||||
static inline int amd_pmu_addr_offset(int index, bool eventsel)
|
||||
{
|
||||
int offset;
|
||||
|
||||
if (!index)
|
||||
return index;
|
||||
|
||||
if (eventsel)
|
||||
offset = event_offsets[index];
|
||||
else
|
||||
offset = count_offsets[index];
|
||||
|
||||
if (offset)
|
||||
return offset;
|
||||
|
||||
if (!cpu_has_perfctr_core)
|
||||
offset = index;
|
||||
else
|
||||
offset = index << 1;
|
||||
|
||||
if (eventsel)
|
||||
event_offsets[index] = offset;
|
||||
else
|
||||
count_offsets[index] = offset;
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static int amd_core_hw_config(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.exclude_host && event->attr.exclude_guest)
|
||||
/*
|
||||
* When HO == GO == 1 the hardware treats that as GO == HO == 0
|
||||
* and will count in both modes. We don't want to count in that
|
||||
* case so we emulate no-counting by setting US = OS = 0.
|
||||
*/
|
||||
event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
|
||||
ARCH_PERFMON_EVENTSEL_OS);
|
||||
else if (event->attr.exclude_host)
|
||||
event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
|
||||
else if (event->attr.exclude_guest)
|
||||
event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD64 events are detected based on their event codes.
|
||||
*/
|
||||
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
|
||||
{
|
||||
return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
|
||||
}
|
||||
|
||||
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
|
||||
{
|
||||
return (hwc->config & 0xe0) == 0xe0;
|
||||
}
|
||||
|
||||
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
struct amd_nb *nb = cpuc->amd_nb;
|
||||
|
||||
return nb && nb->nb_id != -1;
|
||||
}
|
||||
|
||||
static int amd_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* pass precise event sampling to ibs: */
|
||||
if (event->attr.precise_ip && get_ibs_caps())
|
||||
return -ENOENT;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ret = x86_pmu_hw_config(event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (event->attr.type == PERF_TYPE_RAW)
|
||||
event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
|
||||
|
||||
return amd_core_hw_config(event);
|
||||
}
|
||||
|
||||
static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct amd_nb *nb = cpuc->amd_nb;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* need to scan whole list because event may not have
|
||||
* been assigned during scheduling
|
||||
*
|
||||
* no race condition possible because event can only
|
||||
* be removed on one CPU at a time AND PMU is disabled
|
||||
* when we come here
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
if (cmpxchg(nb->owners + i, event, NULL) == event)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD64 NorthBridge events need special treatment because
|
||||
* counter access needs to be synchronized across all cores
|
||||
* of a package. Refer to BKDG section 3.12
|
||||
*
|
||||
* NB events are events measuring L3 cache, Hypertransport
|
||||
* traffic. They are identified by an event code >= 0xe00.
|
||||
* They measure events on the NorthBride which is shared
|
||||
* by all cores on a package. NB events are counted on a
|
||||
* shared set of counters. When a NB event is programmed
|
||||
* in a counter, the data actually comes from a shared
|
||||
* counter. Thus, access to those counters needs to be
|
||||
* synchronized.
|
||||
*
|
||||
* We implement the synchronization such that no two cores
|
||||
* can be measuring NB events using the same counters. Thus,
|
||||
* we maintain a per-NB allocation table. The available slot
|
||||
* is propagated using the event_constraint structure.
|
||||
*
|
||||
* We provide only one choice for each NB event based on
|
||||
* the fact that only NB events have restrictions. Consequently,
|
||||
* if a counter is available, there is a guarantee the NB event
|
||||
* will be assigned to it. If no slot is available, an empty
|
||||
* constraint is returned and scheduling will eventually fail
|
||||
* for this event.
|
||||
*
|
||||
* Note that all cores attached the same NB compete for the same
|
||||
* counters to host NB events, this is why we use atomic ops. Some
|
||||
* multi-chip CPUs may have more than one NB.
|
||||
*
|
||||
* Given that resources are allocated (cmpxchg), they must be
|
||||
* eventually freed for others to use. This is accomplished by
|
||||
* calling __amd_put_nb_event_constraints()
|
||||
*
|
||||
* Non NB events are not impacted by this restriction.
|
||||
*/
|
||||
static struct event_constraint *
|
||||
__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||
struct event_constraint *c)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct amd_nb *nb = cpuc->amd_nb;
|
||||
struct perf_event *old;
|
||||
int idx, new = -1;
|
||||
|
||||
if (!c)
|
||||
c = &unconstrained;
|
||||
|
||||
if (cpuc->is_fake)
|
||||
return c;
|
||||
|
||||
/*
|
||||
* detect if already present, if so reuse
|
||||
*
|
||||
* cannot merge with actual allocation
|
||||
* because of possible holes
|
||||
*
|
||||
* event can already be present yet not assigned (in hwc->idx)
|
||||
* because of successive calls to x86_schedule_events() from
|
||||
* hw_perf_group_sched_in() without hw_perf_enable()
|
||||
*/
|
||||
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
|
||||
if (new == -1 || hwc->idx == idx)
|
||||
/* assign free slot, prefer hwc->idx */
|
||||
old = cmpxchg(nb->owners + idx, NULL, event);
|
||||
else if (nb->owners[idx] == event)
|
||||
/* event already present */
|
||||
old = event;
|
||||
else
|
||||
continue;
|
||||
|
||||
if (old && old != event)
|
||||
continue;
|
||||
|
||||
/* reassign to this slot */
|
||||
if (new != -1)
|
||||
cmpxchg(nb->owners + new, event, NULL);
|
||||
new = idx;
|
||||
|
||||
/* already present, reuse */
|
||||
if (old == event)
|
||||
break;
|
||||
}
|
||||
|
||||
if (new == -1)
|
||||
return &emptyconstraint;
|
||||
|
||||
return &nb->event_constraints[new];
|
||||
}
|
||||
|
||||
static struct amd_nb *amd_alloc_nb(int cpu)
|
||||
{
|
||||
struct amd_nb *nb;
|
||||
int i;
|
||||
|
||||
nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (!nb)
|
||||
return NULL;
|
||||
|
||||
nb->nb_id = -1;
|
||||
|
||||
/*
|
||||
* initialize all possible NB constraints
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
__set_bit(i, nb->event_constraints[i].idxmsk);
|
||||
nb->event_constraints[i].weight = 1;
|
||||
}
|
||||
return nb;
|
||||
}
|
||||
|
||||
static int amd_pmu_cpu_prepare(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||
|
||||
WARN_ON_ONCE(cpuc->amd_nb);
|
||||
|
||||
if (boot_cpu_data.x86_max_cores < 2)
|
||||
return NOTIFY_OK;
|
||||
|
||||
cpuc->amd_nb = amd_alloc_nb(cpu);
|
||||
if (!cpuc->amd_nb)
|
||||
return NOTIFY_BAD;
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static void amd_pmu_cpu_starting(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||
struct amd_nb *nb;
|
||||
int i, nb_id;
|
||||
|
||||
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
|
||||
|
||||
if (boot_cpu_data.x86_max_cores < 2)
|
||||
return;
|
||||
|
||||
nb_id = amd_get_nb_id(cpu);
|
||||
WARN_ON_ONCE(nb_id == BAD_APICID);
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
nb = per_cpu(cpu_hw_events, i).amd_nb;
|
||||
if (WARN_ON_ONCE(!nb))
|
||||
continue;
|
||||
|
||||
if (nb->nb_id == nb_id) {
|
||||
cpuc->kfree_on_online = cpuc->amd_nb;
|
||||
cpuc->amd_nb = nb;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cpuc->amd_nb->nb_id = nb_id;
|
||||
cpuc->amd_nb->refcnt++;
|
||||
}
|
||||
|
||||
static void amd_pmu_cpu_dead(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuhw;
|
||||
|
||||
if (boot_cpu_data.x86_max_cores < 2)
|
||||
return;
|
||||
|
||||
cpuhw = &per_cpu(cpu_hw_events, cpu);
|
||||
|
||||
if (cpuhw->amd_nb) {
|
||||
struct amd_nb *nb = cpuhw->amd_nb;
|
||||
|
||||
if (nb->nb_id == -1 || --nb->refcnt == 0)
|
||||
kfree(nb);
|
||||
|
||||
cpuhw->amd_nb = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
|
||||
{
|
||||
/*
|
||||
* if not NB event or no NB, then no constraints
|
||||
*/
|
||||
if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
|
||||
return &unconstrained;
|
||||
|
||||
return __amd_get_nb_event_constraints(cpuc, event, NULL);
|
||||
}
|
||||
|
||||
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
|
||||
__amd_put_nb_event_constraints(cpuc, event);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7,32-35");
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
|
||||
static struct attribute *amd_format_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* AMD Family 15h */
|
||||
|
||||
#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
|
||||
|
||||
#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
|
||||
#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
|
||||
#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
|
||||
#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
|
||||
#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
|
||||
#define AMD_EVENT_EX_LS 0x000000C0ULL
|
||||
#define AMD_EVENT_DE 0x000000D0ULL
|
||||
#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
|
||||
|
||||
/*
|
||||
* AMD family 15h event code/PMC mappings:
|
||||
*
|
||||
* type = event_code & 0x0F0:
|
||||
*
|
||||
* 0x000 FP PERF_CTL[5:3]
|
||||
* 0x010 FP PERF_CTL[5:3]
|
||||
* 0x020 LS PERF_CTL[5:0]
|
||||
* 0x030 LS PERF_CTL[5:0]
|
||||
* 0x040 DC PERF_CTL[5:0]
|
||||
* 0x050 DC PERF_CTL[5:0]
|
||||
* 0x060 CU PERF_CTL[2:0]
|
||||
* 0x070 CU PERF_CTL[2:0]
|
||||
* 0x080 IC/DE PERF_CTL[2:0]
|
||||
* 0x090 IC/DE PERF_CTL[2:0]
|
||||
* 0x0A0 ---
|
||||
* 0x0B0 ---
|
||||
* 0x0C0 EX/LS PERF_CTL[5:0]
|
||||
* 0x0D0 DE PERF_CTL[2:0]
|
||||
* 0x0E0 NB NB_PERF_CTL[3:0]
|
||||
* 0x0F0 NB NB_PERF_CTL[3:0]
|
||||
*
|
||||
* Exceptions:
|
||||
*
|
||||
* 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*)
|
||||
* 0x003 FP PERF_CTL[3]
|
||||
* 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*)
|
||||
* 0x00B FP PERF_CTL[3]
|
||||
* 0x00D FP PERF_CTL[3]
|
||||
* 0x023 DE PERF_CTL[2:0]
|
||||
* 0x02D LS PERF_CTL[3]
|
||||
* 0x02E LS PERF_CTL[3,0]
|
||||
* 0x031 LS PERF_CTL[2:0] (**)
|
||||
* 0x043 CU PERF_CTL[2:0]
|
||||
* 0x045 CU PERF_CTL[2:0]
|
||||
* 0x046 CU PERF_CTL[2:0]
|
||||
* 0x054 CU PERF_CTL[2:0]
|
||||
* 0x055 CU PERF_CTL[2:0]
|
||||
* 0x08F IC PERF_CTL[0]
|
||||
* 0x187 DE PERF_CTL[0]
|
||||
* 0x188 DE PERF_CTL[0]
|
||||
* 0x0DB EX PERF_CTL[5:0]
|
||||
* 0x0DC LS PERF_CTL[5:0]
|
||||
* 0x0DD LS PERF_CTL[5:0]
|
||||
* 0x0DE LS PERF_CTL[5:0]
|
||||
* 0x0DF LS PERF_CTL[5:0]
|
||||
* 0x1C0 EX PERF_CTL[5:3]
|
||||
* 0x1D6 EX PERF_CTL[5:0]
|
||||
* 0x1D8 EX PERF_CTL[5:0]
|
||||
*
|
||||
* (*) depending on the umask all FPU counters may be used
|
||||
* (**) only one unitmask enabled at a time
|
||||
*/
|
||||
|
||||
static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
|
||||
static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
|
||||
static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
|
||||
static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
|
||||
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
|
||||
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
|
||||
|
||||
static struct event_constraint *
|
||||
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned int event_code = amd_get_event_code(hwc);
|
||||
|
||||
switch (event_code & AMD_EVENT_TYPE_MASK) {
|
||||
case AMD_EVENT_FP:
|
||||
switch (event_code) {
|
||||
case 0x000:
|
||||
if (!(hwc->config & 0x0000F000ULL))
|
||||
break;
|
||||
if (!(hwc->config & 0x00000F00ULL))
|
||||
break;
|
||||
return &amd_f15_PMC3;
|
||||
case 0x004:
|
||||
if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
|
||||
break;
|
||||
return &amd_f15_PMC3;
|
||||
case 0x003:
|
||||
case 0x00B:
|
||||
case 0x00D:
|
||||
return &amd_f15_PMC3;
|
||||
}
|
||||
return &amd_f15_PMC53;
|
||||
case AMD_EVENT_LS:
|
||||
case AMD_EVENT_DC:
|
||||
case AMD_EVENT_EX_LS:
|
||||
switch (event_code) {
|
||||
case 0x023:
|
||||
case 0x043:
|
||||
case 0x045:
|
||||
case 0x046:
|
||||
case 0x054:
|
||||
case 0x055:
|
||||
return &amd_f15_PMC20;
|
||||
case 0x02D:
|
||||
return &amd_f15_PMC3;
|
||||
case 0x02E:
|
||||
return &amd_f15_PMC30;
|
||||
case 0x031:
|
||||
if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
|
||||
return &amd_f15_PMC20;
|
||||
return &emptyconstraint;
|
||||
case 0x1C0:
|
||||
return &amd_f15_PMC53;
|
||||
default:
|
||||
return &amd_f15_PMC50;
|
||||
}
|
||||
case AMD_EVENT_CU:
|
||||
case AMD_EVENT_IC_DE:
|
||||
case AMD_EVENT_DE:
|
||||
switch (event_code) {
|
||||
case 0x08F:
|
||||
case 0x187:
|
||||
case 0x188:
|
||||
return &amd_f15_PMC0;
|
||||
case 0x0DB ... 0x0DF:
|
||||
case 0x1D6:
|
||||
case 0x1D8:
|
||||
return &amd_f15_PMC50;
|
||||
default:
|
||||
return &amd_f15_PMC20;
|
||||
}
|
||||
case AMD_EVENT_NB:
|
||||
/* moved to perf_event_amd_uncore.c */
|
||||
return &emptyconstraint;
|
||||
default:
|
||||
return &emptyconstraint;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t amd_event_sysfs_show(char *page, u64 config)
|
||||
{
|
||||
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
|
||||
(config & AMD64_EVENTSEL_EVENT) >> 24;
|
||||
|
||||
return x86_event_sysfs_show(page, config, event);
|
||||
}
|
||||
|
||||
static __initconst const struct x86_pmu amd_pmu = {
|
||||
.name = "AMD",
|
||||
.handle_irq = x86_pmu_handle_irq,
|
||||
.disable_all = x86_pmu_disable_all,
|
||||
.enable_all = x86_pmu_enable_all,
|
||||
.enable = x86_pmu_enable_event,
|
||||
.disable = x86_pmu_disable_event,
|
||||
.hw_config = amd_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_K7_EVNTSEL0,
|
||||
.perfctr = MSR_K7_PERFCTR0,
|
||||
.addr_offset = amd_pmu_addr_offset,
|
||||
.event_map = amd_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
|
||||
.num_counters = AMD64_NUM_COUNTERS,
|
||||
.cntval_bits = 48,
|
||||
.cntval_mask = (1ULL << 48) - 1,
|
||||
.apic = 1,
|
||||
/* use highest bit to detect overflow */
|
||||
.max_period = (1ULL << 47) - 1,
|
||||
.get_event_constraints = amd_get_event_constraints,
|
||||
.put_event_constraints = amd_put_event_constraints,
|
||||
|
||||
.format_attrs = amd_format_attr,
|
||||
.events_sysfs_show = amd_event_sysfs_show,
|
||||
|
||||
.cpu_prepare = amd_pmu_cpu_prepare,
|
||||
.cpu_starting = amd_pmu_cpu_starting,
|
||||
.cpu_dead = amd_pmu_cpu_dead,
|
||||
};
|
||||
|
||||
static int __init amd_core_pmu_init(void)
|
||||
{
|
||||
if (!cpu_has_perfctr_core)
|
||||
return 0;
|
||||
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 0x15:
|
||||
pr_cont("Fam15h ");
|
||||
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_err("core perfctr but no constraints; unknown hardware!\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* If core performance counter extensions exists, we must use
|
||||
* MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
|
||||
* amd_pmu_addr_offset().
|
||||
*/
|
||||
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
|
||||
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
|
||||
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
|
||||
|
||||
pr_cont("core perfctr, ");
|
||||
return 0;
|
||||
}
|
||||
|
||||
__init int amd_pmu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Performance-monitoring supported from K7 and later: */
|
||||
if (boot_cpu_data.x86 < 6)
|
||||
return -ENODEV;
|
||||
|
||||
x86_pmu = amd_pmu;
|
||||
|
||||
ret = amd_core_pmu_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Events are common for all AMDs */
|
||||
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amd_pmu_enable_virt(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
cpuc->perf_ctr_virt_mask = 0;
|
||||
|
||||
/* Reload all events */
|
||||
x86_pmu_disable_all();
|
||||
x86_pmu_enable_all(0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
|
||||
|
||||
void amd_pmu_disable_virt(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
/*
|
||||
* We only mask out the Host-only bit so that host-only counting works
|
||||
* when SVM is disabled. If someone sets up a guest-only counter when
|
||||
* SVM is disabled the Guest-only bits still gets set and the counter
|
||||
* will not count anything.
|
||||
*/
|
||||
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
|
||||
|
||||
/* Reload all events */
|
||||
x86_pmu_disable_all();
|
||||
x86_pmu_enable_all(0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
|
||||
946
arch/x86/kernel/cpu/perf_event_amd_ibs.c
Normal file
946
arch/x86/kernel/cpu/perf_event_amd_ibs.c
Normal file
|
|
@ -0,0 +1,946 @@
|
|||
/*
|
||||
* Performance events - AMD IBS
|
||||
*
|
||||
* Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
|
||||
*
|
||||
* For licencing details see kernel-base/COPYING
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static u32 ibs_caps;
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
|
||||
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/nmi.h>
|
||||
|
||||
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
|
||||
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
|
||||
|
||||
enum ibs_states {
|
||||
IBS_ENABLED = 0,
|
||||
IBS_STARTED = 1,
|
||||
IBS_STOPPING = 2,
|
||||
|
||||
IBS_MAX_STATES,
|
||||
};
|
||||
|
||||
struct cpu_perf_ibs {
|
||||
struct perf_event *event;
|
||||
unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
|
||||
};
|
||||
|
||||
struct perf_ibs {
|
||||
struct pmu pmu;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 cnt_mask;
|
||||
u64 enable_mask;
|
||||
u64 valid_mask;
|
||||
u64 max_period;
|
||||
unsigned long offset_mask[1];
|
||||
int offset_max;
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
|
||||
struct attribute **format_attrs;
|
||||
struct attribute_group format_group;
|
||||
const struct attribute_group *attr_groups[2];
|
||||
|
||||
u64 (*get_count)(u64 config);
|
||||
};
|
||||
|
||||
struct perf_ibs_data {
|
||||
u32 size;
|
||||
union {
|
||||
u32 data[0]; /* data buffer starts here */
|
||||
u32 caps;
|
||||
};
|
||||
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
|
||||
};
|
||||
|
||||
static int
|
||||
perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
|
||||
{
|
||||
s64 left = local64_read(&hwc->period_left);
|
||||
s64 period = hwc->sample_period;
|
||||
int overflow = 0;
|
||||
|
||||
/*
|
||||
* If we are way outside a reasonable range then just skip forward:
|
||||
*/
|
||||
if (unlikely(left <= -period)) {
|
||||
left = period;
|
||||
local64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
overflow = 1;
|
||||
}
|
||||
|
||||
if (unlikely(left < (s64)min)) {
|
||||
left += period;
|
||||
local64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
overflow = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the hw period that triggers the sw overflow is too short
|
||||
* we might hit the irq handler. This biases the results.
|
||||
* Thus we shorten the next-to-last period and set the last
|
||||
* period to the max period.
|
||||
*/
|
||||
if (left > max) {
|
||||
left -= max;
|
||||
if (left > max)
|
||||
left = max;
|
||||
else if (left < min)
|
||||
left = min;
|
||||
}
|
||||
|
||||
*hw_period = (u64)left;
|
||||
|
||||
return overflow;
|
||||
}
|
||||
|
||||
static int
|
||||
perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int shift = 64 - width;
|
||||
u64 prev_raw_count;
|
||||
u64 delta;
|
||||
|
||||
/*
|
||||
* Careful: an NMI might modify the previous event value.
|
||||
*
|
||||
* Our tactic to handle this is to first atomically read and
|
||||
* exchange a new raw count - then add that new-prev delta
|
||||
* count to the generic event atomically:
|
||||
*/
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Now we have the new raw value and have updated the prev
|
||||
* timestamp already. We can now calculate the elapsed delta
|
||||
* (event-)time and add that to the generic event.
|
||||
*
|
||||
* Careful, not all hw sign-extends above the physical width
|
||||
* of the count.
|
||||
*/
|
||||
delta = (new_raw_count << shift) - (prev_raw_count << shift);
|
||||
delta >>= shift;
|
||||
|
||||
local64_add(delta, &event->count);
|
||||
local64_sub(delta, &hwc->period_left);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct perf_ibs perf_ibs_fetch;
|
||||
static struct perf_ibs perf_ibs_op;
|
||||
|
||||
static struct perf_ibs *get_ibs_pmu(int type)
|
||||
{
|
||||
if (perf_ibs_fetch.pmu.type == type)
|
||||
return &perf_ibs_fetch;
|
||||
if (perf_ibs_op.pmu.type == type)
|
||||
return &perf_ibs_op;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use IBS for precise event sampling:
|
||||
*
|
||||
* perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
|
||||
* perf record -a -e r076:p ... # same as -e cpu-cycles:p
|
||||
* perf record -a -e r0C1:p ... # use ibs op counting micro-ops
|
||||
*
|
||||
* IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
|
||||
* MSRC001_1033) is used to select either cycle or micro-ops counting
|
||||
* mode.
|
||||
*
|
||||
* The rip of IBS samples has skid 0. Thus, IBS supports precise
|
||||
* levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
|
||||
* rip is invalid when IBS was not able to record the rip correctly.
|
||||
* We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
|
||||
*
|
||||
*/
|
||||
static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
|
||||
{
|
||||
switch (event->attr.precise_ip) {
|
||||
case 0:
|
||||
return -ENOENT;
|
||||
case 1:
|
||||
case 2:
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_HARDWARE:
|
||||
switch (event->attr.config) {
|
||||
case PERF_COUNT_HW_CPU_CYCLES:
|
||||
*config = 0;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case PERF_TYPE_RAW:
|
||||
switch (event->attr.config) {
|
||||
case 0x0076:
|
||||
*config = 0;
|
||||
return 0;
|
||||
case 0x00C1:
|
||||
*config = IBS_OP_CNT_CTL;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static const struct perf_event_attr ibs_notsupp = {
|
||||
.exclude_user = 1,
|
||||
.exclude_kernel = 1,
|
||||
.exclude_hv = 1,
|
||||
.exclude_idle = 1,
|
||||
.exclude_host = 1,
|
||||
.exclude_guest = 1,
|
||||
};
|
||||
|
||||
static int perf_ibs_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs;
|
||||
u64 max_cnt, config;
|
||||
int ret;
|
||||
|
||||
perf_ibs = get_ibs_pmu(event->attr.type);
|
||||
if (perf_ibs) {
|
||||
config = event->attr.config;
|
||||
} else {
|
||||
perf_ibs = &perf_ibs_op;
|
||||
ret = perf_ibs_precise_event(event, &config);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (event->pmu != &perf_ibs->pmu)
|
||||
return -ENOENT;
|
||||
|
||||
if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
|
||||
return -EINVAL;
|
||||
|
||||
if (config & ~perf_ibs->config_mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (hwc->sample_period) {
|
||||
if (config & perf_ibs->cnt_mask)
|
||||
/* raw max_cnt may not be set */
|
||||
return -EINVAL;
|
||||
if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
|
||||
/*
|
||||
* lower 4 bits can not be set in ibs max cnt,
|
||||
* but allowing it in case we adjust the
|
||||
* sample period to set a frequency.
|
||||
*/
|
||||
return -EINVAL;
|
||||
hwc->sample_period &= ~0x0FULL;
|
||||
if (!hwc->sample_period)
|
||||
hwc->sample_period = 0x10;
|
||||
} else {
|
||||
max_cnt = config & perf_ibs->cnt_mask;
|
||||
config &= ~perf_ibs->cnt_mask;
|
||||
event->attr.sample_period = max_cnt << 4;
|
||||
hwc->sample_period = event->attr.sample_period;
|
||||
}
|
||||
|
||||
if (!hwc->sample_period)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If we modify hwc->sample_period, we also need to update
|
||||
* hwc->last_period and hwc->period_left.
|
||||
*/
|
||||
hwc->last_period = hwc->sample_period;
|
||||
local64_set(&hwc->period_left, hwc->sample_period);
|
||||
|
||||
hwc->config_base = perf_ibs->msr;
|
||||
hwc->config = config;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 *period)
|
||||
{
|
||||
int overflow;
|
||||
|
||||
/* ignore lower 4 bits in min count: */
|
||||
overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
|
||||
local64_set(&hwc->prev_count, 0);
|
||||
|
||||
return overflow;
|
||||
}
|
||||
|
||||
static u64 get_ibs_fetch_count(u64 config)
|
||||
{
|
||||
return (config & IBS_FETCH_CNT) >> 12;
|
||||
}
|
||||
|
||||
static u64 get_ibs_op_count(u64 config)
|
||||
{
|
||||
u64 count = 0;
|
||||
|
||||
if (config & IBS_OP_VAL)
|
||||
count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
|
||||
|
||||
if (ibs_caps & IBS_CAPS_RDWROPCNT)
|
||||
count += (config & IBS_OP_CUR_CNT) >> 32;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static void
|
||||
perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
|
||||
u64 *config)
|
||||
{
|
||||
u64 count = perf_ibs->get_count(*config);
|
||||
|
||||
/*
|
||||
* Set width to 64 since we do not overflow on max width but
|
||||
* instead on max count. In perf_ibs_set_period() we clear
|
||||
* prev count manually on overflow.
|
||||
*/
|
||||
while (!perf_event_try_update(event, count, 64)) {
|
||||
rdmsrl(event->hw.config_base, *config);
|
||||
count = perf_ibs->get_count(*config);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 config)
|
||||
{
|
||||
wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Erratum #420 Instruction-Based Sampling Engine May Generate
|
||||
* Interrupt that Cannot Be Cleared:
|
||||
*
|
||||
* Must clear counter mask first, then clear the enable bit. See
|
||||
* Revision Guide for AMD Family 10h Processors, Publication #41322.
|
||||
*/
|
||||
static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 config)
|
||||
{
|
||||
config &= ~perf_ibs->cnt_mask;
|
||||
wrmsrl(hwc->config_base, config);
|
||||
config &= ~perf_ibs->enable_mask;
|
||||
wrmsrl(hwc->config_base, config);
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot restore the ibs pmu state, so we always needs to update
|
||||
* the event while stopping it and then reset the state when starting
|
||||
* again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
|
||||
* perf_ibs_start()/perf_ibs_stop() and instead always do it.
|
||||
*/
|
||||
static void perf_ibs_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
u64 period;
|
||||
|
||||
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
|
||||
hwc->state = 0;
|
||||
|
||||
perf_ibs_set_period(perf_ibs, hwc, &period);
|
||||
set_bit(IBS_STARTED, pcpu->state);
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static void perf_ibs_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
u64 config;
|
||||
int stopping;
|
||||
|
||||
stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
|
||||
|
||||
if (!stopping && (hwc->state & PERF_HES_UPTODATE))
|
||||
return;
|
||||
|
||||
rdmsrl(hwc->config_base, config);
|
||||
|
||||
if (stopping) {
|
||||
set_bit(IBS_STOPPING, pcpu->state);
|
||||
perf_ibs_disable_event(perf_ibs, hwc, config);
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Clear valid bit to not count rollovers on update, rollovers
|
||||
* are only updated in the irq handler.
|
||||
*/
|
||||
config &= ~perf_ibs->valid_mask;
|
||||
|
||||
perf_ibs_event_update(perf_ibs, event, &config);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
static int perf_ibs_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
|
||||
if (test_and_set_bit(IBS_ENABLED, pcpu->state))
|
||||
return -ENOSPC;
|
||||
|
||||
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
pcpu->event = event;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
perf_ibs_start(event, PERF_EF_RELOAD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_ibs_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
|
||||
if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
|
||||
return;
|
||||
|
||||
perf_ibs_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
pcpu->event = NULL;
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static void perf_ibs_read(struct perf_event *event) { }
|
||||
|
||||
PMU_FORMAT_ATTR(rand_en, "config:57");
|
||||
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
|
||||
|
||||
static struct attribute *ibs_fetch_format_attrs[] = {
|
||||
&format_attr_rand_en.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ibs_op_format_attrs[] = {
|
||||
NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct perf_ibs perf_ibs_fetch = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
|
||||
.event_init = perf_ibs_init,
|
||||
.add = perf_ibs_add,
|
||||
.del = perf_ibs_del,
|
||||
.start = perf_ibs_start,
|
||||
.stop = perf_ibs_stop,
|
||||
.read = perf_ibs_read,
|
||||
},
|
||||
.msr = MSR_AMD64_IBSFETCHCTL,
|
||||
.config_mask = IBS_FETCH_CONFIG_MASK,
|
||||
.cnt_mask = IBS_FETCH_MAX_CNT,
|
||||
.enable_mask = IBS_FETCH_ENABLE,
|
||||
.valid_mask = IBS_FETCH_VAL,
|
||||
.max_period = IBS_FETCH_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
|
||||
.format_attrs = ibs_fetch_format_attrs,
|
||||
|
||||
.get_count = get_ibs_fetch_count,
|
||||
};
|
||||
|
||||
static struct perf_ibs perf_ibs_op = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
|
||||
.event_init = perf_ibs_init,
|
||||
.add = perf_ibs_add,
|
||||
.del = perf_ibs_del,
|
||||
.start = perf_ibs_start,
|
||||
.stop = perf_ibs_stop,
|
||||
.read = perf_ibs_read,
|
||||
},
|
||||
.msr = MSR_AMD64_IBSOPCTL,
|
||||
.config_mask = IBS_OP_CONFIG_MASK,
|
||||
.cnt_mask = IBS_OP_MAX_CNT,
|
||||
.enable_mask = IBS_OP_ENABLE,
|
||||
.valid_mask = IBS_OP_VAL,
|
||||
.max_period = IBS_OP_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
|
||||
.format_attrs = ibs_op_format_attrs,
|
||||
|
||||
.get_count = get_ibs_op_count,
|
||||
};
|
||||
|
||||
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
||||
{
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
struct perf_event *event = pcpu->event;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_sample_data data;
|
||||
struct perf_raw_record raw;
|
||||
struct pt_regs regs;
|
||||
struct perf_ibs_data ibs_data;
|
||||
int offset, size, check_rip, offset_max, throttle = 0;
|
||||
unsigned int msr;
|
||||
u64 *buf, *config, period;
|
||||
|
||||
if (!test_bit(IBS_STARTED, pcpu->state)) {
|
||||
/*
|
||||
* Catch spurious interrupts after stopping IBS: After
|
||||
* disabling IBS there could be still incoming NMIs
|
||||
* with samples that even have the valid bit cleared.
|
||||
* Mark all this NMIs as handled.
|
||||
*/
|
||||
return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
|
||||
}
|
||||
|
||||
msr = hwc->config_base;
|
||||
buf = ibs_data.regs;
|
||||
rdmsrl(msr, *buf);
|
||||
if (!(*buf++ & perf_ibs->valid_mask))
|
||||
return 0;
|
||||
|
||||
config = &ibs_data.regs[0];
|
||||
perf_ibs_event_update(perf_ibs, event, config);
|
||||
perf_sample_data_init(&data, 0, hwc->last_period);
|
||||
if (!perf_ibs_set_period(perf_ibs, hwc, &period))
|
||||
goto out; /* no sw counter overflow */
|
||||
|
||||
ibs_data.caps = ibs_caps;
|
||||
size = 1;
|
||||
offset = 1;
|
||||
check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW)
|
||||
offset_max = perf_ibs->offset_max;
|
||||
else if (check_rip)
|
||||
offset_max = 2;
|
||||
else
|
||||
offset_max = 1;
|
||||
do {
|
||||
rdmsrl(msr + offset, *buf++);
|
||||
size++;
|
||||
offset = find_next_bit(perf_ibs->offset_mask,
|
||||
perf_ibs->offset_max,
|
||||
offset + 1);
|
||||
} while (offset < offset_max);
|
||||
ibs_data.size = sizeof(u64) * size;
|
||||
|
||||
regs = *iregs;
|
||||
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
|
||||
regs.flags &= ~PERF_EFLAGS_EXACT;
|
||||
} else {
|
||||
set_linear_ip(®s, ibs_data.regs[1]);
|
||||
regs.flags |= PERF_EFLAGS_EXACT;
|
||||
}
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.size = sizeof(u32) + ibs_data.size;
|
||||
raw.data = ibs_data.data;
|
||||
data.raw = &raw;
|
||||
}
|
||||
|
||||
throttle = perf_event_overflow(event, &data, ®s);
|
||||
out:
|
||||
if (throttle)
|
||||
perf_ibs_disable_event(perf_ibs, hwc, *config);
|
||||
else
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
|
||||
{
|
||||
int handled = 0;
|
||||
|
||||
handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
|
||||
handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
|
||||
|
||||
if (handled)
|
||||
inc_irq_stat(apic_perf_irqs);
|
||||
|
||||
return handled;
|
||||
}
|
||||
NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
|
||||
|
||||
static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
|
||||
{
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
int ret;
|
||||
|
||||
pcpu = alloc_percpu(struct cpu_perf_ibs);
|
||||
if (!pcpu)
|
||||
return -ENOMEM;
|
||||
|
||||
perf_ibs->pcpu = pcpu;
|
||||
|
||||
/* register attributes */
|
||||
if (perf_ibs->format_attrs[0]) {
|
||||
memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
|
||||
perf_ibs->format_group.name = "format";
|
||||
perf_ibs->format_group.attrs = perf_ibs->format_attrs;
|
||||
|
||||
memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
|
||||
perf_ibs->attr_groups[0] = &perf_ibs->format_group;
|
||||
perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
|
||||
}
|
||||
|
||||
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
|
||||
if (ret) {
|
||||
perf_ibs->pcpu = NULL;
|
||||
free_percpu(pcpu);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __init int perf_event_ibs_init(void)
|
||||
{
|
||||
struct attribute **attr = ibs_op_format_attrs;
|
||||
|
||||
if (!ibs_caps)
|
||||
return -ENODEV; /* ibs not supported by the cpu */
|
||||
|
||||
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
|
||||
|
||||
if (ibs_caps & IBS_CAPS_OPCNT) {
|
||||
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
|
||||
*attr++ = &format_attr_cnt_ctl.attr;
|
||||
}
|
||||
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
|
||||
|
||||
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
|
||||
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
|
||||
|
||||
static __init int perf_event_ibs_init(void) { return 0; }
|
||||
|
||||
#endif
|
||||
|
||||
/* IBS - apic initialization, for perf and oprofile */
|
||||
|
||||
static __init u32 __get_ibs_caps(void)
|
||||
{
|
||||
u32 caps;
|
||||
unsigned int max_level;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_IBS))
|
||||
return 0;
|
||||
|
||||
/* check IBS cpuid feature flags */
|
||||
max_level = cpuid_eax(0x80000000);
|
||||
if (max_level < IBS_CPUID_FEATURES)
|
||||
return IBS_CAPS_DEFAULT;
|
||||
|
||||
caps = cpuid_eax(IBS_CPUID_FEATURES);
|
||||
if (!(caps & IBS_CAPS_AVAIL))
|
||||
/* cpuid flags not valid */
|
||||
return IBS_CAPS_DEFAULT;
|
||||
|
||||
return caps;
|
||||
}
|
||||
|
||||
u32 get_ibs_caps(void)
|
||||
{
|
||||
return ibs_caps;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(get_ibs_caps);
|
||||
|
||||
static inline int get_eilvt(int offset)
|
||||
{
|
||||
return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
|
||||
}
|
||||
|
||||
static inline int put_eilvt(int offset)
|
||||
{
|
||||
return !setup_APIC_eilvt(offset, 0, 0, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check and reserve APIC extended interrupt LVT offset for IBS if available.
|
||||
*/
|
||||
static inline int ibs_eilvt_valid(void)
|
||||
{
|
||||
int offset;
|
||||
u64 val;
|
||||
int valid = 0;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
rdmsrl(MSR_AMD64_IBSCTL, val);
|
||||
offset = val & IBSCTL_LVT_OFFSET_MASK;
|
||||
|
||||
if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
|
||||
pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
|
||||
smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!get_eilvt(offset)) {
|
||||
pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
|
||||
smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
|
||||
goto out;
|
||||
}
|
||||
|
||||
valid = 1;
|
||||
out:
|
||||
preempt_enable();
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
static int setup_ibs_ctl(int ibs_eilvt_off)
|
||||
{
|
||||
struct pci_dev *cpu_cfg;
|
||||
int nodes;
|
||||
u32 value = 0;
|
||||
|
||||
nodes = 0;
|
||||
cpu_cfg = NULL;
|
||||
do {
|
||||
cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
|
||||
PCI_DEVICE_ID_AMD_10H_NB_MISC,
|
||||
cpu_cfg);
|
||||
if (!cpu_cfg)
|
||||
break;
|
||||
++nodes;
|
||||
pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
|
||||
| IBSCTL_LVT_OFFSET_VALID);
|
||||
pci_read_config_dword(cpu_cfg, IBSCTL, &value);
|
||||
if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
|
||||
pci_dev_put(cpu_cfg);
|
||||
printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
|
||||
"IBSCTL = 0x%08x\n", value);
|
||||
return -EINVAL;
|
||||
}
|
||||
} while (1);
|
||||
|
||||
if (!nodes) {
|
||||
printk(KERN_DEBUG "No CPU node configured for IBS\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This runs only on the current cpu. We try to find an LVT offset and
|
||||
* setup the local APIC. For this we must disable preemption. On
|
||||
* success we initialize all nodes with this offset. This updates then
|
||||
* the offset in the IBS_CTL per-node msr. The per-core APIC setup of
|
||||
* the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
|
||||
* is using the new offset.
|
||||
*/
|
||||
static int force_ibs_eilvt_setup(void)
|
||||
{
|
||||
int offset;
|
||||
int ret;
|
||||
|
||||
preempt_disable();
|
||||
/* find the next free available EILVT entry, skip offset 0 */
|
||||
for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
|
||||
if (get_eilvt(offset))
|
||||
break;
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
if (offset == APIC_EILVT_NR_MAX) {
|
||||
printk(KERN_DEBUG "No EILVT entry available\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
ret = setup_ibs_ctl(offset);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!ibs_eilvt_valid()) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_info("IBS: LVT offset %d assigned\n", offset);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
preempt_disable();
|
||||
put_eilvt(offset);
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ibs_eilvt_setup(void)
|
||||
{
|
||||
/*
|
||||
* Force LVT offset assignment for family 10h: The offsets are
|
||||
* not assigned by the BIOS for this family, so the OS is
|
||||
* responsible for doing it. If the OS assignment fails, fall
|
||||
* back to BIOS settings and try to setup this.
|
||||
*/
|
||||
if (boot_cpu_data.x86 == 0x10)
|
||||
force_ibs_eilvt_setup();
|
||||
}
|
||||
|
||||
static inline int get_ibs_lvt_offset(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_AMD64_IBSCTL, val);
|
||||
if (!(val & IBSCTL_LVT_OFFSET_VALID))
|
||||
return -EINVAL;
|
||||
|
||||
return val & IBSCTL_LVT_OFFSET_MASK;
|
||||
}
|
||||
|
||||
static void setup_APIC_ibs(void *dummy)
|
||||
{
|
||||
int offset;
|
||||
|
||||
offset = get_ibs_lvt_offset();
|
||||
if (offset < 0)
|
||||
goto failed;
|
||||
|
||||
if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
|
||||
return;
|
||||
failed:
|
||||
pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
static void clear_APIC_ibs(void *dummy)
|
||||
{
|
||||
int offset;
|
||||
|
||||
offset = get_ibs_lvt_offset();
|
||||
if (offset >= 0)
|
||||
setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
|
||||
static int perf_ibs_suspend(void)
|
||||
{
|
||||
clear_APIC_ibs(NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_ibs_resume(void)
|
||||
{
|
||||
ibs_eilvt_setup();
|
||||
setup_APIC_ibs(NULL);
|
||||
}
|
||||
|
||||
static struct syscore_ops perf_ibs_syscore_ops = {
|
||||
.resume = perf_ibs_resume,
|
||||
.suspend = perf_ibs_suspend,
|
||||
};
|
||||
|
||||
static void perf_ibs_pm_init(void)
|
||||
{
|
||||
register_syscore_ops(&perf_ibs_syscore_ops);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void perf_ibs_pm_init(void) { }
|
||||
|
||||
#endif
|
||||
|
||||
static int
|
||||
perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_STARTING:
|
||||
setup_APIC_ibs(NULL);
|
||||
break;
|
||||
case CPU_DYING:
|
||||
clear_APIC_ibs(NULL);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static __init int amd_ibs_init(void)
|
||||
{
|
||||
u32 caps;
|
||||
int ret = -EINVAL;
|
||||
|
||||
caps = __get_ibs_caps();
|
||||
if (!caps)
|
||||
return -ENODEV; /* ibs not supported by the cpu */
|
||||
|
||||
ibs_eilvt_setup();
|
||||
|
||||
if (!ibs_eilvt_valid())
|
||||
goto out;
|
||||
|
||||
perf_ibs_pm_init();
|
||||
cpu_notifier_register_begin();
|
||||
ibs_caps = caps;
|
||||
/* make ibs_caps visible to other cpus: */
|
||||
smp_mb();
|
||||
smp_call_function(setup_APIC_ibs, NULL, 1);
|
||||
__perf_cpu_notifier(perf_ibs_cpu_notifier);
|
||||
cpu_notifier_register_done();
|
||||
|
||||
ret = perf_event_ibs_init();
|
||||
out:
|
||||
if (ret)
|
||||
pr_err("Failed to setup IBS, %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Since we need the pci subsystem to init ibs we can't do this earlier: */
|
||||
device_initcall(amd_ibs_init);
|
||||
502
arch/x86/kernel/cpu/perf_event_amd_iommu.c
Normal file
502
arch/x86/kernel/cpu/perf_event_amd_iommu.c
Normal file
|
|
@ -0,0 +1,502 @@
|
|||
/*
|
||||
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Steven Kinney <Steven.Kinney@amd.com>
|
||||
* Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
|
||||
*
|
||||
* Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
#include "perf_event_amd_iommu.h"
|
||||
|
||||
#define COUNTER_SHIFT 16
|
||||
|
||||
#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
|
||||
#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
|
||||
|
||||
/* iommu pmu config masks */
|
||||
#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
|
||||
#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
|
||||
#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
|
||||
#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
|
||||
#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
|
||||
#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
|
||||
#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
|
||||
|
||||
static struct perf_amd_iommu __perf_iommu;
|
||||
|
||||
struct perf_amd_iommu {
|
||||
struct pmu pmu;
|
||||
u8 max_banks;
|
||||
u8 max_counters;
|
||||
u64 cntr_assign_mask;
|
||||
raw_spinlock_t lock;
|
||||
const struct attribute_group *attr_groups[4];
|
||||
};
|
||||
|
||||
#define format_group attr_groups[0]
|
||||
#define cpumask_group attr_groups[1]
|
||||
#define events_group attr_groups[2]
|
||||
#define null_group attr_groups[3]
|
||||
|
||||
/*---------------------------------------------
|
||||
* sysfs format attributes
|
||||
*---------------------------------------------*/
|
||||
PMU_FORMAT_ATTR(csource, "config:0-7");
|
||||
PMU_FORMAT_ATTR(devid, "config:8-23");
|
||||
PMU_FORMAT_ATTR(pasid, "config:24-39");
|
||||
PMU_FORMAT_ATTR(domid, "config:40-55");
|
||||
PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
|
||||
PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
|
||||
PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
|
||||
|
||||
static struct attribute *iommu_format_attrs[] = {
|
||||
&format_attr_csource.attr,
|
||||
&format_attr_devid.attr,
|
||||
&format_attr_pasid.attr,
|
||||
&format_attr_domid.attr,
|
||||
&format_attr_devid_mask.attr,
|
||||
&format_attr_pasid_mask.attr,
|
||||
&format_attr_domid_mask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_iommu_format_group = {
|
||||
.name = "format",
|
||||
.attrs = iommu_format_attrs,
|
||||
};
|
||||
|
||||
/*---------------------------------------------
|
||||
* sysfs events attributes
|
||||
*---------------------------------------------*/
|
||||
struct amd_iommu_event_desc {
|
||||
struct kobj_attribute attr;
|
||||
const char *event;
|
||||
};
|
||||
|
||||
static ssize_t _iommu_event_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct amd_iommu_event_desc *event =
|
||||
container_of(attr, struct amd_iommu_event_desc, attr);
|
||||
return sprintf(buf, "%s\n", event->event);
|
||||
}
|
||||
|
||||
#define AMD_IOMMU_EVENT_DESC(_name, _event) \
|
||||
{ \
|
||||
.attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
|
||||
.event = _event, \
|
||||
}
|
||||
|
||||
static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
|
||||
AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
|
||||
AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
|
||||
AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
|
||||
AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
|
||||
AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
|
||||
AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
|
||||
AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
|
||||
AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
|
||||
AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
/*---------------------------------------------
|
||||
* sysfs cpumask attributes
|
||||
*---------------------------------------------*/
|
||||
static cpumask_t iommu_cpumask;
|
||||
|
||||
static ssize_t _iommu_cpumask_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask);
|
||||
buf[n++] = '\n';
|
||||
buf[n] = '\0';
|
||||
return n;
|
||||
}
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
|
||||
|
||||
static struct attribute *iommu_cpumask_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_iommu_cpumask_group = {
|
||||
.attrs = iommu_cpumask_attrs,
|
||||
};
|
||||
|
||||
/*---------------------------------------------*/
|
||||
|
||||
static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
|
||||
{
|
||||
unsigned long flags;
|
||||
int shift, bank, cntr, retval;
|
||||
int max_banks = perf_iommu->max_banks;
|
||||
int max_cntrs = perf_iommu->max_counters;
|
||||
|
||||
raw_spin_lock_irqsave(&perf_iommu->lock, flags);
|
||||
|
||||
for (bank = 0, shift = 0; bank < max_banks; bank++) {
|
||||
for (cntr = 0; cntr < max_cntrs; cntr++) {
|
||||
shift = bank + (bank*3) + cntr;
|
||||
if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
|
||||
continue;
|
||||
} else {
|
||||
perf_iommu->cntr_assign_mask |= (1ULL<<shift);
|
||||
retval = ((u16)((u16)bank<<8) | (u8)(cntr));
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
retval = -ENOSPC;
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
|
||||
u8 bank, u8 cntr)
|
||||
{
|
||||
unsigned long flags;
|
||||
int max_banks, max_cntrs;
|
||||
int shift = 0;
|
||||
|
||||
max_banks = perf_iommu->max_banks;
|
||||
max_cntrs = perf_iommu->max_counters;
|
||||
|
||||
if ((bank > max_banks) || (cntr > max_cntrs))
|
||||
return -EINVAL;
|
||||
|
||||
shift = bank + cntr + (bank*3);
|
||||
|
||||
raw_spin_lock_irqsave(&perf_iommu->lock, flags);
|
||||
perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
|
||||
raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_iommu_event_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_amd_iommu *perf_iommu;
|
||||
u64 config, config1;
|
||||
|
||||
/* test the event attr type check for PMU enumeration */
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* IOMMU counters are shared across all cores.
|
||||
* Therefore, it does not support per-process mode.
|
||||
* Also, it does not support event sampling mode.
|
||||
*/
|
||||
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
|
||||
return -EINVAL;
|
||||
|
||||
/* IOMMU counters do not have usr/os/guest/host bits */
|
||||
if (event->attr.exclude_user || event->attr.exclude_kernel ||
|
||||
event->attr.exclude_host || event->attr.exclude_guest)
|
||||
return -EINVAL;
|
||||
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
perf_iommu = &__perf_iommu;
|
||||
|
||||
if (event->pmu != &perf_iommu->pmu)
|
||||
return -ENOENT;
|
||||
|
||||
if (perf_iommu) {
|
||||
config = event->attr.config;
|
||||
config1 = event->attr.config1;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* integrate with iommu base devid (0000), assume one iommu */
|
||||
perf_iommu->max_banks =
|
||||
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
|
||||
perf_iommu->max_counters =
|
||||
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
|
||||
if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
|
||||
return -EINVAL;
|
||||
|
||||
/* update the hw_perf_event struct with the iommu config data */
|
||||
hwc->config = config;
|
||||
hwc->extra_reg.config = config1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_iommu_enable_event(struct perf_event *ev)
|
||||
{
|
||||
u8 csource = _GET_CSOURCE(ev);
|
||||
u16 devid = _GET_DEVID(ev);
|
||||
u64 reg = 0ULL;
|
||||
|
||||
reg = csource;
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®, true);
|
||||
|
||||
reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_DEVID_MATCH_REG, ®, true);
|
||||
|
||||
reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_PASID_MATCH_REG, ®, true);
|
||||
|
||||
reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_DOMID_MATCH_REG, ®, true);
|
||||
}
|
||||
|
||||
static void perf_iommu_disable_event(struct perf_event *event)
|
||||
{
|
||||
u64 reg = 0ULL;
|
||||
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®, true);
|
||||
}
|
||||
|
||||
static void perf_iommu_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_start\n");
|
||||
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
|
||||
hwc->state = 0;
|
||||
|
||||
if (flags & PERF_EF_RELOAD) {
|
||||
u64 prev_raw_count = local64_read(&hwc->prev_count);
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
|
||||
}
|
||||
|
||||
perf_iommu_enable_event(event);
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
}
|
||||
|
||||
static void perf_iommu_read(struct perf_event *event)
|
||||
{
|
||||
u64 count = 0ULL;
|
||||
u64 prev_raw_count = 0ULL;
|
||||
u64 delta = 0ULL;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
pr_debug("perf: amd_iommu:perf_iommu_read\n");
|
||||
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_REG, &count, false);
|
||||
|
||||
/* IOMMU pc counter register is only 48 bits */
|
||||
count &= 0xFFFFFFFFFFFFULL;
|
||||
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
count) != prev_raw_count)
|
||||
return;
|
||||
|
||||
/* Handling 48-bit counter overflowing */
|
||||
delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
|
||||
delta >>= COUNTER_SHIFT;
|
||||
local64_add(delta, &event->count);
|
||||
|
||||
}
|
||||
|
||||
static void perf_iommu_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 config;
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_stop\n");
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
perf_iommu_disable_event(event);
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
config = hwc->config;
|
||||
perf_iommu_read(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
static int perf_iommu_add(struct perf_event *event, int flags)
|
||||
{
|
||||
int retval;
|
||||
struct perf_amd_iommu *perf_iommu =
|
||||
container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_add\n");
|
||||
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
/* request an iommu bank/counter */
|
||||
retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
|
||||
if (retval != -ENOSPC)
|
||||
event->hw.extra_reg.reg = (u16)retval;
|
||||
else
|
||||
return retval;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
perf_iommu_start(event, PERF_EF_RELOAD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_iommu_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct perf_amd_iommu *perf_iommu =
|
||||
container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_del\n");
|
||||
perf_iommu_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
/* clear the assigned iommu bank/counter */
|
||||
clear_avail_iommu_bnk_cntr(perf_iommu,
|
||||
_GET_BANK(event),
|
||||
_GET_CNTR(event));
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
|
||||
{
|
||||
struct attribute **attrs;
|
||||
struct attribute_group *attr_group;
|
||||
int i = 0, j;
|
||||
|
||||
while (amd_iommu_v2_event_descs[i].attr.attr.name)
|
||||
i++;
|
||||
|
||||
attr_group = kzalloc(sizeof(struct attribute *)
|
||||
* (i + 1) + sizeof(*attr_group), GFP_KERNEL);
|
||||
if (!attr_group)
|
||||
return -ENOMEM;
|
||||
|
||||
attrs = (struct attribute **)(attr_group + 1);
|
||||
for (j = 0; j < i; j++)
|
||||
attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
|
||||
|
||||
attr_group->name = "events";
|
||||
attr_group->attrs = attrs;
|
||||
perf_iommu->events_group = attr_group;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init void amd_iommu_pc_exit(void)
|
||||
{
|
||||
if (__perf_iommu.events_group != NULL) {
|
||||
kfree(__perf_iommu.events_group);
|
||||
__perf_iommu.events_group = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static __init int _init_perf_amd_iommu(
|
||||
struct perf_amd_iommu *perf_iommu, char *name)
|
||||
{
|
||||
int ret;
|
||||
|
||||
raw_spin_lock_init(&perf_iommu->lock);
|
||||
|
||||
/* Init format attributes */
|
||||
perf_iommu->format_group = &amd_iommu_format_group;
|
||||
|
||||
/* Init cpumask attributes to only core 0 */
|
||||
cpumask_set_cpu(0, &iommu_cpumask);
|
||||
perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
|
||||
|
||||
/* Init events attributes */
|
||||
if (_init_events_attrs(perf_iommu) != 0)
|
||||
pr_err("perf: amd_iommu: Only support raw events.\n");
|
||||
|
||||
/* Init null attributes */
|
||||
perf_iommu->null_group = NULL;
|
||||
perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
|
||||
|
||||
ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
|
||||
if (ret) {
|
||||
pr_err("perf: amd_iommu: Failed to initialized.\n");
|
||||
amd_iommu_pc_exit();
|
||||
} else {
|
||||
pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
|
||||
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
|
||||
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct perf_amd_iommu __perf_iommu = {
|
||||
.pmu = {
|
||||
.event_init = perf_iommu_event_init,
|
||||
.add = perf_iommu_add,
|
||||
.del = perf_iommu_del,
|
||||
.start = perf_iommu_start,
|
||||
.stop = perf_iommu_stop,
|
||||
.read = perf_iommu_read,
|
||||
},
|
||||
.max_banks = 0x00,
|
||||
.max_counters = 0x00,
|
||||
.cntr_assign_mask = 0ULL,
|
||||
.format_group = NULL,
|
||||
.cpumask_group = NULL,
|
||||
.events_group = NULL,
|
||||
.null_group = NULL,
|
||||
};
|
||||
|
||||
static __init int amd_iommu_pc_init(void)
|
||||
{
|
||||
/* Make sure the IOMMU PC resource is available */
|
||||
if (!amd_iommu_pc_supported())
|
||||
return -ENODEV;
|
||||
|
||||
_init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
device_initcall(amd_iommu_pc_init);
|
||||
40
arch/x86/kernel/cpu/perf_event_amd_iommu.h
Normal file
40
arch/x86/kernel/cpu/perf_event_amd_iommu.h
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Steven Kinney <Steven.Kinney@amd.com>
|
||||
* Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#ifndef _PERF_EVENT_AMD_IOMMU_H_
|
||||
#define _PERF_EVENT_AMD_IOMMU_H_
|
||||
|
||||
/* iommu pc mmio region register indexes */
|
||||
#define IOMMU_PC_COUNTER_REG 0x00
|
||||
#define IOMMU_PC_COUNTER_SRC_REG 0x08
|
||||
#define IOMMU_PC_PASID_MATCH_REG 0x10
|
||||
#define IOMMU_PC_DOMID_MATCH_REG 0x18
|
||||
#define IOMMU_PC_DEVID_MATCH_REG 0x20
|
||||
#define IOMMU_PC_COUNTER_REPORT_REG 0x28
|
||||
|
||||
/* maximun specified bank/counters */
|
||||
#define PC_MAX_SPEC_BNKS 64
|
||||
#define PC_MAX_SPEC_CNTRS 16
|
||||
|
||||
/* iommu pc reg masks*/
|
||||
#define IOMMU_BASE_DEVID 0x0000
|
||||
|
||||
/* amd_iommu_init.c external support functions */
|
||||
extern bool amd_iommu_pc_supported(void);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||
|
||||
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value, bool is_write);
|
||||
|
||||
#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
|
||||
604
arch/x86/kernel/cpu/perf_event_amd_uncore.c
Normal file
604
arch/x86/kernel/cpu/perf_event_amd_uncore.c
Normal file
|
|
@ -0,0 +1,604 @@
|
|||
/*
|
||||
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Jacob Shin <jacob.shin@amd.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#define NUM_COUNTERS_NB 4
|
||||
#define NUM_COUNTERS_L2 4
|
||||
#define MAX_COUNTERS NUM_COUNTERS_NB
|
||||
|
||||
#define RDPMC_BASE_NB 6
|
||||
#define RDPMC_BASE_L2 10
|
||||
|
||||
#define COUNTER_SHIFT 16
|
||||
|
||||
struct amd_uncore {
|
||||
int id;
|
||||
int refcnt;
|
||||
int cpu;
|
||||
int num_counters;
|
||||
int rdpmc_base;
|
||||
u32 msr_base;
|
||||
cpumask_t *active_mask;
|
||||
struct pmu *pmu;
|
||||
struct perf_event *events[MAX_COUNTERS];
|
||||
struct amd_uncore *free_when_cpu_online;
|
||||
};
|
||||
|
||||
static struct amd_uncore * __percpu *amd_uncore_nb;
|
||||
static struct amd_uncore * __percpu *amd_uncore_l2;
|
||||
|
||||
static struct pmu amd_nb_pmu;
|
||||
static struct pmu amd_l2_pmu;
|
||||
|
||||
static cpumask_t amd_nb_active_mask;
|
||||
static cpumask_t amd_l2_active_mask;
|
||||
|
||||
static bool is_nb_event(struct perf_event *event)
|
||||
{
|
||||
return event->pmu->type == amd_nb_pmu.type;
|
||||
}
|
||||
|
||||
static bool is_l2_event(struct perf_event *event)
|
||||
{
|
||||
return event->pmu->type == amd_l2_pmu.type;
|
||||
}
|
||||
|
||||
static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
|
||||
{
|
||||
if (is_nb_event(event) && amd_uncore_nb)
|
||||
return *per_cpu_ptr(amd_uncore_nb, event->cpu);
|
||||
else if (is_l2_event(event) && amd_uncore_l2)
|
||||
return *per_cpu_ptr(amd_uncore_l2, event->cpu);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void amd_uncore_read(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 prev, new;
|
||||
s64 delta;
|
||||
|
||||
/*
|
||||
* since we do not enable counter overflow interrupts,
|
||||
* we do not have to worry about prev_count changing on us
|
||||
*/
|
||||
|
||||
prev = local64_read(&hwc->prev_count);
|
||||
rdpmcl(hwc->event_base_rdpmc, new);
|
||||
local64_set(&hwc->prev_count, new);
|
||||
delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
|
||||
delta >>= COUNTER_SHIFT;
|
||||
local64_add(delta, &event->count);
|
||||
}
|
||||
|
||||
static void amd_uncore_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (flags & PERF_EF_RELOAD)
|
||||
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
|
||||
|
||||
hwc->state = 0;
|
||||
wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static void amd_uncore_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
wrmsrl(hwc->config_base, hwc->config);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
|
||||
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
|
||||
amd_uncore_read(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
}
|
||||
|
||||
static int amd_uncore_add(struct perf_event *event, int flags)
|
||||
{
|
||||
int i;
|
||||
struct amd_uncore *uncore = event_to_amd_uncore(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
/* are we already assigned? */
|
||||
if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < uncore->num_counters; i++) {
|
||||
if (uncore->events[i] == event) {
|
||||
hwc->idx = i;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* if not, take the first available counter */
|
||||
hwc->idx = -1;
|
||||
for (i = 0; i < uncore->num_counters; i++) {
|
||||
if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
|
||||
hwc->idx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (hwc->idx == -1)
|
||||
return -EBUSY;
|
||||
|
||||
hwc->config_base = uncore->msr_base + (2 * hwc->idx);
|
||||
hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
|
||||
hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
amd_uncore_start(event, PERF_EF_RELOAD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amd_uncore_del(struct perf_event *event, int flags)
|
||||
{
|
||||
int i;
|
||||
struct amd_uncore *uncore = event_to_amd_uncore(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
amd_uncore_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
for (i = 0; i < uncore->num_counters; i++) {
|
||||
if (cmpxchg(&uncore->events[i], event, NULL) == event)
|
||||
break;
|
||||
}
|
||||
|
||||
hwc->idx = -1;
|
||||
}
|
||||
|
||||
static int amd_uncore_event_init(struct perf_event *event)
|
||||
{
|
||||
struct amd_uncore *uncore;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* NB and L2 counters (MSRs) are shared across all cores that share the
|
||||
* same NB / L2 cache. Interrupts can be directed to a single target
|
||||
* core, however, event counts generated by processes running on other
|
||||
* cores cannot be masked out. So we do not support sampling and
|
||||
* per-thread events.
|
||||
*/
|
||||
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
|
||||
return -EINVAL;
|
||||
|
||||
/* NB and L2 counters do not have usr/os/guest/host bits */
|
||||
if (event->attr.exclude_user || event->attr.exclude_kernel ||
|
||||
event->attr.exclude_host || event->attr.exclude_guest)
|
||||
return -EINVAL;
|
||||
|
||||
/* and we do not enable counter overflow interrupts */
|
||||
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
|
||||
hwc->idx = -1;
|
||||
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
uncore = event_to_amd_uncore(event);
|
||||
if (!uncore)
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* since request can come in to any of the shared cores, we will remap
|
||||
* to a single common cpu.
|
||||
*/
|
||||
event->cpu = uncore->cpu;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
int n;
|
||||
cpumask_t *active_mask;
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
|
||||
if (pmu->type == amd_nb_pmu.type)
|
||||
active_mask = &amd_nb_active_mask;
|
||||
else if (pmu->type == amd_l2_pmu.type)
|
||||
active_mask = &amd_l2_active_mask;
|
||||
else
|
||||
return 0;
|
||||
|
||||
n = cpulist_scnprintf(buf, PAGE_SIZE - 2, active_mask);
|
||||
buf[n++] = '\n';
|
||||
buf[n] = '\0';
|
||||
return n;
|
||||
}
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
|
||||
|
||||
static struct attribute *amd_uncore_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_uncore_attr_group = {
|
||||
.attrs = amd_uncore_attrs,
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7,32-35");
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15");
|
||||
|
||||
static struct attribute *amd_uncore_format_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_uncore_format_group = {
|
||||
.name = "format",
|
||||
.attrs = amd_uncore_format_attr,
|
||||
};
|
||||
|
||||
static const struct attribute_group *amd_uncore_attr_groups[] = {
|
||||
&amd_uncore_attr_group,
|
||||
&amd_uncore_format_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct pmu amd_nb_pmu = {
|
||||
.attr_groups = amd_uncore_attr_groups,
|
||||
.name = "amd_nb",
|
||||
.event_init = amd_uncore_event_init,
|
||||
.add = amd_uncore_add,
|
||||
.del = amd_uncore_del,
|
||||
.start = amd_uncore_start,
|
||||
.stop = amd_uncore_stop,
|
||||
.read = amd_uncore_read,
|
||||
};
|
||||
|
||||
static struct pmu amd_l2_pmu = {
|
||||
.attr_groups = amd_uncore_attr_groups,
|
||||
.name = "amd_l2",
|
||||
.event_init = amd_uncore_event_init,
|
||||
.add = amd_uncore_add,
|
||||
.del = amd_uncore_del,
|
||||
.start = amd_uncore_start,
|
||||
.stop = amd_uncore_stop,
|
||||
.read = amd_uncore_read,
|
||||
};
|
||||
|
||||
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
|
||||
{
|
||||
return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
|
||||
cpu_to_node(cpu));
|
||||
}
|
||||
|
||||
static int amd_uncore_cpu_up_prepare(unsigned int cpu)
|
||||
{
|
||||
struct amd_uncore *uncore_nb = NULL, *uncore_l2;
|
||||
|
||||
if (amd_uncore_nb) {
|
||||
uncore_nb = amd_uncore_alloc(cpu);
|
||||
if (!uncore_nb)
|
||||
goto fail;
|
||||
uncore_nb->cpu = cpu;
|
||||
uncore_nb->num_counters = NUM_COUNTERS_NB;
|
||||
uncore_nb->rdpmc_base = RDPMC_BASE_NB;
|
||||
uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
|
||||
uncore_nb->active_mask = &amd_nb_active_mask;
|
||||
uncore_nb->pmu = &amd_nb_pmu;
|
||||
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
|
||||
}
|
||||
|
||||
if (amd_uncore_l2) {
|
||||
uncore_l2 = amd_uncore_alloc(cpu);
|
||||
if (!uncore_l2)
|
||||
goto fail;
|
||||
uncore_l2->cpu = cpu;
|
||||
uncore_l2->num_counters = NUM_COUNTERS_L2;
|
||||
uncore_l2->rdpmc_base = RDPMC_BASE_L2;
|
||||
uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
|
||||
uncore_l2->active_mask = &amd_l2_active_mask;
|
||||
uncore_l2->pmu = &amd_l2_pmu;
|
||||
*per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
kfree(uncore_nb);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static struct amd_uncore *
|
||||
amd_uncore_find_online_sibling(struct amd_uncore *this,
|
||||
struct amd_uncore * __percpu *uncores)
|
||||
{
|
||||
unsigned int cpu;
|
||||
struct amd_uncore *that;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
that = *per_cpu_ptr(uncores, cpu);
|
||||
|
||||
if (!that)
|
||||
continue;
|
||||
|
||||
if (this == that)
|
||||
continue;
|
||||
|
||||
if (this->id == that->id) {
|
||||
that->free_when_cpu_online = this;
|
||||
this = that;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
this->refcnt++;
|
||||
return this;
|
||||
}
|
||||
|
||||
static void amd_uncore_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
struct amd_uncore *uncore;
|
||||
|
||||
if (amd_uncore_nb) {
|
||||
uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
|
||||
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
|
||||
uncore->id = ecx & 0xff;
|
||||
|
||||
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
|
||||
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
|
||||
}
|
||||
|
||||
if (amd_uncore_l2) {
|
||||
unsigned int apicid = cpu_data(cpu).apicid;
|
||||
unsigned int nshared;
|
||||
|
||||
uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
|
||||
cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
|
||||
nshared = ((eax >> 14) & 0xfff) + 1;
|
||||
uncore->id = apicid - (apicid % nshared);
|
||||
|
||||
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
|
||||
*per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
|
||||
}
|
||||
}
|
||||
|
||||
static void uncore_online(unsigned int cpu,
|
||||
struct amd_uncore * __percpu *uncores)
|
||||
{
|
||||
struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
|
||||
|
||||
kfree(uncore->free_when_cpu_online);
|
||||
uncore->free_when_cpu_online = NULL;
|
||||
|
||||
if (cpu == uncore->cpu)
|
||||
cpumask_set_cpu(cpu, uncore->active_mask);
|
||||
}
|
||||
|
||||
static void amd_uncore_cpu_online(unsigned int cpu)
|
||||
{
|
||||
if (amd_uncore_nb)
|
||||
uncore_online(cpu, amd_uncore_nb);
|
||||
|
||||
if (amd_uncore_l2)
|
||||
uncore_online(cpu, amd_uncore_l2);
|
||||
}
|
||||
|
||||
static void uncore_down_prepare(unsigned int cpu,
|
||||
struct amd_uncore * __percpu *uncores)
|
||||
{
|
||||
unsigned int i;
|
||||
struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
|
||||
|
||||
if (this->cpu != cpu)
|
||||
return;
|
||||
|
||||
/* this cpu is going down, migrate to a shared sibling if possible */
|
||||
for_each_online_cpu(i) {
|
||||
struct amd_uncore *that = *per_cpu_ptr(uncores, i);
|
||||
|
||||
if (cpu == i)
|
||||
continue;
|
||||
|
||||
if (this == that) {
|
||||
perf_pmu_migrate_context(this->pmu, cpu, i);
|
||||
cpumask_clear_cpu(cpu, that->active_mask);
|
||||
cpumask_set_cpu(i, that->active_mask);
|
||||
that->cpu = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void amd_uncore_cpu_down_prepare(unsigned int cpu)
|
||||
{
|
||||
if (amd_uncore_nb)
|
||||
uncore_down_prepare(cpu, amd_uncore_nb);
|
||||
|
||||
if (amd_uncore_l2)
|
||||
uncore_down_prepare(cpu, amd_uncore_l2);
|
||||
}
|
||||
|
||||
static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
|
||||
{
|
||||
struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
|
||||
|
||||
if (cpu == uncore->cpu)
|
||||
cpumask_clear_cpu(cpu, uncore->active_mask);
|
||||
|
||||
if (!--uncore->refcnt)
|
||||
kfree(uncore);
|
||||
*per_cpu_ptr(uncores, cpu) = NULL;
|
||||
}
|
||||
|
||||
static void amd_uncore_cpu_dead(unsigned int cpu)
|
||||
{
|
||||
if (amd_uncore_nb)
|
||||
uncore_dead(cpu, amd_uncore_nb);
|
||||
|
||||
if (amd_uncore_l2)
|
||||
uncore_dead(cpu, amd_uncore_l2);
|
||||
}
|
||||
|
||||
static int
|
||||
amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (long)hcpu;
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_UP_PREPARE:
|
||||
if (amd_uncore_cpu_up_prepare(cpu))
|
||||
return notifier_from_errno(-ENOMEM);
|
||||
break;
|
||||
|
||||
case CPU_STARTING:
|
||||
amd_uncore_cpu_starting(cpu);
|
||||
break;
|
||||
|
||||
case CPU_ONLINE:
|
||||
amd_uncore_cpu_online(cpu);
|
||||
break;
|
||||
|
||||
case CPU_DOWN_PREPARE:
|
||||
amd_uncore_cpu_down_prepare(cpu);
|
||||
break;
|
||||
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DEAD:
|
||||
amd_uncore_cpu_dead(cpu);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block amd_uncore_cpu_notifier_block = {
|
||||
.notifier_call = amd_uncore_cpu_notifier,
|
||||
.priority = CPU_PRI_PERF + 1,
|
||||
};
|
||||
|
||||
static void __init init_cpu_already_online(void *dummy)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
amd_uncore_cpu_starting(cpu);
|
||||
amd_uncore_cpu_online(cpu);
|
||||
}
|
||||
|
||||
static void cleanup_cpu_online(void *dummy)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
amd_uncore_cpu_dead(cpu);
|
||||
}
|
||||
|
||||
static int __init amd_uncore_init(void)
|
||||
{
|
||||
unsigned int cpu, cpu2;
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
||||
goto fail_nodev;
|
||||
|
||||
if (!cpu_has_topoext)
|
||||
goto fail_nodev;
|
||||
|
||||
if (cpu_has_perfctr_nb) {
|
||||
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
|
||||
if (!amd_uncore_nb) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_nb;
|
||||
}
|
||||
ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
|
||||
if (ret)
|
||||
goto fail_nb;
|
||||
|
||||
printk(KERN_INFO "perf: AMD NB counters detected\n");
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (cpu_has_perfctr_l2) {
|
||||
amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
|
||||
if (!amd_uncore_l2) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_l2;
|
||||
}
|
||||
ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
|
||||
if (ret)
|
||||
goto fail_l2;
|
||||
|
||||
printk(KERN_INFO "perf: AMD L2I counters detected\n");
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto fail_nodev;
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
/* init cpus already online before registering for hotplug notifier */
|
||||
for_each_online_cpu(cpu) {
|
||||
ret = amd_uncore_cpu_up_prepare(cpu);
|
||||
if (ret)
|
||||
goto fail_online;
|
||||
smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
|
||||
}
|
||||
|
||||
__register_cpu_notifier(&amd_uncore_cpu_notifier_block);
|
||||
cpu_notifier_register_done();
|
||||
|
||||
return 0;
|
||||
|
||||
|
||||
fail_online:
|
||||
for_each_online_cpu(cpu2) {
|
||||
if (cpu2 == cpu)
|
||||
break;
|
||||
smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
|
||||
}
|
||||
cpu_notifier_register_done();
|
||||
|
||||
/* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
|
||||
amd_uncore_nb = amd_uncore_l2 = NULL;
|
||||
if (cpu_has_perfctr_l2)
|
||||
perf_pmu_unregister(&amd_l2_pmu);
|
||||
fail_l2:
|
||||
if (cpu_has_perfctr_nb)
|
||||
perf_pmu_unregister(&amd_nb_pmu);
|
||||
if (amd_uncore_l2)
|
||||
free_percpu(amd_uncore_l2);
|
||||
fail_nb:
|
||||
if (amd_uncore_nb)
|
||||
free_percpu(amd_uncore_nb);
|
||||
|
||||
fail_nodev:
|
||||
return ret;
|
||||
}
|
||||
device_initcall(amd_uncore_init);
|
||||
2653
arch/x86/kernel/cpu/perf_event_intel.c
Normal file
2653
arch/x86/kernel/cpu/perf_event_intel.c
Normal file
File diff suppressed because it is too large
Load diff
1065
arch/x86/kernel/cpu/perf_event_intel_ds.c
Normal file
1065
arch/x86/kernel/cpu/perf_event_intel_ds.c
Normal file
File diff suppressed because it is too large
Load diff
779
arch/x86/kernel/cpu/perf_event_intel_lbr.c
Normal file
779
arch/x86/kernel/cpu/perf_event_intel_lbr.c
Normal file
|
|
@ -0,0 +1,779 @@
|
|||
#include <linux/perf_event.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
enum {
|
||||
LBR_FORMAT_32 = 0x00,
|
||||
LBR_FORMAT_LIP = 0x01,
|
||||
LBR_FORMAT_EIP = 0x02,
|
||||
LBR_FORMAT_EIP_FLAGS = 0x03,
|
||||
LBR_FORMAT_EIP_FLAGS2 = 0x04,
|
||||
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
|
||||
};
|
||||
|
||||
static enum {
|
||||
LBR_EIP_FLAGS = 1,
|
||||
LBR_TSX = 2,
|
||||
} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
|
||||
[LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
|
||||
[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
|
||||
};
|
||||
|
||||
/*
|
||||
* Intel LBR_SELECT bits
|
||||
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
|
||||
*
|
||||
* Hardware branch filter (not available on all CPUs)
|
||||
*/
|
||||
#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
|
||||
#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
|
||||
#define LBR_JCC_BIT 2 /* do not capture conditional branches */
|
||||
#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
|
||||
#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
|
||||
#define LBR_RETURN_BIT 5 /* do not capture near returns */
|
||||
#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
|
||||
#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
|
||||
#define LBR_FAR_BIT 8 /* do not capture far branches */
|
||||
|
||||
#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
|
||||
#define LBR_USER (1 << LBR_USER_BIT)
|
||||
#define LBR_JCC (1 << LBR_JCC_BIT)
|
||||
#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
|
||||
#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
|
||||
#define LBR_RETURN (1 << LBR_RETURN_BIT)
|
||||
#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
|
||||
#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
|
||||
#define LBR_FAR (1 << LBR_FAR_BIT)
|
||||
|
||||
#define LBR_PLM (LBR_KERNEL | LBR_USER)
|
||||
|
||||
#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
|
||||
#define LBR_NOT_SUPP -1 /* LBR filter not supported */
|
||||
#define LBR_IGN 0 /* ignored */
|
||||
|
||||
#define LBR_ANY \
|
||||
(LBR_JCC |\
|
||||
LBR_REL_CALL |\
|
||||
LBR_IND_CALL |\
|
||||
LBR_RETURN |\
|
||||
LBR_REL_JMP |\
|
||||
LBR_IND_JMP |\
|
||||
LBR_FAR)
|
||||
|
||||
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
||||
#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
|
||||
#define LBR_FROM_FLAG_ABORT (1ULL << 61)
|
||||
|
||||
#define for_each_branch_sample_type(x) \
|
||||
for ((x) = PERF_SAMPLE_BRANCH_USER; \
|
||||
(x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
|
||||
|
||||
/*
|
||||
* x86control flow change classification
|
||||
* x86control flow changes include branches, interrupts, traps, faults
|
||||
*/
|
||||
enum {
|
||||
X86_BR_NONE = 0, /* unknown */
|
||||
|
||||
X86_BR_USER = 1 << 0, /* branch target is user */
|
||||
X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
|
||||
|
||||
X86_BR_CALL = 1 << 2, /* call */
|
||||
X86_BR_RET = 1 << 3, /* return */
|
||||
X86_BR_SYSCALL = 1 << 4, /* syscall */
|
||||
X86_BR_SYSRET = 1 << 5, /* syscall return */
|
||||
X86_BR_INT = 1 << 6, /* sw interrupt */
|
||||
X86_BR_IRET = 1 << 7, /* return from interrupt */
|
||||
X86_BR_JCC = 1 << 8, /* conditional */
|
||||
X86_BR_JMP = 1 << 9, /* jump */
|
||||
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
|
||||
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
|
||||
X86_BR_ABORT = 1 << 12,/* transaction abort */
|
||||
X86_BR_IN_TX = 1 << 13,/* in transaction */
|
||||
X86_BR_NO_TX = 1 << 14,/* not in transaction */
|
||||
};
|
||||
|
||||
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
|
||||
#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
|
||||
|
||||
#define X86_BR_ANY \
|
||||
(X86_BR_CALL |\
|
||||
X86_BR_RET |\
|
||||
X86_BR_SYSCALL |\
|
||||
X86_BR_SYSRET |\
|
||||
X86_BR_INT |\
|
||||
X86_BR_IRET |\
|
||||
X86_BR_JCC |\
|
||||
X86_BR_JMP |\
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_ABORT |\
|
||||
X86_BR_IND_CALL)
|
||||
|
||||
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
|
||||
|
||||
#define X86_BR_ANY_CALL \
|
||||
(X86_BR_CALL |\
|
||||
X86_BR_IND_CALL |\
|
||||
X86_BR_SYSCALL |\
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_INT)
|
||||
|
||||
static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
|
||||
|
||||
/*
|
||||
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
|
||||
* otherwise it becomes near impossible to get a reliable stack.
|
||||
*/
|
||||
|
||||
static void __intel_pmu_lbr_enable(void)
|
||||
{
|
||||
u64 debugctl;
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (cpuc->lbr_sel)
|
||||
wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
|
||||
|
||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||
debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
||||
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||
}
|
||||
|
||||
static void __intel_pmu_lbr_disable(void)
|
||||
{
|
||||
u64 debugctl;
|
||||
|
||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||
debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
||||
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||
}
|
||||
|
||||
static void intel_pmu_lbr_reset_32(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++)
|
||||
wrmsrl(x86_pmu.lbr_from + i, 0);
|
||||
}
|
||||
|
||||
static void intel_pmu_lbr_reset_64(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
wrmsrl(x86_pmu.lbr_from + i, 0);
|
||||
wrmsrl(x86_pmu.lbr_to + i, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_reset(void)
|
||||
{
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
|
||||
intel_pmu_lbr_reset_32();
|
||||
else
|
||||
intel_pmu_lbr_reset_64();
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_enable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Reset the LBR stack if we changed task context to
|
||||
* avoid data leaks.
|
||||
*/
|
||||
if (event->ctx->task && cpuc->lbr_context != event->ctx) {
|
||||
intel_pmu_lbr_reset();
|
||||
cpuc->lbr_context = event->ctx;
|
||||
}
|
||||
cpuc->br_sel = event->hw.branch_reg.reg;
|
||||
|
||||
cpuc->lbr_users++;
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_disable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
cpuc->lbr_users--;
|
||||
WARN_ON_ONCE(cpuc->lbr_users < 0);
|
||||
|
||||
if (cpuc->enabled && !cpuc->lbr_users) {
|
||||
__intel_pmu_lbr_disable();
|
||||
/* avoid stale pointer */
|
||||
cpuc->lbr_context = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_enable_all(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (cpuc->lbr_users)
|
||||
__intel_pmu_lbr_enable();
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_disable_all(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (cpuc->lbr_users)
|
||||
__intel_pmu_lbr_disable();
|
||||
}
|
||||
|
||||
/*
|
||||
* TOS = most recently recorded branch
|
||||
*/
|
||||
static inline u64 intel_pmu_lbr_tos(void)
|
||||
{
|
||||
u64 tos;
|
||||
|
||||
rdmsrl(x86_pmu.lbr_tos, tos);
|
||||
|
||||
return tos;
|
||||
}
|
||||
|
||||
static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
unsigned long mask = x86_pmu.lbr_nr - 1;
|
||||
u64 tos = intel_pmu_lbr_tos();
|
||||
int i;
|
||||
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
unsigned long lbr_idx = (tos - i) & mask;
|
||||
union {
|
||||
struct {
|
||||
u32 from;
|
||||
u32 to;
|
||||
};
|
||||
u64 lbr;
|
||||
} msr_lastbranch;
|
||||
|
||||
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
|
||||
|
||||
cpuc->lbr_entries[i].from = msr_lastbranch.from;
|
||||
cpuc->lbr_entries[i].to = msr_lastbranch.to;
|
||||
cpuc->lbr_entries[i].mispred = 0;
|
||||
cpuc->lbr_entries[i].predicted = 0;
|
||||
cpuc->lbr_entries[i].reserved = 0;
|
||||
}
|
||||
cpuc->lbr_stack.nr = i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Due to lack of segmentation in Linux the effective address (offset)
|
||||
* is the same as the linear address, allowing us to merge the LIP and EIP
|
||||
* LBR formats.
|
||||
*/
|
||||
static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
unsigned long mask = x86_pmu.lbr_nr - 1;
|
||||
int lbr_format = x86_pmu.intel_cap.lbr_format;
|
||||
u64 tos = intel_pmu_lbr_tos();
|
||||
int i;
|
||||
int out = 0;
|
||||
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
unsigned long lbr_idx = (tos - i) & mask;
|
||||
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
|
||||
int skip = 0;
|
||||
int lbr_flags = lbr_desc[lbr_format];
|
||||
|
||||
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
|
||||
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
|
||||
|
||||
if (lbr_flags & LBR_EIP_FLAGS) {
|
||||
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||
pred = !mis;
|
||||
skip = 1;
|
||||
}
|
||||
if (lbr_flags & LBR_TSX) {
|
||||
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
|
||||
abort = !!(from & LBR_FROM_FLAG_ABORT);
|
||||
skip = 3;
|
||||
}
|
||||
from = (u64)((((s64)from) << skip) >> skip);
|
||||
|
||||
/*
|
||||
* Some CPUs report duplicated abort records,
|
||||
* with the second entry not having an abort bit set.
|
||||
* Skip them here. This loop runs backwards,
|
||||
* so we need to undo the previous record.
|
||||
* If the abort just happened outside the window
|
||||
* the extra entry cannot be removed.
|
||||
*/
|
||||
if (abort && x86_pmu.lbr_double_abort && out > 0)
|
||||
out--;
|
||||
|
||||
cpuc->lbr_entries[out].from = from;
|
||||
cpuc->lbr_entries[out].to = to;
|
||||
cpuc->lbr_entries[out].mispred = mis;
|
||||
cpuc->lbr_entries[out].predicted = pred;
|
||||
cpuc->lbr_entries[out].in_tx = in_tx;
|
||||
cpuc->lbr_entries[out].abort = abort;
|
||||
cpuc->lbr_entries[out].reserved = 0;
|
||||
out++;
|
||||
}
|
||||
cpuc->lbr_stack.nr = out;
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_read(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (!cpuc->lbr_users)
|
||||
return;
|
||||
|
||||
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
|
||||
intel_pmu_lbr_read_32(cpuc);
|
||||
else
|
||||
intel_pmu_lbr_read_64(cpuc);
|
||||
|
||||
intel_pmu_lbr_filter(cpuc);
|
||||
}
|
||||
|
||||
/*
|
||||
* SW filter is used:
|
||||
* - in case there is no HW filter
|
||||
* - in case the HW filter has errata or limitations
|
||||
*/
|
||||
static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
|
||||
{
|
||||
u64 br_type = event->attr.branch_sample_type;
|
||||
int mask = 0;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_USER)
|
||||
mask |= X86_BR_USER;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
|
||||
mask |= X86_BR_KERNEL;
|
||||
|
||||
/* we ignore BRANCH_HV here */
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ANY)
|
||||
mask |= X86_BR_ANY;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
|
||||
mask |= X86_BR_ANY_CALL;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
|
||||
mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
|
||||
mask |= X86_BR_IND_CALL;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
|
||||
mask |= X86_BR_ABORT;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
|
||||
mask |= X86_BR_IN_TX;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
|
||||
mask |= X86_BR_NO_TX;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_COND)
|
||||
mask |= X86_BR_JCC;
|
||||
|
||||
/*
|
||||
* stash actual user request into reg, it may
|
||||
* be used by fixup code for some CPU
|
||||
*/
|
||||
event->hw.branch_reg.reg = mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* setup the HW LBR filter
|
||||
* Used only when available, may not be enough to disambiguate
|
||||
* all branches, may need the help of the SW filter
|
||||
*/
|
||||
static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event_extra *reg;
|
||||
u64 br_type = event->attr.branch_sample_type;
|
||||
u64 mask = 0, m;
|
||||
u64 v;
|
||||
|
||||
for_each_branch_sample_type(m) {
|
||||
if (!(br_type & m))
|
||||
continue;
|
||||
|
||||
v = x86_pmu.lbr_sel_map[m];
|
||||
if (v == LBR_NOT_SUPP)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (v != LBR_IGN)
|
||||
mask |= v;
|
||||
}
|
||||
reg = &event->hw.branch_reg;
|
||||
reg->idx = EXTRA_REG_LBR;
|
||||
|
||||
/* LBR_SELECT operates in suppress mode so invert mask */
|
||||
reg->config = ~mask & x86_pmu.lbr_sel_mask;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int intel_pmu_setup_lbr_filter(struct perf_event *event)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* no LBR on this PMU
|
||||
*/
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* setup SW LBR filter
|
||||
*/
|
||||
intel_pmu_setup_sw_lbr_filter(event);
|
||||
|
||||
/*
|
||||
* setup HW LBR filter, if any
|
||||
*/
|
||||
if (x86_pmu.lbr_sel_map)
|
||||
ret = intel_pmu_setup_hw_lbr_filter(event);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* return the type of control flow change at address "from"
|
||||
* intruction is not necessarily a branch (in case of interrupt).
|
||||
*
|
||||
* The branch type returned also includes the priv level of the
|
||||
* target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
|
||||
*
|
||||
* If a branch type is unknown OR the instruction cannot be
|
||||
* decoded (e.g., text page not present), then X86_BR_NONE is
|
||||
* returned.
|
||||
*/
|
||||
static int branch_type(unsigned long from, unsigned long to, int abort)
|
||||
{
|
||||
struct insn insn;
|
||||
void *addr;
|
||||
int bytes, size = MAX_INSN_SIZE;
|
||||
int ret = X86_BR_NONE;
|
||||
int ext, to_plm, from_plm;
|
||||
u8 buf[MAX_INSN_SIZE];
|
||||
int is64 = 0;
|
||||
|
||||
to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
|
||||
from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
|
||||
|
||||
/*
|
||||
* maybe zero if lbr did not fill up after a reset by the time
|
||||
* we get a PMU interrupt
|
||||
*/
|
||||
if (from == 0 || to == 0)
|
||||
return X86_BR_NONE;
|
||||
|
||||
if (abort)
|
||||
return X86_BR_ABORT | to_plm;
|
||||
|
||||
if (from_plm == X86_BR_USER) {
|
||||
/*
|
||||
* can happen if measuring at the user level only
|
||||
* and we interrupt in a kernel thread, e.g., idle.
|
||||
*/
|
||||
if (!current->mm)
|
||||
return X86_BR_NONE;
|
||||
|
||||
/* may fail if text not present */
|
||||
bytes = copy_from_user_nmi(buf, (void __user *)from, size);
|
||||
if (bytes != 0)
|
||||
return X86_BR_NONE;
|
||||
|
||||
addr = buf;
|
||||
} else {
|
||||
/*
|
||||
* The LBR logs any address in the IP, even if the IP just
|
||||
* faulted. This means userspace can control the from address.
|
||||
* Ensure we don't blindy read any address by validating it is
|
||||
* a known text address.
|
||||
*/
|
||||
if (kernel_text_address(from))
|
||||
addr = (void *)from;
|
||||
else
|
||||
return X86_BR_NONE;
|
||||
}
|
||||
|
||||
/*
|
||||
* decoder needs to know the ABI especially
|
||||
* on 64-bit systems running 32-bit apps
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
|
||||
#endif
|
||||
insn_init(&insn, addr, is64);
|
||||
insn_get_opcode(&insn);
|
||||
|
||||
switch (insn.opcode.bytes[0]) {
|
||||
case 0xf:
|
||||
switch (insn.opcode.bytes[1]) {
|
||||
case 0x05: /* syscall */
|
||||
case 0x34: /* sysenter */
|
||||
ret = X86_BR_SYSCALL;
|
||||
break;
|
||||
case 0x07: /* sysret */
|
||||
case 0x35: /* sysexit */
|
||||
ret = X86_BR_SYSRET;
|
||||
break;
|
||||
case 0x80 ... 0x8f: /* conditional */
|
||||
ret = X86_BR_JCC;
|
||||
break;
|
||||
default:
|
||||
ret = X86_BR_NONE;
|
||||
}
|
||||
break;
|
||||
case 0x70 ... 0x7f: /* conditional */
|
||||
ret = X86_BR_JCC;
|
||||
break;
|
||||
case 0xc2: /* near ret */
|
||||
case 0xc3: /* near ret */
|
||||
case 0xca: /* far ret */
|
||||
case 0xcb: /* far ret */
|
||||
ret = X86_BR_RET;
|
||||
break;
|
||||
case 0xcf: /* iret */
|
||||
ret = X86_BR_IRET;
|
||||
break;
|
||||
case 0xcc ... 0xce: /* int */
|
||||
ret = X86_BR_INT;
|
||||
break;
|
||||
case 0xe8: /* call near rel */
|
||||
case 0x9a: /* call far absolute */
|
||||
ret = X86_BR_CALL;
|
||||
break;
|
||||
case 0xe0 ... 0xe3: /* loop jmp */
|
||||
ret = X86_BR_JCC;
|
||||
break;
|
||||
case 0xe9 ... 0xeb: /* jmp */
|
||||
ret = X86_BR_JMP;
|
||||
break;
|
||||
case 0xff: /* call near absolute, call far absolute ind */
|
||||
insn_get_modrm(&insn);
|
||||
ext = (insn.modrm.bytes[0] >> 3) & 0x7;
|
||||
switch (ext) {
|
||||
case 2: /* near ind call */
|
||||
case 3: /* far ind call */
|
||||
ret = X86_BR_IND_CALL;
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
ret = X86_BR_JMP;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ret = X86_BR_NONE;
|
||||
}
|
||||
/*
|
||||
* interrupts, traps, faults (and thus ring transition) may
|
||||
* occur on any instructions. Thus, to classify them correctly,
|
||||
* we need to first look at the from and to priv levels. If they
|
||||
* are different and to is in the kernel, then it indicates
|
||||
* a ring transition. If the from instruction is not a ring
|
||||
* transition instr (syscall, systenter, int), then it means
|
||||
* it was a irq, trap or fault.
|
||||
*
|
||||
* we have no way of detecting kernel to kernel faults.
|
||||
*/
|
||||
if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
|
||||
&& ret != X86_BR_SYSCALL && ret != X86_BR_INT)
|
||||
ret = X86_BR_IRQ;
|
||||
|
||||
/*
|
||||
* branch priv level determined by target as
|
||||
* is done by HW when LBR_SELECT is implemented
|
||||
*/
|
||||
if (ret != X86_BR_NONE)
|
||||
ret |= to_plm;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* implement actual branch filter based on user demand.
|
||||
* Hardware may not exactly satisfy that request, thus
|
||||
* we need to inspect opcodes. Mismatched branches are
|
||||
* discarded. Therefore, the number of branches returned
|
||||
* in PERF_SAMPLE_BRANCH_STACK sample may vary.
|
||||
*/
|
||||
static void
|
||||
intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
u64 from, to;
|
||||
int br_sel = cpuc->br_sel;
|
||||
int i, j, type;
|
||||
bool compress = false;
|
||||
|
||||
/* if sampling all branches, then nothing to filter */
|
||||
if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
|
||||
return;
|
||||
|
||||
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
|
||||
|
||||
from = cpuc->lbr_entries[i].from;
|
||||
to = cpuc->lbr_entries[i].to;
|
||||
|
||||
type = branch_type(from, to, cpuc->lbr_entries[i].abort);
|
||||
if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
|
||||
if (cpuc->lbr_entries[i].in_tx)
|
||||
type |= X86_BR_IN_TX;
|
||||
else
|
||||
type |= X86_BR_NO_TX;
|
||||
}
|
||||
|
||||
/* if type does not correspond, then discard */
|
||||
if (type == X86_BR_NONE || (br_sel & type) != type) {
|
||||
cpuc->lbr_entries[i].from = 0;
|
||||
compress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!compress)
|
||||
return;
|
||||
|
||||
/* remove all entries with from=0 */
|
||||
for (i = 0; i < cpuc->lbr_stack.nr; ) {
|
||||
if (!cpuc->lbr_entries[i].from) {
|
||||
j = i;
|
||||
while (++j < cpuc->lbr_stack.nr)
|
||||
cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
|
||||
cpuc->lbr_stack.nr--;
|
||||
if (!cpuc->lbr_entries[i].from)
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Map interface branch filters onto LBR filters
|
||||
*/
|
||||
static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
|
||||
[PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
|
||||
[PERF_SAMPLE_BRANCH_USER] = LBR_USER,
|
||||
[PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
|
||||
[PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
|
||||
[PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
|
||||
| LBR_IND_JMP | LBR_FAR,
|
||||
/*
|
||||
* NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
|
||||
*/
|
||||
[PERF_SAMPLE_BRANCH_ANY_CALL] =
|
||||
LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
|
||||
/*
|
||||
* NHM/WSM erratum: must include IND_JMP to capture IND_CALL
|
||||
*/
|
||||
[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
|
||||
[PERF_SAMPLE_BRANCH_COND] = LBR_JCC,
|
||||
};
|
||||
|
||||
static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
|
||||
[PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
|
||||
[PERF_SAMPLE_BRANCH_USER] = LBR_USER,
|
||||
[PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
|
||||
[PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
|
||||
[PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
|
||||
[PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
|
||||
| LBR_FAR,
|
||||
[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
|
||||
[PERF_SAMPLE_BRANCH_COND] = LBR_JCC,
|
||||
};
|
||||
|
||||
/* core */
|
||||
void __init intel_pmu_lbr_init_core(void)
|
||||
{
|
||||
x86_pmu.lbr_nr = 4;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - compensate for lack of HW filter
|
||||
*/
|
||||
pr_cont("4-deep LBR, ");
|
||||
}
|
||||
|
||||
/* nehalem/westmere */
|
||||
void __init intel_pmu_lbr_init_nhm(void)
|
||||
{
|
||||
x86_pmu.lbr_nr = 16;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
|
||||
|
||||
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
|
||||
x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - workaround LBR_SEL errata (see above)
|
||||
* - support syscall, sysret capture.
|
||||
* That requires LBR_FAR but that means far
|
||||
* jmp need to be filtered out
|
||||
*/
|
||||
pr_cont("16-deep LBR, ");
|
||||
}
|
||||
|
||||
/* sandy bridge */
|
||||
void __init intel_pmu_lbr_init_snb(void)
|
||||
{
|
||||
x86_pmu.lbr_nr = 16;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
|
||||
|
||||
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
|
||||
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - support syscall, sysret capture.
|
||||
* That requires LBR_FAR but that means far
|
||||
* jmp need to be filtered out
|
||||
*/
|
||||
pr_cont("16-deep LBR, ");
|
||||
}
|
||||
|
||||
/* atom */
|
||||
void __init intel_pmu_lbr_init_atom(void)
|
||||
{
|
||||
/*
|
||||
* only models starting at stepping 10 seems
|
||||
* to have an operational LBR which can freeze
|
||||
* on PMU interrupt
|
||||
*/
|
||||
if (boot_cpu_data.x86_model == 28
|
||||
&& boot_cpu_data.x86_mask < 10) {
|
||||
pr_cont("LBR disabled due to erratum");
|
||||
return;
|
||||
}
|
||||
|
||||
x86_pmu.lbr_nr = 8;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - compensate for lack of HW filter
|
||||
*/
|
||||
pr_cont("8-deep LBR, ");
|
||||
}
|
||||
714
arch/x86/kernel/cpu/perf_event_intel_rapl.c
Normal file
714
arch/x86/kernel/cpu/perf_event_intel_rapl.c
Normal file
|
|
@ -0,0 +1,714 @@
|
|||
/*
|
||||
* perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
|
||||
* Copyright (C) 2013 Google, Inc., Stephane Eranian
|
||||
*
|
||||
* Intel RAPL interface is specified in the IA-32 Manual Vol3b
|
||||
* section 14.7.1 (September 2013)
|
||||
*
|
||||
* RAPL provides more controls than just reporting energy consumption
|
||||
* however here we only expose the 3 energy consumption free running
|
||||
* counters (pp0, pkg, dram).
|
||||
*
|
||||
* Each of those counters increments in a power unit defined by the
|
||||
* RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
|
||||
* but it can vary.
|
||||
*
|
||||
* Counter to rapl events mappings:
|
||||
*
|
||||
* pp0 counter: consumption of all physical cores (power plane 0)
|
||||
* event: rapl_energy_cores
|
||||
* perf code: 0x1
|
||||
*
|
||||
* pkg counter: consumption of the whole processor package
|
||||
* event: rapl_energy_pkg
|
||||
* perf code: 0x2
|
||||
*
|
||||
* dram counter: consumption of the dram domain (servers only)
|
||||
* event: rapl_energy_dram
|
||||
* perf code: 0x3
|
||||
*
|
||||
* dram counter: consumption of the builtin-gpu domain (client only)
|
||||
* event: rapl_energy_gpu
|
||||
* perf code: 0x4
|
||||
*
|
||||
* We manage those counters as free running (read-only). They may be
|
||||
* use simultaneously by other tools, such as turbostat.
|
||||
*
|
||||
* The events only support system-wide mode counting. There is no
|
||||
* sampling support because it does not make sense and is not
|
||||
* supported by the RAPL hardware.
|
||||
*
|
||||
* Because we want to avoid floating-point operations in the kernel,
|
||||
* the events are all reported in fixed point arithmetic (32.32).
|
||||
* Tools must adjust the counts to convert them to Watts using
|
||||
* the duration of the measurement. Tools may use a function such as
|
||||
* ldexp(raw_count, -32);
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include "perf_event.h"
|
||||
|
||||
/*
|
||||
* RAPL energy status counters
|
||||
*/
|
||||
#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */
|
||||
#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
|
||||
#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */
|
||||
#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
|
||||
#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
|
||||
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
|
||||
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
|
||||
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
|
||||
|
||||
/* Clients have PP0, PKG */
|
||||
#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
|
||||
1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_PP1_NRG_STAT)
|
||||
|
||||
/* Servers have PP0, PKG, RAM */
|
||||
#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\
|
||||
1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_RAM_NRG_STAT)
|
||||
|
||||
/* Servers have PP0, PKG, RAM, PP1 */
|
||||
#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
|
||||
1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_RAM_NRG_STAT|\
|
||||
1<<RAPL_IDX_PP1_NRG_STAT)
|
||||
|
||||
/*
|
||||
* event code: LSB 8 bits, passed in attr->config
|
||||
* any other bit is reserved
|
||||
*/
|
||||
#define RAPL_EVENT_MASK 0xFFULL
|
||||
|
||||
#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \
|
||||
static ssize_t __rapl_##_var##_show(struct kobject *kobj, \
|
||||
struct kobj_attribute *attr, \
|
||||
char *page) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
|
||||
return sprintf(page, _format "\n"); \
|
||||
} \
|
||||
static struct kobj_attribute format_attr_##_var = \
|
||||
__ATTR(_name, 0444, __rapl_##_var##_show, NULL)
|
||||
|
||||
#define RAPL_EVENT_DESC(_name, _config) \
|
||||
{ \
|
||||
.attr = __ATTR(_name, 0444, rapl_event_show, NULL), \
|
||||
.config = _config, \
|
||||
}
|
||||
|
||||
#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
|
||||
|
||||
struct rapl_pmu {
|
||||
spinlock_t lock;
|
||||
int hw_unit; /* 1/2^hw_unit Joule */
|
||||
int n_active; /* number of active events */
|
||||
struct list_head active_list;
|
||||
struct pmu *pmu; /* pointer to rapl_pmu_class */
|
||||
ktime_t timer_interval; /* in ktime_t unit */
|
||||
struct hrtimer hrtimer;
|
||||
};
|
||||
|
||||
static struct pmu rapl_pmu_class;
|
||||
static cpumask_t rapl_cpu_mask;
|
||||
static int rapl_cntr_mask;
|
||||
|
||||
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
|
||||
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
|
||||
|
||||
static inline u64 rapl_read_counter(struct perf_event *event)
|
||||
{
|
||||
u64 raw;
|
||||
rdmsrl(event->hw.event_base, raw);
|
||||
return raw;
|
||||
}
|
||||
|
||||
static inline u64 rapl_scale(u64 v)
|
||||
{
|
||||
/*
|
||||
* scale delta to smallest unit (1/2^32)
|
||||
* users must then scale back: count * 1/(1e9*2^32) to get Joules
|
||||
* or use ldexp(count, -32).
|
||||
* Watts = Joules/Time delta
|
||||
*/
|
||||
return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
|
||||
}
|
||||
|
||||
static u64 rapl_event_update(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 prev_raw_count, new_raw_count;
|
||||
s64 delta, sdelta;
|
||||
int shift = RAPL_CNTR_WIDTH;
|
||||
|
||||
again:
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
rdmsrl(event->hw.event_base, new_raw_count);
|
||||
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count) {
|
||||
cpu_relax();
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we have the new raw value and have updated the prev
|
||||
* timestamp already. We can now calculate the elapsed delta
|
||||
* (event-)time and add that to the generic event.
|
||||
*
|
||||
* Careful, not all hw sign-extends above the physical width
|
||||
* of the count.
|
||||
*/
|
||||
delta = (new_raw_count << shift) - (prev_raw_count << shift);
|
||||
delta >>= shift;
|
||||
|
||||
sdelta = rapl_scale(delta);
|
||||
|
||||
local64_add(sdelta, &event->count);
|
||||
|
||||
return new_raw_count;
|
||||
}
|
||||
|
||||
static void rapl_start_hrtimer(struct rapl_pmu *pmu)
|
||||
{
|
||||
__hrtimer_start_range_ns(&pmu->hrtimer,
|
||||
pmu->timer_interval, 0,
|
||||
HRTIMER_MODE_REL_PINNED, 0);
|
||||
}
|
||||
|
||||
static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
|
||||
{
|
||||
hrtimer_cancel(&pmu->hrtimer);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
|
||||
{
|
||||
struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
|
||||
struct perf_event *event;
|
||||
unsigned long flags;
|
||||
|
||||
if (!pmu->n_active)
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
spin_lock_irqsave(&pmu->lock, flags);
|
||||
|
||||
list_for_each_entry(event, &pmu->active_list, active_entry) {
|
||||
rapl_event_update(event);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
|
||||
hrtimer_forward_now(hrtimer, pmu->timer_interval);
|
||||
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
static void rapl_hrtimer_init(struct rapl_pmu *pmu)
|
||||
{
|
||||
struct hrtimer *hr = &pmu->hrtimer;
|
||||
|
||||
hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hr->function = rapl_hrtimer_handle;
|
||||
}
|
||||
|
||||
static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
event->hw.state = 0;
|
||||
|
||||
list_add_tail(&event->active_entry, &pmu->active_list);
|
||||
|
||||
local64_set(&event->hw.prev_count, rapl_read_counter(event));
|
||||
|
||||
pmu->n_active++;
|
||||
if (pmu->n_active == 1)
|
||||
rapl_start_hrtimer(pmu);
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_start(struct perf_event *event, int mode)
|
||||
{
|
||||
struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pmu->lock, flags);
|
||||
__rapl_pmu_event_start(pmu, event);
|
||||
spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_stop(struct perf_event *event, int mode)
|
||||
{
|
||||
struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pmu->lock, flags);
|
||||
|
||||
/* mark event as deactivated and stopped */
|
||||
if (!(hwc->state & PERF_HES_STOPPED)) {
|
||||
WARN_ON_ONCE(pmu->n_active <= 0);
|
||||
pmu->n_active--;
|
||||
if (pmu->n_active == 0)
|
||||
rapl_stop_hrtimer(pmu);
|
||||
|
||||
list_del(&event->active_entry);
|
||||
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
/* check if update of sw counter is necessary */
|
||||
if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
|
||||
/*
|
||||
* Drain the remaining delta count out of a event
|
||||
* that we are disabling:
|
||||
*/
|
||||
rapl_event_update(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
}
|
||||
|
||||
static int rapl_pmu_event_add(struct perf_event *event, int mode)
|
||||
{
|
||||
struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pmu->lock, flags);
|
||||
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
if (mode & PERF_EF_START)
|
||||
__rapl_pmu_event_start(pmu, event);
|
||||
|
||||
spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
rapl_pmu_event_stop(event, PERF_EF_UPDATE);
|
||||
}
|
||||
|
||||
static int rapl_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
|
||||
int bit, msr, ret = 0;
|
||||
|
||||
/* only look at RAPL events */
|
||||
if (event->attr.type != rapl_pmu_class.type)
|
||||
return -ENOENT;
|
||||
|
||||
/* check only supported bits are set */
|
||||
if (event->attr.config & ~RAPL_EVENT_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* check event is known (determines counter)
|
||||
*/
|
||||
switch (cfg) {
|
||||
case INTEL_RAPL_PP0:
|
||||
bit = RAPL_IDX_PP0_NRG_STAT;
|
||||
msr = MSR_PP0_ENERGY_STATUS;
|
||||
break;
|
||||
case INTEL_RAPL_PKG:
|
||||
bit = RAPL_IDX_PKG_NRG_STAT;
|
||||
msr = MSR_PKG_ENERGY_STATUS;
|
||||
break;
|
||||
case INTEL_RAPL_RAM:
|
||||
bit = RAPL_IDX_RAM_NRG_STAT;
|
||||
msr = MSR_DRAM_ENERGY_STATUS;
|
||||
break;
|
||||
case INTEL_RAPL_PP1:
|
||||
bit = RAPL_IDX_PP1_NRG_STAT;
|
||||
msr = MSR_PP1_ENERGY_STATUS;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
/* check event supported */
|
||||
if (!(rapl_cntr_mask & (1 << bit)))
|
||||
return -EINVAL;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest ||
|
||||
event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
/* must be done before validate_group */
|
||||
event->hw.event_base = msr;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = bit;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_read(struct perf_event *event)
|
||||
{
|
||||
rapl_event_update(event);
|
||||
}
|
||||
|
||||
static ssize_t rapl_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask);
|
||||
|
||||
buf[n++] = '\n';
|
||||
buf[n] = '\0';
|
||||
return n;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
|
||||
|
||||
static struct attribute *rapl_pmu_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_pmu_attr_group = {
|
||||
.attrs = rapl_pmu_attrs,
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
|
||||
EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
|
||||
EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
|
||||
EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
|
||||
|
||||
EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
|
||||
EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
|
||||
EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
|
||||
EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
|
||||
|
||||
/*
|
||||
* we compute in 0.23 nJ increments regardless of MSR
|
||||
*/
|
||||
EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
|
||||
EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
|
||||
EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
|
||||
EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
|
||||
|
||||
static struct attribute *rapl_events_srv_attr[] = {
|
||||
EVENT_PTR(rapl_cores),
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_ram),
|
||||
|
||||
EVENT_PTR(rapl_cores_unit),
|
||||
EVENT_PTR(rapl_pkg_unit),
|
||||
EVENT_PTR(rapl_ram_unit),
|
||||
|
||||
EVENT_PTR(rapl_cores_scale),
|
||||
EVENT_PTR(rapl_pkg_scale),
|
||||
EVENT_PTR(rapl_ram_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_cln_attr[] = {
|
||||
EVENT_PTR(rapl_cores),
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_gpu),
|
||||
|
||||
EVENT_PTR(rapl_cores_unit),
|
||||
EVENT_PTR(rapl_pkg_unit),
|
||||
EVENT_PTR(rapl_gpu_unit),
|
||||
|
||||
EVENT_PTR(rapl_cores_scale),
|
||||
EVENT_PTR(rapl_pkg_scale),
|
||||
EVENT_PTR(rapl_gpu_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_hsw_attr[] = {
|
||||
EVENT_PTR(rapl_cores),
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_gpu),
|
||||
EVENT_PTR(rapl_ram),
|
||||
|
||||
EVENT_PTR(rapl_cores_unit),
|
||||
EVENT_PTR(rapl_pkg_unit),
|
||||
EVENT_PTR(rapl_gpu_unit),
|
||||
EVENT_PTR(rapl_ram_unit),
|
||||
|
||||
EVENT_PTR(rapl_cores_scale),
|
||||
EVENT_PTR(rapl_pkg_scale),
|
||||
EVENT_PTR(rapl_gpu_scale),
|
||||
EVENT_PTR(rapl_ram_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_pmu_events_group = {
|
||||
.name = "events",
|
||||
.attrs = NULL, /* patched at runtime */
|
||||
};
|
||||
|
||||
DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
|
||||
static struct attribute *rapl_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_pmu_format_group = {
|
||||
.name = "format",
|
||||
.attrs = rapl_formats_attr,
|
||||
};
|
||||
|
||||
const struct attribute_group *rapl_attr_groups[] = {
|
||||
&rapl_pmu_attr_group,
|
||||
&rapl_pmu_format_group,
|
||||
&rapl_pmu_events_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct pmu rapl_pmu_class = {
|
||||
.attr_groups = rapl_attr_groups,
|
||||
.task_ctx_nr = perf_invalid_context, /* system-wide only */
|
||||
.event_init = rapl_pmu_event_init,
|
||||
.add = rapl_pmu_event_add, /* must have */
|
||||
.del = rapl_pmu_event_del, /* must have */
|
||||
.start = rapl_pmu_event_start,
|
||||
.stop = rapl_pmu_event_stop,
|
||||
.read = rapl_pmu_event_read,
|
||||
};
|
||||
|
||||
static void rapl_cpu_exit(int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
|
||||
int i, phys_id = topology_physical_package_id(cpu);
|
||||
int target = -1;
|
||||
|
||||
/* find a new cpu on same package */
|
||||
for_each_online_cpu(i) {
|
||||
if (i == cpu)
|
||||
continue;
|
||||
if (phys_id == topology_physical_package_id(i)) {
|
||||
target = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* clear cpu from cpumask
|
||||
* if was set in cpumask and still some cpu on package,
|
||||
* then move to new cpu
|
||||
*/
|
||||
if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
|
||||
cpumask_set_cpu(target, &rapl_cpu_mask);
|
||||
|
||||
WARN_ON(cpumask_empty(&rapl_cpu_mask));
|
||||
/*
|
||||
* migrate events and context to new cpu
|
||||
*/
|
||||
if (target >= 0)
|
||||
perf_pmu_migrate_context(pmu->pmu, cpu, target);
|
||||
|
||||
/* cancel overflow polling timer for CPU */
|
||||
rapl_stop_hrtimer(pmu);
|
||||
}
|
||||
|
||||
static void rapl_cpu_init(int cpu)
|
||||
{
|
||||
int i, phys_id = topology_physical_package_id(cpu);
|
||||
|
||||
/* check if phys_is is already covered */
|
||||
for_each_cpu(i, &rapl_cpu_mask) {
|
||||
if (phys_id == topology_physical_package_id(i))
|
||||
return;
|
||||
}
|
||||
/* was not found, so add it */
|
||||
cpumask_set_cpu(cpu, &rapl_cpu_mask);
|
||||
}
|
||||
|
||||
static int rapl_cpu_prepare(int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
|
||||
int phys_id = topology_physical_package_id(cpu);
|
||||
u64 ms;
|
||||
u64 msr_rapl_power_unit_bits;
|
||||
|
||||
if (pmu)
|
||||
return 0;
|
||||
|
||||
if (phys_id < 0)
|
||||
return -1;
|
||||
|
||||
/* protect rdmsrl() to handle virtualization */
|
||||
if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
|
||||
return -1;
|
||||
|
||||
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (!pmu)
|
||||
return -1;
|
||||
|
||||
spin_lock_init(&pmu->lock);
|
||||
|
||||
INIT_LIST_HEAD(&pmu->active_list);
|
||||
|
||||
/*
|
||||
* grab power unit as: 1/2^unit Joules
|
||||
*
|
||||
* we cache in local PMU instance
|
||||
*/
|
||||
pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
|
||||
pmu->pmu = &rapl_pmu_class;
|
||||
|
||||
/*
|
||||
* use reference of 200W for scaling the timeout
|
||||
* to avoid missing counter overflows.
|
||||
* 200W = 200 Joules/sec
|
||||
* divide interval by 2 to avoid lockstep (2 * 100)
|
||||
* if hw unit is 32, then we use 2 ms 1/200/2
|
||||
*/
|
||||
if (pmu->hw_unit < 32)
|
||||
ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1));
|
||||
else
|
||||
ms = 2;
|
||||
|
||||
pmu->timer_interval = ms_to_ktime(ms);
|
||||
|
||||
rapl_hrtimer_init(pmu);
|
||||
|
||||
/* set RAPL pmu for this cpu for now */
|
||||
per_cpu(rapl_pmu, cpu) = pmu;
|
||||
per_cpu(rapl_pmu_to_free, cpu) = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rapl_cpu_kfree(int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
|
||||
|
||||
kfree(pmu);
|
||||
|
||||
per_cpu(rapl_pmu_to_free, cpu) = NULL;
|
||||
}
|
||||
|
||||
static int rapl_cpu_dying(int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
|
||||
|
||||
if (!pmu)
|
||||
return 0;
|
||||
|
||||
per_cpu(rapl_pmu, cpu) = NULL;
|
||||
|
||||
per_cpu(rapl_pmu_to_free, cpu) = pmu;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rapl_cpu_notifier(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (long)hcpu;
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_UP_PREPARE:
|
||||
rapl_cpu_prepare(cpu);
|
||||
break;
|
||||
case CPU_STARTING:
|
||||
rapl_cpu_init(cpu);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DYING:
|
||||
rapl_cpu_dying(cpu);
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
case CPU_DEAD:
|
||||
rapl_cpu_kfree(cpu);
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
rapl_cpu_exit(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static const struct x86_cpu_id rapl_cpu_match[] = {
|
||||
[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
|
||||
[1] = {},
|
||||
};
|
||||
|
||||
static int __init rapl_pmu_init(void)
|
||||
{
|
||||
struct rapl_pmu *pmu;
|
||||
int cpu, ret;
|
||||
|
||||
/*
|
||||
* check for Intel processor family 6
|
||||
*/
|
||||
if (!x86_match_cpu(rapl_cpu_match))
|
||||
return 0;
|
||||
|
||||
/* check supported CPU */
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 42: /* Sandy Bridge */
|
||||
case 58: /* Ivy Bridge */
|
||||
rapl_cntr_mask = RAPL_IDX_CLN;
|
||||
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
|
||||
break;
|
||||
case 60: /* Haswell */
|
||||
case 69: /* Haswell-Celeron */
|
||||
rapl_cntr_mask = RAPL_IDX_HSW;
|
||||
rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
|
||||
break;
|
||||
case 45: /* Sandy Bridge-EP */
|
||||
case 62: /* IvyTown */
|
||||
rapl_cntr_mask = RAPL_IDX_SRV;
|
||||
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* unsupported */
|
||||
return 0;
|
||||
}
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
ret = rapl_cpu_prepare(cpu);
|
||||
if (ret)
|
||||
goto out;
|
||||
rapl_cpu_init(cpu);
|
||||
}
|
||||
|
||||
__perf_cpu_notifier(rapl_cpu_notifier);
|
||||
|
||||
ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
|
||||
if (WARN_ON(ret)) {
|
||||
pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
|
||||
cpu_notifier_register_done();
|
||||
return -1;
|
||||
}
|
||||
|
||||
pmu = __this_cpu_read(rapl_pmu);
|
||||
|
||||
pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
|
||||
" API unit is 2^-32 Joules,"
|
||||
" %d fixed counters"
|
||||
" %llu ms ovfl timer\n",
|
||||
pmu->hw_unit,
|
||||
hweight32(rapl_cntr_mask),
|
||||
ktime_to_ms(pmu->timer_interval));
|
||||
|
||||
out:
|
||||
cpu_notifier_register_done();
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(rapl_pmu_init);
|
||||
1318
arch/x86/kernel/cpu/perf_event_intel_uncore.c
Normal file
1318
arch/x86/kernel/cpu/perf_event_intel_uncore.c
Normal file
File diff suppressed because it is too large
Load diff
339
arch/x86/kernel/cpu/perf_event_intel_uncore.h
Normal file
339
arch/x86/kernel/cpu/perf_event_intel_uncore.h
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include "perf_event.h"
|
||||
|
||||
#define UNCORE_PMU_NAME_LEN 32
|
||||
#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
|
||||
#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
|
||||
|
||||
#define UNCORE_FIXED_EVENT 0xff
|
||||
#define UNCORE_PMC_IDX_MAX_GENERIC 8
|
||||
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
|
||||
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
|
||||
|
||||
#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
|
||||
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
|
||||
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
|
||||
#define UNCORE_EXTRA_PCI_DEV 0xff
|
||||
#define UNCORE_EXTRA_PCI_DEV_MAX 3
|
||||
|
||||
/* support up to 8 sockets */
|
||||
#define UNCORE_SOCKET_MAX 8
|
||||
|
||||
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
|
||||
|
||||
struct intel_uncore_ops;
|
||||
struct intel_uncore_pmu;
|
||||
struct intel_uncore_box;
|
||||
struct uncore_event_desc;
|
||||
|
||||
struct intel_uncore_type {
|
||||
const char *name;
|
||||
int num_counters;
|
||||
int num_boxes;
|
||||
int perf_ctr_bits;
|
||||
int fixed_ctr_bits;
|
||||
unsigned perf_ctr;
|
||||
unsigned event_ctl;
|
||||
unsigned event_mask;
|
||||
unsigned fixed_ctr;
|
||||
unsigned fixed_ctl;
|
||||
unsigned box_ctl;
|
||||
unsigned msr_offset;
|
||||
unsigned num_shared_regs:8;
|
||||
unsigned single_fixed:1;
|
||||
unsigned pair_ctr_ctl:1;
|
||||
unsigned *msr_offsets;
|
||||
struct event_constraint unconstrainted;
|
||||
struct event_constraint *constraints;
|
||||
struct intel_uncore_pmu *pmus;
|
||||
struct intel_uncore_ops *ops;
|
||||
struct uncore_event_desc *event_descs;
|
||||
const struct attribute_group *attr_groups[4];
|
||||
struct pmu *pmu; /* for custom pmu ops */
|
||||
};
|
||||
|
||||
#define pmu_group attr_groups[0]
|
||||
#define format_group attr_groups[1]
|
||||
#define events_group attr_groups[2]
|
||||
|
||||
struct intel_uncore_ops {
|
||||
void (*init_box)(struct intel_uncore_box *);
|
||||
void (*disable_box)(struct intel_uncore_box *);
|
||||
void (*enable_box)(struct intel_uncore_box *);
|
||||
void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
|
||||
void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
|
||||
u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
|
||||
int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
|
||||
struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
|
||||
struct perf_event *);
|
||||
void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
|
||||
};
|
||||
|
||||
struct intel_uncore_pmu {
|
||||
struct pmu pmu;
|
||||
char name[UNCORE_PMU_NAME_LEN];
|
||||
int pmu_idx;
|
||||
int func_id;
|
||||
struct intel_uncore_type *type;
|
||||
struct intel_uncore_box ** __percpu box;
|
||||
struct list_head box_list;
|
||||
};
|
||||
|
||||
struct intel_uncore_extra_reg {
|
||||
raw_spinlock_t lock;
|
||||
u64 config, config1, config2;
|
||||
atomic_t ref;
|
||||
};
|
||||
|
||||
struct intel_uncore_box {
|
||||
int phys_id;
|
||||
int n_active; /* number of active events */
|
||||
int n_events;
|
||||
int cpu; /* cpu to collect events */
|
||||
unsigned long flags;
|
||||
atomic_t refcnt;
|
||||
struct perf_event *events[UNCORE_PMC_IDX_MAX];
|
||||
struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
|
||||
unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
|
||||
u64 tags[UNCORE_PMC_IDX_MAX];
|
||||
struct pci_dev *pci_dev;
|
||||
struct intel_uncore_pmu *pmu;
|
||||
u64 hrtimer_duration; /* hrtimer timeout for this box */
|
||||
struct hrtimer hrtimer;
|
||||
struct list_head list;
|
||||
struct list_head active_list;
|
||||
void *io_addr;
|
||||
struct intel_uncore_extra_reg shared_regs[0];
|
||||
};
|
||||
|
||||
#define UNCORE_BOX_FLAG_INITIATED 0
|
||||
|
||||
struct uncore_event_desc {
|
||||
struct kobj_attribute attr;
|
||||
const char *config;
|
||||
};
|
||||
|
||||
ssize_t uncore_event_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf);
|
||||
|
||||
#define INTEL_UNCORE_EVENT_DESC(_name, _config) \
|
||||
{ \
|
||||
.attr = __ATTR(_name, 0444, uncore_event_show, NULL), \
|
||||
.config = _config, \
|
||||
}
|
||||
|
||||
#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
|
||||
static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
|
||||
struct kobj_attribute *attr, \
|
||||
char *page) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
|
||||
return sprintf(page, _format "\n"); \
|
||||
} \
|
||||
static struct kobj_attribute format_attr_##_var = \
|
||||
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
|
||||
|
||||
static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->box_ctl;
|
||||
}
|
||||
|
||||
static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->fixed_ctl;
|
||||
}
|
||||
|
||||
static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->fixed_ctr;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
return idx * 4 + box->pmu->type->event_ctl;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
return idx * 8 + box->pmu->type->perf_ctr;
|
||||
}
|
||||
|
||||
static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu = box->pmu;
|
||||
return pmu->type->msr_offsets ?
|
||||
pmu->type->msr_offsets[pmu->pmu_idx] :
|
||||
pmu->type->msr_offset * pmu->pmu_idx;
|
||||
}
|
||||
|
||||
static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
if (!box->pmu->type->box_ctl)
|
||||
return 0;
|
||||
return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
if (!box->pmu->type->fixed_ctl)
|
||||
return 0;
|
||||
return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
return box->pmu->type->event_ctl +
|
||||
(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
|
||||
uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
return box->pmu->type->perf_ctr +
|
||||
(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
|
||||
uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pci_dev)
|
||||
return uncore_pci_fixed_ctl(box);
|
||||
else
|
||||
return uncore_msr_fixed_ctl(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pci_dev)
|
||||
return uncore_pci_fixed_ctr(box);
|
||||
else
|
||||
return uncore_msr_fixed_ctr(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
if (box->pci_dev)
|
||||
return uncore_pci_event_ctl(box, idx);
|
||||
else
|
||||
return uncore_msr_event_ctl(box, idx);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
if (box->pci_dev)
|
||||
return uncore_pci_perf_ctr(box, idx);
|
||||
else
|
||||
return uncore_msr_perf_ctr(box, idx);
|
||||
}
|
||||
|
||||
static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->perf_ctr_bits;
|
||||
}
|
||||
|
||||
static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->fixed_ctr_bits;
|
||||
}
|
||||
|
||||
static inline int uncore_num_counters(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->num_counters;
|
||||
}
|
||||
|
||||
static inline void uncore_disable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pmu->type->ops->disable_box)
|
||||
box->pmu->type->ops->disable_box(box);
|
||||
}
|
||||
|
||||
static inline void uncore_enable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pmu->type->ops->enable_box)
|
||||
box->pmu->type->ops->enable_box(box);
|
||||
}
|
||||
|
||||
static inline void uncore_disable_event(struct intel_uncore_box *box,
|
||||
struct perf_event *event)
|
||||
{
|
||||
box->pmu->type->ops->disable_event(box, event);
|
||||
}
|
||||
|
||||
static inline void uncore_enable_event(struct intel_uncore_box *box,
|
||||
struct perf_event *event)
|
||||
{
|
||||
box->pmu->type->ops->enable_event(box, event);
|
||||
}
|
||||
|
||||
static inline u64 uncore_read_counter(struct intel_uncore_box *box,
|
||||
struct perf_event *event)
|
||||
{
|
||||
return box->pmu->type->ops->read_counter(box, event);
|
||||
}
|
||||
|
||||
static inline void uncore_box_init(struct intel_uncore_box *box)
|
||||
{
|
||||
if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
|
||||
if (box->pmu->type->ops->init_box)
|
||||
box->pmu->type->ops->init_box(box);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
|
||||
{
|
||||
return (box->phys_id < 0);
|
||||
}
|
||||
|
||||
struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
|
||||
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
|
||||
struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
|
||||
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
|
||||
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
|
||||
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
|
||||
void uncore_pmu_event_read(struct perf_event *event);
|
||||
void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
|
||||
struct event_constraint *
|
||||
uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
|
||||
void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
|
||||
u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
|
||||
|
||||
extern struct intel_uncore_type **uncore_msr_uncores;
|
||||
extern struct intel_uncore_type **uncore_pci_uncores;
|
||||
extern struct pci_driver *uncore_pci_driver;
|
||||
extern int uncore_pcibus_to_physid[256];
|
||||
extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
|
||||
extern struct event_constraint uncore_constraint_empty;
|
||||
|
||||
/* perf_event_intel_uncore_snb.c */
|
||||
int snb_uncore_pci_init(void);
|
||||
int ivb_uncore_pci_init(void);
|
||||
int hsw_uncore_pci_init(void);
|
||||
void snb_uncore_cpu_init(void);
|
||||
void nhm_uncore_cpu_init(void);
|
||||
|
||||
/* perf_event_intel_uncore_snbep.c */
|
||||
int snbep_uncore_pci_init(void);
|
||||
void snbep_uncore_cpu_init(void);
|
||||
int ivbep_uncore_pci_init(void);
|
||||
void ivbep_uncore_cpu_init(void);
|
||||
int hswep_uncore_pci_init(void);
|
||||
void hswep_uncore_cpu_init(void);
|
||||
|
||||
/* perf_event_intel_uncore_nhmex.c */
|
||||
void nhmex_uncore_cpu_init(void);
|
||||
1221
arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
Normal file
1221
arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
Normal file
File diff suppressed because it is too large
Load diff
636
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
Normal file
636
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
Normal file
|
|
@ -0,0 +1,636 @@
|
|||
/* Nehalem/SandBridge/Haswell uncore support */
|
||||
#include "perf_event_intel_uncore.h"
|
||||
|
||||
/* SNB event control */
|
||||
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
|
||||
#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
|
||||
#define SNB_UNC_CTL_EDGE_DET (1 << 18)
|
||||
#define SNB_UNC_CTL_EN (1 << 22)
|
||||
#define SNB_UNC_CTL_INVERT (1 << 23)
|
||||
#define SNB_UNC_CTL_CMASK_MASK 0x1f000000
|
||||
#define NHM_UNC_CTL_CMASK_MASK 0xff000000
|
||||
#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0)
|
||||
|
||||
#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
|
||||
SNB_UNC_CTL_UMASK_MASK | \
|
||||
SNB_UNC_CTL_EDGE_DET | \
|
||||
SNB_UNC_CTL_INVERT | \
|
||||
SNB_UNC_CTL_CMASK_MASK)
|
||||
|
||||
#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
|
||||
SNB_UNC_CTL_UMASK_MASK | \
|
||||
SNB_UNC_CTL_EDGE_DET | \
|
||||
SNB_UNC_CTL_INVERT | \
|
||||
NHM_UNC_CTL_CMASK_MASK)
|
||||
|
||||
/* SNB global control register */
|
||||
#define SNB_UNC_PERF_GLOBAL_CTL 0x391
|
||||
#define SNB_UNC_FIXED_CTR_CTRL 0x394
|
||||
#define SNB_UNC_FIXED_CTR 0x395
|
||||
|
||||
/* SNB uncore global control */
|
||||
#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1)
|
||||
#define SNB_UNC_GLOBAL_CTL_EN (1 << 29)
|
||||
|
||||
/* SNB Cbo register */
|
||||
#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700
|
||||
#define SNB_UNC_CBO_0_PER_CTR0 0x706
|
||||
#define SNB_UNC_CBO_MSR_OFFSET 0x10
|
||||
|
||||
/* NHM global control register */
|
||||
#define NHM_UNC_PERF_GLOBAL_CTL 0x391
|
||||
#define NHM_UNC_FIXED_CTR 0x394
|
||||
#define NHM_UNC_FIXED_CTR_CTRL 0x395
|
||||
|
||||
/* NHM uncore global control */
|
||||
#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1)
|
||||
#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32)
|
||||
|
||||
/* NHM uncore register */
|
||||
#define NHM_UNC_PERFEVTSEL0 0x3c0
|
||||
#define NHM_UNC_UNCORE_PMC0 0x3b0
|
||||
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
|
||||
|
||||
/* Sandy Bridge uncore support */
|
||||
static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (hwc->idx < UNCORE_PMC_IDX_FIXED)
|
||||
wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
|
||||
else
|
||||
wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
|
||||
}
|
||||
|
||||
static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
wrmsrl(event->hw.config_base, 0);
|
||||
}
|
||||
|
||||
static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pmu->pmu_idx == 0) {
|
||||
wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
|
||||
SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
|
||||
}
|
||||
}
|
||||
|
||||
static struct uncore_event_desc snb_uncore_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct attribute *snb_uncore_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask5.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group snb_uncore_format_group = {
|
||||
.name = "format",
|
||||
.attrs = snb_uncore_formats_attr,
|
||||
};
|
||||
|
||||
static struct intel_uncore_ops snb_uncore_msr_ops = {
|
||||
.init_box = snb_uncore_msr_init_box,
|
||||
.disable_event = snb_uncore_msr_disable_event,
|
||||
.enable_event = snb_uncore_msr_enable_event,
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
};
|
||||
|
||||
static struct event_constraint snb_uncore_cbox_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct intel_uncore_type snb_uncore_cbox = {
|
||||
.name = "cbox",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 4,
|
||||
.perf_ctr_bits = 44,
|
||||
.fixed_ctr_bits = 48,
|
||||
.perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
|
||||
.event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
|
||||
.fixed_ctr = SNB_UNC_FIXED_CTR,
|
||||
.fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
|
||||
.single_fixed = 1,
|
||||
.event_mask = SNB_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
|
||||
.constraints = snb_uncore_cbox_constraints,
|
||||
.ops = &snb_uncore_msr_ops,
|
||||
.format_group = &snb_uncore_format_group,
|
||||
.event_descs = snb_uncore_events,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *snb_msr_uncores[] = {
|
||||
&snb_uncore_cbox,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void snb_uncore_cpu_init(void)
|
||||
{
|
||||
uncore_msr_uncores = snb_msr_uncores;
|
||||
if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
|
||||
snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
|
||||
}
|
||||
|
||||
enum {
|
||||
SNB_PCI_UNCORE_IMC,
|
||||
};
|
||||
|
||||
static struct uncore_event_desc snb_uncore_imc_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
|
||||
|
||||
INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
|
||||
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff
|
||||
#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48
|
||||
|
||||
/* page size multiple covering all config regs */
|
||||
#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000
|
||||
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
|
||||
#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
|
||||
|
||||
static struct attribute *snb_uncore_imc_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group snb_uncore_imc_format_group = {
|
||||
.name = "format",
|
||||
.attrs = snb_uncore_imc_formats_attr,
|
||||
};
|
||||
|
||||
static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
struct pci_dev *pdev = box->pci_dev;
|
||||
int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
|
||||
resource_size_t addr;
|
||||
u32 pci_dword;
|
||||
|
||||
pci_read_config_dword(pdev, where, &pci_dword);
|
||||
addr = pci_dword;
|
||||
|
||||
#ifdef CONFIG_PHYS_ADDR_T_64BIT
|
||||
pci_read_config_dword(pdev, where + 4, &pci_dword);
|
||||
addr |= ((resource_size_t)pci_dword << 32);
|
||||
#endif
|
||||
|
||||
addr &= ~(PAGE_SIZE - 1);
|
||||
|
||||
box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
|
||||
box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
|
||||
{}
|
||||
|
||||
static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
|
||||
{}
|
||||
|
||||
static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{}
|
||||
|
||||
static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{}
|
||||
|
||||
static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
|
||||
}
|
||||
|
||||
/*
|
||||
* custom event_init() function because we define our own fixed, free
|
||||
* running counters, so we do not want to conflict with generic uncore
|
||||
* logic. Also simplifies processing
|
||||
*/
|
||||
static int snb_uncore_imc_event_init(struct perf_event *event)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu;
|
||||
struct intel_uncore_box *box;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
|
||||
int idx, base;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
pmu = uncore_event_to_pmu(event);
|
||||
/* no device found for this pmu */
|
||||
if (pmu->func_id < 0)
|
||||
return -ENOENT;
|
||||
|
||||
/* Sampling not supported yet */
|
||||
if (hwc->sample_period)
|
||||
return -EINVAL;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest ||
|
||||
event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Place all uncore events for a particular physical package
|
||||
* onto a single cpu
|
||||
*/
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* check only supported bits are set */
|
||||
if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
box = uncore_pmu_to_box(pmu, event->cpu);
|
||||
if (!box || box->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
event->cpu = box->cpu;
|
||||
|
||||
event->hw.idx = -1;
|
||||
event->hw.last_tag = ~0ULL;
|
||||
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
||||
event->hw.branch_reg.idx = EXTRA_REG_NONE;
|
||||
/*
|
||||
* check event is known (whitelist, determines counter)
|
||||
*/
|
||||
switch (cfg) {
|
||||
case SNB_UNCORE_PCI_IMC_DATA_READS:
|
||||
base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
|
||||
idx = UNCORE_PMC_IDX_FIXED;
|
||||
break;
|
||||
case SNB_UNCORE_PCI_IMC_DATA_WRITES:
|
||||
base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
|
||||
idx = UNCORE_PMC_IDX_FIXED + 1;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* must be done before validate_group */
|
||||
event->hw.event_base = base;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = idx;
|
||||
|
||||
/* no group validation needed, we have free running counters */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
u64 count;
|
||||
|
||||
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
event->hw.state = 0;
|
||||
box->n_active++;
|
||||
|
||||
list_add_tail(&event->active_entry, &box->active_list);
|
||||
|
||||
count = snb_uncore_imc_read_counter(box, event);
|
||||
local64_set(&event->hw.prev_count, count);
|
||||
|
||||
if (box->n_active == 1)
|
||||
uncore_pmu_start_hrtimer(box);
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (!(hwc->state & PERF_HES_STOPPED)) {
|
||||
box->n_active--;
|
||||
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
|
||||
list_del(&event->active_entry);
|
||||
|
||||
if (box->n_active == 0)
|
||||
uncore_pmu_cancel_hrtimer(box);
|
||||
}
|
||||
|
||||
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
|
||||
/*
|
||||
* Drain the remaining delta count out of a event
|
||||
* that we are disabling:
|
||||
*/
|
||||
uncore_perf_event_update(box, event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
}
|
||||
|
||||
static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (!box)
|
||||
return -ENODEV;
|
||||
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
if (!(flags & PERF_EF_START))
|
||||
hwc->state |= PERF_HES_ARCH;
|
||||
|
||||
snb_uncore_imc_event_start(event, 0);
|
||||
|
||||
box->n_events++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
int i;
|
||||
|
||||
snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
for (i = 0; i < box->n_events; i++) {
|
||||
if (event == box->event_list[i]) {
|
||||
--box->n_events;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int snb_pci2phy_map_init(int devid)
|
||||
{
|
||||
struct pci_dev *dev = NULL;
|
||||
int bus;
|
||||
|
||||
dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
|
||||
if (!dev)
|
||||
return -ENOTTY;
|
||||
|
||||
bus = dev->bus->number;
|
||||
|
||||
uncore_pcibus_to_physid[bus] = 0;
|
||||
|
||||
pci_dev_put(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pmu snb_uncore_imc_pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = snb_uncore_imc_event_init,
|
||||
.add = snb_uncore_imc_event_add,
|
||||
.del = snb_uncore_imc_event_del,
|
||||
.start = snb_uncore_imc_event_start,
|
||||
.stop = snb_uncore_imc_event_stop,
|
||||
.read = uncore_pmu_event_read,
|
||||
};
|
||||
|
||||
static struct intel_uncore_ops snb_uncore_imc_ops = {
|
||||
.init_box = snb_uncore_imc_init_box,
|
||||
.enable_box = snb_uncore_imc_enable_box,
|
||||
.disable_box = snb_uncore_imc_disable_box,
|
||||
.disable_event = snb_uncore_imc_disable_event,
|
||||
.enable_event = snb_uncore_imc_enable_event,
|
||||
.hw_config = snb_uncore_imc_hw_config,
|
||||
.read_counter = snb_uncore_imc_read_counter,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type snb_uncore_imc = {
|
||||
.name = "imc",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 1,
|
||||
.fixed_ctr_bits = 32,
|
||||
.fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
|
||||
.event_descs = snb_uncore_imc_events,
|
||||
.format_group = &snb_uncore_imc_format_group,
|
||||
.perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
|
||||
.event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
|
||||
.ops = &snb_uncore_imc_ops,
|
||||
.pmu = &snb_uncore_imc_pmu,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *snb_pci_uncores[] = {
|
||||
[SNB_PCI_UNCORE_IMC] = &snb_uncore_imc,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct pci_device_id snb_uncore_pci_ids[] = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static const struct pci_device_id ivb_uncore_pci_ids[] = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static const struct pci_device_id hsw_uncore_pci_ids[] = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct pci_driver snb_uncore_pci_driver = {
|
||||
.name = "snb_uncore",
|
||||
.id_table = snb_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static struct pci_driver ivb_uncore_pci_driver = {
|
||||
.name = "ivb_uncore",
|
||||
.id_table = ivb_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static struct pci_driver hsw_uncore_pci_driver = {
|
||||
.name = "hsw_uncore",
|
||||
.id_table = hsw_uncore_pci_ids,
|
||||
};
|
||||
|
||||
struct imc_uncore_pci_dev {
|
||||
__u32 pci_id;
|
||||
struct pci_driver *driver;
|
||||
};
|
||||
#define IMC_DEV(a, d) \
|
||||
{ .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
|
||||
|
||||
static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
|
||||
IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
|
||||
IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */
|
||||
IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
|
||||
IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
|
||||
{ /* end marker */ }
|
||||
};
|
||||
|
||||
|
||||
#define for_each_imc_pci_id(x, t) \
|
||||
for (x = (t); (x)->pci_id; x++)
|
||||
|
||||
static struct pci_driver *imc_uncore_find_dev(void)
|
||||
{
|
||||
const struct imc_uncore_pci_dev *p;
|
||||
int ret;
|
||||
|
||||
for_each_imc_pci_id(p, desktop_imc_pci_ids) {
|
||||
ret = snb_pci2phy_map_init(p->pci_id);
|
||||
if (ret == 0)
|
||||
return p->driver;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int imc_uncore_pci_init(void)
|
||||
{
|
||||
struct pci_driver *imc_drv = imc_uncore_find_dev();
|
||||
|
||||
if (!imc_drv)
|
||||
return -ENODEV;
|
||||
|
||||
uncore_pci_uncores = snb_pci_uncores;
|
||||
uncore_pci_driver = imc_drv;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int snb_uncore_pci_init(void)
|
||||
{
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
|
||||
int ivb_uncore_pci_init(void)
|
||||
{
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
int hsw_uncore_pci_init(void)
|
||||
{
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
|
||||
/* end of Sandy Bridge uncore support */
|
||||
|
||||
/* Nehalem uncore support */
|
||||
static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
|
||||
}
|
||||
|
||||
static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
|
||||
}
|
||||
|
||||
static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (hwc->idx < UNCORE_PMC_IDX_FIXED)
|
||||
wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
|
||||
else
|
||||
wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
|
||||
}
|
||||
|
||||
static struct attribute *nhm_uncore_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask8.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group nhm_uncore_format_group = {
|
||||
.name = "format",
|
||||
.attrs = nhm_uncore_formats_attr,
|
||||
};
|
||||
|
||||
static struct uncore_event_desc nhm_uncore_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
|
||||
INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"),
|
||||
INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct intel_uncore_ops nhm_uncore_msr_ops = {
|
||||
.disable_box = nhm_uncore_msr_disable_box,
|
||||
.enable_box = nhm_uncore_msr_enable_box,
|
||||
.disable_event = snb_uncore_msr_disable_event,
|
||||
.enable_event = nhm_uncore_msr_enable_event,
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type nhm_uncore = {
|
||||
.name = "",
|
||||
.num_counters = 8,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 48,
|
||||
.fixed_ctr_bits = 48,
|
||||
.event_ctl = NHM_UNC_PERFEVTSEL0,
|
||||
.perf_ctr = NHM_UNC_UNCORE_PMC0,
|
||||
.fixed_ctr = NHM_UNC_FIXED_CTR,
|
||||
.fixed_ctl = NHM_UNC_FIXED_CTR_CTRL,
|
||||
.event_mask = NHM_UNC_RAW_EVENT_MASK,
|
||||
.event_descs = nhm_uncore_events,
|
||||
.ops = &nhm_uncore_msr_ops,
|
||||
.format_group = &nhm_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *nhm_msr_uncores[] = {
|
||||
&nhm_uncore,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void nhm_uncore_cpu_init(void)
|
||||
{
|
||||
uncore_msr_uncores = nhm_msr_uncores;
|
||||
}
|
||||
|
||||
/* end of Nehalem uncore support */
|
||||
2316
arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
Normal file
2316
arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
Normal file
File diff suppressed because it is too large
Load diff
319
arch/x86/kernel/cpu/perf_event_knc.c
Normal file
319
arch/x86/kernel/cpu/perf_event_knc.c
Normal file
|
|
@ -0,0 +1,319 @@
|
|||
/* Driver for Intel Xeon Phi "Knights Corner" PMU */
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/hardirq.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static const u64 knc_perfmon_event_map[] =
|
||||
{
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
|
||||
};
|
||||
|
||||
static const u64 __initconst knc_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
/* On Xeon Phi event "0" is a valid DATA_READ */
|
||||
/* (L1 Data Cache Reads) Instruction. */
|
||||
/* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
|
||||
/* bit will always be set in x86_pmu_hw_config(). */
|
||||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
|
||||
/* DATA_READ */
|
||||
[ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
|
||||
[ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
|
||||
[ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
|
||||
[ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
|
||||
[ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
|
||||
/* DATA_READ */
|
||||
/* see note on L1 OP_READ */
|
||||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
|
||||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
|
||||
[ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
|
||||
[ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
static u64 knc_pmu_event_map(int hw_event)
|
||||
{
|
||||
return knc_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
static struct event_constraint knc_event_constraints[] =
|
||||
{
|
||||
INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
|
||||
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
|
||||
INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
|
||||
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
|
||||
INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
|
||||
INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
|
||||
|
||||
#define KNC_ENABLE_COUNTER0 0x00000001
|
||||
#define KNC_ENABLE_COUNTER1 0x00000002
|
||||
|
||||
static void knc_pmu_disable_all(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
}
|
||||
|
||||
static void knc_pmu_enable_all(int added)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
}
|
||||
|
||||
static inline void
|
||||
knc_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
|
||||
}
|
||||
|
||||
static void knc_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
|
||||
}
|
||||
|
||||
static inline u64 knc_pmu_get_status(void)
|
||||
{
|
||||
u64 status;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static inline void knc_pmu_ack_status(u64 ack)
|
||||
{
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
|
||||
}
|
||||
|
||||
static int knc_pmu_handle_irq(struct pt_regs *regs)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
struct cpu_hw_events *cpuc;
|
||||
int handled = 0;
|
||||
int bit, loops;
|
||||
u64 status;
|
||||
|
||||
cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
knc_pmu_disable_all();
|
||||
|
||||
status = knc_pmu_get_status();
|
||||
if (!status) {
|
||||
knc_pmu_enable_all(0);
|
||||
return handled;
|
||||
}
|
||||
|
||||
loops = 0;
|
||||
again:
|
||||
knc_pmu_ack_status(status);
|
||||
if (++loops > 100) {
|
||||
WARN_ONCE(1, "perf: irq loop stuck!\n");
|
||||
perf_event_print_debug();
|
||||
goto done;
|
||||
}
|
||||
|
||||
inc_irq_stat(apic_perf_irqs);
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
|
||||
struct perf_event *event = cpuc->events[bit];
|
||||
|
||||
handled++;
|
||||
|
||||
if (!test_bit(bit, cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
if (!intel_pmu_save_and_restart(event))
|
||||
continue;
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Repeat if there is more work to be done:
|
||||
*/
|
||||
status = knc_pmu_get_status();
|
||||
if (status)
|
||||
goto again;
|
||||
|
||||
done:
|
||||
knc_pmu_enable_all(0);
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
|
||||
static struct attribute *intel_knc_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct x86_pmu knc_pmu __initconst = {
|
||||
.name = "knc",
|
||||
.handle_irq = knc_pmu_handle_irq,
|
||||
.disable_all = knc_pmu_disable_all,
|
||||
.enable_all = knc_pmu_enable_all,
|
||||
.enable = knc_pmu_enable_event,
|
||||
.disable = knc_pmu_disable_event,
|
||||
.hw_config = x86_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_KNC_EVNTSEL0,
|
||||
.perfctr = MSR_KNC_PERFCTR0,
|
||||
.event_map = knc_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(knc_perfmon_event_map),
|
||||
.apic = 1,
|
||||
.max_period = (1ULL << 39) - 1,
|
||||
.version = 0,
|
||||
.num_counters = 2,
|
||||
.cntval_bits = 40,
|
||||
.cntval_mask = (1ULL << 40) - 1,
|
||||
.get_event_constraints = x86_get_event_constraints,
|
||||
.event_constraints = knc_event_constraints,
|
||||
.format_attrs = intel_knc_formats_attr,
|
||||
};
|
||||
|
||||
__init int knc_pmu_init(void)
|
||||
{
|
||||
x86_pmu = knc_pmu;
|
||||
|
||||
memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
return 0;
|
||||
}
|
||||
1376
arch/x86/kernel/cpu/perf_event_p4.c
Normal file
1376
arch/x86/kernel/cpu/perf_event_p4.c
Normal file
File diff suppressed because it is too large
Load diff
279
arch/x86/kernel/cpu/perf_event_p6.c
Normal file
279
arch/x86/kernel/cpu/perf_event_p6.c
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
#include <linux/perf_event.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
/*
|
||||
* Not sure about some of these
|
||||
*/
|
||||
static const u64 p6_perfmon_event_map[] =
|
||||
{
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */
|
||||
|
||||
};
|
||||
|
||||
static const u64 __initconst p6_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
|
||||
[ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
|
||||
[ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
|
||||
[ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */
|
||||
[ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static u64 p6_pmu_event_map(int hw_event)
|
||||
{
|
||||
return p6_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
/*
|
||||
* Event setting that is specified not to count anything.
|
||||
* We use this to effectively disable a counter.
|
||||
*
|
||||
* L2_RQSTS with 0 MESI unit mask.
|
||||
*/
|
||||
#define P6_NOP_EVENT 0x0000002EULL
|
||||
|
||||
static struct event_constraint p6_event_constraints[] =
|
||||
{
|
||||
INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
|
||||
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
|
||||
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
|
||||
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
|
||||
INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
|
||||
INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static void p6_pmu_disable_all(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
/* p6 only has one enable register */
|
||||
rdmsrl(MSR_P6_EVNTSEL0, val);
|
||||
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
wrmsrl(MSR_P6_EVNTSEL0, val);
|
||||
}
|
||||
|
||||
static void p6_pmu_enable_all(int added)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
/* p6 only has one enable register */
|
||||
rdmsrl(MSR_P6_EVNTSEL0, val);
|
||||
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
wrmsrl(MSR_P6_EVNTSEL0, val);
|
||||
}
|
||||
|
||||
static inline void
|
||||
p6_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val = P6_NOP_EVENT;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base, val);
|
||||
}
|
||||
|
||||
static void p6_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
|
||||
/*
|
||||
* p6 only has a global event enable, set on PerfEvtSel0
|
||||
* We "disable" events by programming P6_NOP_EVENT
|
||||
* and we rely on p6_pmu_enable_all() being called
|
||||
* to actually enable the events.
|
||||
*/
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base, val);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
PMU_FORMAT_ATTR(pc, "config:19" );
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
|
||||
static struct attribute *intel_p6_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_pc.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static __initconst const struct x86_pmu p6_pmu = {
|
||||
.name = "p6",
|
||||
.handle_irq = x86_pmu_handle_irq,
|
||||
.disable_all = p6_pmu_disable_all,
|
||||
.enable_all = p6_pmu_enable_all,
|
||||
.enable = p6_pmu_enable_event,
|
||||
.disable = p6_pmu_disable_event,
|
||||
.hw_config = x86_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_P6_EVNTSEL0,
|
||||
.perfctr = MSR_P6_PERFCTR0,
|
||||
.event_map = p6_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
|
||||
.apic = 1,
|
||||
.max_period = (1ULL << 31) - 1,
|
||||
.version = 0,
|
||||
.num_counters = 2,
|
||||
/*
|
||||
* Events have 40 bits implemented. However they are designed such
|
||||
* that bits [32-39] are sign extensions of bit 31. As such the
|
||||
* effective width of a event for P6-like PMU is 32 bits only.
|
||||
*
|
||||
* See IA-32 Intel Architecture Software developer manual Vol 3B
|
||||
*/
|
||||
.cntval_bits = 32,
|
||||
.cntval_mask = (1ULL << 32) - 1,
|
||||
.get_event_constraints = x86_get_event_constraints,
|
||||
.event_constraints = p6_event_constraints,
|
||||
|
||||
.format_attrs = intel_p6_formats_attr,
|
||||
.events_sysfs_show = intel_event_sysfs_show,
|
||||
|
||||
};
|
||||
|
||||
static __init void p6_pmu_rdpmc_quirk(void)
|
||||
{
|
||||
if (boot_cpu_data.x86_mask < 9) {
|
||||
/*
|
||||
* PPro erratum 26; fixed in stepping 9 and above.
|
||||
*/
|
||||
pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
|
||||
x86_pmu.attr_rdpmc_broken = 1;
|
||||
x86_pmu.attr_rdpmc = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__init int p6_pmu_init(void)
|
||||
{
|
||||
x86_pmu = p6_pmu;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 1: /* Pentium Pro */
|
||||
x86_add_quirk(p6_pmu_rdpmc_quirk);
|
||||
break;
|
||||
|
||||
case 3: /* Pentium II - Klamath */
|
||||
case 5: /* Pentium II - Deschutes */
|
||||
case 6: /* Pentium II - Mendocino */
|
||||
break;
|
||||
|
||||
case 7: /* Pentium III - Katmai */
|
||||
case 8: /* Pentium III - Coppermine */
|
||||
case 10: /* Pentium III Xeon */
|
||||
case 11: /* Pentium III - Tualatin */
|
||||
break;
|
||||
|
||||
case 9: /* Pentium M - Banias */
|
||||
case 13: /* Pentium M - Dothan */
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
return 0;
|
||||
}
|
||||
160
arch/x86/kernel/cpu/perfctr-watchdog.c
Normal file
160
arch/x86/kernel/cpu/perfctr-watchdog.c
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* local apic based NMI watchdog for various CPUs.
|
||||
*
|
||||
* This file also handles reservation of performance counters for coordination
|
||||
* with other users (like oprofile).
|
||||
*
|
||||
* Note that these events normally don't tick when the CPU idles. This means
|
||||
* the frequency varies with CPU load.
|
||||
*
|
||||
* Original code for K7/P6 written by Keith Owens
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/smp.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <linux/kprobes.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm/perf_event.h>
|
||||
|
||||
/*
|
||||
* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
|
||||
* offset from MSR_P4_BSU_ESCR0.
|
||||
*
|
||||
* It will be the max for all platforms (for now)
|
||||
*/
|
||||
#define NMI_MAX_COUNTER_BITS 66
|
||||
|
||||
/*
|
||||
* perfctr_nmi_owner tracks the ownership of the perfctr registers:
|
||||
* evtsel_nmi_owner tracks the ownership of the event selection
|
||||
* - different performance counters/ event selection may be reserved for
|
||||
* different subsystems this reservation system just tries to coordinate
|
||||
* things a little
|
||||
*/
|
||||
static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
|
||||
static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
|
||||
|
||||
/* converts an msr to an appropriate reservation bit */
|
||||
static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
|
||||
{
|
||||
/* returns the bit offset of the performance counter register */
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
if (msr >= MSR_F15H_PERF_CTR)
|
||||
return (msr - MSR_F15H_PERF_CTR) >> 1;
|
||||
return msr - MSR_K7_PERFCTR0;
|
||||
case X86_VENDOR_INTEL:
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
|
||||
return msr - MSR_ARCH_PERFMON_PERFCTR0;
|
||||
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
return msr - MSR_P6_PERFCTR0;
|
||||
case 11:
|
||||
return msr - MSR_KNC_PERFCTR0;
|
||||
case 15:
|
||||
return msr - MSR_P4_BPU_PERFCTR0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* converts an msr to an appropriate reservation bit
|
||||
* returns the bit offset of the event selection register
|
||||
*/
|
||||
static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
|
||||
{
|
||||
/* returns the bit offset of the event selection register */
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
if (msr >= MSR_F15H_PERF_CTL)
|
||||
return (msr - MSR_F15H_PERF_CTL) >> 1;
|
||||
return msr - MSR_K7_EVNTSEL0;
|
||||
case X86_VENDOR_INTEL:
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
|
||||
return msr - MSR_ARCH_PERFMON_EVENTSEL0;
|
||||
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
return msr - MSR_P6_EVNTSEL0;
|
||||
case 11:
|
||||
return msr - MSR_KNC_EVNTSEL0;
|
||||
case 15:
|
||||
return msr - MSR_P4_BSU_ESCR0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
/* checks for a bit availability (hack for oprofile) */
|
||||
int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
|
||||
{
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
return !test_bit(counter, perfctr_nmi_owner);
|
||||
}
|
||||
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
|
||||
|
||||
int reserve_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
counter = nmi_perfctr_msr_to_bit(msr);
|
||||
/* register not managed by the allocator? */
|
||||
if (counter > NMI_MAX_COUNTER_BITS)
|
||||
return 1;
|
||||
|
||||
if (!test_and_set_bit(counter, perfctr_nmi_owner))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reserve_perfctr_nmi);
|
||||
|
||||
void release_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
counter = nmi_perfctr_msr_to_bit(msr);
|
||||
/* register not managed by the allocator? */
|
||||
if (counter > NMI_MAX_COUNTER_BITS)
|
||||
return;
|
||||
|
||||
clear_bit(counter, perfctr_nmi_owner);
|
||||
}
|
||||
EXPORT_SYMBOL(release_perfctr_nmi);
|
||||
|
||||
int reserve_evntsel_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
counter = nmi_evntsel_msr_to_bit(msr);
|
||||
/* register not managed by the allocator? */
|
||||
if (counter > NMI_MAX_COUNTER_BITS)
|
||||
return 1;
|
||||
|
||||
if (!test_and_set_bit(counter, evntsel_nmi_owner))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reserve_evntsel_nmi);
|
||||
|
||||
void release_evntsel_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
counter = nmi_evntsel_msr_to_bit(msr);
|
||||
/* register not managed by the allocator? */
|
||||
if (counter > NMI_MAX_COUNTER_BITS)
|
||||
return;
|
||||
|
||||
clear_bit(counter, evntsel_nmi_owner);
|
||||
}
|
||||
EXPORT_SYMBOL(release_evntsel_nmi);
|
||||
21
arch/x86/kernel/cpu/powerflags.c
Normal file
21
arch/x86/kernel/cpu/powerflags.c
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Strings for the various x86 power flags
|
||||
*
|
||||
* This file must not contain any executable code.
|
||||
*/
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
const char *const x86_power_flags[32] = {
|
||||
"ts", /* temperature sensor */
|
||||
"fid", /* frequency id control */
|
||||
"vid", /* voltage id control */
|
||||
"ttp", /* thermal trip */
|
||||
"tm", /* hardware thermal control */
|
||||
"stc", /* software thermal control */
|
||||
"100mhzsteps", /* 100 MHz multiplier control */
|
||||
"hwpstate", /* hardware P-state control */
|
||||
"", /* tsc invariant mapped to constant_tsc */
|
||||
"cpb", /* core performance boost */
|
||||
"eff_freq_ro", /* Readonly aperf/mperf */
|
||||
};
|
||||
162
arch/x86/kernel/cpu/proc.c
Normal file
162
arch/x86/kernel/cpu/proc.c
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
#include <linux/smp.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cpufreq.h>
|
||||
|
||||
/*
|
||||
* Get CPU information for use by the procfs.
|
||||
*/
|
||||
static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
|
||||
unsigned int cpu)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
|
||||
seq_printf(m, "siblings\t: %d\n", cpumask_weight(cpu_core_mask(cpu)));
|
||||
seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
|
||||
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
|
||||
seq_printf(m, "apicid\t\t: %d\n", c->apicid);
|
||||
seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
|
||||
{
|
||||
seq_printf(m,
|
||||
"fdiv_bug\t: %s\n"
|
||||
"f00f_bug\t: %s\n"
|
||||
"coma_bug\t: %s\n"
|
||||
"fpu\t\t: %s\n"
|
||||
"fpu_exception\t: %s\n"
|
||||
"cpuid level\t: %d\n"
|
||||
"wp\t\t: %s\n",
|
||||
static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
|
||||
static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
|
||||
static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
|
||||
static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
|
||||
static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
|
||||
c->cpuid_level,
|
||||
c->wp_works_ok ? "yes" : "no");
|
||||
}
|
||||
#else
|
||||
static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
|
||||
{
|
||||
seq_printf(m,
|
||||
"fpu\t\t: yes\n"
|
||||
"fpu_exception\t: yes\n"
|
||||
"cpuid level\t: %d\n"
|
||||
"wp\t\t: yes\n",
|
||||
c->cpuid_level);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int show_cpuinfo(struct seq_file *m, void *v)
|
||||
{
|
||||
struct cpuinfo_x86 *c = v;
|
||||
unsigned int cpu;
|
||||
int i;
|
||||
|
||||
cpu = c->cpu_index;
|
||||
seq_printf(m, "processor\t: %u\n"
|
||||
"vendor_id\t: %s\n"
|
||||
"cpu family\t: %d\n"
|
||||
"model\t\t: %u\n"
|
||||
"model name\t: %s\n",
|
||||
cpu,
|
||||
c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
|
||||
c->x86,
|
||||
c->x86_model,
|
||||
c->x86_model_id[0] ? c->x86_model_id : "unknown");
|
||||
|
||||
if (c->x86_mask || c->cpuid_level >= 0)
|
||||
seq_printf(m, "stepping\t: %d\n", c->x86_mask);
|
||||
else
|
||||
seq_printf(m, "stepping\t: unknown\n");
|
||||
if (c->microcode)
|
||||
seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_TSC)) {
|
||||
unsigned int freq = cpufreq_quick_get(cpu);
|
||||
|
||||
if (!freq)
|
||||
freq = cpu_khz;
|
||||
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
|
||||
freq / 1000, (freq % 1000));
|
||||
}
|
||||
|
||||
/* Cache size */
|
||||
if (c->x86_cache_size >= 0)
|
||||
seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
|
||||
|
||||
show_cpuinfo_core(m, c, cpu);
|
||||
show_cpuinfo_misc(m, c);
|
||||
|
||||
seq_printf(m, "flags\t\t:");
|
||||
for (i = 0; i < 32*NCAPINTS; i++)
|
||||
if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
|
||||
seq_printf(m, " %s", x86_cap_flags[i]);
|
||||
|
||||
seq_printf(m, "\nbugs\t\t:");
|
||||
for (i = 0; i < 32*NBUGINTS; i++) {
|
||||
unsigned int bug_bit = 32*NCAPINTS + i;
|
||||
|
||||
if (cpu_has_bug(c, bug_bit) && x86_bug_flags[i])
|
||||
seq_printf(m, " %s", x86_bug_flags[i]);
|
||||
}
|
||||
|
||||
seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
|
||||
c->loops_per_jiffy/(500000/HZ),
|
||||
(c->loops_per_jiffy/(5000/HZ)) % 100);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (c->x86_tlbsize > 0)
|
||||
seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
|
||||
#endif
|
||||
seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
|
||||
seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
|
||||
seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
|
||||
c->x86_phys_bits, c->x86_virt_bits);
|
||||
|
||||
seq_printf(m, "power management:");
|
||||
for (i = 0; i < 32; i++) {
|
||||
if (c->x86_power & (1 << i)) {
|
||||
if (i < ARRAY_SIZE(x86_power_flags) &&
|
||||
x86_power_flags[i])
|
||||
seq_printf(m, "%s%s",
|
||||
x86_power_flags[i][0] ? " " : "",
|
||||
x86_power_flags[i]);
|
||||
else
|
||||
seq_printf(m, " [%d]", i);
|
||||
}
|
||||
}
|
||||
|
||||
seq_printf(m, "\n\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *c_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
*pos = cpumask_next(*pos - 1, cpu_online_mask);
|
||||
if ((*pos) < nr_cpu_ids)
|
||||
return &cpu_data(*pos);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
(*pos)++;
|
||||
return c_start(m, pos);
|
||||
}
|
||||
|
||||
static void c_stop(struct seq_file *m, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
const struct seq_operations cpuinfo_op = {
|
||||
.start = c_start,
|
||||
.next = c_next,
|
||||
.stop = c_stop,
|
||||
.show = show_cpuinfo,
|
||||
};
|
||||
60
arch/x86/kernel/cpu/rdrand.c
Normal file
60
arch/x86/kernel/cpu/rdrand.c
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* This file is part of the Linux kernel.
|
||||
*
|
||||
* Copyright (c) 2011, Intel Corporation
|
||||
* Authors: Fenghua Yu <fenghua.yu@intel.com>,
|
||||
* H. Peter Anvin <hpa@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/archrandom.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
static int __init x86_rdrand_setup(char *s)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_RDRAND);
|
||||
setup_clear_cpu_cap(X86_FEATURE_RDSEED);
|
||||
return 1;
|
||||
}
|
||||
__setup("nordrand", x86_rdrand_setup);
|
||||
|
||||
/*
|
||||
* Force a reseed cycle; we are architecturally guaranteed a reseed
|
||||
* after no more than 512 128-bit chunks of random data. This also
|
||||
* acts as a test of the CPU capability.
|
||||
*/
|
||||
#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
|
||||
|
||||
void x86_init_rdrand(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_ARCH_RANDOM
|
||||
unsigned long tmp;
|
||||
int i, count, ok;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_RDRAND))
|
||||
return; /* Nothing to do */
|
||||
|
||||
for (count = i = 0; i < RESEED_LOOP; i++) {
|
||||
ok = rdrand_long(&tmp);
|
||||
if (ok)
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count != RESEED_LOOP)
|
||||
clear_cpu_cap(c, X86_FEATURE_RDRAND);
|
||||
#endif
|
||||
}
|
||||
71
arch/x86/kernel/cpu/scattered.c
Normal file
71
arch/x86/kernel/cpu/scattered.c
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Routines to identify additional cpu features that are scattered in
|
||||
* cpuid space.
|
||||
*/
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include <asm/pat.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
|
||||
struct cpuid_bit {
|
||||
u16 feature;
|
||||
u8 reg;
|
||||
u8 bit;
|
||||
u32 level;
|
||||
u32 sub_leaf;
|
||||
};
|
||||
|
||||
enum cpuid_regs {
|
||||
CR_EAX = 0,
|
||||
CR_ECX,
|
||||
CR_EDX,
|
||||
CR_EBX
|
||||
};
|
||||
|
||||
void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 max_level;
|
||||
u32 regs[4];
|
||||
const struct cpuid_bit *cb;
|
||||
|
||||
static const struct cpuid_bit cpuid_bits[] = {
|
||||
{ X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 },
|
||||
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
|
||||
{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
|
||||
{ X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
|
||||
{ X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 },
|
||||
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
|
||||
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
|
||||
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
|
||||
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
|
||||
{ X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
|
||||
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 },
|
||||
{ 0, 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
for (cb = cpuid_bits; cb->feature; cb++) {
|
||||
|
||||
/* Verify that the level is valid */
|
||||
max_level = cpuid_eax(cb->level & 0xffff0000);
|
||||
if (max_level < cb->level ||
|
||||
max_level > (cb->level | 0xffff))
|
||||
continue;
|
||||
|
||||
cpuid_count(cb->level, cb->sub_leaf, ®s[CR_EAX],
|
||||
®s[CR_EBX], ®s[CR_ECX], ®s[CR_EDX]);
|
||||
|
||||
if (regs[cb->reg] & (1 << cb->bit))
|
||||
set_cpu_cap(c, cb->feature);
|
||||
}
|
||||
}
|
||||
99
arch/x86/kernel/cpu/topology.c
Normal file
99
arch/x86/kernel/cpu/topology.c
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Check for extended topology enumeration cpuid leaf 0xb and if it
|
||||
* exists, use it for populating initial_apicid and cpu topology
|
||||
* detection.
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/pat.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
/* leaf 0xb SMT level */
|
||||
#define SMT_LEVEL 0
|
||||
|
||||
/* leaf 0xb sub-leaf types */
|
||||
#define INVALID_TYPE 0
|
||||
#define SMT_TYPE 1
|
||||
#define CORE_TYPE 2
|
||||
|
||||
#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
|
||||
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
|
||||
#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
|
||||
|
||||
/*
|
||||
* Check for extended topology enumeration cpuid leaf 0xb and if it
|
||||
* exists, use it for populating initial_apicid and cpu topology
|
||||
* detection.
|
||||
*/
|
||||
void detect_extended_topology(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned int eax, ebx, ecx, edx, sub_index;
|
||||
unsigned int ht_mask_width, core_plus_mask_width;
|
||||
unsigned int core_select_mask, core_level_siblings;
|
||||
static bool printed;
|
||||
|
||||
if (c->cpuid_level < 0xb)
|
||||
return;
|
||||
|
||||
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
/*
|
||||
* check if the cpuid leaf 0xb is actually implemented.
|
||||
*/
|
||||
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
|
||||
return;
|
||||
|
||||
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
|
||||
|
||||
/*
|
||||
* initial apic id, which also represents 32-bit extended x2apic id.
|
||||
*/
|
||||
c->initial_apicid = edx;
|
||||
|
||||
/*
|
||||
* Populate HT related information from sub-leaf level 0.
|
||||
*/
|
||||
core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
|
||||
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
|
||||
|
||||
sub_index = 1;
|
||||
do {
|
||||
cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
/*
|
||||
* Check for the Core type in the implemented sub leaves.
|
||||
*/
|
||||
if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
|
||||
core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
|
||||
core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
|
||||
break;
|
||||
}
|
||||
|
||||
sub_index++;
|
||||
} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
|
||||
|
||||
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
|
||||
|
||||
c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
|
||||
& core_select_mask;
|
||||
c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
|
||||
/*
|
||||
* Reinit the apicid, now that we have extended initial_apicid.
|
||||
*/
|
||||
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
|
||||
|
||||
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
|
||||
|
||||
if (!printed) {
|
||||
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
||||
c->phys_proc_id);
|
||||
if (c->x86_max_cores > 1)
|
||||
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
||||
c->cpu_core_id);
|
||||
printed = 1;
|
||||
}
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
108
arch/x86/kernel/cpu/transmeta.c
Normal file
108
arch/x86/kernel/cpu/transmeta.c
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/msr.h>
|
||||
#include "cpu.h"
|
||||
|
||||
static void early_init_transmeta(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 xlvl;
|
||||
|
||||
/* Transmeta-defined flags: level 0x80860001 */
|
||||
xlvl = cpuid_eax(0x80860000);
|
||||
if ((xlvl & 0xffff0000) == 0x80860000) {
|
||||
if (xlvl >= 0x80860001)
|
||||
c->x86_capability[2] = cpuid_edx(0x80860001);
|
||||
}
|
||||
}
|
||||
|
||||
static void init_transmeta(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int cap_mask, uk, max, dummy;
|
||||
unsigned int cms_rev1, cms_rev2;
|
||||
unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev;
|
||||
char cpu_info[65];
|
||||
|
||||
early_init_transmeta(c);
|
||||
|
||||
cpu_detect_cache_sizes(c);
|
||||
|
||||
/* Print CMS and CPU revision */
|
||||
max = cpuid_eax(0x80860000);
|
||||
cpu_rev = 0;
|
||||
if (max >= 0x80860001) {
|
||||
cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
|
||||
if (cpu_rev != 0x02000000) {
|
||||
printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
|
||||
(cpu_rev >> 24) & 0xff,
|
||||
(cpu_rev >> 16) & 0xff,
|
||||
(cpu_rev >> 8) & 0xff,
|
||||
cpu_rev & 0xff,
|
||||
cpu_freq);
|
||||
}
|
||||
}
|
||||
if (max >= 0x80860002) {
|
||||
cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
|
||||
if (cpu_rev == 0x02000000) {
|
||||
printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
|
||||
new_cpu_rev, cpu_freq);
|
||||
}
|
||||
printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
|
||||
(cms_rev1 >> 24) & 0xff,
|
||||
(cms_rev1 >> 16) & 0xff,
|
||||
(cms_rev1 >> 8) & 0xff,
|
||||
cms_rev1 & 0xff,
|
||||
cms_rev2);
|
||||
}
|
||||
if (max >= 0x80860006) {
|
||||
cpuid(0x80860003,
|
||||
(void *)&cpu_info[0],
|
||||
(void *)&cpu_info[4],
|
||||
(void *)&cpu_info[8],
|
||||
(void *)&cpu_info[12]);
|
||||
cpuid(0x80860004,
|
||||
(void *)&cpu_info[16],
|
||||
(void *)&cpu_info[20],
|
||||
(void *)&cpu_info[24],
|
||||
(void *)&cpu_info[28]);
|
||||
cpuid(0x80860005,
|
||||
(void *)&cpu_info[32],
|
||||
(void *)&cpu_info[36],
|
||||
(void *)&cpu_info[40],
|
||||
(void *)&cpu_info[44]);
|
||||
cpuid(0x80860006,
|
||||
(void *)&cpu_info[48],
|
||||
(void *)&cpu_info[52],
|
||||
(void *)&cpu_info[56],
|
||||
(void *)&cpu_info[60]);
|
||||
cpu_info[64] = '\0';
|
||||
printk(KERN_INFO "CPU: %s\n", cpu_info);
|
||||
}
|
||||
|
||||
/* Unhide possibly hidden capability flags */
|
||||
rdmsr(0x80860004, cap_mask, uk);
|
||||
wrmsr(0x80860004, ~0, uk);
|
||||
c->x86_capability[0] = cpuid_edx(0x00000001);
|
||||
wrmsr(0x80860004, cap_mask, uk);
|
||||
|
||||
/* All Transmeta CPUs have a constant TSC */
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
/*
|
||||
* randomize_va_space slows us down enormously;
|
||||
* it probably triggers retranslation of x86->native bytecode
|
||||
*/
|
||||
randomize_va_space = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static const struct cpu_dev transmeta_cpu_dev = {
|
||||
.c_vendor = "Transmeta",
|
||||
.c_ident = { "GenuineTMx86", "TransmetaCPU" },
|
||||
.c_early_init = early_init_transmeta,
|
||||
.c_init = init_transmeta,
|
||||
.c_x86_vendor = X86_VENDOR_TRANSMETA,
|
||||
};
|
||||
|
||||
cpu_dev_register(transmeta_cpu_dev);
|
||||
25
arch/x86/kernel/cpu/umc.c
Normal file
25
arch/x86/kernel/cpu/umc.c
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <asm/processor.h>
|
||||
#include "cpu.h"
|
||||
|
||||
/*
|
||||
* UMC chips appear to be only either 386 or 486,
|
||||
* so no special init takes place.
|
||||
*/
|
||||
|
||||
static const struct cpu_dev umc_cpu_dev = {
|
||||
.c_vendor = "UMC",
|
||||
.c_ident = { "UMC UMC UMC" },
|
||||
.legacy_models = {
|
||||
{ .family = 4, .model_names =
|
||||
{
|
||||
[1] = "U5D",
|
||||
[2] = "U5S",
|
||||
}
|
||||
},
|
||||
},
|
||||
.c_x86_vendor = X86_VENDOR_UMC,
|
||||
};
|
||||
|
||||
cpu_dev_register(umc_cpu_dev);
|
||||
|
||||
147
arch/x86/kernel/cpu/vmware.c
Normal file
147
arch/x86/kernel/cpu/vmware.c
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* VMware Detection code.
|
||||
*
|
||||
* Copyright (C) 2008, VMware, Inc.
|
||||
* Author : Alok N Kataria <akataria@vmware.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
|
||||
* NON INFRINGEMENT. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/div64.h>
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/hypervisor.h>
|
||||
|
||||
#define CPUID_VMWARE_INFO_LEAF 0x40000000
|
||||
#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
|
||||
#define VMWARE_HYPERVISOR_PORT 0x5658
|
||||
|
||||
#define VMWARE_PORT_CMD_GETVERSION 10
|
||||
#define VMWARE_PORT_CMD_GETHZ 45
|
||||
#define VMWARE_PORT_CMD_GETVCPU_INFO 68
|
||||
#define VMWARE_PORT_CMD_LEGACY_X2APIC 3
|
||||
#define VMWARE_PORT_CMD_VCPU_RESERVED 31
|
||||
|
||||
#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
|
||||
__asm__("inl (%%dx)" : \
|
||||
"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
|
||||
"0"(VMWARE_HYPERVISOR_MAGIC), \
|
||||
"1"(VMWARE_PORT_CMD_##cmd), \
|
||||
"2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \
|
||||
"memory");
|
||||
|
||||
static inline int __vmware_platform(void)
|
||||
{
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx);
|
||||
return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
|
||||
}
|
||||
|
||||
static unsigned long vmware_get_tsc_khz(void)
|
||||
{
|
||||
uint64_t tsc_hz, lpj;
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
|
||||
VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
|
||||
|
||||
tsc_hz = eax | (((uint64_t)ebx) << 32);
|
||||
do_div(tsc_hz, 1000);
|
||||
BUG_ON(tsc_hz >> 32);
|
||||
printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
|
||||
(unsigned long) tsc_hz / 1000,
|
||||
(unsigned long) tsc_hz % 1000);
|
||||
|
||||
if (!preset_lpj) {
|
||||
lpj = ((u64)tsc_hz * 1000);
|
||||
do_div(lpj, HZ);
|
||||
preset_lpj = lpj;
|
||||
}
|
||||
|
||||
return tsc_hz;
|
||||
}
|
||||
|
||||
static void __init vmware_platform_setup(void)
|
||||
{
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
|
||||
VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
|
||||
|
||||
if (ebx != UINT_MAX)
|
||||
x86_platform.calibrate_tsc = vmware_get_tsc_khz;
|
||||
else
|
||||
printk(KERN_WARNING
|
||||
"Failed to get TSC freq from the hypervisor\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* While checking the dmi string information, just checking the product
|
||||
* serial key should be enough, as this will always have a VMware
|
||||
* specific string when running under VMware hypervisor.
|
||||
*/
|
||||
static uint32_t __init vmware_platform(void)
|
||||
{
|
||||
if (cpu_has_hypervisor) {
|
||||
unsigned int eax;
|
||||
unsigned int hyper_vendor_id[3];
|
||||
|
||||
cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
|
||||
&hyper_vendor_id[1], &hyper_vendor_id[2]);
|
||||
if (!memcmp(hyper_vendor_id, "VMwareVMware", 12))
|
||||
return CPUID_VMWARE_INFO_LEAF;
|
||||
} else if (dmi_available && dmi_name_in_serial("VMware") &&
|
||||
__vmware_platform())
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* VMware hypervisor takes care of exporting a reliable TSC to the guest.
|
||||
* Still, due to timing difference when running on virtual cpus, the TSC can
|
||||
* be marked as unstable in some cases. For example, the TSC sync check at
|
||||
* bootup can fail due to a marginal offset between vcpus' TSCs (though the
|
||||
* TSCs do not drift from each other). Also, the ACPI PM timer clocksource
|
||||
* is not suitable as a watchdog when running on a hypervisor because the
|
||||
* kernel may miss a wrap of the counter if the vcpu is descheduled for a
|
||||
* long time. To skip these checks at runtime we set these capability bits,
|
||||
* so that the kernel could just trust the hypervisor with providing a
|
||||
* reliable virtual TSC that is suitable for timekeeping.
|
||||
*/
|
||||
static void vmware_set_cpu_features(struct cpuinfo_x86 *c)
|
||||
{
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
|
||||
}
|
||||
|
||||
/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
|
||||
static bool __init vmware_legacy_x2apic_available(void)
|
||||
{
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
|
||||
return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
|
||||
(eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
|
||||
}
|
||||
|
||||
const __refconst struct hypervisor_x86 x86_hyper_vmware = {
|
||||
.name = "VMware",
|
||||
.detect = vmware_platform,
|
||||
.set_cpu_features = vmware_set_cpu_features,
|
||||
.init_platform = vmware_platform_setup,
|
||||
.x2apic_available = vmware_legacy_x2apic_available,
|
||||
};
|
||||
EXPORT_SYMBOL(x86_hyper_vmware);
|
||||
Loading…
Add table
Add a link
Reference in a new issue