mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-10-30 07:38:52 +01:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
11
arch/x86/kernel/cpu/mcheck/Makefile
Normal file
11
arch/x86/kernel/cpu/mcheck/Makefile
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
obj-y = mce.o mce-severity.o
|
||||
|
||||
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
|
||||
obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
|
||||
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
|
||||
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
|
||||
|
||||
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
|
||||
|
||||
obj-$(CONFIG_ACPI_APEI) += mce-apei.o
|
||||
155
arch/x86/kernel/cpu/mcheck/mce-apei.c
Normal file
155
arch/x86/kernel/cpu/mcheck/mce-apei.c
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
/*
|
||||
* Bridge between MCE and APEI
|
||||
*
|
||||
* On some machine, corrected memory errors are reported via APEI
|
||||
* generic hardware error source (GHES) instead of corrected Machine
|
||||
* Check. These corrected memory errors can be reported to user space
|
||||
* through /dev/mcelog via faking a corrected Machine Check, so that
|
||||
* the error memory page can be offlined by /sbin/mcelog if the error
|
||||
* count for one page is beyond the threshold.
|
||||
*
|
||||
* For fatal MCE, save MCE record into persistent storage via ERST, so
|
||||
* that the MCE record can be logged after reboot via ERST.
|
||||
*
|
||||
* Copyright 2010 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/cper.h>
|
||||
#include <acpi/apei.h>
|
||||
#include <acpi/ghes.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
|
||||
{
|
||||
struct mce m;
|
||||
|
||||
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
|
||||
return;
|
||||
|
||||
mce_setup(&m);
|
||||
m.bank = 1;
|
||||
/* Fake a memory read error with unknown channel */
|
||||
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
|
||||
|
||||
if (severity >= GHES_SEV_RECOVERABLE)
|
||||
m.status |= MCI_STATUS_UC;
|
||||
if (severity >= GHES_SEV_PANIC)
|
||||
m.status |= MCI_STATUS_PCC;
|
||||
|
||||
m.addr = mem_err->physical_addr;
|
||||
mce_log(&m);
|
||||
mce_notify_irq();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
|
||||
|
||||
#define CPER_CREATOR_MCE \
|
||||
UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
|
||||
0x64, 0x90, 0xb8, 0x9d)
|
||||
#define CPER_SECTION_TYPE_MCE \
|
||||
UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
|
||||
0x04, 0x4a, 0x38, 0xfc)
|
||||
|
||||
/*
|
||||
* CPER specification (in UEFI specification 2.3 appendix N) requires
|
||||
* byte-packed.
|
||||
*/
|
||||
struct cper_mce_record {
|
||||
struct cper_record_header hdr;
|
||||
struct cper_section_descriptor sec_hdr;
|
||||
struct mce mce;
|
||||
} __packed;
|
||||
|
||||
int apei_write_mce(struct mce *m)
|
||||
{
|
||||
struct cper_mce_record rcd;
|
||||
|
||||
memset(&rcd, 0, sizeof(rcd));
|
||||
memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
|
||||
rcd.hdr.revision = CPER_RECORD_REV;
|
||||
rcd.hdr.signature_end = CPER_SIG_END;
|
||||
rcd.hdr.section_count = 1;
|
||||
rcd.hdr.error_severity = CPER_SEV_FATAL;
|
||||
/* timestamp, platform_id, partition_id are all invalid */
|
||||
rcd.hdr.validation_bits = 0;
|
||||
rcd.hdr.record_length = sizeof(rcd);
|
||||
rcd.hdr.creator_id = CPER_CREATOR_MCE;
|
||||
rcd.hdr.notification_type = CPER_NOTIFY_MCE;
|
||||
rcd.hdr.record_id = cper_next_record_id();
|
||||
rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
|
||||
|
||||
rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
|
||||
rcd.sec_hdr.section_length = sizeof(rcd.mce);
|
||||
rcd.sec_hdr.revision = CPER_SEC_REV;
|
||||
/* fru_id and fru_text is invalid */
|
||||
rcd.sec_hdr.validation_bits = 0;
|
||||
rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
|
||||
rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
|
||||
rcd.sec_hdr.section_severity = CPER_SEV_FATAL;
|
||||
|
||||
memcpy(&rcd.mce, m, sizeof(*m));
|
||||
|
||||
return erst_write(&rcd.hdr);
|
||||
}
|
||||
|
||||
ssize_t apei_read_mce(struct mce *m, u64 *record_id)
|
||||
{
|
||||
struct cper_mce_record rcd;
|
||||
int rc, pos;
|
||||
|
||||
rc = erst_get_record_id_begin(&pos);
|
||||
if (rc)
|
||||
return rc;
|
||||
retry:
|
||||
rc = erst_get_record_id_next(&pos, record_id);
|
||||
if (rc)
|
||||
goto out;
|
||||
/* no more record */
|
||||
if (*record_id == APEI_ERST_INVALID_RECORD_ID)
|
||||
goto out;
|
||||
rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
|
||||
/* someone else has cleared the record, try next one */
|
||||
if (rc == -ENOENT)
|
||||
goto retry;
|
||||
else if (rc < 0)
|
||||
goto out;
|
||||
/* try to skip other type records in storage */
|
||||
else if (rc != sizeof(rcd) ||
|
||||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
|
||||
goto retry;
|
||||
memcpy(m, &rcd.mce, sizeof(*m));
|
||||
rc = sizeof(*m);
|
||||
out:
|
||||
erst_get_record_id_end();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Check whether there is record in ERST */
|
||||
int apei_check_mce(void)
|
||||
{
|
||||
return erst_get_record_count();
|
||||
}
|
||||
|
||||
int apei_clear_mce(u64 record_id)
|
||||
{
|
||||
return erst_clear(record_id);
|
||||
}
|
||||
256
arch/x86/kernel/cpu/mcheck/mce-inject.c
Normal file
256
arch/x86/kernel/cpu/mcheck/mce-inject.c
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* Machine check injection support.
|
||||
* Copyright 2008 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*
|
||||
* Authors:
|
||||
* Andi Kleen
|
||||
* Ying Huang
|
||||
*/
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/nmi.h>
|
||||
|
||||
/* Update fake mce registers on current CPU. */
|
||||
static void inject_mce(struct mce *m)
|
||||
{
|
||||
struct mce *i = &per_cpu(injectm, m->extcpu);
|
||||
|
||||
/* Make sure no one reads partially written injectm */
|
||||
i->finished = 0;
|
||||
mb();
|
||||
m->finished = 0;
|
||||
/* First set the fields after finished */
|
||||
i->extcpu = m->extcpu;
|
||||
mb();
|
||||
/* Now write record in order, finished last (except above) */
|
||||
memcpy(i, m, sizeof(struct mce));
|
||||
/* Finally activate it */
|
||||
mb();
|
||||
i->finished = 1;
|
||||
}
|
||||
|
||||
static void raise_poll(struct mce *m)
|
||||
{
|
||||
unsigned long flags;
|
||||
mce_banks_t b;
|
||||
|
||||
memset(&b, 0xff, sizeof(mce_banks_t));
|
||||
local_irq_save(flags);
|
||||
machine_check_poll(0, &b);
|
||||
local_irq_restore(flags);
|
||||
m->finished = 0;
|
||||
}
|
||||
|
||||
static void raise_exception(struct mce *m, struct pt_regs *pregs)
|
||||
{
|
||||
struct pt_regs regs;
|
||||
unsigned long flags;
|
||||
|
||||
if (!pregs) {
|
||||
memset(®s, 0, sizeof(struct pt_regs));
|
||||
regs.ip = m->ip;
|
||||
regs.cs = m->cs;
|
||||
pregs = ®s;
|
||||
}
|
||||
/* in mcheck exeception handler, irq will be disabled */
|
||||
local_irq_save(flags);
|
||||
do_machine_check(pregs, 0);
|
||||
local_irq_restore(flags);
|
||||
m->finished = 0;
|
||||
}
|
||||
|
||||
static cpumask_var_t mce_inject_cpumask;
|
||||
static DEFINE_MUTEX(mce_inject_mutex);
|
||||
|
||||
static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct mce *m = this_cpu_ptr(&injectm);
|
||||
if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
|
||||
return NMI_DONE;
|
||||
cpumask_clear_cpu(cpu, mce_inject_cpumask);
|
||||
if (m->inject_flags & MCJ_EXCEPTION)
|
||||
raise_exception(m, regs);
|
||||
else if (m->status)
|
||||
raise_poll(m);
|
||||
return NMI_HANDLED;
|
||||
}
|
||||
|
||||
static void mce_irq_ipi(void *info)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct mce *m = this_cpu_ptr(&injectm);
|
||||
|
||||
if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
|
||||
m->inject_flags & MCJ_EXCEPTION) {
|
||||
cpumask_clear_cpu(cpu, mce_inject_cpumask);
|
||||
raise_exception(m, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Inject mce on current CPU */
|
||||
static int raise_local(void)
|
||||
{
|
||||
struct mce *m = this_cpu_ptr(&injectm);
|
||||
int context = MCJ_CTX(m->inject_flags);
|
||||
int ret = 0;
|
||||
int cpu = m->extcpu;
|
||||
|
||||
if (m->inject_flags & MCJ_EXCEPTION) {
|
||||
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
|
||||
switch (context) {
|
||||
case MCJ_CTX_IRQ:
|
||||
/*
|
||||
* Could do more to fake interrupts like
|
||||
* calling irq_enter, but the necessary
|
||||
* machinery isn't exported currently.
|
||||
*/
|
||||
/*FALL THROUGH*/
|
||||
case MCJ_CTX_PROCESS:
|
||||
raise_exception(m, NULL);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_INFO "Invalid MCE context\n");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
|
||||
} else if (m->status) {
|
||||
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
|
||||
raise_poll(m);
|
||||
mce_notify_irq();
|
||||
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
|
||||
} else
|
||||
m->finished = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void raise_mce(struct mce *m)
|
||||
{
|
||||
int context = MCJ_CTX(m->inject_flags);
|
||||
|
||||
inject_mce(m);
|
||||
|
||||
if (context == MCJ_CTX_RANDOM)
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
|
||||
unsigned long start;
|
||||
int cpu;
|
||||
|
||||
get_online_cpus();
|
||||
cpumask_copy(mce_inject_cpumask, cpu_online_mask);
|
||||
cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct mce *mcpu = &per_cpu(injectm, cpu);
|
||||
if (!mcpu->finished ||
|
||||
MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
|
||||
cpumask_clear_cpu(cpu, mce_inject_cpumask);
|
||||
}
|
||||
if (!cpumask_empty(mce_inject_cpumask)) {
|
||||
if (m->inject_flags & MCJ_IRQ_BROADCAST) {
|
||||
/*
|
||||
* don't wait because mce_irq_ipi is necessary
|
||||
* to be sync with following raise_local
|
||||
*/
|
||||
preempt_disable();
|
||||
smp_call_function_many(mce_inject_cpumask,
|
||||
mce_irq_ipi, NULL, 0);
|
||||
preempt_enable();
|
||||
} else if (m->inject_flags & MCJ_NMI_BROADCAST)
|
||||
apic->send_IPI_mask(mce_inject_cpumask,
|
||||
NMI_VECTOR);
|
||||
}
|
||||
start = jiffies;
|
||||
while (!cpumask_empty(mce_inject_cpumask)) {
|
||||
if (!time_before(jiffies, start + 2*HZ)) {
|
||||
printk(KERN_ERR
|
||||
"Timeout waiting for mce inject %lx\n",
|
||||
*cpumask_bits(mce_inject_cpumask));
|
||||
break;
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
raise_local();
|
||||
put_cpu();
|
||||
put_online_cpus();
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
preempt_disable();
|
||||
raise_local();
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
|
||||
/* Error injection interface */
|
||||
static ssize_t mce_write(struct file *filp, const char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
struct mce m;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
/*
|
||||
* There are some cases where real MSR reads could slip
|
||||
* through.
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
|
||||
return -EIO;
|
||||
|
||||
if ((unsigned long)usize > sizeof(struct mce))
|
||||
usize = sizeof(struct mce);
|
||||
if (copy_from_user(&m, ubuf, usize))
|
||||
return -EFAULT;
|
||||
|
||||
if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Need to give user space some time to set everything up,
|
||||
* so do it a jiffie or two later everywhere.
|
||||
*/
|
||||
schedule_timeout(2);
|
||||
|
||||
mutex_lock(&mce_inject_mutex);
|
||||
raise_mce(&m);
|
||||
mutex_unlock(&mce_inject_mutex);
|
||||
return usize;
|
||||
}
|
||||
|
||||
static int inject_init(void)
|
||||
{
|
||||
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
printk(KERN_INFO "Machine check injector initialized\n");
|
||||
register_mce_write_callback(mce_write);
|
||||
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
|
||||
"mce_notify");
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(inject_init);
|
||||
/*
|
||||
* Cannot tolerate unloading currently because we cannot
|
||||
* guarantee all openers of mce_chrdev will get a reference to us.
|
||||
*/
|
||||
MODULE_LICENSE("GPL");
|
||||
66
arch/x86/kernel/cpu/mcheck/mce-internal.h
Normal file
66
arch/x86/kernel/cpu/mcheck/mce-internal.h
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
#include <linux/device.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
enum severity_level {
|
||||
MCE_NO_SEVERITY,
|
||||
MCE_KEEP_SEVERITY,
|
||||
MCE_SOME_SEVERITY,
|
||||
MCE_AO_SEVERITY,
|
||||
MCE_UC_SEVERITY,
|
||||
MCE_AR_SEVERITY,
|
||||
MCE_PANIC_SEVERITY,
|
||||
};
|
||||
|
||||
#define ATTR_LEN 16
|
||||
|
||||
/* One object for each MCE bank, shared by all CPUs */
|
||||
struct mce_bank {
|
||||
u64 ctl; /* subevents to enable */
|
||||
unsigned char init; /* initialise bank? */
|
||||
struct device_attribute attr; /* device attribute */
|
||||
char attrname[ATTR_LEN]; /* attribute name */
|
||||
};
|
||||
|
||||
int mce_severity(struct mce *a, int tolerant, char **msg);
|
||||
struct dentry *mce_get_debugfs_dir(void);
|
||||
|
||||
extern struct mce_bank *mce_banks;
|
||||
extern mce_banks_t mce_banks_ce_disabled;
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
unsigned long mce_intel_adjust_timer(unsigned long interval);
|
||||
void mce_intel_cmci_poll(void);
|
||||
void mce_intel_hcpu_update(unsigned long cpu);
|
||||
void cmci_disable_bank(int bank);
|
||||
#else
|
||||
# define mce_intel_adjust_timer mce_adjust_timer_default
|
||||
static inline void mce_intel_cmci_poll(void) { }
|
||||
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
|
||||
static inline void cmci_disable_bank(int bank) { }
|
||||
#endif
|
||||
|
||||
void mce_timer_kick(unsigned long interval);
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI
|
||||
int apei_write_mce(struct mce *m);
|
||||
ssize_t apei_read_mce(struct mce *m, u64 *record_id);
|
||||
int apei_check_mce(void);
|
||||
int apei_clear_mce(u64 record_id);
|
||||
#else
|
||||
static inline int apei_write_mce(struct mce *m)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int apei_check_mce(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int apei_clear_mce(u64 record_id)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
283
arch/x86/kernel/cpu/mcheck/mce-severity.c
Normal file
283
arch/x86/kernel/cpu/mcheck/mce-severity.c
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
/*
|
||||
* MCE grading rules.
|
||||
* Copyright 2008, 2009 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*
|
||||
* Author: Andi Kleen
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
/*
|
||||
* Grade an mce by severity. In general the most severe ones are processed
|
||||
* first. Since there are quite a lot of combinations test the bits in a
|
||||
* table-driven way. The rules are simply processed in order, first
|
||||
* match wins.
|
||||
*
|
||||
* Note this is only used for machine check exceptions, the corrected
|
||||
* errors use much simpler rules. The exceptions still check for the corrected
|
||||
* errors, but only to leave them alone for the CMCI handler (except for
|
||||
* panic situations)
|
||||
*/
|
||||
|
||||
enum context { IN_KERNEL = 1, IN_USER = 2 };
|
||||
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
|
||||
|
||||
static struct severity {
|
||||
u64 mask;
|
||||
u64 result;
|
||||
unsigned char sev;
|
||||
unsigned char mcgmask;
|
||||
unsigned char mcgres;
|
||||
unsigned char ser;
|
||||
unsigned char context;
|
||||
unsigned char covered;
|
||||
char *msg;
|
||||
} severities[] = {
|
||||
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
|
||||
#define KERNEL .context = IN_KERNEL
|
||||
#define USER .context = IN_USER
|
||||
#define SER .ser = SER_REQUIRED
|
||||
#define NOSER .ser = NO_SER
|
||||
#define BITCLR(x) .mask = x, .result = 0
|
||||
#define BITSET(x) .mask = x, .result = x
|
||||
#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
|
||||
#define MASK(x, y) .mask = x, .result = y
|
||||
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
|
||||
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
|
||||
#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
|
||||
|
||||
MCESEV(
|
||||
NO, "Invalid",
|
||||
BITCLR(MCI_STATUS_VAL)
|
||||
),
|
||||
MCESEV(
|
||||
NO, "Not enabled",
|
||||
BITCLR(MCI_STATUS_EN)
|
||||
),
|
||||
MCESEV(
|
||||
PANIC, "Processor context corrupt",
|
||||
BITSET(MCI_STATUS_PCC)
|
||||
),
|
||||
/* When MCIP is not set something is very confused */
|
||||
MCESEV(
|
||||
PANIC, "MCIP not set in MCA handler",
|
||||
MCGMASK(MCG_STATUS_MCIP, 0)
|
||||
),
|
||||
/* Neither return not error IP -- no chance to recover -> PANIC */
|
||||
MCESEV(
|
||||
PANIC, "Neither restart nor error IP",
|
||||
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
|
||||
),
|
||||
MCESEV(
|
||||
PANIC, "In kernel and no restart IP",
|
||||
KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
|
||||
),
|
||||
MCESEV(
|
||||
KEEP, "Corrected error",
|
||||
NOSER, BITCLR(MCI_STATUS_UC)
|
||||
),
|
||||
|
||||
/* ignore OVER for UCNA */
|
||||
MCESEV(
|
||||
KEEP, "Uncorrected no action required",
|
||||
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
|
||||
),
|
||||
MCESEV(
|
||||
PANIC, "Illegal combination (UCNA with AR=1)",
|
||||
SER,
|
||||
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
|
||||
),
|
||||
MCESEV(
|
||||
KEEP, "Non signalled machine check",
|
||||
SER, BITCLR(MCI_STATUS_S)
|
||||
),
|
||||
|
||||
MCESEV(
|
||||
PANIC, "Action required with lost events",
|
||||
SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
|
||||
),
|
||||
|
||||
/* known AR MCACODs: */
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
MCESEV(
|
||||
KEEP, "Action required but unaffected thread is continuable",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
|
||||
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
|
||||
),
|
||||
MCESEV(
|
||||
AR, "Action required: data load error in a user process",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
|
||||
USER
|
||||
),
|
||||
MCESEV(
|
||||
AR, "Action required: instruction fetch error in a user process",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
|
||||
USER
|
||||
),
|
||||
#endif
|
||||
MCESEV(
|
||||
PANIC, "Action required: unknown MCACOD",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
|
||||
),
|
||||
|
||||
/* known AO MCACODs: */
|
||||
MCESEV(
|
||||
AO, "Action optional: memory scrubbing error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
|
||||
),
|
||||
MCESEV(
|
||||
AO, "Action optional: last level cache writeback error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
|
||||
),
|
||||
MCESEV(
|
||||
SOME, "Action optional: unknown MCACOD",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
|
||||
),
|
||||
MCESEV(
|
||||
SOME, "Action optional with lost events",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
|
||||
),
|
||||
|
||||
MCESEV(
|
||||
PANIC, "Overflowed uncorrected",
|
||||
BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
|
||||
),
|
||||
MCESEV(
|
||||
UC, "Uncorrected",
|
||||
BITSET(MCI_STATUS_UC)
|
||||
),
|
||||
MCESEV(
|
||||
SOME, "No match",
|
||||
BITSET(0)
|
||||
) /* always matches. keep at end */
|
||||
};
|
||||
|
||||
/*
|
||||
* If mcgstatus indicated that ip/cs on the stack were
|
||||
* no good, then "m->cs" will be zero and we will have
|
||||
* to assume the worst case (IN_KERNEL) as we actually
|
||||
* have no idea what we were executing when the machine
|
||||
* check hit.
|
||||
* If we do have a good "m->cs" (or a faked one in the
|
||||
* case we were executing in VM86 mode) we can use it to
|
||||
* distinguish an exception taken in user from from one
|
||||
* taken in the kernel.
|
||||
*/
|
||||
static int error_context(struct mce *m)
|
||||
{
|
||||
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
|
||||
}
|
||||
|
||||
int mce_severity(struct mce *m, int tolerant, char **msg)
|
||||
{
|
||||
enum context ctx = error_context(m);
|
||||
struct severity *s;
|
||||
|
||||
for (s = severities;; s++) {
|
||||
if ((m->status & s->mask) != s->result)
|
||||
continue;
|
||||
if ((m->mcgstatus & s->mcgmask) != s->mcgres)
|
||||
continue;
|
||||
if (s->ser == SER_REQUIRED && !mca_cfg.ser)
|
||||
continue;
|
||||
if (s->ser == NO_SER && mca_cfg.ser)
|
||||
continue;
|
||||
if (s->context && ctx != s->context)
|
||||
continue;
|
||||
if (msg)
|
||||
*msg = s->msg;
|
||||
s->covered = 1;
|
||||
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
|
||||
if (panic_on_oops || tolerant < 1)
|
||||
return MCE_PANIC_SEVERITY;
|
||||
}
|
||||
return s->sev;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
static void *s_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
if (*pos >= ARRAY_SIZE(severities))
|
||||
return NULL;
|
||||
return &severities[*pos];
|
||||
}
|
||||
|
||||
static void *s_next(struct seq_file *f, void *data, loff_t *pos)
|
||||
{
|
||||
if (++(*pos) >= ARRAY_SIZE(severities))
|
||||
return NULL;
|
||||
return &severities[*pos];
|
||||
}
|
||||
|
||||
static void s_stop(struct seq_file *f, void *data)
|
||||
{
|
||||
}
|
||||
|
||||
static int s_show(struct seq_file *f, void *data)
|
||||
{
|
||||
struct severity *ser = data;
|
||||
seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations severities_seq_ops = {
|
||||
.start = s_start,
|
||||
.next = s_next,
|
||||
.stop = s_stop,
|
||||
.show = s_show,
|
||||
};
|
||||
|
||||
static int severities_coverage_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return seq_open(file, &severities_seq_ops);
|
||||
}
|
||||
|
||||
static ssize_t severities_coverage_write(struct file *file,
|
||||
const char __user *ubuf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ARRAY_SIZE(severities); i++)
|
||||
severities[i].covered = 0;
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations severities_coverage_fops = {
|
||||
.open = severities_coverage_open,
|
||||
.release = seq_release,
|
||||
.read = seq_read,
|
||||
.write = severities_coverage_write,
|
||||
.llseek = seq_lseek,
|
||||
};
|
||||
|
||||
static int __init severities_debugfs_init(void)
|
||||
{
|
||||
struct dentry *dmce, *fsev;
|
||||
|
||||
dmce = mce_get_debugfs_dir();
|
||||
if (!dmce)
|
||||
goto err_out;
|
||||
|
||||
fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
|
||||
&severities_coverage_fops);
|
||||
if (!fsev)
|
||||
goto err_out;
|
||||
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
return -ENOMEM;
|
||||
}
|
||||
late_initcall(severities_debugfs_init);
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
2560
arch/x86/kernel/cpu/mcheck/mce.c
Normal file
2560
arch/x86/kernel/cpu/mcheck/mce.c
Normal file
File diff suppressed because it is too large
Load diff
789
arch/x86/kernel/cpu/mcheck/mce_amd.c
Normal file
789
arch/x86/kernel/cpu/mcheck/mce_amd.c
Normal file
|
|
@ -0,0 +1,789 @@
|
|||
/*
|
||||
* (c) 2005-2012 Advanced Micro Devices, Inc.
|
||||
* Your use of this code is subject to the terms and conditions of the
|
||||
* GNU general public license version 2. See "COPYING" or
|
||||
* http://www.gnu.org/licenses/gpl.html
|
||||
*
|
||||
* Written by Jacob Shin - AMD, Inc.
|
||||
*
|
||||
* Maintained by: Borislav Petkov <bp@alien8.de>
|
||||
*
|
||||
* April 2006
|
||||
* - added support for AMD Family 0x10 processors
|
||||
* May 2012
|
||||
* - major scrubbing
|
||||
*
|
||||
* All MC4_MISCi registers are shared between multi-cores
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/amd_nb.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#define NR_BLOCKS 9
|
||||
#define THRESHOLD_MAX 0xFFF
|
||||
#define INT_TYPE_APIC 0x00020000
|
||||
#define MASK_VALID_HI 0x80000000
|
||||
#define MASK_CNTP_HI 0x40000000
|
||||
#define MASK_LOCKED_HI 0x20000000
|
||||
#define MASK_LVTOFF_HI 0x00F00000
|
||||
#define MASK_COUNT_EN_HI 0x00080000
|
||||
#define MASK_INT_TYPE_HI 0x00060000
|
||||
#define MASK_OVERFLOW_HI 0x00010000
|
||||
#define MASK_ERR_COUNT_HI 0x00000FFF
|
||||
#define MASK_BLKPTR_LO 0xFF000000
|
||||
#define MCG_XBLK_ADDR 0xC0000400
|
||||
|
||||
static const char * const th_names[] = {
|
||||
"load_store",
|
||||
"insn_fetch",
|
||||
"combined_unit",
|
||||
"",
|
||||
"northbridge",
|
||||
"execution_unit",
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
|
||||
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
|
||||
|
||||
static void amd_threshold_interrupt(void);
|
||||
|
||||
/*
|
||||
* CPU Initialization
|
||||
*/
|
||||
|
||||
struct thresh_restart {
|
||||
struct threshold_block *b;
|
||||
int reset;
|
||||
int set_lvt_off;
|
||||
int lvt_off;
|
||||
u16 old_limit;
|
||||
};
|
||||
|
||||
static inline bool is_shared_bank(int bank)
|
||||
{
|
||||
/* Bank 4 is for northbridge reporting and is thus shared */
|
||||
return (bank == 4);
|
||||
}
|
||||
|
||||
static const char * const bank4_names(struct threshold_block *b)
|
||||
{
|
||||
switch (b->address) {
|
||||
/* MSR4_MISC0 */
|
||||
case 0x00000413:
|
||||
return "dram";
|
||||
|
||||
case 0xc0000408:
|
||||
return "ht_links";
|
||||
|
||||
case 0xc0000409:
|
||||
return "l3_cache";
|
||||
|
||||
default:
|
||||
WARN(1, "Funny MSR: 0x%08x\n", b->address);
|
||||
return "";
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
|
||||
{
|
||||
/*
|
||||
* bank 4 supports APIC LVT interrupts implicitly since forever.
|
||||
*/
|
||||
if (bank == 4)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* IntP: interrupt present; if this bit is set, the thresholding
|
||||
* bank can generate APIC LVT interrupts
|
||||
*/
|
||||
return msr_high_bits & BIT(28);
|
||||
}
|
||||
|
||||
static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
|
||||
{
|
||||
int msr = (hi & MASK_LVTOFF_HI) >> 20;
|
||||
|
||||
if (apic < 0) {
|
||||
pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
|
||||
"for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
|
||||
b->bank, b->block, b->address, hi, lo);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (apic != msr) {
|
||||
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
|
||||
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
|
||||
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
};
|
||||
|
||||
/*
|
||||
* Called via smp_call_function_single(), must be called with correct
|
||||
* cpu affinity.
|
||||
*/
|
||||
static void threshold_restart_bank(void *_tr)
|
||||
{
|
||||
struct thresh_restart *tr = _tr;
|
||||
u32 hi, lo;
|
||||
|
||||
rdmsr(tr->b->address, lo, hi);
|
||||
|
||||
if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
|
||||
tr->reset = 1; /* limit cannot be lower than err count */
|
||||
|
||||
if (tr->reset) { /* reset err count and overflow bit */
|
||||
hi =
|
||||
(hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
|
||||
(THRESHOLD_MAX - tr->b->threshold_limit);
|
||||
} else if (tr->old_limit) { /* change limit w/o reset */
|
||||
int new_count = (hi & THRESHOLD_MAX) +
|
||||
(tr->old_limit - tr->b->threshold_limit);
|
||||
|
||||
hi = (hi & ~MASK_ERR_COUNT_HI) |
|
||||
(new_count & THRESHOLD_MAX);
|
||||
}
|
||||
|
||||
/* clear IntType */
|
||||
hi &= ~MASK_INT_TYPE_HI;
|
||||
|
||||
if (!tr->b->interrupt_capable)
|
||||
goto done;
|
||||
|
||||
if (tr->set_lvt_off) {
|
||||
if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
|
||||
/* set new lvt offset */
|
||||
hi &= ~MASK_LVTOFF_HI;
|
||||
hi |= tr->lvt_off << 20;
|
||||
}
|
||||
}
|
||||
|
||||
if (tr->b->interrupt_enable)
|
||||
hi |= INT_TYPE_APIC;
|
||||
|
||||
done:
|
||||
|
||||
hi |= MASK_COUNT_EN_HI;
|
||||
wrmsr(tr->b->address, lo, hi);
|
||||
}
|
||||
|
||||
static void mce_threshold_block_init(struct threshold_block *b, int offset)
|
||||
{
|
||||
struct thresh_restart tr = {
|
||||
.b = b,
|
||||
.set_lvt_off = 1,
|
||||
.lvt_off = offset,
|
||||
};
|
||||
|
||||
b->threshold_limit = THRESHOLD_MAX;
|
||||
threshold_restart_bank(&tr);
|
||||
};
|
||||
|
||||
static int setup_APIC_mce(int reserved, int new)
|
||||
{
|
||||
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
|
||||
APIC_EILVT_MSG_FIX, 0))
|
||||
return new;
|
||||
|
||||
return reserved;
|
||||
}
|
||||
|
||||
/* cpu init entry point, called from mce.c with preempt off */
|
||||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
struct threshold_block b;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
unsigned int bank, block;
|
||||
int offset = -1;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
if (block == 0)
|
||||
address = MSR_IA32_MC0_MISC + bank * 4;
|
||||
else if (block == 1) {
|
||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
||||
if (!address)
|
||||
break;
|
||||
|
||||
address += MCG_XBLK_ADDR;
|
||||
} else
|
||||
++address;
|
||||
|
||||
if (rdmsr_safe(address, &low, &high))
|
||||
break;
|
||||
|
||||
if (!(high & MASK_VALID_HI))
|
||||
continue;
|
||||
|
||||
if (!(high & MASK_CNTP_HI) ||
|
||||
(high & MASK_LOCKED_HI))
|
||||
continue;
|
||||
|
||||
if (!block)
|
||||
per_cpu(bank_map, cpu) |= (1 << bank);
|
||||
|
||||
memset(&b, 0, sizeof(b));
|
||||
b.cpu = cpu;
|
||||
b.bank = bank;
|
||||
b.block = block;
|
||||
b.address = address;
|
||||
b.interrupt_capable = lvt_interrupt_supported(bank, high);
|
||||
|
||||
if (b.interrupt_capable) {
|
||||
int new = (high & MASK_LVTOFF_HI) >> 20;
|
||||
offset = setup_APIC_mce(offset, new);
|
||||
}
|
||||
|
||||
mce_threshold_block_init(&b, offset);
|
||||
mce_threshold_vector = amd_threshold_interrupt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* APIC Interrupt Handler
|
||||
*/
|
||||
|
||||
/*
|
||||
* threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
|
||||
* the interrupt goes off when error_count reaches threshold_limit.
|
||||
* the handler will simply log mcelog w/ software defined bank number.
|
||||
*/
|
||||
static void amd_threshold_interrupt(void)
|
||||
{
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
unsigned int bank, block;
|
||||
struct mce m;
|
||||
|
||||
mce_setup(&m);
|
||||
|
||||
/* assume first bank caused it */
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
|
||||
continue;
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
if (block == 0) {
|
||||
address = MSR_IA32_MC0_MISC + bank * 4;
|
||||
} else if (block == 1) {
|
||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
||||
if (!address)
|
||||
break;
|
||||
address += MCG_XBLK_ADDR;
|
||||
} else {
|
||||
++address;
|
||||
}
|
||||
|
||||
if (rdmsr_safe(address, &low, &high))
|
||||
break;
|
||||
|
||||
if (!(high & MASK_VALID_HI)) {
|
||||
if (block)
|
||||
continue;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (!(high & MASK_CNTP_HI) ||
|
||||
(high & MASK_LOCKED_HI))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Log the machine check that caused the threshold
|
||||
* event.
|
||||
*/
|
||||
machine_check_poll(MCP_TIMESTAMP,
|
||||
this_cpu_ptr(&mce_poll_banks));
|
||||
|
||||
if (high & MASK_OVERFLOW_HI) {
|
||||
rdmsrl(address, m.misc);
|
||||
rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
|
||||
m.status);
|
||||
m.bank = K8_MCE_THRESHOLD_BASE
|
||||
+ bank * NR_BLOCKS
|
||||
+ block;
|
||||
mce_log(&m);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Sysfs Interface
|
||||
*/
|
||||
|
||||
struct threshold_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show) (struct threshold_block *, char *);
|
||||
ssize_t (*store) (struct threshold_block *, const char *, size_t count);
|
||||
};
|
||||
|
||||
#define SHOW_FIELDS(name) \
|
||||
static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
|
||||
{ \
|
||||
return sprintf(buf, "%lu\n", (unsigned long) b->name); \
|
||||
}
|
||||
SHOW_FIELDS(interrupt_enable)
|
||||
SHOW_FIELDS(threshold_limit)
|
||||
|
||||
static ssize_t
|
||||
store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
|
||||
{
|
||||
struct thresh_restart tr;
|
||||
unsigned long new;
|
||||
|
||||
if (!b->interrupt_capable)
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtoul(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
b->interrupt_enable = !!new;
|
||||
|
||||
memset(&tr, 0, sizeof(tr));
|
||||
tr.b = b;
|
||||
|
||||
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
|
||||
{
|
||||
struct thresh_restart tr;
|
||||
unsigned long new;
|
||||
|
||||
if (kstrtoul(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (new > THRESHOLD_MAX)
|
||||
new = THRESHOLD_MAX;
|
||||
if (new < 1)
|
||||
new = 1;
|
||||
|
||||
memset(&tr, 0, sizeof(tr));
|
||||
tr.old_limit = b->threshold_limit;
|
||||
b->threshold_limit = new;
|
||||
tr.b = b;
|
||||
|
||||
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t show_error_count(struct threshold_block *b, char *buf)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
|
||||
|
||||
return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
|
||||
(THRESHOLD_MAX - b->threshold_limit)));
|
||||
}
|
||||
|
||||
static struct threshold_attr error_count = {
|
||||
.attr = {.name = __stringify(error_count), .mode = 0444 },
|
||||
.show = show_error_count,
|
||||
};
|
||||
|
||||
#define RW_ATTR(val) \
|
||||
static struct threshold_attr val = { \
|
||||
.attr = {.name = __stringify(val), .mode = 0644 }, \
|
||||
.show = show_## val, \
|
||||
.store = store_## val, \
|
||||
};
|
||||
|
||||
RW_ATTR(interrupt_enable);
|
||||
RW_ATTR(threshold_limit);
|
||||
|
||||
static struct attribute *default_attrs[] = {
|
||||
&threshold_limit.attr,
|
||||
&error_count.attr,
|
||||
NULL, /* possibly interrupt_enable if supported, see below */
|
||||
NULL,
|
||||
};
|
||||
|
||||
#define to_block(k) container_of(k, struct threshold_block, kobj)
|
||||
#define to_attr(a) container_of(a, struct threshold_attr, attr)
|
||||
|
||||
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
|
||||
{
|
||||
struct threshold_block *b = to_block(kobj);
|
||||
struct threshold_attr *a = to_attr(attr);
|
||||
ssize_t ret;
|
||||
|
||||
ret = a->show ? a->show(b, buf) : -EIO;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t store(struct kobject *kobj, struct attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct threshold_block *b = to_block(kobj);
|
||||
struct threshold_attr *a = to_attr(attr);
|
||||
ssize_t ret;
|
||||
|
||||
ret = a->store ? a->store(b, buf, count) : -EIO;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct sysfs_ops threshold_ops = {
|
||||
.show = show,
|
||||
.store = store,
|
||||
};
|
||||
|
||||
static struct kobj_type threshold_ktype = {
|
||||
.sysfs_ops = &threshold_ops,
|
||||
.default_attrs = default_attrs,
|
||||
};
|
||||
|
||||
static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
|
||||
unsigned int block, u32 address)
|
||||
{
|
||||
struct threshold_block *b = NULL;
|
||||
u32 low, high;
|
||||
int err;
|
||||
|
||||
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
|
||||
return 0;
|
||||
|
||||
if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
|
||||
return 0;
|
||||
|
||||
if (!(high & MASK_VALID_HI)) {
|
||||
if (block)
|
||||
goto recurse;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!(high & MASK_CNTP_HI) ||
|
||||
(high & MASK_LOCKED_HI))
|
||||
goto recurse;
|
||||
|
||||
b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
|
||||
if (!b)
|
||||
return -ENOMEM;
|
||||
|
||||
b->block = block;
|
||||
b->bank = bank;
|
||||
b->cpu = cpu;
|
||||
b->address = address;
|
||||
b->interrupt_enable = 0;
|
||||
b->interrupt_capable = lvt_interrupt_supported(bank, high);
|
||||
b->threshold_limit = THRESHOLD_MAX;
|
||||
|
||||
if (b->interrupt_capable)
|
||||
threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
|
||||
else
|
||||
threshold_ktype.default_attrs[2] = NULL;
|
||||
|
||||
INIT_LIST_HEAD(&b->miscj);
|
||||
|
||||
if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
|
||||
list_add(&b->miscj,
|
||||
&per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
|
||||
} else {
|
||||
per_cpu(threshold_banks, cpu)[bank]->blocks = b;
|
||||
}
|
||||
|
||||
err = kobject_init_and_add(&b->kobj, &threshold_ktype,
|
||||
per_cpu(threshold_banks, cpu)[bank]->kobj,
|
||||
(bank == 4 ? bank4_names(b) : th_names[bank]));
|
||||
if (err)
|
||||
goto out_free;
|
||||
recurse:
|
||||
if (!block) {
|
||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
||||
if (!address)
|
||||
return 0;
|
||||
address += MCG_XBLK_ADDR;
|
||||
} else {
|
||||
++address;
|
||||
}
|
||||
|
||||
err = allocate_threshold_blocks(cpu, bank, ++block, address);
|
||||
if (err)
|
||||
goto out_free;
|
||||
|
||||
if (b)
|
||||
kobject_uevent(&b->kobj, KOBJ_ADD);
|
||||
|
||||
return err;
|
||||
|
||||
out_free:
|
||||
if (b) {
|
||||
kobject_put(&b->kobj);
|
||||
list_del(&b->miscj);
|
||||
kfree(b);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __threshold_add_blocks(struct threshold_bank *b)
|
||||
{
|
||||
struct list_head *head = &b->blocks->miscj;
|
||||
struct threshold_block *pos = NULL;
|
||||
struct threshold_block *tmp = NULL;
|
||||
int err = 0;
|
||||
|
||||
err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
list_for_each_entry_safe(pos, tmp, head, miscj) {
|
||||
|
||||
err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
|
||||
if (err) {
|
||||
list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
|
||||
kobject_del(&pos->kobj);
|
||||
|
||||
return err;
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int threshold_create_bank(unsigned int cpu, unsigned int bank)
|
||||
{
|
||||
struct device *dev = per_cpu(mce_device, cpu);
|
||||
struct amd_northbridge *nb = NULL;
|
||||
struct threshold_bank *b = NULL;
|
||||
const char *name = th_names[bank];
|
||||
int err = 0;
|
||||
|
||||
if (is_shared_bank(bank)) {
|
||||
nb = node_to_amd_nb(amd_get_nb_id(cpu));
|
||||
|
||||
/* threshold descriptor already initialized on this node? */
|
||||
if (nb && nb->bank4) {
|
||||
/* yes, use it */
|
||||
b = nb->bank4;
|
||||
err = kobject_add(b->kobj, &dev->kobj, name);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
per_cpu(threshold_banks, cpu)[bank] = b;
|
||||
atomic_inc(&b->cpus);
|
||||
|
||||
err = __threshold_add_blocks(b);
|
||||
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
|
||||
if (!b) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
b->kobj = kobject_create_and_add(name, &dev->kobj);
|
||||
if (!b->kobj) {
|
||||
err = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
per_cpu(threshold_banks, cpu)[bank] = b;
|
||||
|
||||
if (is_shared_bank(bank)) {
|
||||
atomic_set(&b->cpus, 1);
|
||||
|
||||
/* nb is already initialized, see above */
|
||||
if (nb) {
|
||||
WARN_ON(nb->bank4);
|
||||
nb->bank4 = b;
|
||||
}
|
||||
}
|
||||
|
||||
err = allocate_threshold_blocks(cpu, bank, 0,
|
||||
MSR_IA32_MC0_MISC + bank * 4);
|
||||
if (!err)
|
||||
goto out;
|
||||
|
||||
out_free:
|
||||
kfree(b);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
/* create dir/files for all valid threshold banks */
|
||||
static int threshold_create_device(unsigned int cpu)
|
||||
{
|
||||
unsigned int bank;
|
||||
struct threshold_bank **bp;
|
||||
int err = 0;
|
||||
|
||||
bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
|
||||
GFP_KERNEL);
|
||||
if (!bp)
|
||||
return -ENOMEM;
|
||||
|
||||
per_cpu(threshold_banks, cpu) = bp;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||
continue;
|
||||
err = threshold_create_bank(cpu, bank);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void deallocate_threshold_block(unsigned int cpu,
|
||||
unsigned int bank)
|
||||
{
|
||||
struct threshold_block *pos = NULL;
|
||||
struct threshold_block *tmp = NULL;
|
||||
struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
|
||||
|
||||
if (!head)
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
|
||||
kobject_put(&pos->kobj);
|
||||
list_del(&pos->miscj);
|
||||
kfree(pos);
|
||||
}
|
||||
|
||||
kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
|
||||
per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
|
||||
}
|
||||
|
||||
static void __threshold_remove_blocks(struct threshold_bank *b)
|
||||
{
|
||||
struct threshold_block *pos = NULL;
|
||||
struct threshold_block *tmp = NULL;
|
||||
|
||||
kobject_del(b->kobj);
|
||||
|
||||
list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
|
||||
kobject_del(&pos->kobj);
|
||||
}
|
||||
|
||||
static void threshold_remove_bank(unsigned int cpu, int bank)
|
||||
{
|
||||
struct amd_northbridge *nb;
|
||||
struct threshold_bank *b;
|
||||
|
||||
b = per_cpu(threshold_banks, cpu)[bank];
|
||||
if (!b)
|
||||
return;
|
||||
|
||||
if (!b->blocks)
|
||||
goto free_out;
|
||||
|
||||
if (is_shared_bank(bank)) {
|
||||
if (!atomic_dec_and_test(&b->cpus)) {
|
||||
__threshold_remove_blocks(b);
|
||||
per_cpu(threshold_banks, cpu)[bank] = NULL;
|
||||
return;
|
||||
} else {
|
||||
/*
|
||||
* the last CPU on this node using the shared bank is
|
||||
* going away, remove that bank now.
|
||||
*/
|
||||
nb = node_to_amd_nb(amd_get_nb_id(cpu));
|
||||
nb->bank4 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
deallocate_threshold_block(cpu, bank);
|
||||
|
||||
free_out:
|
||||
kobject_del(b->kobj);
|
||||
kobject_put(b->kobj);
|
||||
kfree(b);
|
||||
per_cpu(threshold_banks, cpu)[bank] = NULL;
|
||||
}
|
||||
|
||||
static void threshold_remove_device(unsigned int cpu)
|
||||
{
|
||||
unsigned int bank;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||
continue;
|
||||
threshold_remove_bank(cpu, bank);
|
||||
}
|
||||
kfree(per_cpu(threshold_banks, cpu));
|
||||
}
|
||||
|
||||
/* get notified when a cpu comes on/off */
|
||||
static void
|
||||
amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
|
||||
{
|
||||
switch (action) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
threshold_create_device(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
threshold_remove_device(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static __init int threshold_init_device(void)
|
||||
{
|
||||
unsigned lcpu = 0;
|
||||
|
||||
/* to hit CPUs online before the notifier is up */
|
||||
for_each_online_cpu(lcpu) {
|
||||
int err = threshold_create_device(lcpu);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
threshold_cpu_callback = amd_64_threshold_cpu_callback;
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* there are 3 funcs which need to be _initcalled in a logic sequence:
|
||||
* 1. xen_late_init_mcelog
|
||||
* 2. mcheck_init_device
|
||||
* 3. threshold_init_device
|
||||
*
|
||||
* xen_late_init_mcelog must register xen_mce_chrdev_device before
|
||||
* native mce_chrdev_device registration if running under xen platform;
|
||||
*
|
||||
* mcheck_init_device should be inited before threshold_init_device to
|
||||
* initialize mce_device, otherwise a NULL ptr dereference will cause panic.
|
||||
*
|
||||
* so we use following _initcalls
|
||||
* 1. device_initcall(xen_late_init_mcelog);
|
||||
* 2. device_initcall_sync(mcheck_init_device);
|
||||
* 3. late_initcall(threshold_init_device);
|
||||
*
|
||||
* when running under xen, the initcall order is 1,2,3;
|
||||
* on baremetal, we skip 1 and we do only 2 and 3.
|
||||
*/
|
||||
late_initcall(threshold_init_device);
|
||||
391
arch/x86/kernel/cpu/mcheck/mce_intel.c
Normal file
391
arch/x86/kernel/cpu/mcheck/mce_intel.c
Normal file
|
|
@ -0,0 +1,391 @@
|
|||
/*
|
||||
* Intel specific MCE features.
|
||||
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
|
||||
* Copyright (C) 2008, 2009 Intel Corporation
|
||||
* Author: Andi Kleen
|
||||
*/
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
/*
|
||||
* Support for Intel Correct Machine Check Interrupts. This allows
|
||||
* the CPU to raise an interrupt when a corrected machine check happened.
|
||||
* Normally we pick those up using a regular polling timer.
|
||||
* Also supports reliable discovery of shared banks.
|
||||
*/
|
||||
|
||||
/*
|
||||
* CMCI can be delivered to multiple cpus that share a machine check bank
|
||||
* so we need to designate a single cpu to process errors logged in each bank
|
||||
* in the interrupt handler (otherwise we would have many races and potential
|
||||
* double reporting of the same error).
|
||||
* Note that this can change when a cpu is offlined or brought online since
|
||||
* some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
|
||||
* disables CMCI on all banks owned by the cpu and clears this bitfield. At
|
||||
* this point, cmci_rediscover() kicks in and a different cpu may end up
|
||||
* taking ownership of some of the shared MCA banks that were previously
|
||||
* owned by the offlined cpu.
|
||||
*/
|
||||
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
|
||||
|
||||
/*
|
||||
* cmci_discover_lock protects against parallel discovery attempts
|
||||
* which could race against each other.
|
||||
*/
|
||||
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
|
||||
|
||||
#define CMCI_THRESHOLD 1
|
||||
#define CMCI_POLL_INTERVAL (30 * HZ)
|
||||
#define CMCI_STORM_INTERVAL (1 * HZ)
|
||||
#define CMCI_STORM_THRESHOLD 15
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
|
||||
static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
|
||||
static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
|
||||
|
||||
enum {
|
||||
CMCI_STORM_NONE,
|
||||
CMCI_STORM_ACTIVE,
|
||||
CMCI_STORM_SUBSIDED,
|
||||
};
|
||||
|
||||
static atomic_t cmci_storm_on_cpus;
|
||||
|
||||
static int cmci_supported(int *banks)
|
||||
{
|
||||
u64 cap;
|
||||
|
||||
if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Vendor check is not strictly needed, but the initial
|
||||
* initialization is vendor keyed and this
|
||||
* makes sure none of the backdoors are entered otherwise.
|
||||
*/
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return 0;
|
||||
if (!cpu_has_apic || lapic_get_maxlvt() < 6)
|
||||
return 0;
|
||||
rdmsrl(MSR_IA32_MCG_CAP, cap);
|
||||
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
|
||||
return !!(cap & MCG_CMCI_P);
|
||||
}
|
||||
|
||||
void mce_intel_cmci_poll(void)
|
||||
{
|
||||
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
|
||||
return;
|
||||
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
|
||||
}
|
||||
|
||||
void mce_intel_hcpu_update(unsigned long cpu)
|
||||
{
|
||||
if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
|
||||
atomic_dec(&cmci_storm_on_cpus);
|
||||
|
||||
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
|
||||
}
|
||||
|
||||
unsigned long mce_intel_adjust_timer(unsigned long interval)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (interval < CMCI_POLL_INTERVAL)
|
||||
return interval;
|
||||
|
||||
switch (__this_cpu_read(cmci_storm_state)) {
|
||||
case CMCI_STORM_ACTIVE:
|
||||
/*
|
||||
* We switch back to interrupt mode once the poll timer has
|
||||
* silenced itself. That means no events recorded and the
|
||||
* timer interval is back to our poll interval.
|
||||
*/
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
|
||||
r = atomic_sub_return(1, &cmci_storm_on_cpus);
|
||||
if (r == 0)
|
||||
pr_notice("CMCI storm subsided: switching to interrupt mode\n");
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case CMCI_STORM_SUBSIDED:
|
||||
/*
|
||||
* We wait for all cpus to go back to SUBSIDED
|
||||
* state. When that happens we switch back to
|
||||
* interrupt mode.
|
||||
*/
|
||||
if (!atomic_read(&cmci_storm_on_cpus)) {
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
|
||||
cmci_reenable();
|
||||
cmci_recheck();
|
||||
}
|
||||
return CMCI_POLL_INTERVAL;
|
||||
default:
|
||||
/*
|
||||
* We have shiny weather. Let the poll do whatever it
|
||||
* thinks.
|
||||
*/
|
||||
return interval;
|
||||
}
|
||||
}
|
||||
|
||||
static void cmci_storm_disable_banks(void)
|
||||
{
|
||||
unsigned long flags, *owned;
|
||||
int bank;
|
||||
u64 val;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
owned = this_cpu_ptr(mce_banks_owned);
|
||||
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static bool cmci_storm_detect(void)
|
||||
{
|
||||
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
|
||||
unsigned long ts = __this_cpu_read(cmci_time_stamp);
|
||||
unsigned long now = jiffies;
|
||||
int r;
|
||||
|
||||
if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
|
||||
return true;
|
||||
|
||||
if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
|
||||
cnt++;
|
||||
} else {
|
||||
cnt = 1;
|
||||
__this_cpu_write(cmci_time_stamp, now);
|
||||
}
|
||||
__this_cpu_write(cmci_storm_cnt, cnt);
|
||||
|
||||
if (cnt <= CMCI_STORM_THRESHOLD)
|
||||
return false;
|
||||
|
||||
cmci_storm_disable_banks();
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
|
||||
r = atomic_add_return(1, &cmci_storm_on_cpus);
|
||||
mce_timer_kick(CMCI_POLL_INTERVAL);
|
||||
|
||||
if (r == 1)
|
||||
pr_notice("CMCI storm detected: switching to poll mode\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The interrupt handler. This is called on every event.
|
||||
* Just call the poller directly to log any events.
|
||||
* This could in theory increase the threshold under high load,
|
||||
* but doesn't for now.
|
||||
*/
|
||||
static void intel_threshold_interrupt(void)
|
||||
{
|
||||
if (cmci_storm_detect())
|
||||
return;
|
||||
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
|
||||
mce_notify_irq();
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
|
||||
* on this CPU. Use the algorithm recommended in the SDM to discover shared
|
||||
* banks.
|
||||
*/
|
||||
static void cmci_discover(int banks)
|
||||
{
|
||||
unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
|
||||
unsigned long flags;
|
||||
int i;
|
||||
int bios_wrong_thresh = 0;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++) {
|
||||
u64 val;
|
||||
int bios_zero_thresh = 0;
|
||||
|
||||
if (test_bit(i, owned))
|
||||
continue;
|
||||
|
||||
/* Skip banks in firmware first mode */
|
||||
if (test_bit(i, mce_banks_ce_disabled))
|
||||
continue;
|
||||
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Already owned by someone else? */
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
clear_bit(i, owned);
|
||||
__clear_bit(i, this_cpu_ptr(mce_poll_banks));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!mca_cfg.bios_cmci_threshold) {
|
||||
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
||||
val |= CMCI_THRESHOLD;
|
||||
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
|
||||
/*
|
||||
* If bios_cmci_threshold boot option was specified
|
||||
* but the threshold is zero, we'll try to initialize
|
||||
* it to 1.
|
||||
*/
|
||||
bios_zero_thresh = 1;
|
||||
val |= CMCI_THRESHOLD;
|
||||
}
|
||||
|
||||
val |= MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Did the enable bit stick? -- the bank supports CMCI */
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
set_bit(i, owned);
|
||||
__clear_bit(i, this_cpu_ptr(mce_poll_banks));
|
||||
/*
|
||||
* We are able to set thresholds for some banks that
|
||||
* had a threshold of 0. This means the BIOS has not
|
||||
* set the thresholds properly or does not work with
|
||||
* this boot option. Note down now and report later.
|
||||
*/
|
||||
if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
|
||||
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
|
||||
bios_wrong_thresh = 1;
|
||||
} else {
|
||||
WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
|
||||
pr_info_once(
|
||||
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
|
||||
pr_info_once(
|
||||
"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Just in case we missed an event during initialization check
|
||||
* all the CMCI owned banks.
|
||||
*/
|
||||
void cmci_recheck(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int banks;
|
||||
|
||||
if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/* Caller must hold the lock on cmci_discover_lock */
|
||||
static void __cmci_disable_bank(int bank)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
|
||||
return;
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable CMCI on this CPU for all banks it owns when it goes down.
|
||||
* This allows other CPUs to claim the banks on rediscovery.
|
||||
*/
|
||||
void cmci_clear(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
int banks;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++)
|
||||
__cmci_disable_bank(i);
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static void cmci_rediscover_work_func(void *arg)
|
||||
{
|
||||
int banks;
|
||||
|
||||
/* Recheck banks in case CPUs don't all have the same */
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks);
|
||||
}
|
||||
|
||||
/* After a CPU went down cycle through all the others and rediscover */
|
||||
void cmci_rediscover(void)
|
||||
{
|
||||
int banks;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
|
||||
on_each_cpu(cmci_rediscover_work_func, NULL, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reenable CMCI on this CPU in case a CPU down failed.
|
||||
*/
|
||||
void cmci_reenable(void)
|
||||
{
|
||||
int banks;
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks);
|
||||
}
|
||||
|
||||
void cmci_disable_bank(int bank)
|
||||
{
|
||||
int banks;
|
||||
unsigned long flags;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
__cmci_disable_bank(bank);
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static void intel_init_cmci(void)
|
||||
{
|
||||
int banks;
|
||||
|
||||
if (!cmci_supported(&banks))
|
||||
return;
|
||||
|
||||
mce_threshold_vector = intel_threshold_interrupt;
|
||||
cmci_discover(banks);
|
||||
/*
|
||||
* For CPU #0 this runs with still disabled APIC, but that's
|
||||
* ok because only the vector is set up. We still do another
|
||||
* check for the banks later for CPU #0 just to make sure
|
||||
* to not miss any events.
|
||||
*/
|
||||
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
|
||||
cmci_recheck();
|
||||
}
|
||||
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
intel_init_thermal(c);
|
||||
intel_init_cmci();
|
||||
}
|
||||
66
arch/x86/kernel/cpu/mcheck/p5.c
Normal file
66
arch/x86/kernel/cpu/mcheck/p5.c
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* P5 specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/* By default disabled */
|
||||
int mce_p5_enabled __read_mostly;
|
||||
|
||||
/* Machine check handler for Pentium class Intel CPUs: */
|
||||
static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
u32 loaddr, hi, lotype;
|
||||
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
|
||||
|
||||
printk(KERN_EMERG
|
||||
"CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
|
||||
smp_processor_id(), loaddr, lotype);
|
||||
|
||||
if (lotype & (1<<5)) {
|
||||
printk(KERN_EMERG
|
||||
"CPU#%d: Possible thermal failure (CPU on fire ?).\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
|
||||
/* Default P5 to off as its often misconnected: */
|
||||
if (!mce_p5_enabled)
|
||||
return;
|
||||
|
||||
/* Check for MCE support: */
|
||||
if (!cpu_has(c, X86_FEATURE_MCE))
|
||||
return;
|
||||
|
||||
machine_check_vector = pentium_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
/* Read registers before enabling: */
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
|
||||
printk(KERN_INFO
|
||||
"Intel old style machine check architecture supported.\n");
|
||||
|
||||
/* Enable MCE: */
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
printk(KERN_INFO
|
||||
"Intel old style machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
573
arch/x86/kernel/cpu/mcheck/therm_throt.c
Normal file
573
arch/x86/kernel/cpu/mcheck/therm_throt.c
Normal file
|
|
@ -0,0 +1,573 @@
|
|||
/*
|
||||
* Thermal throttle event support code (such as syslog messaging and rate
|
||||
* limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
|
||||
*
|
||||
* This allows consistent reporting of CPU thermal throttle events.
|
||||
*
|
||||
* Maintains a counter in /sys that keeps track of the number of thermal
|
||||
* events, such that the user knows how bad the thermal problem might be
|
||||
* (since the logging to syslog and mcelog is rate limited).
|
||||
*
|
||||
* Author: Dmitriy Zavin (dmitriyz@google.com)
|
||||
*
|
||||
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
|
||||
* Inspired by Ross Biro's and Al Borchers' counter code.
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
|
||||
/* How long to wait between reporting thermal events */
|
||||
#define CHECK_INTERVAL (300 * HZ)
|
||||
|
||||
#define THERMAL_THROTTLING_EVENT 0
|
||||
#define POWER_LIMIT_EVENT 1
|
||||
|
||||
/*
|
||||
* Current thermal event state:
|
||||
*/
|
||||
struct _thermal_state {
|
||||
bool new_event;
|
||||
int event;
|
||||
u64 next_check;
|
||||
unsigned long count;
|
||||
unsigned long last_count;
|
||||
};
|
||||
|
||||
struct thermal_state {
|
||||
struct _thermal_state core_throttle;
|
||||
struct _thermal_state core_power_limit;
|
||||
struct _thermal_state package_throttle;
|
||||
struct _thermal_state package_power_limit;
|
||||
struct _thermal_state core_thresh0;
|
||||
struct _thermal_state core_thresh1;
|
||||
struct _thermal_state pkg_thresh0;
|
||||
struct _thermal_state pkg_thresh1;
|
||||
};
|
||||
|
||||
/* Callback to handle core threshold interrupts */
|
||||
int (*platform_thermal_notify)(__u64 msr_val);
|
||||
EXPORT_SYMBOL(platform_thermal_notify);
|
||||
|
||||
/* Callback to handle core package threshold_interrupts */
|
||||
int (*platform_thermal_package_notify)(__u64 msr_val);
|
||||
EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
|
||||
|
||||
/* Callback support of rate control, return true, if
|
||||
* callback has rate control */
|
||||
bool (*platform_thermal_package_rate_control)(void);
|
||||
EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
|
||||
|
||||
|
||||
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
|
||||
|
||||
static atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
|
||||
static u32 lvtthmr_init __read_mostly;
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
#define define_therm_throt_device_one_ro(_name) \
|
||||
static DEVICE_ATTR(_name, 0444, \
|
||||
therm_throt_device_show_##_name, \
|
||||
NULL) \
|
||||
|
||||
#define define_therm_throt_device_show_func(event, name) \
|
||||
\
|
||||
static ssize_t therm_throt_device_show_##event##_##name( \
|
||||
struct device *dev, \
|
||||
struct device_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
unsigned int cpu = dev->id; \
|
||||
ssize_t ret; \
|
||||
\
|
||||
preempt_disable(); /* CPU hotplug */ \
|
||||
if (cpu_online(cpu)) { \
|
||||
ret = sprintf(buf, "%lu\n", \
|
||||
per_cpu(thermal_state, cpu).event.name); \
|
||||
} else \
|
||||
ret = 0; \
|
||||
preempt_enable(); \
|
||||
\
|
||||
return ret; \
|
||||
}
|
||||
|
||||
define_therm_throt_device_show_func(core_throttle, count);
|
||||
define_therm_throt_device_one_ro(core_throttle_count);
|
||||
|
||||
define_therm_throt_device_show_func(core_power_limit, count);
|
||||
define_therm_throt_device_one_ro(core_power_limit_count);
|
||||
|
||||
define_therm_throt_device_show_func(package_throttle, count);
|
||||
define_therm_throt_device_one_ro(package_throttle_count);
|
||||
|
||||
define_therm_throt_device_show_func(package_power_limit, count);
|
||||
define_therm_throt_device_one_ro(package_power_limit_count);
|
||||
|
||||
static struct attribute *thermal_throttle_attrs[] = {
|
||||
&dev_attr_core_throttle_count.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group thermal_attr_group = {
|
||||
.attrs = thermal_throttle_attrs,
|
||||
.name = "thermal_throttle"
|
||||
};
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
#define CORE_LEVEL 0
|
||||
#define PACKAGE_LEVEL 1
|
||||
|
||||
/***
|
||||
* therm_throt_process - Process thermal throttling event from interrupt
|
||||
* @curr: Whether the condition is current or not (boolean), since the
|
||||
* thermal interrupt normally gets called both when the thermal
|
||||
* event begins and once the event has ended.
|
||||
*
|
||||
* This function is called by the thermal interrupt after the
|
||||
* IRQ has been acknowledged.
|
||||
*
|
||||
* It will take care of rate limiting and printing messages to the syslog.
|
||||
*
|
||||
* Returns: 0 : Event should NOT be further logged, i.e. still in
|
||||
* "timeout" from previous log message.
|
||||
* 1 : Event should be logged further, and a message has been
|
||||
* printed to the syslog.
|
||||
*/
|
||||
static int therm_throt_process(bool new_event, int event, int level)
|
||||
{
|
||||
struct _thermal_state *state;
|
||||
unsigned int this_cpu = smp_processor_id();
|
||||
bool old_event;
|
||||
u64 now;
|
||||
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
|
||||
|
||||
now = get_jiffies_64();
|
||||
if (level == CORE_LEVEL) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
state = &pstate->core_throttle;
|
||||
else if (event == POWER_LIMIT_EVENT)
|
||||
state = &pstate->core_power_limit;
|
||||
else
|
||||
return 0;
|
||||
} else if (level == PACKAGE_LEVEL) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
state = &pstate->package_throttle;
|
||||
else if (event == POWER_LIMIT_EVENT)
|
||||
state = &pstate->package_power_limit;
|
||||
else
|
||||
return 0;
|
||||
} else
|
||||
return 0;
|
||||
|
||||
old_event = state->new_event;
|
||||
state->new_event = new_event;
|
||||
|
||||
if (new_event)
|
||||
state->count++;
|
||||
|
||||
if (time_before64(now, state->next_check) &&
|
||||
state->count != state->last_count)
|
||||
return 0;
|
||||
|
||||
state->next_check = now + CHECK_INTERVAL;
|
||||
state->last_count = state->count;
|
||||
|
||||
/* if we just entered the thermal event */
|
||||
if (new_event) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
|
||||
this_cpu,
|
||||
level == CORE_LEVEL ? "Core" : "Package",
|
||||
state->count);
|
||||
return 1;
|
||||
}
|
||||
if (old_event) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
|
||||
this_cpu,
|
||||
level == CORE_LEVEL ? "Core" : "Package");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int thresh_event_valid(int level, int event)
|
||||
{
|
||||
struct _thermal_state *state;
|
||||
unsigned int this_cpu = smp_processor_id();
|
||||
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
|
||||
u64 now = get_jiffies_64();
|
||||
|
||||
if (level == PACKAGE_LEVEL)
|
||||
state = (event == 0) ? &pstate->pkg_thresh0 :
|
||||
&pstate->pkg_thresh1;
|
||||
else
|
||||
state = (event == 0) ? &pstate->core_thresh0 :
|
||||
&pstate->core_thresh1;
|
||||
|
||||
if (time_before64(now, state->next_check))
|
||||
return 0;
|
||||
|
||||
state->next_check = now + CHECK_INTERVAL;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool int_pln_enable;
|
||||
static int __init int_pln_enable_setup(char *s)
|
||||
{
|
||||
int_pln_enable = true;
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("int_pln_enable", int_pln_enable_setup);
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
/* Add/Remove thermal_throttle interface for CPU device: */
|
||||
static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
|
||||
{
|
||||
int err;
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_core_power_limit_count.attr,
|
||||
thermal_attr_group.name);
|
||||
if (cpu_has(c, X86_FEATURE_PTS)) {
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_package_throttle_count.attr,
|
||||
thermal_attr_group.name);
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_package_power_limit_count.attr,
|
||||
thermal_attr_group.name);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void thermal_throttle_remove_dev(struct device *dev)
|
||||
{
|
||||
sysfs_remove_group(&dev->kobj, &thermal_attr_group);
|
||||
}
|
||||
|
||||
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
|
||||
static int
|
||||
thermal_throttle_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct device *dev;
|
||||
int err = 0;
|
||||
|
||||
dev = get_cpu_device(cpu);
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
err = thermal_throttle_add_dev(dev, cpu);
|
||||
WARN_ON(err);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
thermal_throttle_remove_dev(dev);
|
||||
break;
|
||||
}
|
||||
return notifier_from_errno(err);
|
||||
}
|
||||
|
||||
static struct notifier_block thermal_throttle_cpu_notifier =
|
||||
{
|
||||
.notifier_call = thermal_throttle_cpu_callback,
|
||||
};
|
||||
|
||||
static __init int thermal_throttle_init_device(void)
|
||||
{
|
||||
unsigned int cpu = 0;
|
||||
int err;
|
||||
|
||||
if (!atomic_read(&therm_throt_en))
|
||||
return 0;
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
/* connect live CPUs to sysfs */
|
||||
for_each_online_cpu(cpu) {
|
||||
err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
|
||||
WARN_ON(err);
|
||||
}
|
||||
|
||||
__register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
|
||||
cpu_notifier_register_done();
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(thermal_throttle_init_device);
|
||||
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
static void notify_package_thresholds(__u64 msr_val)
|
||||
{
|
||||
bool notify_thres_0 = false;
|
||||
bool notify_thres_1 = false;
|
||||
|
||||
if (!platform_thermal_package_notify)
|
||||
return;
|
||||
|
||||
/* lower threshold check */
|
||||
if (msr_val & THERM_LOG_THRESHOLD0)
|
||||
notify_thres_0 = true;
|
||||
/* higher threshold check */
|
||||
if (msr_val & THERM_LOG_THRESHOLD1)
|
||||
notify_thres_1 = true;
|
||||
|
||||
if (!notify_thres_0 && !notify_thres_1)
|
||||
return;
|
||||
|
||||
if (platform_thermal_package_rate_control &&
|
||||
platform_thermal_package_rate_control()) {
|
||||
/* Rate control is implemented in callback */
|
||||
platform_thermal_package_notify(msr_val);
|
||||
return;
|
||||
}
|
||||
|
||||
/* lower threshold reached */
|
||||
if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
|
||||
platform_thermal_package_notify(msr_val);
|
||||
/* higher threshold reached */
|
||||
if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
|
||||
platform_thermal_package_notify(msr_val);
|
||||
}
|
||||
|
||||
static void notify_thresholds(__u64 msr_val)
|
||||
{
|
||||
/* check whether the interrupt handler is defined;
|
||||
* otherwise simply return
|
||||
*/
|
||||
if (!platform_thermal_notify)
|
||||
return;
|
||||
|
||||
/* lower threshold reached */
|
||||
if ((msr_val & THERM_LOG_THRESHOLD0) &&
|
||||
thresh_event_valid(CORE_LEVEL, 0))
|
||||
platform_thermal_notify(msr_val);
|
||||
/* higher threshold reached */
|
||||
if ((msr_val & THERM_LOG_THRESHOLD1) &&
|
||||
thresh_event_valid(CORE_LEVEL, 1))
|
||||
platform_thermal_notify(msr_val);
|
||||
}
|
||||
|
||||
/* Thermal transition interrupt handler */
|
||||
static void intel_thermal_interrupt(void)
|
||||
{
|
||||
__u64 msr_val;
|
||||
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
|
||||
/* Check for violation of core thermal thresholds*/
|
||||
notify_thresholds(msr_val);
|
||||
|
||||
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
|
||||
THERMAL_THROTTLING_EVENT,
|
||||
CORE_LEVEL) != 0)
|
||||
mce_log_therm_throt_event(msr_val);
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
|
||||
therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
|
||||
POWER_LIMIT_EVENT,
|
||||
CORE_LEVEL);
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_PTS)) {
|
||||
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
|
||||
/* check violations of package thermal thresholds */
|
||||
notify_package_thresholds(msr_val);
|
||||
therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
|
||||
THERMAL_THROTTLING_EVENT,
|
||||
PACKAGE_LEVEL);
|
||||
if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
|
||||
therm_throt_process(msr_val &
|
||||
PACKAGE_THERM_STATUS_POWER_LIMIT,
|
||||
POWER_LIMIT_EVENT,
|
||||
PACKAGE_LEVEL);
|
||||
}
|
||||
}
|
||||
|
||||
static void unexpected_thermal_interrupt(void)
|
||||
{
|
||||
printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
|
||||
|
||||
static inline void __smp_thermal_interrupt(void)
|
||||
{
|
||||
inc_irq_stat(irq_thermal_count);
|
||||
smp_thermal_vector();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
entering_irq();
|
||||
__smp_thermal_interrupt();
|
||||
exiting_ack_irq();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
entering_irq();
|
||||
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
|
||||
__smp_thermal_interrupt();
|
||||
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
|
||||
exiting_ack_irq();
|
||||
}
|
||||
|
||||
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
|
||||
static int intel_thermal_supported(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has_apic)
|
||||
return 0;
|
||||
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void __init mcheck_intel_therm_init(void)
|
||||
{
|
||||
/*
|
||||
* This function is only called on boot CPU. Save the init thermal
|
||||
* LVT value on BSP and use that value to restore APs' thermal LVT
|
||||
* entry BIOS programmed later
|
||||
*/
|
||||
if (intel_thermal_supported(&boot_cpu_data))
|
||||
lvtthmr_init = apic_read(APIC_LVTTHMR);
|
||||
}
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
int tm2 = 0;
|
||||
u32 l, h;
|
||||
|
||||
if (!intel_thermal_supported(c))
|
||||
return;
|
||||
|
||||
/*
|
||||
* First check if its enabled already, in which case there might
|
||||
* be some SMM goo which handles it, so we can't even put a handler
|
||||
* since it might be delivered via SMI already:
|
||||
*/
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
|
||||
h = lvtthmr_init;
|
||||
/*
|
||||
* The initial value of thermal LVT entries on all APs always reads
|
||||
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
|
||||
* sequence to them and LVT registers are reset to 0s except for
|
||||
* the mask bits which are set to 1s when APs receive INIT IPI.
|
||||
* If BIOS takes over the thermal interrupt and sets its interrupt
|
||||
* delivery mode to SMI (not fixed), it restores the value that the
|
||||
* BIOS has programmed on AP based on BSP's info we saved since BIOS
|
||||
* is always setting the same value for all threads/cores.
|
||||
*/
|
||||
if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
|
||||
apic_write(APIC_LVTTHMR, lvtthmr_init);
|
||||
|
||||
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
if (system_state == SYSTEM_BOOTING)
|
||||
printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check whether a vector already exists */
|
||||
if (h & APIC_VECTOR_MASK) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal LVT vector (%#x) already installed\n",
|
||||
cpu, (h & APIC_VECTOR_MASK));
|
||||
return;
|
||||
}
|
||||
|
||||
/* early Pentium M models use different method for enabling TM2 */
|
||||
if (cpu_has(c, X86_FEATURE_TM2)) {
|
||||
if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
|
||||
rdmsr(MSR_THERM2_CTL, l, h);
|
||||
if (l & MSR_THERM2_CTL_TM_SELECT)
|
||||
tm2 = 1;
|
||||
} else if (l & MSR_IA32_MISC_ENABLE_TM2)
|
||||
tm2 = 1;
|
||||
}
|
||||
|
||||
/* We'll mask the thermal vector in the lapic till we're ready: */
|
||||
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
(l | (THERM_INT_LOW_ENABLE
|
||||
| THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
|
||||
else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
l | (THERM_INT_LOW_ENABLE
|
||||
| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
|
||||
else
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_PTS)) {
|
||||
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
|
||||
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
||||
(l | (PACKAGE_THERM_INT_LOW_ENABLE
|
||||
| PACKAGE_THERM_INT_HIGH_ENABLE))
|
||||
& ~PACKAGE_THERM_INT_PLN_ENABLE, h);
|
||||
else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
||||
l | (PACKAGE_THERM_INT_LOW_ENABLE
|
||||
| PACKAGE_THERM_INT_HIGH_ENABLE
|
||||
| PACKAGE_THERM_INT_PLN_ENABLE), h);
|
||||
else
|
||||
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
||||
l | (PACKAGE_THERM_INT_LOW_ENABLE
|
||||
| PACKAGE_THERM_INT_HIGH_ENABLE), h);
|
||||
}
|
||||
|
||||
smp_thermal_vector = intel_thermal_interrupt;
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
|
||||
|
||||
/* Unmask the thermal vector: */
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
|
||||
printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
|
||||
tm2 ? "TM2" : "TM1");
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
}
|
||||
41
arch/x86/kernel/cpu/mcheck/threshold.c
Normal file
41
arch/x86/kernel/cpu/mcheck/threshold.c
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Common corrected MCE threshold handler code:
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/irq_vectors.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
|
||||
static void default_threshold_interrupt(void)
|
||||
{
|
||||
printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
|
||||
THRESHOLD_APIC_VECTOR);
|
||||
}
|
||||
|
||||
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
|
||||
|
||||
static inline void __smp_threshold_interrupt(void)
|
||||
{
|
||||
inc_irq_stat(irq_threshold_count);
|
||||
mce_threshold_vector();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_threshold_interrupt(void)
|
||||
{
|
||||
entering_irq();
|
||||
__smp_threshold_interrupt();
|
||||
exiting_ack_irq();
|
||||
}
|
||||
|
||||
asmlinkage __visible void smp_trace_threshold_interrupt(void)
|
||||
{
|
||||
entering_irq();
|
||||
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
|
||||
__smp_threshold_interrupt();
|
||||
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
|
||||
exiting_ack_irq();
|
||||
}
|
||||
38
arch/x86/kernel/cpu/mcheck/winchip.c
Normal file
38
arch/x86/kernel/cpu/mcheck/winchip.c
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* IDT Winchip specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/* Machine check handler for WinChip C6: */
|
||||
static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting on the Winchip C6 series */
|
||||
void winchip_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
machine_check_vector = winchip_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
rdmsr(MSR_IDT_FCR1, lo, hi);
|
||||
lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */
|
||||
lo &= ~(1<<4); /* Enable MCE */
|
||||
wrmsr(MSR_IDT_FCR1, lo, hi);
|
||||
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
|
||||
printk(KERN_INFO
|
||||
"Winchip machine check reporting enabled on CPU#0.\n");
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue