Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

388
drivers/edac/Kconfig Normal file
View file

@ -0,0 +1,388 @@
#
# EDAC Kconfig
# Copyright (c) 2008 Doug Thompson www.softwarebitmaker.com
# Licensed and distributed under the GPL
#
config EDAC_SUPPORT
bool
menuconfig EDAC
bool "EDAC (Error Detection And Correction) reporting"
depends on HAS_IOMEM
depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT
help
EDAC is designed to report errors in the core system.
These are low-level errors that are reported in the CPU or
supporting chipset or other subsystems:
memory errors, cache errors, PCI errors, thermal throttling, etc..
If unsure, select 'Y'.
If this code is reporting problems on your system, please
see the EDAC project web pages for more information at:
<http://bluesmoke.sourceforge.net/>
and:
<http://buttersideup.com/edacwiki>
There is also a mailing list for the EDAC project, which can
be found via the sourceforge page.
if EDAC
config EDAC_LEGACY_SYSFS
bool "EDAC legacy sysfs"
default y
help
Enable the compatibility sysfs nodes.
Use 'Y' if your edac utilities aren't ported to work with the newer
structures.
config EDAC_DEBUG
bool "Debugging"
help
This turns on debugging information for the entire EDAC subsystem.
You do so by inserting edac_module with "edac_debug_level=x." Valid
levels are 0-4 (from low to high) and by default it is set to 2.
Usually you should select 'N' here.
config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)"
depends on CPU_SUP_AMD && X86_MCE_AMD
default y
---help---
Enable this option if you want to decode Machine Check Exceptions
occurring on your machine in human-readable form.
You should definitely say Y here in case you want to decode MCEs
which occur really early upon boot, before the module infrastructure
has been initialized.
config EDAC_MCE_INJ
tristate "Simple MCE injection interface over /sysfs"
depends on EDAC_DECODE_MCE
default n
help
This is a simple interface to inject MCEs over /sysfs and test
the MCE decoding code in EDAC.
This is currently AMD-only.
config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting"
select RAS
help
Some systems are able to detect and correct errors in main
memory. EDAC can report statistics on memory error
detection and correction (EDAC - or commonly referred to ECC
errors). EDAC will also try to decode where these errors
occurred so that a particular failing memory module can be
replaced. If unsure, select 'Y'.
config EDAC_GHES
bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y)
default y
help
Not all machines support hardware-driven error report. Some of those
provide a BIOS-driven error report mechanism via ACPI, using the
APEI/GHES driver. By enabling this option, the error reports provided
by GHES are sent to userspace via the EDAC API.
When this option is enabled, it will disable the hardware-driven
mechanisms, if a GHES BIOS is detected, entering into the
"Firmware First" mode.
It should be noticed that keeping both GHES and a hardware-driven
error mechanism won't work well, as BIOS will race with OS, while
reading the error registers. So, if you want to not use "Firmware
first" GHES error mechanism, you should disable GHES either at
compilation time or by passing "ghes.disable=1" Kernel parameter
at boot time.
In doubt, say 'Y'.
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h"
depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
help
Support for error detection and correction of DRAM ECC errors on
the AMD64 families of memory controllers (K8 and F10h)
config EDAC_AMD64_ERROR_INJECTION
bool "Sysfs HW Error injection facilities"
depends on EDAC_AMD64
help
Recent Opterons (Family 10h and later) provide for Memory Error
Injection into the ECC detection circuits. The amd64_edac module
allows the operator/user to inject Uncorrectable and Correctable
errors into DRAM.
When enabled, in each of the respective memory controller directories
(/sys/devices/system/edac/mc/mcX), there are 3 input files:
- inject_section (0..3, 16-byte section of 64-byte cacheline),
- inject_word (0..8, 16-bit word of 16-byte section),
- inject_ecc_vector (hex ecc vector: select bits of inject word)
In addition, there are two control files, inject_read and inject_write,
which trigger the DRAM ECC Read and Write respectively.
config EDAC_AMD76X
tristate "AMD 76x (760, 762, 768)"
depends on EDAC_MM_EDAC && PCI && X86_32
help
Support for error detection and correction on the AMD 76x
series of chipsets used with the Athlon processor.
config EDAC_E7XXX
tristate "Intel e7xxx (e7205, e7500, e7501, e7505)"
depends on EDAC_MM_EDAC && PCI && X86_32
help
Support for error detection and correction on the Intel
E7205, E7500, E7501 and E7505 server chipsets.
config EDAC_E752X
tristate "Intel e752x (e7520, e7525, e7320) and 3100"
depends on EDAC_MM_EDAC && PCI && X86
help
Support for error detection and correction on the Intel
E7520, E7525, E7320 server chipsets.
config EDAC_I82443BXGX
tristate "Intel 82443BX/GX (440BX/GX)"
depends on EDAC_MM_EDAC && PCI && X86_32
depends on BROKEN
help
Support for error detection and correction on the Intel
82443BX/GX memory controllers (440BX/GX chipsets).
config EDAC_I82875P
tristate "Intel 82875p (D82875P, E7210)"
depends on EDAC_MM_EDAC && PCI && X86_32
help
Support for error detection and correction on the Intel
DP82785P and E7210 server chipsets.
config EDAC_I82975X
tristate "Intel 82975x (D82975x)"
depends on EDAC_MM_EDAC && PCI && X86
help
Support for error detection and correction on the Intel
DP82975x server chipsets.
config EDAC_I3000
tristate "Intel 3000/3010"
depends on EDAC_MM_EDAC && PCI && X86
help
Support for error detection and correction on the Intel
3000 and 3010 server chipsets.
config EDAC_I3200
tristate "Intel 3200"
depends on EDAC_MM_EDAC && PCI && X86
help
Support for error detection and correction on the Intel
3200 and 3210 server chipsets.
config EDAC_IE31200
tristate "Intel e312xx"
depends on EDAC_MM_EDAC && PCI && X86
help
Support for error detection and correction on the Intel
E3-1200 based DRAM controllers.
config EDAC_X38
tristate "Intel X38"
depends on EDAC_MM_EDAC && PCI && X86
help
Support for error detection and correction on the Intel
X38 server chipsets.
config EDAC_I5400
tristate "Intel 5400 (Seaburg) chipsets"
depends on EDAC_MM_EDAC && PCI && X86
help
Support for error detection and correction the Intel
i5400 MCH chipset (Seaburg).
config EDAC_I7CORE
tristate "Intel i7 Core (Nehalem) processors"
depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
help
Support for error detection and correction the Intel
i7 Core (Nehalem) Integrated Memory Controller that exists on
newer processors like i7 Core, i7 Core Extreme, Xeon 35xx
and Xeon 55xx processors.
config EDAC_I82860
tristate "Intel 82860"
depends on EDAC_MM_EDAC && PCI && X86_32
help
Support for error detection and correction on the Intel
82860 chipset.
config EDAC_R82600
tristate "Radisys 82600 embedded chipset"
depends on EDAC_MM_EDAC && PCI && X86_32
help
Support for error detection and correction on the Radisys
82600 embedded chipset.
config EDAC_I5000
tristate "Intel Greencreek/Blackford chipset"
depends on EDAC_MM_EDAC && X86 && PCI
help
Support for error detection and correction the Intel
Greekcreek/Blackford chipsets.
config EDAC_I5100
tristate "Intel San Clemente MCH"
depends on EDAC_MM_EDAC && X86 && PCI
help
Support for error detection and correction the Intel
San Clemente MCH.
config EDAC_I7300
tristate "Intel Clarksboro MCH"
depends on EDAC_MM_EDAC && X86 && PCI
help
Support for error detection and correction the Intel
Clarksboro MCH (Intel 7300 chipset).
config EDAC_SBRIDGE
tristate "Intel Sandy-Bridge/Ivy-Bridge/Haswell Integrated MC"
depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
depends on PCI_MMCONFIG
help
Support for error detection and correction the Intel
Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers.
config EDAC_MPC85XX
tristate "Freescale MPC83xx / MPC85xx"
depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
help
Support for error detection and correction on the Freescale
MPC8349, MPC8560, MPC8540, MPC8548
config EDAC_MV64X60
tristate "Marvell MV64x60"
depends on EDAC_MM_EDAC && MV64X60
help
Support for error detection and correction on the Marvell
MV64360 and MV64460 chipsets.
config EDAC_PASEMI
tristate "PA Semi PWRficient"
depends on EDAC_MM_EDAC && PCI
depends on PPC_PASEMI
help
Support for error detection and correction on PA Semi
PWRficient.
config EDAC_CELL
tristate "Cell Broadband Engine memory controller"
depends on EDAC_MM_EDAC && PPC_CELL_COMMON
help
Support for error detection and correction on the
Cell Broadband Engine internal memory controller
on platform without a hypervisor
config EDAC_PPC4XX
tristate "PPC4xx IBM DDR2 Memory Controller"
depends on EDAC_MM_EDAC && 4xx
help
This enables support for EDAC on the ECC memory used
with the IBM DDR2 memory controller found in various
PowerPC 4xx embedded processors such as the 405EX[r],
440SP, 440SPe, 460EX, 460GT and 460SX.
config EDAC_AMD8131
tristate "AMD8131 HyperTransport PCI-X Tunnel"
depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
help
Support for error detection and correction on the
AMD8131 HyperTransport PCI-X Tunnel chip.
Note, add more Kconfig dependency if it's adopted
on some machine other than Maple.
config EDAC_AMD8111
tristate "AMD8111 HyperTransport I/O Hub"
depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
help
Support for error detection and correction on the
AMD8111 HyperTransport I/O Hub chip.
Note, add more Kconfig dependency if it's adopted
on some machine other than Maple.
config EDAC_CPC925
tristate "IBM CPC925 Memory Controller (PPC970FX)"
depends on EDAC_MM_EDAC && PPC64
help
Support for error detection and correction on the
IBM CPC925 Bridge and Memory Controller, which is
a companion chip to the PowerPC 970 family of
processors.
config EDAC_TILE
tristate "Tilera Memory Controller"
depends on EDAC_MM_EDAC && TILE
default y
help
Support for error detection and correction on the
Tilera memory controller.
config EDAC_HIGHBANK_MC
tristate "Highbank Memory Controller"
depends on EDAC_MM_EDAC && ARCH_HIGHBANK
help
Support for error detection and correction on the
Calxeda Highbank memory controller.
config EDAC_HIGHBANK_L2
tristate "Highbank L2 Cache"
depends on EDAC_MM_EDAC && ARCH_HIGHBANK
help
Support for error detection and correction on the
Calxeda Highbank memory controller.
config EDAC_OCTEON_PC
tristate "Cavium Octeon Primary Caches"
depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
help
Support for error detection and correction on the primary caches of
the cnMIPS cores of Cavium Octeon family SOCs.
config EDAC_OCTEON_L2C
tristate "Cavium Octeon Secondary Caches (L2C)"
depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
help
Support for error detection and correction on the
Cavium Octeon family of SOCs.
config EDAC_OCTEON_LMC
tristate "Cavium Octeon DRAM Memory Controller (LMC)"
depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
help
Support for error detection and correction on the
Cavium Octeon family of SOCs.
config EDAC_OCTEON_PCI
tristate "Cavium Octeon PCI Controller"
depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC
help
Support for error detection and correction on the
Cavium Octeon family of SOCs.
config EDAC_ALTERA_MC
tristate "Altera SDRAM Memory Controller EDAC"
depends on EDAC_MM_EDAC && ARCH_SOCFPGA
help
Support for error detection and correction on the
Altera SDRAM memory controller. Note that the
preloader must initialize the SDRAM before loading
the kernel.
endif # EDAC

69
drivers/edac/Makefile Normal file
View file

@ -0,0 +1,69 @@
#
# Makefile for the Linux kernel EDAC drivers.
#
# Copyright 02 Jul 2003, Linux Networx (http://lnxi.com)
# This file may be distributed under the terms of the
# GNU General Public License.
#
obj-$(CONFIG_EDAC) := edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
edac_core-y += edac_module.o edac_device_sysfs.o
ifdef CONFIG_PCI
edac_core-y += edac_pci.o edac_pci_sysfs.o
endif
obj-$(CONFIG_EDAC_GHES) += ghes_edac.o
obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
edac_mce_amd-y := mce_amd.o
obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o
obj-$(CONFIG_EDAC_I5000) += i5000_edac.o
obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o
obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o
obj-$(CONFIG_EDAC_I3000) += i3000_edac.o
obj-$(CONFIG_EDAC_I3200) += i3200_edac.o
obj-$(CONFIG_EDAC_IE31200) += ie31200_edac.o
obj-$(CONFIG_EDAC_X38) += x38_edac.o
obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
amd64_edac_mod-y := amd64_edac.o
amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o
amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
obj-$(CONFIG_EDAC_AMD64) += amd64_edac_mod.o
obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o
obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac.o
obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o
obj-$(CONFIG_EDAC_CELL) += cell_edac.o
obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
obj-$(CONFIG_EDAC_TILE) += tile_edac.o
obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o
obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o
obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o
obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o

410
drivers/edac/altera_edac.c Normal file
View file

@ -0,0 +1,410 @@
/*
* Copyright Altera Corporation (C) 2014. All rights reserved.
* Copyright 2011-2012 Calxeda, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* Adapted from the highbank_mc_edac driver.
*/
#include <linux/ctype.h>
#include <linux/edac.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/mfd/syscon.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include "edac_core.h"
#include "edac_module.h"
#define EDAC_MOD_STR "altera_edac"
#define EDAC_VERSION "1"
/* SDRAM Controller CtrlCfg Register */
#define CTLCFG_OFST 0x00
/* SDRAM Controller CtrlCfg Register Bit Masks */
#define CTLCFG_ECC_EN 0x400
#define CTLCFG_ECC_CORR_EN 0x800
#define CTLCFG_GEN_SB_ERR 0x2000
#define CTLCFG_GEN_DB_ERR 0x4000
#define CTLCFG_ECC_AUTO_EN (CTLCFG_ECC_EN | \
CTLCFG_ECC_CORR_EN)
/* SDRAM Controller Address Width Register */
#define DRAMADDRW_OFST 0x2C
/* SDRAM Controller Address Widths Field Register */
#define DRAMADDRW_COLBIT_MASK 0x001F
#define DRAMADDRW_COLBIT_SHIFT 0
#define DRAMADDRW_ROWBIT_MASK 0x03E0
#define DRAMADDRW_ROWBIT_SHIFT 5
#define DRAMADDRW_BANKBIT_MASK 0x1C00
#define DRAMADDRW_BANKBIT_SHIFT 10
#define DRAMADDRW_CSBIT_MASK 0xE000
#define DRAMADDRW_CSBIT_SHIFT 13
/* SDRAM Controller Interface Data Width Register */
#define DRAMIFWIDTH_OFST 0x30
/* SDRAM Controller Interface Data Width Defines */
#define DRAMIFWIDTH_16B_ECC 24
#define DRAMIFWIDTH_32B_ECC 40
/* SDRAM Controller DRAM Status Register */
#define DRAMSTS_OFST 0x38
/* SDRAM Controller DRAM Status Register Bit Masks */
#define DRAMSTS_SBEERR 0x04
#define DRAMSTS_DBEERR 0x08
#define DRAMSTS_CORR_DROP 0x10
/* SDRAM Controller DRAM IRQ Register */
#define DRAMINTR_OFST 0x3C
/* SDRAM Controller DRAM IRQ Register Bit Masks */
#define DRAMINTR_INTREN 0x01
#define DRAMINTR_SBEMASK 0x02
#define DRAMINTR_DBEMASK 0x04
#define DRAMINTR_CORRDROPMASK 0x08
#define DRAMINTR_INTRCLR 0x10
/* SDRAM Controller Single Bit Error Count Register */
#define SBECOUNT_OFST 0x40
/* SDRAM Controller Single Bit Error Count Register Bit Masks */
#define SBECOUNT_MASK 0x0F
/* SDRAM Controller Double Bit Error Count Register */
#define DBECOUNT_OFST 0x44
/* SDRAM Controller Double Bit Error Count Register Bit Masks */
#define DBECOUNT_MASK 0x0F
/* SDRAM Controller ECC Error Address Register */
#define ERRADDR_OFST 0x48
/* SDRAM Controller ECC Error Address Register Bit Masks */
#define ERRADDR_MASK 0xFFFFFFFF
/* Altera SDRAM Memory Controller data */
struct altr_sdram_mc_data {
struct regmap *mc_vbase;
};
static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
{
struct mem_ctl_info *mci = dev_id;
struct altr_sdram_mc_data *drvdata = mci->pvt_info;
u32 status, err_count, err_addr;
/* Error Address is shared by both SBE & DBE */
regmap_read(drvdata->mc_vbase, ERRADDR_OFST, &err_addr);
regmap_read(drvdata->mc_vbase, DRAMSTS_OFST, &status);
if (status & DRAMSTS_DBEERR) {
regmap_read(drvdata->mc_vbase, DBECOUNT_OFST, &err_count);
panic("\nEDAC: [%d Uncorrectable errors @ 0x%08X]\n",
err_count, err_addr);
}
if (status & DRAMSTS_SBEERR) {
regmap_read(drvdata->mc_vbase, SBECOUNT_OFST, &err_count);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count,
err_addr >> PAGE_SHIFT,
err_addr & ~PAGE_MASK, 0,
0, 0, -1, mci->ctl_name, "");
}
regmap_write(drvdata->mc_vbase, DRAMINTR_OFST,
(DRAMINTR_INTRCLR | DRAMINTR_INTREN));
return IRQ_HANDLED;
}
#ifdef CONFIG_EDAC_DEBUG
static ssize_t altr_sdr_mc_err_inject_write(struct file *file,
const char __user *data,
size_t count, loff_t *ppos)
{
struct mem_ctl_info *mci = file->private_data;
struct altr_sdram_mc_data *drvdata = mci->pvt_info;
u32 *ptemp;
dma_addr_t dma_handle;
u32 reg, read_reg;
ptemp = dma_alloc_coherent(mci->pdev, 16, &dma_handle, GFP_KERNEL);
if (!ptemp) {
dma_free_coherent(mci->pdev, 16, ptemp, dma_handle);
edac_printk(KERN_ERR, EDAC_MC,
"Inject: Buffer Allocation error\n");
return -ENOMEM;
}
regmap_read(drvdata->mc_vbase, CTLCFG_OFST, &read_reg);
read_reg &= ~(CTLCFG_GEN_SB_ERR | CTLCFG_GEN_DB_ERR);
/* Error are injected by writing a word while the SBE or DBE
* bit in the CTLCFG register is set. Reading the word will
* trigger the SBE or DBE error and the corresponding IRQ.
*/
if (count == 3) {
edac_printk(KERN_ALERT, EDAC_MC,
"Inject Double bit error\n");
regmap_write(drvdata->mc_vbase, CTLCFG_OFST,
(read_reg | CTLCFG_GEN_DB_ERR));
} else {
edac_printk(KERN_ALERT, EDAC_MC,
"Inject Single bit error\n");
regmap_write(drvdata->mc_vbase, CTLCFG_OFST,
(read_reg | CTLCFG_GEN_SB_ERR));
}
ptemp[0] = 0x5A5A5A5A;
ptemp[1] = 0xA5A5A5A5;
/* Clear the error injection bits */
regmap_write(drvdata->mc_vbase, CTLCFG_OFST, read_reg);
/* Ensure it has been written out */
wmb();
/*
* To trigger the error, we need to read the data back
* (the data was written with errors above).
* The ACCESS_ONCE macros and printk are used to prevent the
* the compiler optimizing these reads out.
*/
reg = ACCESS_ONCE(ptemp[0]);
read_reg = ACCESS_ONCE(ptemp[1]);
/* Force Read */
rmb();
edac_printk(KERN_ALERT, EDAC_MC, "Read Data [0x%X, 0x%X]\n",
reg, read_reg);
dma_free_coherent(mci->pdev, 16, ptemp, dma_handle);
return count;
}
static const struct file_operations altr_sdr_mc_debug_inject_fops = {
.open = simple_open,
.write = altr_sdr_mc_err_inject_write,
.llseek = generic_file_llseek,
};
static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci)
{
if (mci->debugfs)
debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
&altr_sdr_mc_debug_inject_fops);
}
#else
static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci)
{}
#endif
/* Get total memory size in bytes */
static u32 altr_sdram_get_total_mem_size(struct regmap *mc_vbase)
{
u32 size, read_reg, row, bank, col, cs, width;
if (regmap_read(mc_vbase, DRAMADDRW_OFST, &read_reg) < 0)
return 0;
if (regmap_read(mc_vbase, DRAMIFWIDTH_OFST, &width) < 0)
return 0;
col = (read_reg & DRAMADDRW_COLBIT_MASK) >>
DRAMADDRW_COLBIT_SHIFT;
row = (read_reg & DRAMADDRW_ROWBIT_MASK) >>
DRAMADDRW_ROWBIT_SHIFT;
bank = (read_reg & DRAMADDRW_BANKBIT_MASK) >>
DRAMADDRW_BANKBIT_SHIFT;
cs = (read_reg & DRAMADDRW_CSBIT_MASK) >>
DRAMADDRW_CSBIT_SHIFT;
/* Correct for ECC as its not addressible */
if (width == DRAMIFWIDTH_32B_ECC)
width = 32;
if (width == DRAMIFWIDTH_16B_ECC)
width = 16;
/* calculate the SDRAM size base on this info */
size = 1 << (row + bank + col);
size = size * cs * (width / 8);
return size;
}
static int altr_sdram_probe(struct platform_device *pdev)
{
struct edac_mc_layer layers[2];
struct mem_ctl_info *mci;
struct altr_sdram_mc_data *drvdata;
struct regmap *mc_vbase;
struct dimm_info *dimm;
u32 read_reg, mem_size;
int irq;
int res = 0;
/* Validate the SDRAM controller has ECC enabled */
/* Grab the register range from the sdr controller in device tree */
mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
"altr,sdr-syscon");
if (IS_ERR(mc_vbase)) {
edac_printk(KERN_ERR, EDAC_MC,
"regmap for altr,sdr-syscon lookup failed.\n");
return -ENODEV;
}
if (regmap_read(mc_vbase, CTLCFG_OFST, &read_reg) ||
((read_reg & CTLCFG_ECC_AUTO_EN) != CTLCFG_ECC_AUTO_EN)) {
edac_printk(KERN_ERR, EDAC_MC,
"No ECC/ECC disabled [0x%08X]\n", read_reg);
return -ENODEV;
}
/* Grab memory size from device tree. */
mem_size = altr_sdram_get_total_mem_size(mc_vbase);
if (!mem_size) {
edac_printk(KERN_ERR, EDAC_MC,
"Unable to calculate memory size\n");
return -ENODEV;
}
/* Ensure the SDRAM Interrupt is disabled and cleared */
if (regmap_write(mc_vbase, DRAMINTR_OFST, DRAMINTR_INTRCLR)) {
edac_printk(KERN_ERR, EDAC_MC,
"Error clearing SDRAM ECC IRQ\n");
return -ENODEV;
}
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
edac_printk(KERN_ERR, EDAC_MC,
"No irq %d in DT\n", irq);
return -ENODEV;
}
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = 1;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = 1;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
sizeof(struct altr_sdram_mc_data));
if (!mci)
return -ENOMEM;
mci->pdev = &pdev->dev;
drvdata = mci->pvt_info;
drvdata->mc_vbase = mc_vbase;
platform_set_drvdata(pdev, mci);
if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
res = -ENOMEM;
goto free;
}
mci->mtype_cap = MEM_FLAG_DDR3;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = EDAC_VERSION;
mci->ctl_name = dev_name(&pdev->dev);
mci->scrub_mode = SCRUB_SW_SRC;
mci->dev_name = dev_name(&pdev->dev);
dimm = *mci->dimms;
dimm->nr_pages = ((mem_size - 1) >> PAGE_SHIFT) + 1;
dimm->grain = 8;
dimm->dtype = DEV_X8;
dimm->mtype = MEM_DDR3;
dimm->edac_mode = EDAC_SECDED;
res = edac_mc_add_mc(mci);
if (res < 0)
goto err;
res = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler,
0, dev_name(&pdev->dev), mci);
if (res < 0) {
edac_mc_printk(mci, KERN_ERR,
"Unable to request irq %d\n", irq);
res = -ENODEV;
goto err2;
}
if (regmap_write(drvdata->mc_vbase, DRAMINTR_OFST,
(DRAMINTR_INTRCLR | DRAMINTR_INTREN))) {
edac_mc_printk(mci, KERN_ERR,
"Error enabling SDRAM ECC IRQ\n");
res = -ENODEV;
goto err2;
}
altr_sdr_mc_create_debugfs_nodes(mci);
devres_close_group(&pdev->dev, NULL);
return 0;
err2:
edac_mc_del_mc(&pdev->dev);
err:
devres_release_group(&pdev->dev, NULL);
free:
edac_mc_free(mci);
edac_printk(KERN_ERR, EDAC_MC,
"EDAC Probe Failed; Error %d\n", res);
return res;
}
static int altr_sdram_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
platform_set_drvdata(pdev, NULL);
return 0;
}
static const struct of_device_id altr_sdram_ctrl_of_match[] = {
{ .compatible = "altr,sdram-edac", },
{},
};
MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
static struct platform_driver altr_sdram_edac_driver = {
.probe = altr_sdram_probe,
.remove = altr_sdram_remove,
.driver = {
.name = "altr_sdram_edac",
.of_match_table = altr_sdram_ctrl_of_match,
},
};
module_platform_driver(altr_sdram_edac_driver);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Thor Thayer");
MODULE_DESCRIPTION("EDAC Driver for Altera SDRAM Controller");

2994
drivers/edac/amd64_edac.c Normal file

File diff suppressed because it is too large Load diff

548
drivers/edac/amd64_edac.h Normal file
View file

@ -0,0 +1,548 @@
/*
* AMD64 class Memory Controller kernel module
*
* Copyright (c) 2009 SoftwareBitMaker.
* Copyright (c) 2009 Advanced Micro Devices, Inc.
*
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Originally Written by Thayne Harbaugh
*
* Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>:
* - K8 CPU Revision D and greater support
*
* Changes by Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>:
* - Module largely rewritten, with new (and hopefully correct)
* code for dealing with node and chip select interleaving,
* various code cleanup, and bug fixes
* - Added support for memory hoisting using DRAM hole address
* register
*
* Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>:
* -K8 Rev (1207) revision support added, required Revision
* specific mini-driver code to support Rev F as well as
* prior revisions
*
* Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>:
* -Family 10h revision support added. New PCI Device IDs,
* indicating new changes. Actual registers modified
* were slight, less than the Rev E to Rev F transition
* but changing the PCI Device ID was the proper thing to
* do, as it provides for almost automactic family
* detection. The mods to Rev F required more family
* information detection.
*
* Changes/Fixes by Borislav Petkov <bp@alien8.de>:
* - misc fixes and code cleanups
*
* This module is based on the following documents
* (available from http://www.amd.com/):
*
* Title: BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD
* Opteron Processors
* AMD publication #: 26094
*` Revision: 3.26
*
* Title: BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh
* Processors
* AMD publication #: 32559
* Revision: 3.00
* Issue Date: May 2006
*
* Title: BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h
* Processors
* AMD publication #: 31116
* Revision: 3.00
* Issue Date: September 07, 2007
*
* Sections in the first 2 documents are no longer in sync with each other.
* The Family 10h BKDG was totally re-written from scratch with a new
* presentation model.
* Therefore, comments that refer to a Document section might be off.
*/
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/slab.h>
#include <linux/mmzone.h>
#include <linux/edac.h>
#include <asm/msr.h>
#include "edac_core.h"
#include "mce_amd.h"
#define amd64_debug(fmt, arg...) \
edac_printk(KERN_DEBUG, "amd64", fmt, ##arg)
#define amd64_info(fmt, arg...) \
edac_printk(KERN_INFO, "amd64", fmt, ##arg)
#define amd64_notice(fmt, arg...) \
edac_printk(KERN_NOTICE, "amd64", fmt, ##arg)
#define amd64_warn(fmt, arg...) \
edac_printk(KERN_WARNING, "amd64", fmt, ##arg)
#define amd64_err(fmt, arg...) \
edac_printk(KERN_ERR, "amd64", fmt, ##arg)
#define amd64_mc_warn(mci, fmt, arg...) \
edac_mc_chipset_printk(mci, KERN_WARNING, "amd64", fmt, ##arg)
#define amd64_mc_err(mci, fmt, arg...) \
edac_mc_chipset_printk(mci, KERN_ERR, "amd64", fmt, ##arg)
/*
* Throughout the comments in this code, the following terms are used:
*
* SysAddr, DramAddr, and InputAddr
*
* These terms come directly from the amd64 documentation
* (AMD publication #26094). They are defined as follows:
*
* SysAddr:
* This is a physical address generated by a CPU core or a device
* doing DMA. If generated by a CPU core, a SysAddr is the result of
* a virtual to physical address translation by the CPU core's address
* translation mechanism (MMU).
*
* DramAddr:
* A DramAddr is derived from a SysAddr by subtracting an offset that
* depends on which node the SysAddr maps to and whether the SysAddr
* is within a range affected by memory hoisting. The DRAM Base
* (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers
* determine which node a SysAddr maps to.
*
* If the DRAM Hole Address Register (DHAR) is enabled and the SysAddr
* is within the range of addresses specified by this register, then
* a value x from the DHAR is subtracted from the SysAddr to produce a
* DramAddr. Here, x represents the base address for the node that
* the SysAddr maps to plus an offset due to memory hoisting. See
* section 3.4.8 and the comments in amd64_get_dram_hole_info() and
* sys_addr_to_dram_addr() below for more information.
*
* If the SysAddr is not affected by the DHAR then a value y is
* subtracted from the SysAddr to produce a DramAddr. Here, y is the
* base address for the node that the SysAddr maps to. See section
* 3.4.4 and the comments in sys_addr_to_dram_addr() below for more
* information.
*
* InputAddr:
* A DramAddr is translated to an InputAddr before being passed to the
* memory controller for the node that the DramAddr is associated
* with. The memory controller then maps the InputAddr to a csrow.
* If node interleaving is not in use, then the InputAddr has the same
* value as the DramAddr. Otherwise, the InputAddr is produced by
* discarding the bits used for node interleaving from the DramAddr.
* See section 3.4.4 for more information.
*
* The memory controller for a given node uses its DRAM CS Base and
* DRAM CS Mask registers to map an InputAddr to a csrow. See
* sections 3.5.4 and 3.5.5 for more information.
*/
#define EDAC_AMD64_VERSION "3.4.0"
#define EDAC_MOD_STR "amd64_edac"
/* Extended Model from CPUID, for CPU Revision numbers */
#define K8_REV_D 1
#define K8_REV_E 2
#define K8_REV_F 4
/* Hardware limit on ChipSelect rows per MC and processors per system */
#define NUM_CHIPSELECTS 8
#define DRAM_RANGES 8
#define ON true
#define OFF false
/*
* PCI-defined configuration space registers
*/
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c
#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531
#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581
#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582
/*
* Function 1 - Address Map
*/
#define DRAM_BASE_LO 0x40
#define DRAM_LIMIT_LO 0x44
/*
* F15 M30h D18F1x2[1C:00]
*/
#define DRAM_CONT_BASE 0x200
#define DRAM_CONT_LIMIT 0x204
/*
* F15 M30h D18F1x2[4C:40]
*/
#define DRAM_CONT_HIGH_OFF 0x240
#define dram_rw(pvt, i) ((u8)(pvt->ranges[i].base.lo & 0x3))
#define dram_intlv_sel(pvt, i) ((u8)((pvt->ranges[i].lim.lo >> 8) & 0x7))
#define dram_dst_node(pvt, i) ((u8)(pvt->ranges[i].lim.lo & 0x7))
#define DHAR 0xf0
#define dhar_mem_hoist_valid(pvt) ((pvt)->dhar & BIT(1))
#define dhar_base(pvt) ((pvt)->dhar & 0xff000000)
#define k8_dhar_offset(pvt) (((pvt)->dhar & 0x0000ff00) << 16)
/* NOTE: Extra mask bit vs K8 */
#define f10_dhar_offset(pvt) (((pvt)->dhar & 0x0000ff80) << 16)
#define DCT_CFG_SEL 0x10C
#define DRAM_LOCAL_NODE_BASE 0x120
#define DRAM_LOCAL_NODE_LIM 0x124
#define DRAM_BASE_HI 0x140
#define DRAM_LIMIT_HI 0x144
/*
* Function 2 - DRAM controller
*/
#define DCSB0 0x40
#define DCSB1 0x140
#define DCSB_CS_ENABLE BIT(0)
#define DCSM0 0x60
#define DCSM1 0x160
#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE)
#define DBAM0 0x80
#define DBAM1 0x180
/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */
#define DBAM_DIMM(i, reg) ((((reg) >> (4*(i)))) & 0xF)
#define DBAM_MAX_VALUE 11
#define DCLR0 0x90
#define DCLR1 0x190
#define REVE_WIDTH_128 BIT(16)
#define WIDTH_128 BIT(11)
#define DCHR0 0x94
#define DCHR1 0x194
#define DDR3_MODE BIT(8)
#define DCT_SEL_LO 0x110
#define dct_high_range_enabled(pvt) ((pvt)->dct_sel_lo & BIT(0))
#define dct_interleave_enabled(pvt) ((pvt)->dct_sel_lo & BIT(2))
#define dct_ganging_enabled(pvt) ((boot_cpu_data.x86 == 0x10) && ((pvt)->dct_sel_lo & BIT(4)))
#define dct_data_intlv_enabled(pvt) ((pvt)->dct_sel_lo & BIT(5))
#define dct_memory_cleared(pvt) ((pvt)->dct_sel_lo & BIT(10))
#define SWAP_INTLV_REG 0x10c
#define DCT_SEL_HI 0x114
/*
* Function 3 - Misc Control
*/
#define NBCTL 0x40
#define NBCFG 0x44
#define NBCFG_CHIPKILL BIT(23)
#define NBCFG_ECC_ENABLE BIT(22)
/* F3x48: NBSL */
#define F10_NBSL_EXT_ERR_ECC 0x8
#define NBSL_PP_OBS 0x2
#define SCRCTRL 0x58
#define F10_ONLINE_SPARE 0xB0
#define online_spare_swap_done(pvt, c) (((pvt)->online_spare >> (1 + 2 * (c))) & 0x1)
#define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7)
#define F10_NB_ARRAY_ADDR 0xB8
#define F10_NB_ARRAY_DRAM BIT(31)
/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */
#define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1)
#define F10_NB_ARRAY_DATA 0xBC
#define F10_NB_ARR_ECC_WR_REQ BIT(17)
#define SET_NB_DRAM_INJECTION_WRITE(inj) \
(BIT(((inj.word) & 0xF) + 20) | \
F10_NB_ARR_ECC_WR_REQ | inj.bit_map)
#define SET_NB_DRAM_INJECTION_READ(inj) \
(BIT(((inj.word) & 0xF) + 20) | \
BIT(16) | inj.bit_map)
#define NBCAP 0xE8
#define NBCAP_CHIPKILL BIT(4)
#define NBCAP_SECDED BIT(3)
#define NBCAP_DCT_DUAL BIT(0)
#define EXT_NB_MCA_CFG 0x180
/* MSRs */
#define MSR_MCGCTL_NBE BIT(4)
enum amd_families {
K8_CPUS = 0,
F10_CPUS,
F15_CPUS,
F15_M30H_CPUS,
F16_CPUS,
F16_M30H_CPUS,
NUM_FAMILIES,
};
/* Error injection control structure */
struct error_injection {
u32 section;
u32 word;
u32 bit_map;
};
/* low and high part of PCI config space regs */
struct reg_pair {
u32 lo, hi;
};
/*
* See F1x[1, 0][7C:40] DRAM Base/Limit Registers
*/
struct dram_range {
struct reg_pair base;
struct reg_pair lim;
};
/* A DCT chip selects collection */
struct chip_select {
u32 csbases[NUM_CHIPSELECTS];
u8 b_cnt;
u32 csmasks[NUM_CHIPSELECTS];
u8 m_cnt;
};
struct amd64_pvt {
struct low_ops *ops;
/* pci_device handles which we utilize */
struct pci_dev *F1, *F2, *F3;
u16 mc_node_id; /* MC index of this MC node */
u8 fam; /* CPU family */
u8 model; /* ... model */
u8 stepping; /* ... stepping */
int ext_model; /* extended model value of this node */
int channel_count;
/* Raw registers */
u32 dclr0; /* DRAM Configuration Low DCT0 reg */
u32 dclr1; /* DRAM Configuration Low DCT1 reg */
u32 dchr0; /* DRAM Configuration High DCT0 reg */
u32 dchr1; /* DRAM Configuration High DCT1 reg */
u32 nbcap; /* North Bridge Capabilities */
u32 nbcfg; /* F10 North Bridge Configuration */
u32 ext_nbcfg; /* Extended F10 North Bridge Configuration */
u32 dhar; /* DRAM Hoist reg */
u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */
u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */
/* one for each DCT */
struct chip_select csels[2];
/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
struct dram_range ranges[DRAM_RANGES];
u64 top_mem; /* top of memory below 4GB */
u64 top_mem2; /* top of memory above 4GB */
u32 dct_sel_lo; /* DRAM Controller Select Low */
u32 dct_sel_hi; /* DRAM Controller Select High */
u32 online_spare; /* On-Line spare Reg */
/* x4 or x8 syndromes in use */
u8 ecc_sym_sz;
/* place to store error injection parameters prior to issue */
struct error_injection injection;
};
enum err_codes {
DECODE_OK = 0,
ERR_NODE = -1,
ERR_CSROW = -2,
ERR_CHANNEL = -3,
};
struct err_info {
int err_code;
struct mem_ctl_info *src_mci;
int csrow;
int channel;
u16 syndrome;
u32 page;
u32 offset;
};
static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)
{
u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
if (boot_cpu_data.x86 == 0xf)
return addr;
return (((u64)pvt->ranges[i].base.hi & 0x000000ff) << 40) | addr;
}
static inline u64 get_dram_limit(struct amd64_pvt *pvt, u8 i)
{
u64 lim = (((u64)pvt->ranges[i].lim.lo & 0xffff0000) << 8) | 0x00ffffff;
if (boot_cpu_data.x86 == 0xf)
return lim;
return (((u64)pvt->ranges[i].lim.hi & 0x000000ff) << 40) | lim;
}
static inline u16 extract_syndrome(u64 status)
{
return ((status >> 47) & 0xff) | ((status >> 16) & 0xff00);
}
static inline u8 dct_sel_interleave_addr(struct amd64_pvt *pvt)
{
if (pvt->fam == 0x15 && pvt->model >= 0x30)
return (((pvt->dct_sel_hi >> 9) & 0x1) << 2) |
((pvt->dct_sel_lo >> 6) & 0x3);
return ((pvt)->dct_sel_lo >> 6) & 0x3;
}
/*
* per-node ECC settings descriptor
*/
struct ecc_settings {
u32 old_nbctl;
bool nbctl_valid;
struct flags {
unsigned long nb_mce_enable:1;
unsigned long nb_ecc_prev:1;
} flags;
};
#ifdef CONFIG_EDAC_DEBUG
int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci);
void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci);
#else
static inline int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci)
{
return 0;
}
static void inline amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci)
{
}
#endif
#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci);
void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci);
#else
static inline int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci)
{
return 0;
}
static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
{
}
#endif
/*
* Each of the PCI Device IDs types have their own set of hardware accessor
* functions and per device encoding/decoding logic.
*/
struct low_ops {
int (*early_channel_count) (struct amd64_pvt *pvt);
void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
struct err_info *);
int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode);
};
struct amd64_family_type {
const char *ctl_name;
u16 f1_id, f3_id;
struct low_ops ops;
};
int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 *val, const char *func);
int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 val, const char *func);
#define amd64_read_pci_cfg(pdev, offset, val) \
__amd64_read_pci_cfg_dword(pdev, offset, val, __func__)
#define amd64_write_pci_cfg(pdev, offset, val) \
__amd64_write_pci_cfg_dword(pdev, offset, val, __func__)
int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size);
#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
/* Injection helpers */
static inline void disable_caches(void *dummy)
{
write_cr0(read_cr0() | X86_CR0_CD);
wbinvd();
}
static inline void enable_caches(void *dummy)
{
write_cr0(read_cr0() & ~X86_CR0_CD);
}
static inline u8 dram_intlv_en(struct amd64_pvt *pvt, unsigned int i)
{
if (pvt->fam == 0x15 && pvt->model >= 0x30) {
u32 tmp;
amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &tmp);
return (u8) tmp & 0xF;
}
return (u8) (pvt->ranges[i].base.lo >> 8) & 0x7;
}
static inline u8 dhar_valid(struct amd64_pvt *pvt)
{
if (pvt->fam == 0x15 && pvt->model >= 0x30) {
u32 tmp;
amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp);
return (tmp >> 1) & BIT(0);
}
return (pvt)->dhar & BIT(0);
}
static inline u32 dct_sel_baseaddr(struct amd64_pvt *pvt)
{
if (pvt->fam == 0x15 && pvt->model >= 0x30) {
u32 tmp;
amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp);
return (tmp >> 11) & 0x1FFF;
}
return (pvt)->dct_sel_lo & 0xFFFFF800;
}

View file

@ -0,0 +1,73 @@
#include "amd64_edac.h"
#define EDAC_DCT_ATTR_SHOW(reg) \
static ssize_t amd64_##reg##_show(struct device *dev, \
struct device_attribute *mattr, \
char *data) \
{ \
struct mem_ctl_info *mci = to_mci(dev); \
struct amd64_pvt *pvt = mci->pvt_info; \
return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \
}
EDAC_DCT_ATTR_SHOW(dhar);
EDAC_DCT_ATTR_SHOW(dbam0);
EDAC_DCT_ATTR_SHOW(top_mem);
EDAC_DCT_ATTR_SHOW(top_mem2);
static ssize_t amd64_hole_show(struct device *dev,
struct device_attribute *mattr,
char *data)
{
struct mem_ctl_info *mci = to_mci(dev);
u64 hole_base = 0;
u64 hole_offset = 0;
u64 hole_size = 0;
amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size);
return sprintf(data, "%llx %llx %llx\n", hole_base, hole_offset,
hole_size);
}
/*
* update NUM_DBG_ATTRS in case you add new members
*/
static DEVICE_ATTR(dhar, S_IRUGO, amd64_dhar_show, NULL);
static DEVICE_ATTR(dbam, S_IRUGO, amd64_dbam0_show, NULL);
static DEVICE_ATTR(topmem, S_IRUGO, amd64_top_mem_show, NULL);
static DEVICE_ATTR(topmem2, S_IRUGO, amd64_top_mem2_show, NULL);
static DEVICE_ATTR(dram_hole, S_IRUGO, amd64_hole_show, NULL);
int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci)
{
int rc;
rc = device_create_file(&mci->dev, &dev_attr_dhar);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_dbam);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_topmem);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_topmem2);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_dram_hole);
if (rc < 0)
return rc;
return 0;
}
void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci)
{
device_remove_file(&mci->dev, &dev_attr_dhar);
device_remove_file(&mci->dev, &dev_attr_dbam);
device_remove_file(&mci->dev, &dev_attr_topmem);
device_remove_file(&mci->dev, &dev_attr_topmem2);
device_remove_file(&mci->dev, &dev_attr_dram_hole);
}

View file

@ -0,0 +1,241 @@
#include "amd64_edac.h"
static ssize_t amd64_inject_section_show(struct device *dev,
struct device_attribute *mattr,
char *buf)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
return sprintf(buf, "0x%x\n", pvt->injection.section);
}
/*
* store error injection section value which refers to one of 4 16-byte sections
* within a 64-byte cacheline
*
* range: 0..3
*/
static ssize_t amd64_inject_section_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
int ret;
ret = kstrtoul(data, 10, &value);
if (ret < 0)
return ret;
if (value > 3) {
amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
return -EINVAL;
}
pvt->injection.section = (u32) value;
return count;
}
static ssize_t amd64_inject_word_show(struct device *dev,
struct device_attribute *mattr,
char *buf)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
return sprintf(buf, "0x%x\n", pvt->injection.word);
}
/*
* store error injection word value which refers to one of 9 16-bit word of the
* 16-byte (128-bit + ECC bits) section
*
* range: 0..8
*/
static ssize_t amd64_inject_word_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
int ret;
ret = kstrtoul(data, 10, &value);
if (ret < 0)
return ret;
if (value > 8) {
amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
return -EINVAL;
}
pvt->injection.word = (u32) value;
return count;
}
static ssize_t amd64_inject_ecc_vector_show(struct device *dev,
struct device_attribute *mattr,
char *buf)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
return sprintf(buf, "0x%x\n", pvt->injection.bit_map);
}
/*
* store 16 bit error injection vector which enables injecting errors to the
* corresponding bit within the error injection word above. When used during a
* DRAM ECC read, it holds the contents of the of the DRAM ECC bits.
*/
static ssize_t amd64_inject_ecc_vector_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
int ret;
ret = kstrtoul(data, 16, &value);
if (ret < 0)
return ret;
if (value & 0xFFFF0000) {
amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value);
return -EINVAL;
}
pvt->injection.bit_map = (u32) value;
return count;
}
/*
* Do a DRAM ECC read. Assemble staged values in the pvt area, format into
* fields needed by the injection registers and read the NB Array Data Port.
*/
static ssize_t amd64_inject_read_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
u32 section, word_bits;
int ret;
ret = kstrtoul(data, 10, &value);
if (ret < 0)
return ret;
/* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection);
/* Issue 'word' and 'bit' along with the READ request */
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
return count;
}
/*
* Do a DRAM ECC write. Assemble staged values in the pvt area and format into
* fields needed by the injection registers.
*/
static ssize_t amd64_inject_write_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
u32 section, word_bits, tmp;
unsigned long value;
int ret;
ret = kstrtoul(data, 10, &value);
if (ret < 0)
return ret;
/* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
pr_notice_once("Don't forget to decrease MCE polling interval in\n"
"/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
"so that you can get the error report faster.\n");
on_each_cpu(disable_caches, NULL, 1);
/* Issue 'word' and 'bit' along with the READ request */
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
retry:
/* wait until injection happens */
amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
if (tmp & F10_NB_ARR_ECC_WR_REQ) {
cpu_relax();
goto retry;
}
on_each_cpu(enable_caches, NULL, 1);
edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
return count;
}
/*
* update NUM_INJ_ATTRS in case you add new members
*/
static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
amd64_inject_section_show, amd64_inject_section_store);
static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR,
amd64_inject_word_show, amd64_inject_word_store);
static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR,
amd64_inject_ecc_vector_show, amd64_inject_ecc_vector_store);
static DEVICE_ATTR(inject_write, S_IWUSR,
NULL, amd64_inject_write_store);
static DEVICE_ATTR(inject_read, S_IWUSR,
NULL, amd64_inject_read_store);
int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci)
{
int rc;
rc = device_create_file(&mci->dev, &dev_attr_inject_section);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_inject_word);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_inject_ecc_vector);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_inject_write);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_inject_read);
if (rc < 0)
return rc;
return 0;
}
void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
{
device_remove_file(&mci->dev, &dev_attr_inject_section);
device_remove_file(&mci->dev, &dev_attr_inject_word);
device_remove_file(&mci->dev, &dev_attr_inject_ecc_vector);
device_remove_file(&mci->dev, &dev_attr_inject_write);
device_remove_file(&mci->dev, &dev_attr_inject_read);
}

378
drivers/edac/amd76x_edac.c Normal file
View file

@ -0,0 +1,378 @@
/*
* AMD 76x Memory Controller kernel module
* (C) 2003 Linux Networx (http://lnxi.com)
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written by Thayne Harbaugh
* Based on work by Dan Hollis <goemon at anime dot net> and others.
* http://www.anime.net/~goemon/linux-ecc/
*
* $Id: edac_amd76x.c,v 1.4.2.5 2005/10/05 00:43:44 dsp_llnl Exp $
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_core.h"
#define AMD76X_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "amd76x_edac"
#define amd76x_printk(level, fmt, arg...) \
edac_printk(level, "amd76x", fmt, ##arg)
#define amd76x_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "amd76x", fmt, ##arg)
#define AMD76X_NR_CSROWS 8
#define AMD76X_NR_DIMMS 4
/* AMD 76x register addresses - device 0 function 0 - PCI bridge */
#define AMD76X_ECC_MODE_STATUS 0x48 /* Mode and status of ECC (32b)
*
* 31:16 reserved
* 15:14 SERR enabled: x1=ue 1x=ce
* 13 reserved
* 12 diag: disabled, enabled
* 11:10 mode: dis, EC, ECC, ECC+scrub
* 9:8 status: x1=ue 1x=ce
* 7:4 UE cs row
* 3:0 CE cs row
*/
#define AMD76X_DRAM_MODE_STATUS 0x58 /* DRAM Mode and status (32b)
*
* 31:26 clock disable 5 - 0
* 25 SDRAM init
* 24 reserved
* 23 mode register service
* 22:21 suspend to RAM
* 20 burst refresh enable
* 19 refresh disable
* 18 reserved
* 17:16 cycles-per-refresh
* 15:8 reserved
* 7:0 x4 mode enable 7 - 0
*/
#define AMD76X_MEM_BASE_ADDR 0xC0 /* Memory base address (8 x 32b)
*
* 31:23 chip-select base
* 22:16 reserved
* 15:7 chip-select mask
* 6:3 reserved
* 2:1 address mode
* 0 chip-select enable
*/
struct amd76x_error_info {
u32 ecc_mode_status;
};
enum amd76x_chips {
AMD761 = 0,
AMD762
};
struct amd76x_dev_info {
const char *ctl_name;
};
static const struct amd76x_dev_info amd76x_devs[] = {
[AMD761] = {
.ctl_name = "AMD761"},
[AMD762] = {
.ctl_name = "AMD762"},
};
static struct edac_pci_ctl_info *amd76x_pci;
/**
* amd76x_get_error_info - fetch error information
* @mci: Memory controller
* @info: Info to fill in
*
* Fetch and store the AMD76x ECC status. Clear pending status
* on the chip so that further errors will be reported
*/
static void amd76x_get_error_info(struct mem_ctl_info *mci,
struct amd76x_error_info *info)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS,
&info->ecc_mode_status);
if (info->ecc_mode_status & BIT(8))
pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS,
(u32) BIT(8), (u32) BIT(8));
if (info->ecc_mode_status & BIT(9))
pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS,
(u32) BIT(9), (u32) BIT(9));
}
/**
* amd76x_process_error_info - Error check
* @mci: Memory controller
* @info: Previously fetched information from chip
* @handle_errors: 1 if we should do recovery
*
* Process the chip state and decide if an error has occurred.
* A return of 1 indicates an error. Also if handle_errors is true
* then attempt to handle and clean up after the error
*/
static int amd76x_process_error_info(struct mem_ctl_info *mci,
struct amd76x_error_info *info,
int handle_errors)
{
int error_found;
u32 row;
error_found = 0;
/*
* Check for an uncorrectable error
*/
if (info->ecc_mode_status & BIT(8)) {
error_found = 1;
if (handle_errors) {
row = (info->ecc_mode_status >> 4) & 0xf;
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
mci->csrows[row]->first_page, 0, 0,
row, 0, -1,
mci->ctl_name, "");
}
}
/*
* Check for a correctable error
*/
if (info->ecc_mode_status & BIT(9)) {
error_found = 1;
if (handle_errors) {
row = info->ecc_mode_status & 0xf;
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
mci->csrows[row]->first_page, 0, 0,
row, 0, -1,
mci->ctl_name, "");
}
}
return error_found;
}
/**
* amd76x_check - Poll the controller
* @mci: Memory controller
*
* Called by the poll handlers this function reads the status
* from the controller and checks for errors.
*/
static void amd76x_check(struct mem_ctl_info *mci)
{
struct amd76x_error_info info;
edac_dbg(3, "\n");
amd76x_get_error_info(mci, &info);
amd76x_process_error_info(mci, &info, 1);
}
static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
enum edac_type edac_mode)
{
struct csrow_info *csrow;
struct dimm_info *dimm;
u32 mba, mba_base, mba_mask, dms;
int index;
for (index = 0; index < mci->nr_csrows; index++) {
csrow = mci->csrows[index];
dimm = csrow->channels[0]->dimm;
/* find the DRAM Chip Select Base address and mask */
pci_read_config_dword(pdev,
AMD76X_MEM_BASE_ADDR + (index * 4), &mba);
if (!(mba & BIT(0)))
continue;
mba_base = mba & 0xff800000UL;
mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL;
pci_read_config_dword(pdev, AMD76X_DRAM_MODE_STATUS, &dms);
csrow->first_page = mba_base >> PAGE_SHIFT;
dimm->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
csrow->page_mask = mba_mask >> PAGE_SHIFT;
dimm->grain = dimm->nr_pages << PAGE_SHIFT;
dimm->mtype = MEM_RDDR;
dimm->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
dimm->edac_mode = edac_mode;
}
}
/**
* amd76x_probe1 - Perform set up for detected device
* @pdev; PCI device detected
* @dev_idx: Device type index
*
* We have found an AMD76x and now need to set up the memory
* controller status reporting. We configure and set up the
* memory controller reporting and claim the device.
*/
static int amd76x_probe1(struct pci_dev *pdev, int dev_idx)
{
static const enum edac_type ems_modes[] = {
EDAC_NONE,
EDAC_EC,
EDAC_SECDED,
EDAC_SECDED
};
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
u32 ems;
u32 ems_mode;
struct amd76x_error_info discard;
edac_dbg(0, "\n");
pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems);
ems_mode = (ems >> 10) & 0x3;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = AMD76X_NR_CSROWS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = 1;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (mci == NULL)
return -ENOMEM;
edac_dbg(0, "mci = %p\n", mci);
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_RDDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
mci->edac_cap = ems_mode ?
(EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_NONE;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = AMD76X_REVISION;
mci->ctl_name = amd76x_devs[dev_idx].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = amd76x_check;
mci->ctl_page_to_phys = NULL;
amd76x_init_csrows(mci, pdev, ems_modes[ems_mode]);
amd76x_get_error_info(mci, &discard); /* clear counters */
/* Here we assume that we will never see multiple instances of this
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
/* allocating generic PCI control info */
amd76x_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
if (!amd76x_pci) {
printk(KERN_WARNING
"%s(): Unable to create PCI control\n",
__func__);
printk(KERN_WARNING
"%s(): PCI error report via EDAC not setup\n",
__func__);
}
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
fail:
edac_mc_free(mci);
return -ENODEV;
}
/* returns count (>= 0), or negative on error */
static int amd76x_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
edac_dbg(0, "\n");
/* don't need to call pci_enable_device() */
return amd76x_probe1(pdev, ent->driver_data);
}
/**
* amd76x_remove_one - driver shutdown
* @pdev: PCI device being handed back
*
* Called when the driver is unloaded. Find the matching mci
* structure for the device then delete the mci and free the
* resources.
*/
static void amd76x_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
edac_dbg(0, "\n");
if (amd76x_pci)
edac_pci_release_generic_ctl(amd76x_pci);
if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
return;
edac_mc_free(mci);
}
static const struct pci_device_id amd76x_pci_tbl[] = {
{
PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
AMD762},
{
PCI_VEND_DEV(AMD, FE_GATE_700E), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
AMD761},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl);
static struct pci_driver amd76x_driver = {
.name = EDAC_MOD_STR,
.probe = amd76x_init_one,
.remove = amd76x_remove_one,
.id_table = amd76x_pci_tbl,
};
static int __init amd76x_init(void)
{
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
return pci_register_driver(&amd76x_driver);
}
static void __exit amd76x_exit(void)
{
pci_unregister_driver(&amd76x_driver);
}
module_init(amd76x_init);
module_exit(amd76x_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh");
MODULE_DESCRIPTION("MC support for AMD 76x memory controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

610
drivers/edac/amd8111_edac.c Normal file
View file

@ -0,0 +1,610 @@
/*
* amd8111_edac.c, AMD8111 Hyper Transport chip EDAC kernel module
*
* Copyright (c) 2008 Wind River Systems, Inc.
*
* Authors: Cao Qingtao <qingtao.cao@windriver.com>
* Benjamin Walsh <benjamin.walsh@windriver.com>
* Hu Yongqi <yongqi.hu@windriver.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
#include <linux/edac.h>
#include <linux/pci_ids.h>
#include <asm/io.h>
#include "edac_core.h"
#include "edac_module.h"
#include "amd8111_edac.h"
#define AMD8111_EDAC_REVISION " Ver: 1.0.0"
#define AMD8111_EDAC_MOD_STR "amd8111_edac"
#define PCI_DEVICE_ID_AMD_8111_PCI 0x7460
enum amd8111_edac_devs {
LPC_BRIDGE = 0,
};
enum amd8111_edac_pcis {
PCI_BRIDGE = 0,
};
/* Wrapper functions for accessing PCI configuration space */
static int edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32)
{
int ret;
ret = pci_read_config_dword(dev, reg, val32);
if (ret != 0)
printk(KERN_ERR AMD8111_EDAC_MOD_STR
" PCI Access Read Error at 0x%x\n", reg);
return ret;
}
static void edac_pci_read_byte(struct pci_dev *dev, int reg, u8 *val8)
{
int ret;
ret = pci_read_config_byte(dev, reg, val8);
if (ret != 0)
printk(KERN_ERR AMD8111_EDAC_MOD_STR
" PCI Access Read Error at 0x%x\n", reg);
}
static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32)
{
int ret;
ret = pci_write_config_dword(dev, reg, val32);
if (ret != 0)
printk(KERN_ERR AMD8111_EDAC_MOD_STR
" PCI Access Write Error at 0x%x\n", reg);
}
static void edac_pci_write_byte(struct pci_dev *dev, int reg, u8 val8)
{
int ret;
ret = pci_write_config_byte(dev, reg, val8);
if (ret != 0)
printk(KERN_ERR AMD8111_EDAC_MOD_STR
" PCI Access Write Error at 0x%x\n", reg);
}
/*
* device-specific methods for amd8111 PCI Bridge Controller
*
* Error Reporting and Handling for amd8111 chipset could be found
* in its datasheet 3.1.2 section, P37
*/
static void amd8111_pci_bridge_init(struct amd8111_pci_info *pci_info)
{
u32 val32;
struct pci_dev *dev = pci_info->dev;
/* First clear error detection flags on the host interface */
/* Clear SSE/SMA/STA flags in the global status register*/
edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32);
if (val32 & PCI_STSCMD_CLEAR_MASK)
edac_pci_write_dword(dev, REG_PCI_STSCMD, val32);
/* Clear CRC and Link Fail flags in HT Link Control reg */
edac_pci_read_dword(dev, REG_HT_LINK, &val32);
if (val32 & HT_LINK_CLEAR_MASK)
edac_pci_write_dword(dev, REG_HT_LINK, val32);
/* Second clear all fault on the secondary interface */
/* Clear error flags in the memory-base limit reg. */
edac_pci_read_dword(dev, REG_MEM_LIM, &val32);
if (val32 & MEM_LIMIT_CLEAR_MASK)
edac_pci_write_dword(dev, REG_MEM_LIM, val32);
/* Clear Discard Timer Expired flag in Interrupt/Bridge Control reg */
edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32);
if (val32 & PCI_INTBRG_CTRL_CLEAR_MASK)
edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32);
/* Last enable error detections */
if (edac_op_state == EDAC_OPSTATE_POLL) {
/* Enable System Error reporting in global status register */
edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32);
val32 |= PCI_STSCMD_SERREN;
edac_pci_write_dword(dev, REG_PCI_STSCMD, val32);
/* Enable CRC Sync flood packets to HyperTransport Link */
edac_pci_read_dword(dev, REG_HT_LINK, &val32);
val32 |= HT_LINK_CRCFEN;
edac_pci_write_dword(dev, REG_HT_LINK, val32);
/* Enable SSE reporting etc in Interrupt control reg */
edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32);
val32 |= PCI_INTBRG_CTRL_POLL_MASK;
edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32);
}
}
static void amd8111_pci_bridge_exit(struct amd8111_pci_info *pci_info)
{
u32 val32;
struct pci_dev *dev = pci_info->dev;
if (edac_op_state == EDAC_OPSTATE_POLL) {
/* Disable System Error reporting */
edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32);
val32 &= ~PCI_STSCMD_SERREN;
edac_pci_write_dword(dev, REG_PCI_STSCMD, val32);
/* Disable CRC flood packets */
edac_pci_read_dword(dev, REG_HT_LINK, &val32);
val32 &= ~HT_LINK_CRCFEN;
edac_pci_write_dword(dev, REG_HT_LINK, val32);
/* Disable DTSERREN/MARSP/SERREN in Interrupt Control reg */
edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32);
val32 &= ~PCI_INTBRG_CTRL_POLL_MASK;
edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32);
}
}
static void amd8111_pci_bridge_check(struct edac_pci_ctl_info *edac_dev)
{
struct amd8111_pci_info *pci_info = edac_dev->pvt_info;
struct pci_dev *dev = pci_info->dev;
u32 val32;
/* Check out PCI Bridge Status and Command Register */
edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32);
if (val32 & PCI_STSCMD_CLEAR_MASK) {
printk(KERN_INFO "Error(s) in PCI bridge status and command"
"register on device %s\n", pci_info->ctl_name);
printk(KERN_INFO "SSE: %d, RMA: %d, RTA: %d\n",
(val32 & PCI_STSCMD_SSE) != 0,
(val32 & PCI_STSCMD_RMA) != 0,
(val32 & PCI_STSCMD_RTA) != 0);
val32 |= PCI_STSCMD_CLEAR_MASK;
edac_pci_write_dword(dev, REG_PCI_STSCMD, val32);
edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
}
/* Check out HyperTransport Link Control Register */
edac_pci_read_dword(dev, REG_HT_LINK, &val32);
if (val32 & HT_LINK_LKFAIL) {
printk(KERN_INFO "Error(s) in hypertransport link control"
"register on device %s\n", pci_info->ctl_name);
printk(KERN_INFO "LKFAIL: %d\n",
(val32 & HT_LINK_LKFAIL) != 0);
val32 |= HT_LINK_LKFAIL;
edac_pci_write_dword(dev, REG_HT_LINK, val32);
edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
}
/* Check out PCI Interrupt and Bridge Control Register */
edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32);
if (val32 & PCI_INTBRG_CTRL_DTSTAT) {
printk(KERN_INFO "Error(s) in PCI interrupt and bridge control"
"register on device %s\n", pci_info->ctl_name);
printk(KERN_INFO "DTSTAT: %d\n",
(val32 & PCI_INTBRG_CTRL_DTSTAT) != 0);
val32 |= PCI_INTBRG_CTRL_DTSTAT;
edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32);
edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
}
/* Check out PCI Bridge Memory Base-Limit Register */
edac_pci_read_dword(dev, REG_MEM_LIM, &val32);
if (val32 & MEM_LIMIT_CLEAR_MASK) {
printk(KERN_INFO
"Error(s) in mem limit register on %s device\n",
pci_info->ctl_name);
printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n"
"RTA: %d, STA: %d, MDPE: %d\n",
(val32 & MEM_LIMIT_DPE) != 0,
(val32 & MEM_LIMIT_RSE) != 0,
(val32 & MEM_LIMIT_RMA) != 0,
(val32 & MEM_LIMIT_RTA) != 0,
(val32 & MEM_LIMIT_STA) != 0,
(val32 & MEM_LIMIT_MDPE) != 0);
val32 |= MEM_LIMIT_CLEAR_MASK;
edac_pci_write_dword(dev, REG_MEM_LIM, val32);
edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
}
}
static struct resource *legacy_io_res;
static int at_compat_reg_broken;
#define LEGACY_NR_PORTS 1
/* device-specific methods for amd8111 LPC Bridge device */
static void amd8111_lpc_bridge_init(struct amd8111_dev_info *dev_info)
{
u8 val8;
struct pci_dev *dev = dev_info->dev;
/* First clear REG_AT_COMPAT[SERR, IOCHK] if necessary */
legacy_io_res = request_region(REG_AT_COMPAT, LEGACY_NR_PORTS,
AMD8111_EDAC_MOD_STR);
if (!legacy_io_res)
printk(KERN_INFO "%s: failed to request legacy I/O region "
"start %d, len %d\n", __func__,
REG_AT_COMPAT, LEGACY_NR_PORTS);
else {
val8 = __do_inb(REG_AT_COMPAT);
if (val8 == 0xff) { /* buggy port */
printk(KERN_INFO "%s: port %d is buggy, not supported"
" by hardware?\n", __func__, REG_AT_COMPAT);
at_compat_reg_broken = 1;
release_region(REG_AT_COMPAT, LEGACY_NR_PORTS);
legacy_io_res = NULL;
} else {
u8 out8 = 0;
if (val8 & AT_COMPAT_SERR)
out8 = AT_COMPAT_CLRSERR;
if (val8 & AT_COMPAT_IOCHK)
out8 |= AT_COMPAT_CLRIOCHK;
if (out8 > 0)
__do_outb(out8, REG_AT_COMPAT);
}
}
/* Second clear error flags on LPC bridge */
edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8);
if (val8 & IO_CTRL_1_CLEAR_MASK)
edac_pci_write_byte(dev, REG_IO_CTRL_1, val8);
}
static void amd8111_lpc_bridge_exit(struct amd8111_dev_info *dev_info)
{
if (legacy_io_res)
release_region(REG_AT_COMPAT, LEGACY_NR_PORTS);
}
static void amd8111_lpc_bridge_check(struct edac_device_ctl_info *edac_dev)
{
struct amd8111_dev_info *dev_info = edac_dev->pvt_info;
struct pci_dev *dev = dev_info->dev;
u8 val8;
edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8);
if (val8 & IO_CTRL_1_CLEAR_MASK) {
printk(KERN_INFO
"Error(s) in IO control register on %s device\n",
dev_info->ctl_name);
printk(KERN_INFO "LPC ERR: %d, PW2LPC: %d\n",
(val8 & IO_CTRL_1_LPC_ERR) != 0,
(val8 & IO_CTRL_1_PW2LPC) != 0);
val8 |= IO_CTRL_1_CLEAR_MASK;
edac_pci_write_byte(dev, REG_IO_CTRL_1, val8);
edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
}
if (at_compat_reg_broken == 0) {
u8 out8 = 0;
val8 = __do_inb(REG_AT_COMPAT);
if (val8 & AT_COMPAT_SERR)
out8 = AT_COMPAT_CLRSERR;
if (val8 & AT_COMPAT_IOCHK)
out8 |= AT_COMPAT_CLRIOCHK;
if (out8 > 0) {
__do_outb(out8, REG_AT_COMPAT);
edac_device_handle_ue(edac_dev, 0, 0,
edac_dev->ctl_name);
}
}
}
/* General devices represented by edac_device_ctl_info */
static struct amd8111_dev_info amd8111_devices[] = {
[LPC_BRIDGE] = {
.err_dev = PCI_DEVICE_ID_AMD_8111_LPC,
.ctl_name = "lpc",
.init = amd8111_lpc_bridge_init,
.exit = amd8111_lpc_bridge_exit,
.check = amd8111_lpc_bridge_check,
},
{0},
};
/* PCI controllers represented by edac_pci_ctl_info */
static struct amd8111_pci_info amd8111_pcis[] = {
[PCI_BRIDGE] = {
.err_dev = PCI_DEVICE_ID_AMD_8111_PCI,
.ctl_name = "AMD8111_PCI_Controller",
.init = amd8111_pci_bridge_init,
.exit = amd8111_pci_bridge_exit,
.check = amd8111_pci_bridge_check,
},
{0},
};
static int amd8111_dev_probe(struct pci_dev *dev,
const struct pci_device_id *id)
{
struct amd8111_dev_info *dev_info = &amd8111_devices[id->driver_data];
int ret = -ENODEV;
dev_info->dev = pci_get_device(PCI_VENDOR_ID_AMD,
dev_info->err_dev, NULL);
if (!dev_info->dev) {
printk(KERN_ERR "EDAC device not found:"
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, dev_info->err_dev,
dev_info->ctl_name);
goto err;
}
if (pci_enable_device(dev_info->dev)) {
printk(KERN_ERR "failed to enable:"
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, dev_info->err_dev,
dev_info->ctl_name);
goto err_dev_put;
}
/*
* we do not allocate extra private structure for
* edac_device_ctl_info, but make use of existing
* one instead.
*/
dev_info->edac_idx = edac_device_alloc_index();
dev_info->edac_dev =
edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1,
NULL, 0, 0,
NULL, 0, dev_info->edac_idx);
if (!dev_info->edac_dev) {
ret = -ENOMEM;
goto err_dev_put;
}
dev_info->edac_dev->pvt_info = dev_info;
dev_info->edac_dev->dev = &dev_info->dev->dev;
dev_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR;
dev_info->edac_dev->ctl_name = dev_info->ctl_name;
dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev);
if (edac_op_state == EDAC_OPSTATE_POLL)
dev_info->edac_dev->edac_check = dev_info->check;
if (dev_info->init)
dev_info->init(dev_info);
if (edac_device_add_device(dev_info->edac_dev) > 0) {
printk(KERN_ERR "failed to add edac_dev for %s\n",
dev_info->ctl_name);
goto err_edac_free_ctl;
}
printk(KERN_INFO "added one edac_dev on AMD8111 "
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, dev_info->err_dev,
dev_info->ctl_name);
return 0;
err_edac_free_ctl:
edac_device_free_ctl_info(dev_info->edac_dev);
err_dev_put:
pci_dev_put(dev_info->dev);
err:
return ret;
}
static void amd8111_dev_remove(struct pci_dev *dev)
{
struct amd8111_dev_info *dev_info;
for (dev_info = amd8111_devices; dev_info->err_dev; dev_info++)
if (dev_info->dev->device == dev->device)
break;
if (!dev_info->err_dev) /* should never happen */
return;
if (dev_info->edac_dev) {
edac_device_del_device(dev_info->edac_dev->dev);
edac_device_free_ctl_info(dev_info->edac_dev);
}
if (dev_info->exit)
dev_info->exit(dev_info);
pci_dev_put(dev_info->dev);
}
static int amd8111_pci_probe(struct pci_dev *dev,
const struct pci_device_id *id)
{
struct amd8111_pci_info *pci_info = &amd8111_pcis[id->driver_data];
int ret = -ENODEV;
pci_info->dev = pci_get_device(PCI_VENDOR_ID_AMD,
pci_info->err_dev, NULL);
if (!pci_info->dev) {
printk(KERN_ERR "EDAC device not found:"
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, pci_info->err_dev,
pci_info->ctl_name);
goto err;
}
if (pci_enable_device(pci_info->dev)) {
printk(KERN_ERR "failed to enable:"
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, pci_info->err_dev,
pci_info->ctl_name);
goto err_dev_put;
}
/*
* we do not allocate extra private structure for
* edac_pci_ctl_info, but make use of existing
* one instead.
*/
pci_info->edac_idx = edac_pci_alloc_index();
pci_info->edac_dev = edac_pci_alloc_ctl_info(0, pci_info->ctl_name);
if (!pci_info->edac_dev) {
ret = -ENOMEM;
goto err_dev_put;
}
pci_info->edac_dev->pvt_info = pci_info;
pci_info->edac_dev->dev = &pci_info->dev->dev;
pci_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR;
pci_info->edac_dev->ctl_name = pci_info->ctl_name;
pci_info->edac_dev->dev_name = dev_name(&pci_info->dev->dev);
if (edac_op_state == EDAC_OPSTATE_POLL)
pci_info->edac_dev->edac_check = pci_info->check;
if (pci_info->init)
pci_info->init(pci_info);
if (edac_pci_add_device(pci_info->edac_dev, pci_info->edac_idx) > 0) {
printk(KERN_ERR "failed to add edac_pci for %s\n",
pci_info->ctl_name);
goto err_edac_free_ctl;
}
printk(KERN_INFO "added one edac_pci on AMD8111 "
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, pci_info->err_dev,
pci_info->ctl_name);
return 0;
err_edac_free_ctl:
edac_pci_free_ctl_info(pci_info->edac_dev);
err_dev_put:
pci_dev_put(pci_info->dev);
err:
return ret;
}
static void amd8111_pci_remove(struct pci_dev *dev)
{
struct amd8111_pci_info *pci_info;
for (pci_info = amd8111_pcis; pci_info->err_dev; pci_info++)
if (pci_info->dev->device == dev->device)
break;
if (!pci_info->err_dev) /* should never happen */
return;
if (pci_info->edac_dev) {
edac_pci_del_device(pci_info->edac_dev->dev);
edac_pci_free_ctl_info(pci_info->edac_dev);
}
if (pci_info->exit)
pci_info->exit(pci_info);
pci_dev_put(pci_info->dev);
}
/* PCI Device ID talbe for general EDAC device */
static const struct pci_device_id amd8111_edac_dev_tbl[] = {
{
PCI_VEND_DEV(AMD, 8111_LPC),
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
.driver_data = LPC_BRIDGE,
},
{
0,
} /* table is NULL-terminated */
};
MODULE_DEVICE_TABLE(pci, amd8111_edac_dev_tbl);
static struct pci_driver amd8111_edac_dev_driver = {
.name = "AMD8111_EDAC_DEV",
.probe = amd8111_dev_probe,
.remove = amd8111_dev_remove,
.id_table = amd8111_edac_dev_tbl,
};
/* PCI Device ID table for EDAC PCI controller */
static const struct pci_device_id amd8111_edac_pci_tbl[] = {
{
PCI_VEND_DEV(AMD, 8111_PCI),
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
.driver_data = PCI_BRIDGE,
},
{
0,
} /* table is NULL-terminated */
};
MODULE_DEVICE_TABLE(pci, amd8111_edac_pci_tbl);
static struct pci_driver amd8111_edac_pci_driver = {
.name = "AMD8111_EDAC_PCI",
.probe = amd8111_pci_probe,
.remove = amd8111_pci_remove,
.id_table = amd8111_edac_pci_tbl,
};
static int __init amd8111_edac_init(void)
{
int val;
printk(KERN_INFO "AMD8111 EDAC driver " AMD8111_EDAC_REVISION "\n");
printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n");
/* Only POLL mode supported so far */
edac_op_state = EDAC_OPSTATE_POLL;
val = pci_register_driver(&amd8111_edac_dev_driver);
val |= pci_register_driver(&amd8111_edac_pci_driver);
return val;
}
static void __exit amd8111_edac_exit(void)
{
pci_unregister_driver(&amd8111_edac_pci_driver);
pci_unregister_driver(&amd8111_edac_dev_driver);
}
module_init(amd8111_edac_init);
module_exit(amd8111_edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>\n");
MODULE_DESCRIPTION("AMD8111 HyperTransport I/O Hub EDAC kernel module");

130
drivers/edac/amd8111_edac.h Normal file
View file

@ -0,0 +1,130 @@
/*
* amd8111_edac.h, EDAC defs for AMD8111 hypertransport chip
*
* Copyright (c) 2008 Wind River Systems, Inc.
*
* Authors: Cao Qingtao <qingtao.cao@windriver.com>
* Benjamin Walsh <benjamin.walsh@windriver.com>
* Hu Yongqi <yongqi.hu@windriver.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _AMD8111_EDAC_H_
#define _AMD8111_EDAC_H_
/************************************************************
* PCI Bridge Status and Command Register, DevA:0x04
************************************************************/
#define REG_PCI_STSCMD 0x04
enum pci_stscmd_bits {
PCI_STSCMD_SSE = BIT(30),
PCI_STSCMD_RMA = BIT(29),
PCI_STSCMD_RTA = BIT(28),
PCI_STSCMD_SERREN = BIT(8),
PCI_STSCMD_CLEAR_MASK = (PCI_STSCMD_SSE |
PCI_STSCMD_RMA |
PCI_STSCMD_RTA)
};
/************************************************************
* PCI Bridge Memory Base-Limit Register, DevA:0x1c
************************************************************/
#define REG_MEM_LIM 0x1c
enum mem_limit_bits {
MEM_LIMIT_DPE = BIT(31),
MEM_LIMIT_RSE = BIT(30),
MEM_LIMIT_RMA = BIT(29),
MEM_LIMIT_RTA = BIT(28),
MEM_LIMIT_STA = BIT(27),
MEM_LIMIT_MDPE = BIT(24),
MEM_LIMIT_CLEAR_MASK = (MEM_LIMIT_DPE |
MEM_LIMIT_RSE |
MEM_LIMIT_RMA |
MEM_LIMIT_RTA |
MEM_LIMIT_STA |
MEM_LIMIT_MDPE)
};
/************************************************************
* HyperTransport Link Control Register, DevA:0xc4
************************************************************/
#define REG_HT_LINK 0xc4
enum ht_link_bits {
HT_LINK_LKFAIL = BIT(4),
HT_LINK_CRCFEN = BIT(1),
HT_LINK_CLEAR_MASK = (HT_LINK_LKFAIL)
};
/************************************************************
* PCI Bridge Interrupt and Bridge Control, DevA:0x3c
************************************************************/
#define REG_PCI_INTBRG_CTRL 0x3c
enum pci_intbrg_ctrl_bits {
PCI_INTBRG_CTRL_DTSERREN = BIT(27),
PCI_INTBRG_CTRL_DTSTAT = BIT(26),
PCI_INTBRG_CTRL_MARSP = BIT(21),
PCI_INTBRG_CTRL_SERREN = BIT(17),
PCI_INTBRG_CTRL_PEREN = BIT(16),
PCI_INTBRG_CTRL_CLEAR_MASK = (PCI_INTBRG_CTRL_DTSTAT),
PCI_INTBRG_CTRL_POLL_MASK = (PCI_INTBRG_CTRL_DTSERREN |
PCI_INTBRG_CTRL_MARSP |
PCI_INTBRG_CTRL_SERREN)
};
/************************************************************
* I/O Control 1 Register, DevB:0x40
************************************************************/
#define REG_IO_CTRL_1 0x40
enum io_ctrl_1_bits {
IO_CTRL_1_NMIONERR = BIT(7),
IO_CTRL_1_LPC_ERR = BIT(6),
IO_CTRL_1_PW2LPC = BIT(1),
IO_CTRL_1_CLEAR_MASK = (IO_CTRL_1_LPC_ERR | IO_CTRL_1_PW2LPC)
};
/************************************************************
* Legacy I/O Space Registers
************************************************************/
#define REG_AT_COMPAT 0x61
enum at_compat_bits {
AT_COMPAT_SERR = BIT(7),
AT_COMPAT_IOCHK = BIT(6),
AT_COMPAT_CLRIOCHK = BIT(3),
AT_COMPAT_CLRSERR = BIT(2),
};
struct amd8111_dev_info {
u16 err_dev; /* PCI Device ID */
struct pci_dev *dev;
int edac_idx; /* device index */
char *ctl_name;
struct edac_device_ctl_info *edac_dev;
void (*init)(struct amd8111_dev_info *dev_info);
void (*exit)(struct amd8111_dev_info *dev_info);
void (*check)(struct edac_device_ctl_info *edac_dev);
};
struct amd8111_pci_info {
u16 err_dev; /* PCI Device ID */
struct pci_dev *dev;
int edac_idx; /* pci index */
const char *ctl_name;
struct edac_pci_ctl_info *edac_dev;
void (*init)(struct amd8111_pci_info *dev_info);
void (*exit)(struct amd8111_pci_info *dev_info);
void (*check)(struct edac_pci_ctl_info *edac_dev);
};
#endif /* _AMD8111_EDAC_H_ */

379
drivers/edac/amd8131_edac.c Normal file
View file

@ -0,0 +1,379 @@
/*
* amd8131_edac.c, AMD8131 hypertransport chip EDAC kernel module
*
* Copyright (c) 2008 Wind River Systems, Inc.
*
* Authors: Cao Qingtao <qingtao.cao@windriver.com>
* Benjamin Walsh <benjamin.walsh@windriver.com>
* Hu Yongqi <yongqi.hu@windriver.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/bitops.h>
#include <linux/edac.h>
#include <linux/pci_ids.h>
#include "edac_core.h"
#include "edac_module.h"
#include "amd8131_edac.h"
#define AMD8131_EDAC_REVISION " Ver: 1.0.0"
#define AMD8131_EDAC_MOD_STR "amd8131_edac"
/* Wrapper functions for accessing PCI configuration space */
static void edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32)
{
int ret;
ret = pci_read_config_dword(dev, reg, val32);
if (ret != 0)
printk(KERN_ERR AMD8131_EDAC_MOD_STR
" PCI Access Read Error at 0x%x\n", reg);
}
static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32)
{
int ret;
ret = pci_write_config_dword(dev, reg, val32);
if (ret != 0)
printk(KERN_ERR AMD8131_EDAC_MOD_STR
" PCI Access Write Error at 0x%x\n", reg);
}
static char * const bridge_str[] = {
[NORTH_A] = "NORTH A",
[NORTH_B] = "NORTH B",
[SOUTH_A] = "SOUTH A",
[SOUTH_B] = "SOUTH B",
[NO_BRIDGE] = "NO BRIDGE",
};
/* Support up to two AMD8131 chipsets on a platform */
static struct amd8131_dev_info amd8131_devices[] = {
{
.inst = NORTH_A,
.devfn = DEVFN_PCIX_BRIDGE_NORTH_A,
.ctl_name = "AMD8131_PCIX_NORTH_A",
},
{
.inst = NORTH_B,
.devfn = DEVFN_PCIX_BRIDGE_NORTH_B,
.ctl_name = "AMD8131_PCIX_NORTH_B",
},
{
.inst = SOUTH_A,
.devfn = DEVFN_PCIX_BRIDGE_SOUTH_A,
.ctl_name = "AMD8131_PCIX_SOUTH_A",
},
{
.inst = SOUTH_B,
.devfn = DEVFN_PCIX_BRIDGE_SOUTH_B,
.ctl_name = "AMD8131_PCIX_SOUTH_B",
},
{.inst = NO_BRIDGE,},
};
static void amd8131_pcix_init(struct amd8131_dev_info *dev_info)
{
u32 val32;
struct pci_dev *dev = dev_info->dev;
/* First clear error detection flags */
edac_pci_read_dword(dev, REG_MEM_LIM, &val32);
if (val32 & MEM_LIMIT_MASK)
edac_pci_write_dword(dev, REG_MEM_LIM, val32);
/* Clear Discard Timer Timedout flag */
edac_pci_read_dword(dev, REG_INT_CTLR, &val32);
if (val32 & INT_CTLR_DTS)
edac_pci_write_dword(dev, REG_INT_CTLR, val32);
/* Clear CRC Error flag on link side A */
edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32);
if (val32 & LNK_CTRL_CRCERR_A)
edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32);
/* Clear CRC Error flag on link side B */
edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32);
if (val32 & LNK_CTRL_CRCERR_B)
edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32);
/*
* Then enable all error detections.
*
* Setup Discard Timer Sync Flood Enable,
* System Error Enable and Parity Error Enable.
*/
edac_pci_read_dword(dev, REG_INT_CTLR, &val32);
val32 |= INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE;
edac_pci_write_dword(dev, REG_INT_CTLR, val32);
/* Enable overall SERR Error detection */
edac_pci_read_dword(dev, REG_STS_CMD, &val32);
val32 |= STS_CMD_SERREN;
edac_pci_write_dword(dev, REG_STS_CMD, val32);
/* Setup CRC Flood Enable for link side A */
edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32);
val32 |= LNK_CTRL_CRCFEN;
edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32);
/* Setup CRC Flood Enable for link side B */
edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32);
val32 |= LNK_CTRL_CRCFEN;
edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32);
}
static void amd8131_pcix_exit(struct amd8131_dev_info *dev_info)
{
u32 val32;
struct pci_dev *dev = dev_info->dev;
/* Disable SERR, PERR and DTSE Error detection */
edac_pci_read_dword(dev, REG_INT_CTLR, &val32);
val32 &= ~(INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE);
edac_pci_write_dword(dev, REG_INT_CTLR, val32);
/* Disable overall System Error detection */
edac_pci_read_dword(dev, REG_STS_CMD, &val32);
val32 &= ~STS_CMD_SERREN;
edac_pci_write_dword(dev, REG_STS_CMD, val32);
/* Disable CRC Sync Flood on link side A */
edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32);
val32 &= ~LNK_CTRL_CRCFEN;
edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32);
/* Disable CRC Sync Flood on link side B */
edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32);
val32 &= ~LNK_CTRL_CRCFEN;
edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32);
}
static void amd8131_pcix_check(struct edac_pci_ctl_info *edac_dev)
{
struct amd8131_dev_info *dev_info = edac_dev->pvt_info;
struct pci_dev *dev = dev_info->dev;
u32 val32;
/* Check PCI-X Bridge Memory Base-Limit Register for errors */
edac_pci_read_dword(dev, REG_MEM_LIM, &val32);
if (val32 & MEM_LIMIT_MASK) {
printk(KERN_INFO "Error(s) in mem limit register "
"on %s bridge\n", dev_info->ctl_name);
printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n"
"RTA: %d, STA: %d, MDPE: %d\n",
val32 & MEM_LIMIT_DPE,
val32 & MEM_LIMIT_RSE,
val32 & MEM_LIMIT_RMA,
val32 & MEM_LIMIT_RTA,
val32 & MEM_LIMIT_STA,
val32 & MEM_LIMIT_MDPE);
val32 |= MEM_LIMIT_MASK;
edac_pci_write_dword(dev, REG_MEM_LIM, val32);
edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
}
/* Check if Discard Timer timed out */
edac_pci_read_dword(dev, REG_INT_CTLR, &val32);
if (val32 & INT_CTLR_DTS) {
printk(KERN_INFO "Error(s) in interrupt and control register "
"on %s bridge\n", dev_info->ctl_name);
printk(KERN_INFO "DTS: %d\n", val32 & INT_CTLR_DTS);
val32 |= INT_CTLR_DTS;
edac_pci_write_dword(dev, REG_INT_CTLR, val32);
edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
}
/* Check if CRC error happens on link side A */
edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32);
if (val32 & LNK_CTRL_CRCERR_A) {
printk(KERN_INFO "Error(s) in link conf and control register "
"on %s bridge\n", dev_info->ctl_name);
printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_A);
val32 |= LNK_CTRL_CRCERR_A;
edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32);
edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
}
/* Check if CRC error happens on link side B */
edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32);
if (val32 & LNK_CTRL_CRCERR_B) {
printk(KERN_INFO "Error(s) in link conf and control register "
"on %s bridge\n", dev_info->ctl_name);
printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_B);
val32 |= LNK_CTRL_CRCERR_B;
edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32);
edac_pci_handle_npe(edac_dev, edac_dev->ctl_name);
}
}
static struct amd8131_info amd8131_chipset = {
.err_dev = PCI_DEVICE_ID_AMD_8131_APIC,
.devices = amd8131_devices,
.init = amd8131_pcix_init,
.exit = amd8131_pcix_exit,
.check = amd8131_pcix_check,
};
/*
* There are 4 PCIX Bridges on ATCA-6101 that share the same PCI Device ID,
* so amd8131_probe() would be called by kernel 4 times, with different
* address of pci_dev for each of them each time.
*/
static int amd8131_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
struct amd8131_dev_info *dev_info;
for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE;
dev_info++)
if (dev_info->devfn == dev->devfn)
break;
if (dev_info->inst == NO_BRIDGE) /* should never happen */
return -ENODEV;
/*
* We can't call pci_get_device() as we are used to do because
* there are 4 of them but pci_dev_get() instead.
*/
dev_info->dev = pci_dev_get(dev);
if (pci_enable_device(dev_info->dev)) {
pci_dev_put(dev_info->dev);
printk(KERN_ERR "failed to enable:"
"vendor %x, device %x, devfn %x, name %s\n",
PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev,
dev_info->devfn, dev_info->ctl_name);
return -ENODEV;
}
/*
* we do not allocate extra private structure for
* edac_pci_ctl_info, but make use of existing
* one instead.
*/
dev_info->edac_idx = edac_pci_alloc_index();
dev_info->edac_dev = edac_pci_alloc_ctl_info(0, dev_info->ctl_name);
if (!dev_info->edac_dev)
return -ENOMEM;
dev_info->edac_dev->pvt_info = dev_info;
dev_info->edac_dev->dev = &dev_info->dev->dev;
dev_info->edac_dev->mod_name = AMD8131_EDAC_MOD_STR;
dev_info->edac_dev->ctl_name = dev_info->ctl_name;
dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev);
if (edac_op_state == EDAC_OPSTATE_POLL)
dev_info->edac_dev->edac_check = amd8131_chipset.check;
if (amd8131_chipset.init)
amd8131_chipset.init(dev_info);
if (edac_pci_add_device(dev_info->edac_dev, dev_info->edac_idx) > 0) {
printk(KERN_ERR "failed edac_pci_add_device() for %s\n",
dev_info->ctl_name);
edac_pci_free_ctl_info(dev_info->edac_dev);
return -ENODEV;
}
printk(KERN_INFO "added one device on AMD8131 "
"vendor %x, device %x, devfn %x, name %s\n",
PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev,
dev_info->devfn, dev_info->ctl_name);
return 0;
}
static void amd8131_remove(struct pci_dev *dev)
{
struct amd8131_dev_info *dev_info;
for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE;
dev_info++)
if (dev_info->devfn == dev->devfn)
break;
if (dev_info->inst == NO_BRIDGE) /* should never happen */
return;
if (dev_info->edac_dev) {
edac_pci_del_device(dev_info->edac_dev->dev);
edac_pci_free_ctl_info(dev_info->edac_dev);
}
if (amd8131_chipset.exit)
amd8131_chipset.exit(dev_info);
pci_dev_put(dev_info->dev);
}
static const struct pci_device_id amd8131_edac_pci_tbl[] = {
{
PCI_VEND_DEV(AMD, 8131_BRIDGE),
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
.driver_data = 0,
},
{
0,
} /* table is NULL-terminated */
};
MODULE_DEVICE_TABLE(pci, amd8131_edac_pci_tbl);
static struct pci_driver amd8131_edac_driver = {
.name = AMD8131_EDAC_MOD_STR,
.probe = amd8131_probe,
.remove = amd8131_remove,
.id_table = amd8131_edac_pci_tbl,
};
static int __init amd8131_edac_init(void)
{
printk(KERN_INFO "AMD8131 EDAC driver " AMD8131_EDAC_REVISION "\n");
printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n");
/* Only POLL mode supported so far */
edac_op_state = EDAC_OPSTATE_POLL;
return pci_register_driver(&amd8131_edac_driver);
}
static void __exit amd8131_edac_exit(void)
{
pci_unregister_driver(&amd8131_edac_driver);
}
module_init(amd8131_edac_init);
module_exit(amd8131_edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>\n");
MODULE_DESCRIPTION("AMD8131 HyperTransport PCI-X Tunnel EDAC kernel module");

119
drivers/edac/amd8131_edac.h Normal file
View file

@ -0,0 +1,119 @@
/*
* amd8131_edac.h, EDAC defs for AMD8131 hypertransport chip
*
* Copyright (c) 2008 Wind River Systems, Inc.
*
* Authors: Cao Qingtao <qingtao.cao@windriver.com>
* Benjamin Walsh <benjamin.walsh@windriver.com>
* Hu Yongqi <yongqi.hu@windriver.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _AMD8131_EDAC_H_
#define _AMD8131_EDAC_H_
#define DEVFN_PCIX_BRIDGE_NORTH_A 8
#define DEVFN_PCIX_BRIDGE_NORTH_B 16
#define DEVFN_PCIX_BRIDGE_SOUTH_A 24
#define DEVFN_PCIX_BRIDGE_SOUTH_B 32
/************************************************************
* PCI-X Bridge Status and Command Register, DevA:0x04
************************************************************/
#define REG_STS_CMD 0x04
enum sts_cmd_bits {
STS_CMD_SSE = BIT(30),
STS_CMD_SERREN = BIT(8)
};
/************************************************************
* PCI-X Bridge Interrupt and Bridge Control Register,
************************************************************/
#define REG_INT_CTLR 0x3c
enum int_ctlr_bits {
INT_CTLR_DTSE = BIT(27),
INT_CTLR_DTS = BIT(26),
INT_CTLR_SERR = BIT(17),
INT_CTLR_PERR = BIT(16)
};
/************************************************************
* PCI-X Bridge Memory Base-Limit Register, DevA:0x1C
************************************************************/
#define REG_MEM_LIM 0x1c
enum mem_limit_bits {
MEM_LIMIT_DPE = BIT(31),
MEM_LIMIT_RSE = BIT(30),
MEM_LIMIT_RMA = BIT(29),
MEM_LIMIT_RTA = BIT(28),
MEM_LIMIT_STA = BIT(27),
MEM_LIMIT_MDPE = BIT(24),
MEM_LIMIT_MASK = MEM_LIMIT_DPE|MEM_LIMIT_RSE|MEM_LIMIT_RMA|
MEM_LIMIT_RTA|MEM_LIMIT_STA|MEM_LIMIT_MDPE
};
/************************************************************
* Link Configuration And Control Register, side A
************************************************************/
#define REG_LNK_CTRL_A 0xc4
/************************************************************
* Link Configuration And Control Register, side B
************************************************************/
#define REG_LNK_CTRL_B 0xc8
enum lnk_ctrl_bits {
LNK_CTRL_CRCERR_A = BIT(9),
LNK_CTRL_CRCERR_B = BIT(8),
LNK_CTRL_CRCFEN = BIT(1)
};
enum pcix_bridge_inst {
NORTH_A = 0,
NORTH_B = 1,
SOUTH_A = 2,
SOUTH_B = 3,
NO_BRIDGE = 4
};
struct amd8131_dev_info {
int devfn;
enum pcix_bridge_inst inst;
struct pci_dev *dev;
int edac_idx; /* pci device index */
char *ctl_name;
struct edac_pci_ctl_info *edac_dev;
};
/*
* AMD8131 chipset has two pairs of PCIX Bridge and related IOAPIC
* Controller, and ATCA-6101 has two AMD8131 chipsets, so there are
* four PCIX Bridges on ATCA-6101 altogether.
*
* These PCIX Bridges share the same PCI Device ID and are all of
* Function Zero, they could be discrimated by their pci_dev->devfn.
* They share the same set of init/check/exit methods, and their
* private structures are collected in the devices[] array.
*/
struct amd8131_info {
u16 err_dev; /* PCI Device ID for AMD8131 APIC*/
struct amd8131_dev_info *devices;
void (*init)(struct amd8131_dev_info *dev_info);
void (*exit)(struct amd8131_dev_info *dev_info);
void (*check)(struct edac_pci_ctl_info *edac_dev);
};
#endif /* _AMD8131_EDAC_H_ */

283
drivers/edac/cell_edac.c Normal file
View file

@ -0,0 +1,283 @@
/*
* Cell MIC driver for ECC counting
*
* Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
*
* This file may be distributed under the terms of the
* GNU General Public License.
*/
#undef DEBUG
#include <linux/edac.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/stop_machine.h>
#include <linux/io.h>
#include <linux/of_address.h>
#include <asm/machdep.h>
#include <asm/cell-regs.h>
#include "edac_core.h"
struct cell_edac_priv
{
struct cbe_mic_tm_regs __iomem *regs;
int node;
int chanmask;
#ifdef DEBUG
u64 prev_fir;
#endif
};
static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
{
struct cell_edac_priv *priv = mci->pvt_info;
struct csrow_info *csrow = mci->csrows[0];
unsigned long address, pfn, offset, syndrome;
dev_dbg(mci->pdev, "ECC CE err on node %d, channel %d, ar = 0x%016llx\n",
priv->node, chan, ar);
/* Address decoding is likely a bit bogus, to dbl check */
address = (ar & 0xffffffffe0000000ul) >> 29;
if (priv->chanmask == 0x3)
address = (address << 1) | chan;
pfn = address >> PAGE_SHIFT;
offset = address & ~PAGE_MASK;
syndrome = (ar & 0x000000001fe00000ul) >> 21;
/* TODO: Decoding of the error address */
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
csrow->first_page + pfn, offset, syndrome,
0, chan, -1, "", "");
}
static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
{
struct cell_edac_priv *priv = mci->pvt_info;
struct csrow_info *csrow = mci->csrows[0];
unsigned long address, pfn, offset;
dev_dbg(mci->pdev, "ECC UE err on node %d, channel %d, ar = 0x%016llx\n",
priv->node, chan, ar);
/* Address decoding is likely a bit bogus, to dbl check */
address = (ar & 0xffffffffe0000000ul) >> 29;
if (priv->chanmask == 0x3)
address = (address << 1) | chan;
pfn = address >> PAGE_SHIFT;
offset = address & ~PAGE_MASK;
/* TODO: Decoding of the error address */
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
csrow->first_page + pfn, offset, 0,
0, chan, -1, "", "");
}
static void cell_edac_check(struct mem_ctl_info *mci)
{
struct cell_edac_priv *priv = mci->pvt_info;
u64 fir, addreg, clear = 0;
fir = in_be64(&priv->regs->mic_fir);
#ifdef DEBUG
if (fir != priv->prev_fir) {
dev_dbg(mci->pdev, "fir change : 0x%016lx\n", fir);
priv->prev_fir = fir;
}
#endif
if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_SINGLE_0_ERR)) {
addreg = in_be64(&priv->regs->mic_df_ecc_address_0);
clear |= CBE_MIC_FIR_ECC_SINGLE_0_RESET;
cell_edac_count_ce(mci, 0, addreg);
}
if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_SINGLE_1_ERR)) {
addreg = in_be64(&priv->regs->mic_df_ecc_address_1);
clear |= CBE_MIC_FIR_ECC_SINGLE_1_RESET;
cell_edac_count_ce(mci, 1, addreg);
}
if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_MULTI_0_ERR)) {
addreg = in_be64(&priv->regs->mic_df_ecc_address_0);
clear |= CBE_MIC_FIR_ECC_MULTI_0_RESET;
cell_edac_count_ue(mci, 0, addreg);
}
if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_MULTI_1_ERR)) {
addreg = in_be64(&priv->regs->mic_df_ecc_address_1);
clear |= CBE_MIC_FIR_ECC_MULTI_1_RESET;
cell_edac_count_ue(mci, 1, addreg);
}
/* The procedure for clearing FIR bits is a bit ... weird */
if (clear) {
fir &= ~(CBE_MIC_FIR_ECC_ERR_MASK | CBE_MIC_FIR_ECC_SET_MASK);
fir |= CBE_MIC_FIR_ECC_RESET_MASK;
fir &= ~clear;
out_be64(&priv->regs->mic_fir, fir);
(void)in_be64(&priv->regs->mic_fir);
mb(); /* sync up */
#ifdef DEBUG
fir = in_be64(&priv->regs->mic_fir);
dev_dbg(mci->pdev, "fir clear : 0x%016lx\n", fir);
#endif
}
}
static void cell_edac_init_csrows(struct mem_ctl_info *mci)
{
struct csrow_info *csrow = mci->csrows[0];
struct dimm_info *dimm;
struct cell_edac_priv *priv = mci->pvt_info;
struct device_node *np;
int j;
u32 nr_pages;
for_each_node_by_name(np, "memory") {
struct resource r;
/* We "know" that the Cell firmware only creates one entry
* in the "memory" nodes. If that changes, this code will
* need to be adapted.
*/
if (of_address_to_resource(np, 0, &r))
continue;
if (of_node_to_nid(np) != priv->node)
continue;
csrow->first_page = r.start >> PAGE_SHIFT;
nr_pages = resource_size(&r) >> PAGE_SHIFT;
csrow->last_page = csrow->first_page + nr_pages - 1;
for (j = 0; j < csrow->nr_channels; j++) {
dimm = csrow->channels[j]->dimm;
dimm->mtype = MEM_XDR;
dimm->edac_mode = EDAC_SECDED;
dimm->nr_pages = nr_pages / csrow->nr_channels;
}
dev_dbg(mci->pdev,
"Initialized on node %d, chanmask=0x%x,"
" first_page=0x%lx, nr_pages=0x%x\n",
priv->node, priv->chanmask,
csrow->first_page, nr_pages);
break;
}
of_node_put(np);
}
static int cell_edac_probe(struct platform_device *pdev)
{
struct cbe_mic_tm_regs __iomem *regs;
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct cell_edac_priv *priv;
u64 reg;
int rc, chanmask, num_chans;
regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id));
if (regs == NULL)
return -ENODEV;
edac_op_state = EDAC_OPSTATE_POLL;
/* Get channel population */
reg = in_be64(&regs->mic_mnt_cfg);
dev_dbg(&pdev->dev, "MIC_MNT_CFG = 0x%016llx\n", reg);
chanmask = 0;
if (reg & CBE_MIC_MNT_CFG_CHAN_0_POP)
chanmask |= 0x1;
if (reg & CBE_MIC_MNT_CFG_CHAN_1_POP)
chanmask |= 0x2;
if (chanmask == 0) {
dev_warn(&pdev->dev,
"Yuck ! No channel populated ? Aborting !\n");
return -ENODEV;
}
dev_dbg(&pdev->dev, "Initial FIR = 0x%016llx\n",
in_be64(&regs->mic_fir));
/* Allocate & init EDAC MC data structure */
num_chans = chanmask == 3 ? 2 : 1;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = 1;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = num_chans;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers,
sizeof(struct cell_edac_priv));
if (mci == NULL)
return -ENOMEM;
priv = mci->pvt_info;
priv->regs = regs;
priv->node = pdev->id;
priv->chanmask = chanmask;
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_XDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED;
mci->mod_name = "cell_edac";
mci->ctl_name = "MIC";
mci->dev_name = dev_name(&pdev->dev);
mci->edac_check = cell_edac_check;
cell_edac_init_csrows(mci);
/* Register with EDAC core */
rc = edac_mc_add_mc(mci);
if (rc) {
dev_err(&pdev->dev, "failed to register with EDAC core\n");
edac_mc_free(mci);
return rc;
}
return 0;
}
static int cell_edac_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
if (mci)
edac_mc_free(mci);
return 0;
}
static struct platform_driver cell_edac_driver = {
.driver = {
.name = "cbe-mic",
.owner = THIS_MODULE,
},
.probe = cell_edac_probe,
.remove = cell_edac_remove,
};
static int __init cell_edac_init(void)
{
/* Sanity check registers data structure */
BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
mic_df_ecc_address_0) != 0xf8);
BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
mic_df_ecc_address_1) != 0x1b8);
BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
mic_df_config) != 0x218);
BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
mic_fir) != 0x230);
BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
mic_mnt_cfg) != 0x210);
BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
mic_exc) != 0x208);
return platform_driver_register(&cell_edac_driver);
}
static void __exit cell_edac_exit(void)
{
platform_driver_unregister(&cell_edac_driver);
}
module_init(cell_edac_init);
module_exit(cell_edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
MODULE_DESCRIPTION("ECC counting for Cell MIC");

1097
drivers/edac/cpc925_edac.c Normal file

File diff suppressed because it is too large Load diff

1484
drivers/edac/e752x_edac.c Normal file

File diff suppressed because it is too large Load diff

606
drivers/edac/e7xxx_edac.c Normal file
View file

@ -0,0 +1,606 @@
/*
* Intel e7xxx Memory Controller kernel module
* (C) 2003 Linux Networx (http://lnxi.com)
* This file may be distributed under the terms of the
* GNU General Public License.
*
* See "enum e7xxx_chips" below for supported chipsets
*
* Written by Thayne Harbaugh
* Based on work by Dan Hollis <goemon at anime dot net> and others.
* http://www.anime.net/~goemon/linux-ecc/
*
* Datasheet:
* http://www.intel.com/content/www/us/en/chipsets/e7501-chipset-memory-controller-hub-datasheet.html
*
* Contributors:
* Eric Biederman (Linux Networx)
* Tom Zimmerman (Linux Networx)
* Jim Garlick (Lawrence Livermore National Labs)
* Dave Peterson (Lawrence Livermore National Labs)
* That One Guy (Some other place)
* Wang Zhenyu (intel.com)
*
* $Id: edac_e7xxx.c,v 1.5.2.9 2005/10/05 00:43:44 dsp_llnl Exp $
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_core.h"
#define E7XXX_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "e7xxx_edac"
#define e7xxx_printk(level, fmt, arg...) \
edac_printk(level, "e7xxx", fmt, ##arg)
#define e7xxx_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "e7xxx", fmt, ##arg)
#ifndef PCI_DEVICE_ID_INTEL_7205_0
#define PCI_DEVICE_ID_INTEL_7205_0 0x255d
#endif /* PCI_DEVICE_ID_INTEL_7205_0 */
#ifndef PCI_DEVICE_ID_INTEL_7205_1_ERR
#define PCI_DEVICE_ID_INTEL_7205_1_ERR 0x2551
#endif /* PCI_DEVICE_ID_INTEL_7205_1_ERR */
#ifndef PCI_DEVICE_ID_INTEL_7500_0
#define PCI_DEVICE_ID_INTEL_7500_0 0x2540
#endif /* PCI_DEVICE_ID_INTEL_7500_0 */
#ifndef PCI_DEVICE_ID_INTEL_7500_1_ERR
#define PCI_DEVICE_ID_INTEL_7500_1_ERR 0x2541
#endif /* PCI_DEVICE_ID_INTEL_7500_1_ERR */
#ifndef PCI_DEVICE_ID_INTEL_7501_0
#define PCI_DEVICE_ID_INTEL_7501_0 0x254c
#endif /* PCI_DEVICE_ID_INTEL_7501_0 */
#ifndef PCI_DEVICE_ID_INTEL_7501_1_ERR
#define PCI_DEVICE_ID_INTEL_7501_1_ERR 0x2541
#endif /* PCI_DEVICE_ID_INTEL_7501_1_ERR */
#ifndef PCI_DEVICE_ID_INTEL_7505_0
#define PCI_DEVICE_ID_INTEL_7505_0 0x2550
#endif /* PCI_DEVICE_ID_INTEL_7505_0 */
#ifndef PCI_DEVICE_ID_INTEL_7505_1_ERR
#define PCI_DEVICE_ID_INTEL_7505_1_ERR 0x2551
#endif /* PCI_DEVICE_ID_INTEL_7505_1_ERR */
#define E7XXX_NR_CSROWS 8 /* number of csrows */
#define E7XXX_NR_DIMMS 8 /* 2 channels, 4 dimms/channel */
/* E7XXX register addresses - device 0 function 0 */
#define E7XXX_DRB 0x60 /* DRAM row boundary register (8b) */
#define E7XXX_DRA 0x70 /* DRAM row attribute register (8b) */
/*
* 31 Device width row 7 0=x8 1=x4
* 27 Device width row 6
* 23 Device width row 5
* 19 Device width row 4
* 15 Device width row 3
* 11 Device width row 2
* 7 Device width row 1
* 3 Device width row 0
*/
#define E7XXX_DRC 0x7C /* DRAM controller mode reg (32b) */
/*
* 22 Number channels 0=1,1=2
* 19:18 DRB Granularity 32/64MB
*/
#define E7XXX_TOLM 0xC4 /* DRAM top of low memory reg (16b) */
#define E7XXX_REMAPBASE 0xC6 /* DRAM remap base address reg (16b) */
#define E7XXX_REMAPLIMIT 0xC8 /* DRAM remap limit address reg (16b) */
/* E7XXX register addresses - device 0 function 1 */
#define E7XXX_DRAM_FERR 0x80 /* DRAM first error register (8b) */
#define E7XXX_DRAM_NERR 0x82 /* DRAM next error register (8b) */
#define E7XXX_DRAM_CELOG_ADD 0xA0 /* DRAM first correctable memory */
/* error address register (32b) */
/*
* 31:28 Reserved
* 27:6 CE address (4k block 33:12)
* 5:0 Reserved
*/
#define E7XXX_DRAM_UELOG_ADD 0xB0 /* DRAM first uncorrectable memory */
/* error address register (32b) */
/*
* 31:28 Reserved
* 27:6 CE address (4k block 33:12)
* 5:0 Reserved
*/
#define E7XXX_DRAM_CELOG_SYNDROME 0xD0 /* DRAM first correctable memory */
/* error syndrome register (16b) */
enum e7xxx_chips {
E7500 = 0,
E7501,
E7505,
E7205,
};
struct e7xxx_pvt {
struct pci_dev *bridge_ck;
u32 tolm;
u32 remapbase;
u32 remaplimit;
const struct e7xxx_dev_info *dev_info;
};
struct e7xxx_dev_info {
u16 err_dev;
const char *ctl_name;
};
struct e7xxx_error_info {
u8 dram_ferr;
u8 dram_nerr;
u32 dram_celog_add;
u16 dram_celog_syndrome;
u32 dram_uelog_add;
};
static struct edac_pci_ctl_info *e7xxx_pci;
static const struct e7xxx_dev_info e7xxx_devs[] = {
[E7500] = {
.err_dev = PCI_DEVICE_ID_INTEL_7500_1_ERR,
.ctl_name = "E7500"},
[E7501] = {
.err_dev = PCI_DEVICE_ID_INTEL_7501_1_ERR,
.ctl_name = "E7501"},
[E7505] = {
.err_dev = PCI_DEVICE_ID_INTEL_7505_1_ERR,
.ctl_name = "E7505"},
[E7205] = {
.err_dev = PCI_DEVICE_ID_INTEL_7205_1_ERR,
.ctl_name = "E7205"},
};
/* FIXME - is this valid for both SECDED and S4ECD4ED? */
static inline int e7xxx_find_channel(u16 syndrome)
{
edac_dbg(3, "\n");
if ((syndrome & 0xff00) == 0)
return 0;
if ((syndrome & 0x00ff) == 0)
return 1;
if ((syndrome & 0xf000) == 0 || (syndrome & 0x0f00) == 0)
return 0;
return 1;
}
static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci,
unsigned long page)
{
u32 remap;
struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info;
edac_dbg(3, "\n");
if ((page < pvt->tolm) ||
((page >= 0x100000) && (page < pvt->remapbase)))
return page;
remap = (page - pvt->tolm) + pvt->remapbase;
if (remap < pvt->remaplimit)
return remap;
e7xxx_printk(KERN_ERR, "Invalid page %lx - out of range\n", page);
return pvt->tolm - 1;
}
static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
{
u32 error_1b, page;
u16 syndrome;
int row;
int channel;
edac_dbg(3, "\n");
/* read the error address */
error_1b = info->dram_celog_add;
/* FIXME - should use PAGE_SHIFT */
page = error_1b >> 6; /* convert the address to 4k page */
/* read the syndrome */
syndrome = info->dram_celog_syndrome;
/* FIXME - check for -1 */
row = edac_mc_find_csrow_by_page(mci, page);
/* convert syndrome to channel */
channel = e7xxx_find_channel(syndrome);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, page, 0, syndrome,
row, channel, -1, "e7xxx CE", "");
}
static void process_ce_no_info(struct mem_ctl_info *mci)
{
edac_dbg(3, "\n");
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
"e7xxx CE log register overflow", "");
}
static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
{
u32 error_2b, block_page;
int row;
edac_dbg(3, "\n");
/* read the error address */
error_2b = info->dram_uelog_add;
/* FIXME - should use PAGE_SHIFT */
block_page = error_2b >> 6; /* convert to 4k address */
row = edac_mc_find_csrow_by_page(mci, block_page);
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, block_page, 0, 0,
row, -1, -1, "e7xxx UE", "");
}
static void process_ue_no_info(struct mem_ctl_info *mci)
{
edac_dbg(3, "\n");
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
"e7xxx UE log register overflow", "");
}
static void e7xxx_get_error_info(struct mem_ctl_info *mci,
struct e7xxx_error_info *info)
{
struct e7xxx_pvt *pvt;
pvt = (struct e7xxx_pvt *)mci->pvt_info;
pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_FERR, &info->dram_ferr);
pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_NERR, &info->dram_nerr);
if ((info->dram_ferr & 1) || (info->dram_nerr & 1)) {
pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_CELOG_ADD,
&info->dram_celog_add);
pci_read_config_word(pvt->bridge_ck,
E7XXX_DRAM_CELOG_SYNDROME,
&info->dram_celog_syndrome);
}
if ((info->dram_ferr & 2) || (info->dram_nerr & 2))
pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_UELOG_ADD,
&info->dram_uelog_add);
if (info->dram_ferr & 3)
pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_FERR, 0x03, 0x03);
if (info->dram_nerr & 3)
pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03);
}
static int e7xxx_process_error_info(struct mem_ctl_info *mci,
struct e7xxx_error_info *info,
int handle_errors)
{
int error_found;
error_found = 0;
/* decode and report errors */
if (info->dram_ferr & 1) { /* check first error correctable */
error_found = 1;
if (handle_errors)
process_ce(mci, info);
}
if (info->dram_ferr & 2) { /* check first error uncorrectable */
error_found = 1;
if (handle_errors)
process_ue(mci, info);
}
if (info->dram_nerr & 1) { /* check next error correctable */
error_found = 1;
if (handle_errors) {
if (info->dram_ferr & 1)
process_ce_no_info(mci);
else
process_ce(mci, info);
}
}
if (info->dram_nerr & 2) { /* check next error uncorrectable */
error_found = 1;
if (handle_errors) {
if (info->dram_ferr & 2)
process_ue_no_info(mci);
else
process_ue(mci, info);
}
}
return error_found;
}
static void e7xxx_check(struct mem_ctl_info *mci)
{
struct e7xxx_error_info info;
edac_dbg(3, "\n");
e7xxx_get_error_info(mci, &info);
e7xxx_process_error_info(mci, &info, 1);
}
/* Return 1 if dual channel mode is active. Else return 0. */
static inline int dual_channel_active(u32 drc, int dev_idx)
{
return (dev_idx == E7501) ? ((drc >> 22) & 0x1) : 1;
}
/* Return DRB granularity (0=32mb, 1=64mb). */
static inline int drb_granularity(u32 drc, int dev_idx)
{
/* only e7501 can be single channel */
return (dev_idx == E7501) ? ((drc >> 18) & 0x3) : 1;
}
static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
int dev_idx, u32 drc)
{
unsigned long last_cumul_size;
int index, j;
u8 value;
u32 dra, cumul_size, nr_pages;
int drc_chan, drc_drbg, drc_ddim, mem_dev;
struct csrow_info *csrow;
struct dimm_info *dimm;
enum edac_type edac_mode;
pci_read_config_dword(pdev, E7XXX_DRA, &dra);
drc_chan = dual_channel_active(drc, dev_idx);
drc_drbg = drb_granularity(drc, dev_idx);
drc_ddim = (drc >> 20) & 0x3;
last_cumul_size = 0;
/* The dram row boundary (DRB) reg values are boundary address
* for each DRAM row with a granularity of 32 or 64MB (single/dual
* channel operation). DRB regs are cumulative; therefore DRB7 will
* contain the total memory contained in all eight rows.
*/
for (index = 0; index < mci->nr_csrows; index++) {
/* mem_dev 0=x8, 1=x4 */
mem_dev = (dra >> (index * 4 + 3)) & 0x1;
csrow = mci->csrows[index];
pci_read_config_byte(pdev, E7XXX_DRB + index, &value);
/* convert a 64 or 32 MiB DRB to a page size. */
cumul_size = value << (25 + drc_drbg - PAGE_SHIFT);
edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
if (cumul_size == last_cumul_size)
continue; /* not populated */
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
/*
* if single channel or x8 devices then SECDED
* if dual channel and x4 then S4ECD4ED
*/
if (drc_ddim) {
if (drc_chan && mem_dev) {
edac_mode = EDAC_S4ECD4ED;
mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
} else {
edac_mode = EDAC_SECDED;
mci->edac_cap |= EDAC_FLAG_SECDED;
}
} else
edac_mode = EDAC_NONE;
for (j = 0; j < drc_chan + 1; j++) {
dimm = csrow->channels[j]->dimm;
dimm->nr_pages = nr_pages / (drc_chan + 1);
dimm->grain = 1 << 12; /* 4KiB - resolution of CELOG */
dimm->mtype = MEM_RDDR; /* only one type supported */
dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
dimm->edac_mode = edac_mode;
}
}
}
static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
{
u16 pci_data;
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
struct e7xxx_pvt *pvt = NULL;
u32 drc;
int drc_chan;
struct e7xxx_error_info discard;
edac_dbg(0, "mci\n");
pci_read_config_dword(pdev, E7XXX_DRC, &drc);
drc_chan = dual_channel_active(drc, dev_idx);
/*
* According with the datasheet, this device has a maximum of
* 4 DIMMS per channel, either single-rank or dual-rank. So, the
* total amount of dimms is 8 (E7XXX_NR_DIMMS).
* That means that the DIMM is mapped as CSROWs, and the channel
* will map the rank. So, an error to either channel should be
* attributed to the same dimm.
*/
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = E7XXX_NR_CSROWS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = drc_chan + 1;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (mci == NULL)
return -ENOMEM;
edac_dbg(3, "init mci\n");
mci->mtype_cap = MEM_FLAG_RDDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED |
EDAC_FLAG_S4ECD4ED;
/* FIXME - what if different memory types are in different csrows? */
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = E7XXX_REVISION;
mci->pdev = &pdev->dev;
edac_dbg(3, "init pvt\n");
pvt = (struct e7xxx_pvt *)mci->pvt_info;
pvt->dev_info = &e7xxx_devs[dev_idx];
pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
pvt->dev_info->err_dev, pvt->bridge_ck);
if (!pvt->bridge_ck) {
e7xxx_printk(KERN_ERR, "error reporting device not found:"
"vendor %x device 0x%x (broken BIOS?)\n",
PCI_VENDOR_ID_INTEL, e7xxx_devs[dev_idx].err_dev);
goto fail0;
}
edac_dbg(3, "more mci init\n");
mci->ctl_name = pvt->dev_info->ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = e7xxx_check;
mci->ctl_page_to_phys = ctl_page_to_phys;
e7xxx_init_csrows(mci, pdev, dev_idx, drc);
mci->edac_cap |= EDAC_FLAG_NONE;
edac_dbg(3, "tolm, remapbase, remaplimit\n");
/* load the top of low memory, remap base, and remap limit vars */
pci_read_config_word(pdev, E7XXX_TOLM, &pci_data);
pvt->tolm = ((u32) pci_data) << 4;
pci_read_config_word(pdev, E7XXX_REMAPBASE, &pci_data);
pvt->remapbase = ((u32) pci_data) << 14;
pci_read_config_word(pdev, E7XXX_REMAPLIMIT, &pci_data);
pvt->remaplimit = ((u32) pci_data) << 14;
e7xxx_printk(KERN_INFO,
"tolm = %x, remapbase = %x, remaplimit = %x\n", pvt->tolm,
pvt->remapbase, pvt->remaplimit);
/* clear any pending errors, or initial state bits */
e7xxx_get_error_info(mci, &discard);
/* Here we assume that we will never see multiple instances of this
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail1;
}
/* allocating generic PCI control info */
e7xxx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
if (!e7xxx_pci) {
printk(KERN_WARNING
"%s(): Unable to create PCI control\n",
__func__);
printk(KERN_WARNING
"%s(): PCI error report via EDAC not setup\n",
__func__);
}
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
fail1:
pci_dev_put(pvt->bridge_ck);
fail0:
edac_mc_free(mci);
return -ENODEV;
}
/* returns count (>= 0), or negative on error */
static int e7xxx_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
edac_dbg(0, "\n");
/* wake up and enable device */
return pci_enable_device(pdev) ?
-EIO : e7xxx_probe1(pdev, ent->driver_data);
}
static void e7xxx_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct e7xxx_pvt *pvt;
edac_dbg(0, "\n");
if (e7xxx_pci)
edac_pci_release_generic_ctl(e7xxx_pci);
if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
return;
pvt = (struct e7xxx_pvt *)mci->pvt_info;
pci_dev_put(pvt->bridge_ck);
edac_mc_free(mci);
}
static const struct pci_device_id e7xxx_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
E7205},
{
PCI_VEND_DEV(INTEL, 7500_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
E7500},
{
PCI_VEND_DEV(INTEL, 7501_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
E7501},
{
PCI_VEND_DEV(INTEL, 7505_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
E7505},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl);
static struct pci_driver e7xxx_driver = {
.name = EDAC_MOD_STR,
.probe = e7xxx_init_one,
.remove = e7xxx_remove_one,
.id_table = e7xxx_pci_tbl,
};
static int __init e7xxx_init(void)
{
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
return pci_register_driver(&e7xxx_driver);
}
static void __exit e7xxx_exit(void)
{
pci_unregister_driver(&e7xxx_driver);
}
module_init(e7xxx_init);
module_exit(e7xxx_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
"Based on.work by Dan Hollis et al");
MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

513
drivers/edac/edac_core.h Normal file
View file

@ -0,0 +1,513 @@
/*
* Defines, structures, APIs for edac_core module
*
* (C) 2007 Linux Networx (http://lnxi.com)
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written by Thayne Harbaugh
* Based on work by Dan Hollis <goemon at anime dot net> and others.
* http://www.anime.net/~goemon/linux-ecc/
*
* NMI handling support added by
* Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
*
* Refactored for multi-source files:
* Doug Thompson <norsk5@xmission.com>
*
*/
#ifndef _EDAC_CORE_H_
#define _EDAC_CORE_H_
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/pci.h>
#include <linux/time.h>
#include <linux/nmi.h>
#include <linux/rcupdate.h>
#include <linux/completion.h>
#include <linux/kobject.h>
#include <linux/platform_device.h>
#include <linux/workqueue.h>
#include <linux/edac.h>
#define EDAC_DEVICE_NAME_LEN 31
#define EDAC_ATTRIB_VALUE_LEN 15
#if PAGE_SHIFT < 20
#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
#define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
#else /* PAGE_SHIFT > 20 */
#define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
#define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
#endif
#define edac_printk(level, prefix, fmt, arg...) \
printk(level "EDAC " prefix ": " fmt, ##arg)
#define edac_mc_printk(mci, level, fmt, arg...) \
printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
#define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
#define edac_device_printk(ctl, level, fmt, arg...) \
printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
#define edac_pci_printk(ctl, level, fmt, arg...) \
printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
/* prefixes for edac_printk() and edac_mc_printk() */
#define EDAC_MC "MC"
#define EDAC_PCI "PCI"
#define EDAC_DEBUG "DEBUG"
extern const char * const edac_mem_types[];
#ifdef CONFIG_EDAC_DEBUG
extern int edac_debug_level;
#define edac_dbg(level, fmt, ...) \
do { \
if (level <= edac_debug_level) \
edac_printk(KERN_DEBUG, EDAC_DEBUG, \
"%s: " fmt, __func__, ##__VA_ARGS__); \
} while (0)
#else /* !CONFIG_EDAC_DEBUG */
#define edac_dbg(level, fmt, ...) \
do { \
if (0) \
edac_printk(KERN_DEBUG, EDAC_DEBUG, \
"%s: " fmt, __func__, ##__VA_ARGS__); \
} while (0)
#endif /* !CONFIG_EDAC_DEBUG */
#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
PCI_DEVICE_ID_ ## vend ## _ ## dev
#define edac_dev_name(dev) (dev)->dev_name
/*
* The following are the structures to provide for a generic
* or abstract 'edac_device'. This set of structures and the
* code that implements the APIs for the same, provide for
* registering EDAC type devices which are NOT standard memory.
*
* CPU caches (L1 and L2)
* DMA engines
* Core CPU switches
* Fabric switch units
* PCIe interface controllers
* other EDAC/ECC type devices that can be monitored for
* errors, etc.
*
* It allows for a 2 level set of hierarchy. For example:
*
* cache could be composed of L1, L2 and L3 levels of cache.
* Each CPU core would have its own L1 cache, while sharing
* L2 and maybe L3 caches.
*
* View them arranged, via the sysfs presentation:
* /sys/devices/system/edac/..
*
* mc/ <existing memory device directory>
* cpu/cpu0/.. <L1 and L2 block directory>
* /L1-cache/ce_count
* /ue_count
* /L2-cache/ce_count
* /ue_count
* cpu/cpu1/.. <L1 and L2 block directory>
* /L1-cache/ce_count
* /ue_count
* /L2-cache/ce_count
* /ue_count
* ...
*
* the L1 and L2 directories would be "edac_device_block's"
*/
struct edac_device_counter {
u32 ue_count;
u32 ce_count;
};
/* forward reference */
struct edac_device_ctl_info;
struct edac_device_block;
/* edac_dev_sysfs_attribute structure
* used for driver sysfs attributes in mem_ctl_info
* for extra controls and attributes:
* like high level error Injection controls
*/
struct edac_dev_sysfs_attribute {
struct attribute attr;
ssize_t (*show)(struct edac_device_ctl_info *, char *);
ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
};
/* edac_dev_sysfs_block_attribute structure
*
* used in leaf 'block' nodes for adding controls/attributes
*
* each block in each instance of the containing control structure
* can have an array of the following. The show and store functions
* will be filled in with the show/store function in the
* low level driver.
*
* The 'value' field will be the actual value field used for
* counting
*/
struct edac_dev_sysfs_block_attribute {
struct attribute attr;
ssize_t (*show)(struct kobject *, struct attribute *, char *);
ssize_t (*store)(struct kobject *, struct attribute *,
const char *, size_t);
struct edac_device_block *block;
unsigned int value;
};
/* device block control structure */
struct edac_device_block {
struct edac_device_instance *instance; /* Up Pointer */
char name[EDAC_DEVICE_NAME_LEN + 1];
struct edac_device_counter counters; /* basic UE and CE counters */
int nr_attribs; /* how many attributes */
/* this block's attributes, could be NULL */
struct edac_dev_sysfs_block_attribute *block_attributes;
/* edac sysfs device control */
struct kobject kobj;
};
/* device instance control structure */
struct edac_device_instance {
struct edac_device_ctl_info *ctl; /* Up pointer */
char name[EDAC_DEVICE_NAME_LEN + 4];
struct edac_device_counter counters; /* instance counters */
u32 nr_blocks; /* how many blocks */
struct edac_device_block *blocks; /* block array */
/* edac sysfs device control */
struct kobject kobj;
};
/*
* Abstract edac_device control info structure
*
*/
struct edac_device_ctl_info {
/* for global list of edac_device_ctl_info structs */
struct list_head link;
struct module *owner; /* Module owner of this control struct */
int dev_idx;
/* Per instance controls for this edac_device */
int log_ue; /* boolean for logging UEs */
int log_ce; /* boolean for logging CEs */
int panic_on_ue; /* boolean for panic'ing on an UE */
unsigned poll_msec; /* number of milliseconds to poll interval */
unsigned long delay; /* number of jiffies for poll_msec */
/* Additional top controller level attributes, but specified
* by the low level driver.
*
* Set by the low level driver to provide attributes at the
* controller level, same level as 'ue_count' and 'ce_count' above.
* An array of structures, NULL terminated
*
* If attributes are desired, then set to array of attributes
* If no attributes are desired, leave NULL
*/
struct edac_dev_sysfs_attribute *sysfs_attributes;
/* pointer to main 'edac' subsys in sysfs */
struct bus_type *edac_subsys;
/* the internal state of this controller instance */
int op_state;
/* work struct for this instance */
struct delayed_work work;
/* pointer to edac polling checking routine:
* If NOT NULL: points to polling check routine
* If NULL: Then assumes INTERRUPT operation, where
* MC driver will receive events
*/
void (*edac_check) (struct edac_device_ctl_info * edac_dev);
struct device *dev; /* pointer to device structure */
const char *mod_name; /* module name */
const char *ctl_name; /* edac controller name */
const char *dev_name; /* pci/platform/etc... name */
void *pvt_info; /* pointer to 'private driver' info */
unsigned long start_time; /* edac_device load start time (jiffies) */
struct completion removal_complete;
/* sysfs top name under 'edac' directory
* and instance name:
* cpu/cpu0/...
* cpu/cpu1/...
* cpu/cpu2/...
* ...
*/
char name[EDAC_DEVICE_NAME_LEN + 1];
/* Number of instances supported on this control structure
* and the array of those instances
*/
u32 nr_instances;
struct edac_device_instance *instances;
/* Event counters for the this whole EDAC Device */
struct edac_device_counter counters;
/* edac sysfs device control for the 'name'
* device this structure controls
*/
struct kobject kobj;
};
/* To get from the instance's wq to the beginning of the ctl structure */
#define to_edac_mem_ctl_work(w) \
container_of(w, struct mem_ctl_info, work)
#define to_edac_device_ctl_work(w) \
container_of(w,struct edac_device_ctl_info,work)
/*
* The alloc() and free() functions for the 'edac_device' control info
* structure. A MC driver will allocate one of these for each edac_device
* it is going to control/register with the EDAC CORE.
*/
extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
unsigned sizeof_private,
char *edac_device_name, unsigned nr_instances,
char *edac_block_name, unsigned nr_blocks,
unsigned offset_value,
struct edac_dev_sysfs_block_attribute *block_attributes,
unsigned nr_attribs,
int device_index);
/* The offset value can be:
* -1 indicating no offset value
* 0 for zero-based block numbers
* 1 for 1-based block number
* other for other-based block number
*/
#define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
#ifdef CONFIG_PCI
struct edac_pci_counter {
atomic_t pe_count;
atomic_t npe_count;
};
/*
* Abstract edac_pci control info structure
*
*/
struct edac_pci_ctl_info {
/* for global list of edac_pci_ctl_info structs */
struct list_head link;
int pci_idx;
struct bus_type *edac_subsys; /* pointer to subsystem */
/* the internal state of this controller instance */
int op_state;
/* work struct for this instance */
struct delayed_work work;
/* pointer to edac polling checking routine:
* If NOT NULL: points to polling check routine
* If NULL: Then assumes INTERRUPT operation, where
* MC driver will receive events
*/
void (*edac_check) (struct edac_pci_ctl_info * edac_dev);
struct device *dev; /* pointer to device structure */
const char *mod_name; /* module name */
const char *ctl_name; /* edac controller name */
const char *dev_name; /* pci/platform/etc... name */
void *pvt_info; /* pointer to 'private driver' info */
unsigned long start_time; /* edac_pci load start time (jiffies) */
struct completion complete;
/* sysfs top name under 'edac' directory
* and instance name:
* cpu/cpu0/...
* cpu/cpu1/...
* cpu/cpu2/...
* ...
*/
char name[EDAC_DEVICE_NAME_LEN + 1];
/* Event counters for the this whole EDAC Device */
struct edac_pci_counter counters;
/* edac sysfs device control for the 'name'
* device this structure controls
*/
struct kobject kobj;
struct completion kobj_complete;
};
#define to_edac_pci_ctl_work(w) \
container_of(w, struct edac_pci_ctl_info,work)
/* write all or some bits in a byte-register*/
static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
u8 mask)
{
if (mask != 0xff) {
u8 buf;
pci_read_config_byte(pdev, offset, &buf);
value &= mask;
buf &= ~mask;
value |= buf;
}
pci_write_config_byte(pdev, offset, value);
}
/* write all or some bits in a word-register*/
static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
u16 value, u16 mask)
{
if (mask != 0xffff) {
u16 buf;
pci_read_config_word(pdev, offset, &buf);
value &= mask;
buf &= ~mask;
value |= buf;
}
pci_write_config_word(pdev, offset, value);
}
/*
* pci_write_bits32
*
* edac local routine to do pci_write_config_dword, but adds
* a mask parameter. If mask is all ones, ignore the mask.
* Otherwise utilize the mask to isolate specified bits
*
* write all or some bits in a dword-register
*/
static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
u32 value, u32 mask)
{
if (mask != 0xffffffff) {
u32 buf;
pci_read_config_dword(pdev, offset, &buf);
value &= mask;
buf &= ~mask;
value |= buf;
}
pci_write_config_dword(pdev, offset, value);
}
#endif /* CONFIG_PCI */
struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
unsigned n_layers,
struct edac_mc_layer *layers,
unsigned sz_pvt);
extern int edac_mc_add_mc(struct mem_ctl_info *mci);
extern void edac_mc_free(struct mem_ctl_info *mci);
extern struct mem_ctl_info *edac_mc_find(int idx);
extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
unsigned long page);
void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
struct mem_ctl_info *mci,
struct edac_raw_error_desc *e);
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
struct mem_ctl_info *mci,
const u16 error_count,
const unsigned long page_frame_number,
const unsigned long offset_in_page,
const unsigned long syndrome,
const int top_layer,
const int mid_layer,
const int low_layer,
const char *msg,
const char *other_detail);
/*
* edac_device APIs
*/
extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg);
extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg);
extern int edac_device_alloc_index(void);
extern const char *edac_layer_name[];
/*
* edac_pci APIs
*/
extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
const char *edac_pci_name);
extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);
extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
unsigned long value);
extern int edac_pci_alloc_index(void);
extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
struct device *dev,
const char *mod_name);
extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
/*
* edac misc APIs
*/
extern char *edac_op_state_to_string(int op_state);
#endif /* _EDAC_CORE_H_ */

715
drivers/edac/edac_device.c Normal file
View file

@ -0,0 +1,715 @@
/*
* edac_device.c
* (C) 2007 www.douglaskthompson.com
*
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written by Doug Thompson <norsk5@xmission.com>
*
* edac_device API implementation
* 19 Jan 2007
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/ctype.h>
#include <linux/workqueue.h>
#include <asm/uaccess.h>
#include <asm/page.h>
#include "edac_core.h"
#include "edac_module.h"
/* lock for the list: 'edac_device_list', manipulation of this list
* is protected by the 'device_ctls_mutex' lock
*/
static DEFINE_MUTEX(device_ctls_mutex);
static LIST_HEAD(edac_device_list);
#ifdef CONFIG_EDAC_DEBUG
static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
{
edac_dbg(3, "\tedac_dev = %p dev_idx=%d\n",
edac_dev, edac_dev->dev_idx);
edac_dbg(4, "\tedac_dev->edac_check = %p\n", edac_dev->edac_check);
edac_dbg(3, "\tdev = %p\n", edac_dev->dev);
edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
edac_dev->mod_name, edac_dev->ctl_name);
edac_dbg(3, "\tpvt_info = %p\n\n", edac_dev->pvt_info);
}
#endif /* CONFIG_EDAC_DEBUG */
/*
* edac_device_alloc_ctl_info()
* Allocate a new edac device control info structure
*
* The control structure is allocated in complete chunk
* from the OS. It is in turn sub allocated to the
* various objects that compose the structure
*
* The structure has a 'nr_instance' array within itself.
* Each instance represents a major component
* Example: L1 cache and L2 cache are 2 instance components
*
* Within each instance is an array of 'nr_blocks' blockoffsets
*/
struct edac_device_ctl_info *edac_device_alloc_ctl_info(
unsigned sz_private,
char *edac_device_name, unsigned nr_instances,
char *edac_block_name, unsigned nr_blocks,
unsigned offset_value, /* zero, 1, or other based offset */
struct edac_dev_sysfs_block_attribute *attrib_spec, unsigned nr_attrib,
int device_index)
{
struct edac_device_ctl_info *dev_ctl;
struct edac_device_instance *dev_inst, *inst;
struct edac_device_block *dev_blk, *blk_p, *blk;
struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib;
unsigned total_size;
unsigned count;
unsigned instance, block, attr;
void *pvt, *p;
int err;
edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks);
/* Calculate the size of memory we need to allocate AND
* determine the offsets of the various item arrays
* (instance,block,attrib) from the start of an allocated structure.
* We want the alignment of each item (instance,block,attrib)
* to be at least as stringent as what the compiler would
* provide if we could simply hardcode everything into a single struct.
*/
p = NULL;
dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1);
/* Calc the 'end' offset past end of ONE ctl_info structure
* which will become the start of the 'instance' array
*/
dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances);
/* Calc the 'end' offset past the instance array within the ctl_info
* which will become the start of the block array
*/
count = nr_instances * nr_blocks;
dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count);
/* Calc the 'end' offset past the dev_blk array
* which will become the start of the attrib array, if any.
*/
/* calc how many nr_attrib we need */
if (nr_attrib > 0)
count *= nr_attrib;
dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count);
/* Calc the 'end' offset past the attributes array */
pvt = edac_align_ptr(&p, sz_private, 1);
/* 'pvt' now points to where the private data area is.
* At this point 'pvt' (like dev_inst,dev_blk and dev_attrib)
* is baselined at ZERO
*/
total_size = ((unsigned long)pvt) + sz_private;
/* Allocate the amount of memory for the set of control structures */
dev_ctl = kzalloc(total_size, GFP_KERNEL);
if (dev_ctl == NULL)
return NULL;
/* Adjust pointers so they point within the actual memory we
* just allocated rather than an imaginary chunk of memory
* located at address 0.
* 'dev_ctl' points to REAL memory, while the others are
* ZERO based and thus need to be adjusted to point within
* the allocated memory.
*/
dev_inst = (struct edac_device_instance *)
(((char *)dev_ctl) + ((unsigned long)dev_inst));
dev_blk = (struct edac_device_block *)
(((char *)dev_ctl) + ((unsigned long)dev_blk));
dev_attrib = (struct edac_dev_sysfs_block_attribute *)
(((char *)dev_ctl) + ((unsigned long)dev_attrib));
pvt = sz_private ? (((char *)dev_ctl) + ((unsigned long)pvt)) : NULL;
/* Begin storing the information into the control info structure */
dev_ctl->dev_idx = device_index;
dev_ctl->nr_instances = nr_instances;
dev_ctl->instances = dev_inst;
dev_ctl->pvt_info = pvt;
/* Default logging of CEs and UEs */
dev_ctl->log_ce = 1;
dev_ctl->log_ue = 1;
/* Name of this edac device */
snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name);
edac_dbg(4, "edac_dev=%p next after end=%p\n",
dev_ctl, pvt + sz_private);
/* Initialize every Instance */
for (instance = 0; instance < nr_instances; instance++) {
inst = &dev_inst[instance];
inst->ctl = dev_ctl;
inst->nr_blocks = nr_blocks;
blk_p = &dev_blk[instance * nr_blocks];
inst->blocks = blk_p;
/* name of this instance */
snprintf(inst->name, sizeof(inst->name),
"%s%u", edac_device_name, instance);
/* Initialize every block in each instance */
for (block = 0; block < nr_blocks; block++) {
blk = &blk_p[block];
blk->instance = inst;
snprintf(blk->name, sizeof(blk->name),
"%s%d", edac_block_name, block+offset_value);
edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n",
instance, inst, block, blk, blk->name);
/* if there are NO attributes OR no attribute pointer
* then continue on to next block iteration
*/
if ((nr_attrib == 0) || (attrib_spec == NULL))
continue;
/* setup the attribute array for this block */
blk->nr_attribs = nr_attrib;
attrib_p = &dev_attrib[block*nr_instances*nr_attrib];
blk->block_attributes = attrib_p;
edac_dbg(4, "THIS BLOCK_ATTRIB=%p\n",
blk->block_attributes);
/* Initialize every user specified attribute in this
* block with the data the caller passed in
* Each block gets its own copy of pointers,
* and its unique 'value'
*/
for (attr = 0; attr < nr_attrib; attr++) {
attrib = &attrib_p[attr];
/* populate the unique per attrib
* with the code pointers and info
*/
attrib->attr = attrib_spec[attr].attr;
attrib->show = attrib_spec[attr].show;
attrib->store = attrib_spec[attr].store;
attrib->block = blk; /* up link */
edac_dbg(4, "alloc-attrib=%p attrib_name='%s' attrib-spec=%p spec-name=%s\n",
attrib, attrib->attr.name,
&attrib_spec[attr],
attrib_spec[attr].attr.name
);
}
}
}
/* Mark this instance as merely ALLOCATED */
dev_ctl->op_state = OP_ALLOC;
/*
* Initialize the 'root' kobj for the edac_device controller
*/
err = edac_device_register_sysfs_main_kobj(dev_ctl);
if (err) {
kfree(dev_ctl);
return NULL;
}
/* at this point, the root kobj is valid, and in order to
* 'free' the object, then the function:
* edac_device_unregister_sysfs_main_kobj() must be called
* which will perform kobj unregistration and the actual free
* will occur during the kobject callback operation
*/
return dev_ctl;
}
EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info);
/*
* edac_device_free_ctl_info()
* frees the memory allocated by the edac_device_alloc_ctl_info()
* function
*/
void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info)
{
edac_device_unregister_sysfs_main_kobj(ctl_info);
}
EXPORT_SYMBOL_GPL(edac_device_free_ctl_info);
/*
* find_edac_device_by_dev
* scans the edac_device list for a specific 'struct device *'
*
* lock to be held prior to call: device_ctls_mutex
*
* Return:
* pointer to control structure managing 'dev'
* NULL if not found on list
*/
static struct edac_device_ctl_info *find_edac_device_by_dev(struct device *dev)
{
struct edac_device_ctl_info *edac_dev;
struct list_head *item;
edac_dbg(0, "\n");
list_for_each(item, &edac_device_list) {
edac_dev = list_entry(item, struct edac_device_ctl_info, link);
if (edac_dev->dev == dev)
return edac_dev;
}
return NULL;
}
/*
* add_edac_dev_to_global_list
* Before calling this function, caller must
* assign a unique value to edac_dev->dev_idx.
*
* lock to be held prior to call: device_ctls_mutex
*
* Return:
* 0 on success
* 1 on failure.
*/
static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev)
{
struct list_head *item, *insert_before;
struct edac_device_ctl_info *rover;
insert_before = &edac_device_list;
/* Determine if already on the list */
rover = find_edac_device_by_dev(edac_dev->dev);
if (unlikely(rover != NULL))
goto fail0;
/* Insert in ascending order by 'dev_idx', so find position */
list_for_each(item, &edac_device_list) {
rover = list_entry(item, struct edac_device_ctl_info, link);
if (rover->dev_idx >= edac_dev->dev_idx) {
if (unlikely(rover->dev_idx == edac_dev->dev_idx))
goto fail1;
insert_before = item;
break;
}
}
list_add_tail_rcu(&edac_dev->link, insert_before);
return 0;
fail0:
edac_printk(KERN_WARNING, EDAC_MC,
"%s (%s) %s %s already assigned %d\n",
dev_name(rover->dev), edac_dev_name(rover),
rover->mod_name, rover->ctl_name, rover->dev_idx);
return 1;
fail1:
edac_printk(KERN_WARNING, EDAC_MC,
"bug in low-level driver: attempt to assign\n"
" duplicate dev_idx %d in %s()\n", rover->dev_idx,
__func__);
return 1;
}
/*
* del_edac_device_from_global_list
*/
static void del_edac_device_from_global_list(struct edac_device_ctl_info
*edac_device)
{
list_del_rcu(&edac_device->link);
/* these are for safe removal of devices from global list while
* NMI handlers may be traversing list
*/
synchronize_rcu();
INIT_LIST_HEAD(&edac_device->link);
}
/*
* edac_device_workq_function
* performs the operation scheduled by a workq request
*
* this workq is embedded within an edac_device_ctl_info
* structure, that needs to be polled for possible error events.
*
* This operation is to acquire the list mutex lock
* (thus preventing insertation or deletion)
* and then call the device's poll function IFF this device is
* running polled and there is a poll function defined.
*/
static void edac_device_workq_function(struct work_struct *work_req)
{
struct delayed_work *d_work = to_delayed_work(work_req);
struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work);
mutex_lock(&device_ctls_mutex);
/* If we are being removed, bail out immediately */
if (edac_dev->op_state == OP_OFFLINE) {
mutex_unlock(&device_ctls_mutex);
return;
}
/* Only poll controllers that are running polled and have a check */
if ((edac_dev->op_state == OP_RUNNING_POLL) &&
(edac_dev->edac_check != NULL)) {
edac_dev->edac_check(edac_dev);
}
mutex_unlock(&device_ctls_mutex);
/* Reschedule the workq for the next time period to start again
* if the number of msec is for 1 sec, then adjust to the next
* whole one second to save timers firing all over the period
* between integral seconds
*/
if (edac_dev->poll_msec == 1000)
queue_delayed_work(edac_workqueue, &edac_dev->work,
round_jiffies_relative(edac_dev->delay));
else
queue_delayed_work(edac_workqueue, &edac_dev->work,
edac_dev->delay);
}
/*
* edac_device_workq_setup
* initialize a workq item for this edac_device instance
* passing in the new delay period in msec
*/
void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
unsigned msec)
{
edac_dbg(0, "\n");
/* take the arg 'msec' and set it into the control structure
* to used in the time period calculation
* then calc the number of jiffies that represents
*/
edac_dev->poll_msec = msec;
edac_dev->delay = msecs_to_jiffies(msec);
INIT_DELAYED_WORK(&edac_dev->work, edac_device_workq_function);
/* optimize here for the 1 second case, which will be normal value, to
* fire ON the 1 second time event. This helps reduce all sorts of
* timers firing on sub-second basis, while they are happy
* to fire together on the 1 second exactly
*/
if (edac_dev->poll_msec == 1000)
queue_delayed_work(edac_workqueue, &edac_dev->work,
round_jiffies_relative(edac_dev->delay));
else
queue_delayed_work(edac_workqueue, &edac_dev->work,
edac_dev->delay);
}
/*
* edac_device_workq_teardown
* stop the workq processing on this edac_dev
*/
void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev)
{
int status;
if (!edac_dev->edac_check)
return;
status = cancel_delayed_work(&edac_dev->work);
if (status == 0) {
/* workq instance might be running, wait for it */
flush_workqueue(edac_workqueue);
}
}
/*
* edac_device_reset_delay_period
*
* need to stop any outstanding workq queued up at this time
* because we will be resetting the sleep time.
* Then restart the workq on the new delay
*/
void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev,
unsigned long value)
{
/* cancel the current workq request, without the mutex lock */
edac_device_workq_teardown(edac_dev);
/* acquire the mutex before doing the workq setup */
mutex_lock(&device_ctls_mutex);
/* restart the workq request, with new delay value */
edac_device_workq_setup(edac_dev, value);
mutex_unlock(&device_ctls_mutex);
}
/*
* edac_device_alloc_index: Allocate a unique device index number
*
* Return:
* allocated index number
*/
int edac_device_alloc_index(void)
{
static atomic_t device_indexes = ATOMIC_INIT(0);
return atomic_inc_return(&device_indexes) - 1;
}
EXPORT_SYMBOL_GPL(edac_device_alloc_index);
/**
* edac_device_add_device: Insert the 'edac_dev' structure into the
* edac_device global list and create sysfs entries associated with
* edac_device structure.
* @edac_device: pointer to the edac_device structure to be added to the list
* 'edac_device' structure.
*
* Return:
* 0 Success
* !0 Failure
*/
int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
{
edac_dbg(0, "\n");
#ifdef CONFIG_EDAC_DEBUG
if (edac_debug_level >= 3)
edac_device_dump_device(edac_dev);
#endif
mutex_lock(&device_ctls_mutex);
if (add_edac_dev_to_global_list(edac_dev))
goto fail0;
/* set load time so that error rate can be tracked */
edac_dev->start_time = jiffies;
/* create this instance's sysfs entries */
if (edac_device_create_sysfs(edac_dev)) {
edac_device_printk(edac_dev, KERN_WARNING,
"failed to create sysfs device\n");
goto fail1;
}
/* If there IS a check routine, then we are running POLLED */
if (edac_dev->edac_check != NULL) {
/* This instance is NOW RUNNING */
edac_dev->op_state = OP_RUNNING_POLL;
/*
* enable workq processing on this instance,
* default = 1000 msec
*/
edac_device_workq_setup(edac_dev, 1000);
} else {
edac_dev->op_state = OP_RUNNING_INTERRUPT;
}
/* Report action taken */
edac_device_printk(edac_dev, KERN_INFO,
"Giving out device to module %s controller %s: DEV %s (%s)\n",
edac_dev->mod_name, edac_dev->ctl_name, edac_dev->dev_name,
edac_op_state_to_string(edac_dev->op_state));
mutex_unlock(&device_ctls_mutex);
return 0;
fail1:
/* Some error, so remove the entry from the lsit */
del_edac_device_from_global_list(edac_dev);
fail0:
mutex_unlock(&device_ctls_mutex);
return 1;
}
EXPORT_SYMBOL_GPL(edac_device_add_device);
/**
* edac_device_del_device:
* Remove sysfs entries for specified edac_device structure and
* then remove edac_device structure from global list
*
* @dev:
* Pointer to 'struct device' representing edac_device
* structure to remove.
*
* Return:
* Pointer to removed edac_device structure,
* OR NULL if device not found.
*/
struct edac_device_ctl_info *edac_device_del_device(struct device *dev)
{
struct edac_device_ctl_info *edac_dev;
edac_dbg(0, "\n");
mutex_lock(&device_ctls_mutex);
/* Find the structure on the list, if not there, then leave */
edac_dev = find_edac_device_by_dev(dev);
if (edac_dev == NULL) {
mutex_unlock(&device_ctls_mutex);
return NULL;
}
/* mark this instance as OFFLINE */
edac_dev->op_state = OP_OFFLINE;
/* deregister from global list */
del_edac_device_from_global_list(edac_dev);
mutex_unlock(&device_ctls_mutex);
/* clear workq processing on this instance */
edac_device_workq_teardown(edac_dev);
/* Tear down the sysfs entries for this instance */
edac_device_remove_sysfs(edac_dev);
edac_printk(KERN_INFO, EDAC_MC,
"Removed device %d for %s %s: DEV %s\n",
edac_dev->dev_idx,
edac_dev->mod_name, edac_dev->ctl_name, edac_dev_name(edac_dev));
return edac_dev;
}
EXPORT_SYMBOL_GPL(edac_device_del_device);
static inline int edac_device_get_log_ce(struct edac_device_ctl_info *edac_dev)
{
return edac_dev->log_ce;
}
static inline int edac_device_get_log_ue(struct edac_device_ctl_info *edac_dev)
{
return edac_dev->log_ue;
}
static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
*edac_dev)
{
return edac_dev->panic_on_ue;
}
/*
* edac_device_handle_ce
* perform a common output and handling of an 'edac_dev' CE event
*/
void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;
if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
edac_device_printk(edac_dev, KERN_ERR,
"INTERNAL ERROR: 'instance' out of range "
"(%d >= %d)\n", inst_nr,
edac_dev->nr_instances);
return;
}
instance = edac_dev->instances + inst_nr;
if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
edac_device_printk(edac_dev, KERN_ERR,
"INTERNAL ERROR: instance %d 'block' "
"out of range (%d >= %d)\n",
inst_nr, block_nr,
instance->nr_blocks);
return;
}
if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
block->counters.ce_count++;
}
/* Propagate the count up the 'totals' tree */
instance->counters.ce_count++;
edac_dev->counters.ce_count++;
if (edac_device_get_log_ce(edac_dev))
edac_device_printk(edac_dev, KERN_WARNING,
"CE: %s instance: %s block: %s '%s'\n",
edac_dev->ctl_name, instance->name,
block ? block->name : "N/A", msg);
}
EXPORT_SYMBOL_GPL(edac_device_handle_ce);
/*
* edac_device_handle_ue
* perform a common output and handling of an 'edac_dev' UE event
*/
void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg)
{
struct edac_device_instance *instance;
struct edac_device_block *block = NULL;
if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
edac_device_printk(edac_dev, KERN_ERR,
"INTERNAL ERROR: 'instance' out of range "
"(%d >= %d)\n", inst_nr,
edac_dev->nr_instances);
return;
}
instance = edac_dev->instances + inst_nr;
if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
edac_device_printk(edac_dev, KERN_ERR,
"INTERNAL ERROR: instance %d 'block' "
"out of range (%d >= %d)\n",
inst_nr, block_nr,
instance->nr_blocks);
return;
}
if (instance->nr_blocks > 0) {
block = instance->blocks + block_nr;
block->counters.ue_count++;
}
/* Propagate the count up the 'totals' tree */
instance->counters.ue_count++;
edac_dev->counters.ue_count++;
if (edac_device_get_log_ue(edac_dev))
edac_device_printk(edac_dev, KERN_EMERG,
"UE: %s instance: %s block: %s '%s'\n",
edac_dev->ctl_name, instance->name,
block ? block->name : "N/A", msg);
if (edac_device_get_panic_on_ue(edac_dev))
panic("EDAC %s: UE instance: %s block %s '%s'\n",
edac_dev->ctl_name, instance->name,
block ? block->name : "N/A", msg);
}
EXPORT_SYMBOL_GPL(edac_device_handle_ue);

View file

@ -0,0 +1,879 @@
/*
* file for managing the edac_device subsystem of devices for EDAC
*
* (C) 2007 SoftwareBitMaker
*
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written Doug Thompson <norsk5@xmission.com>
*
*/
#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/edac.h>
#include "edac_core.h"
#include "edac_module.h"
#define EDAC_DEVICE_SYMLINK "device"
#define to_edacdev(k) container_of(k, struct edac_device_ctl_info, kobj)
#define to_edacdev_attr(a) container_of(a, struct edacdev_attribute, attr)
/*
* Set of edac_device_ctl_info attribute store/show functions
*/
/* 'log_ue' */
static ssize_t edac_device_ctl_log_ue_show(struct edac_device_ctl_info
*ctl_info, char *data)
{
return sprintf(data, "%u\n", ctl_info->log_ue);
}
static ssize_t edac_device_ctl_log_ue_store(struct edac_device_ctl_info
*ctl_info, const char *data,
size_t count)
{
/* if parameter is zero, turn off flag, if non-zero turn on flag */
ctl_info->log_ue = (simple_strtoul(data, NULL, 0) != 0);
return count;
}
/* 'log_ce' */
static ssize_t edac_device_ctl_log_ce_show(struct edac_device_ctl_info
*ctl_info, char *data)
{
return sprintf(data, "%u\n", ctl_info->log_ce);
}
static ssize_t edac_device_ctl_log_ce_store(struct edac_device_ctl_info
*ctl_info, const char *data,
size_t count)
{
/* if parameter is zero, turn off flag, if non-zero turn on flag */
ctl_info->log_ce = (simple_strtoul(data, NULL, 0) != 0);
return count;
}
/* 'panic_on_ue' */
static ssize_t edac_device_ctl_panic_on_ue_show(struct edac_device_ctl_info
*ctl_info, char *data)
{
return sprintf(data, "%u\n", ctl_info->panic_on_ue);
}
static ssize_t edac_device_ctl_panic_on_ue_store(struct edac_device_ctl_info
*ctl_info, const char *data,
size_t count)
{
/* if parameter is zero, turn off flag, if non-zero turn on flag */
ctl_info->panic_on_ue = (simple_strtoul(data, NULL, 0) != 0);
return count;
}
/* 'poll_msec' show and store functions*/
static ssize_t edac_device_ctl_poll_msec_show(struct edac_device_ctl_info
*ctl_info, char *data)
{
return sprintf(data, "%u\n", ctl_info->poll_msec);
}
static ssize_t edac_device_ctl_poll_msec_store(struct edac_device_ctl_info
*ctl_info, const char *data,
size_t count)
{
unsigned long value;
/* get the value and enforce that it is non-zero, must be at least
* one millisecond for the delay period, between scans
* Then cancel last outstanding delay for the work request
* and set a new one.
*/
value = simple_strtoul(data, NULL, 0);
edac_device_reset_delay_period(ctl_info, value);
return count;
}
/* edac_device_ctl_info specific attribute structure */
struct ctl_info_attribute {
struct attribute attr;
ssize_t(*show) (struct edac_device_ctl_info *, char *);
ssize_t(*store) (struct edac_device_ctl_info *, const char *, size_t);
};
#define to_ctl_info(k) container_of(k, struct edac_device_ctl_info, kobj)
#define to_ctl_info_attr(a) container_of(a,struct ctl_info_attribute,attr)
/* Function to 'show' fields from the edac_dev 'ctl_info' structure */
static ssize_t edac_dev_ctl_info_show(struct kobject *kobj,
struct attribute *attr, char *buffer)
{
struct edac_device_ctl_info *edac_dev = to_ctl_info(kobj);
struct ctl_info_attribute *ctl_info_attr = to_ctl_info_attr(attr);
if (ctl_info_attr->show)
return ctl_info_attr->show(edac_dev, buffer);
return -EIO;
}
/* Function to 'store' fields into the edac_dev 'ctl_info' structure */
static ssize_t edac_dev_ctl_info_store(struct kobject *kobj,
struct attribute *attr,
const char *buffer, size_t count)
{
struct edac_device_ctl_info *edac_dev = to_ctl_info(kobj);
struct ctl_info_attribute *ctl_info_attr = to_ctl_info_attr(attr);
if (ctl_info_attr->store)
return ctl_info_attr->store(edac_dev, buffer, count);
return -EIO;
}
/* edac_dev file operations for an 'ctl_info' */
static const struct sysfs_ops device_ctl_info_ops = {
.show = edac_dev_ctl_info_show,
.store = edac_dev_ctl_info_store
};
#define CTL_INFO_ATTR(_name,_mode,_show,_store) \
static struct ctl_info_attribute attr_ctl_info_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
};
/* Declare the various ctl_info attributes here and their respective ops */
CTL_INFO_ATTR(log_ue, S_IRUGO | S_IWUSR,
edac_device_ctl_log_ue_show, edac_device_ctl_log_ue_store);
CTL_INFO_ATTR(log_ce, S_IRUGO | S_IWUSR,
edac_device_ctl_log_ce_show, edac_device_ctl_log_ce_store);
CTL_INFO_ATTR(panic_on_ue, S_IRUGO | S_IWUSR,
edac_device_ctl_panic_on_ue_show,
edac_device_ctl_panic_on_ue_store);
CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR,
edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store);
/* Base Attributes of the EDAC_DEVICE ECC object */
static struct ctl_info_attribute *device_ctrl_attr[] = {
&attr_ctl_info_panic_on_ue,
&attr_ctl_info_log_ue,
&attr_ctl_info_log_ce,
&attr_ctl_info_poll_msec,
NULL,
};
/*
* edac_device_ctrl_master_release
*
* called when the reference count for the 'main' kobj
* for a edac_device control struct reaches zero
*
* Reference count model:
* One 'main' kobject for each control structure allocated.
* That main kobj is initially set to one AND
* the reference count for the EDAC 'core' module is
* bumped by one, thus added 'keep in memory' dependency.
*
* Each new internal kobj (in instances and blocks) then
* bumps the 'main' kobject.
*
* When they are released their release functions decrement
* the 'main' kobj.
*
* When the main kobj reaches zero (0) then THIS function
* is called which then decrements the EDAC 'core' module.
* When the module reference count reaches zero then the
* module no longer has dependency on keeping the release
* function code in memory and module can be unloaded.
*
* This will support several control objects as well, each
* with its own 'main' kobj.
*/
static void edac_device_ctrl_master_release(struct kobject *kobj)
{
struct edac_device_ctl_info *edac_dev = to_edacdev(kobj);
edac_dbg(4, "control index=%d\n", edac_dev->dev_idx);
/* decrement the EDAC CORE module ref count */
module_put(edac_dev->owner);
/* free the control struct containing the 'main' kobj
* passed in to this routine
*/
kfree(edac_dev);
}
/* ktype for the main (master) kobject */
static struct kobj_type ktype_device_ctrl = {
.release = edac_device_ctrl_master_release,
.sysfs_ops = &device_ctl_info_ops,
.default_attrs = (struct attribute **)device_ctrl_attr,
};
/*
* edac_device_register_sysfs_main_kobj
*
* perform the high level setup for the new edac_device instance
*
* Return: 0 SUCCESS
* !0 FAILURE
*/
int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
{
struct bus_type *edac_subsys;
int err;
edac_dbg(1, "\n");
/* get the /sys/devices/system/edac reference */
edac_subsys = edac_get_sysfs_subsys();
if (edac_subsys == NULL) {
edac_dbg(1, "no edac_subsys error\n");
err = -ENODEV;
goto err_out;
}
/* Point to the 'edac_subsys' this instance 'reports' to */
edac_dev->edac_subsys = edac_subsys;
/* Init the devices's kobject */
memset(&edac_dev->kobj, 0, sizeof(struct kobject));
/* Record which module 'owns' this control structure
* and bump the ref count of the module
*/
edac_dev->owner = THIS_MODULE;
if (!try_module_get(edac_dev->owner)) {
err = -ENODEV;
goto err_mod_get;
}
/* register */
err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl,
&edac_subsys->dev_root->kobj,
"%s", edac_dev->name);
if (err) {
edac_dbg(1, "Failed to register '.../edac/%s'\n",
edac_dev->name);
goto err_kobj_reg;
}
kobject_uevent(&edac_dev->kobj, KOBJ_ADD);
/* At this point, to 'free' the control struct,
* edac_device_unregister_sysfs_main_kobj() must be used
*/
edac_dbg(4, "Registered '.../edac/%s' kobject\n", edac_dev->name);
return 0;
/* Error exit stack */
err_kobj_reg:
module_put(edac_dev->owner);
err_mod_get:
edac_put_sysfs_subsys();
err_out:
return err;
}
/*
* edac_device_unregister_sysfs_main_kobj:
* the '..../edac/<name>' kobject
*/
void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev)
{
edac_dbg(0, "\n");
edac_dbg(4, "name of kobject is: %s\n", kobject_name(&dev->kobj));
/*
* Unregister the edac device's kobject and
* allow for reference count to reach 0 at which point
* the callback will be called to:
* a) module_put() this module
* b) 'kfree' the memory
*/
kobject_put(&dev->kobj);
edac_put_sysfs_subsys();
}
/* edac_dev -> instance information */
/*
* Set of low-level instance attribute show functions
*/
static ssize_t instance_ue_count_show(struct edac_device_instance *instance,
char *data)
{
return sprintf(data, "%u\n", instance->counters.ue_count);
}
static ssize_t instance_ce_count_show(struct edac_device_instance *instance,
char *data)
{
return sprintf(data, "%u\n", instance->counters.ce_count);
}
#define to_instance(k) container_of(k, struct edac_device_instance, kobj)
#define to_instance_attr(a) container_of(a,struct instance_attribute,attr)
/* DEVICE instance kobject release() function */
static void edac_device_ctrl_instance_release(struct kobject *kobj)
{
struct edac_device_instance *instance;
edac_dbg(1, "\n");
/* map from this kobj to the main control struct
* and then dec the main kobj count
*/
instance = to_instance(kobj);
kobject_put(&instance->ctl->kobj);
}
/* instance specific attribute structure */
struct instance_attribute {
struct attribute attr;
ssize_t(*show) (struct edac_device_instance *, char *);
ssize_t(*store) (struct edac_device_instance *, const char *, size_t);
};
/* Function to 'show' fields from the edac_dev 'instance' structure */
static ssize_t edac_dev_instance_show(struct kobject *kobj,
struct attribute *attr, char *buffer)
{
struct edac_device_instance *instance = to_instance(kobj);
struct instance_attribute *instance_attr = to_instance_attr(attr);
if (instance_attr->show)
return instance_attr->show(instance, buffer);
return -EIO;
}
/* Function to 'store' fields into the edac_dev 'instance' structure */
static ssize_t edac_dev_instance_store(struct kobject *kobj,
struct attribute *attr,
const char *buffer, size_t count)
{
struct edac_device_instance *instance = to_instance(kobj);
struct instance_attribute *instance_attr = to_instance_attr(attr);
if (instance_attr->store)
return instance_attr->store(instance, buffer, count);
return -EIO;
}
/* edac_dev file operations for an 'instance' */
static const struct sysfs_ops device_instance_ops = {
.show = edac_dev_instance_show,
.store = edac_dev_instance_store
};
#define INSTANCE_ATTR(_name,_mode,_show,_store) \
static struct instance_attribute attr_instance_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
};
/*
* Define attributes visible for the edac_device instance object
* Each contains a pointer to a show and an optional set
* function pointer that does the low level output/input
*/
INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL);
INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL);
/* list of edac_dev 'instance' attributes */
static struct instance_attribute *device_instance_attr[] = {
&attr_instance_ce_count,
&attr_instance_ue_count,
NULL,
};
/* The 'ktype' for each edac_dev 'instance' */
static struct kobj_type ktype_instance_ctrl = {
.release = edac_device_ctrl_instance_release,
.sysfs_ops = &device_instance_ops,
.default_attrs = (struct attribute **)device_instance_attr,
};
/* edac_dev -> instance -> block information */
#define to_block(k) container_of(k, struct edac_device_block, kobj)
#define to_block_attr(a) \
container_of(a, struct edac_dev_sysfs_block_attribute, attr)
/*
* Set of low-level block attribute show functions
*/
static ssize_t block_ue_count_show(struct kobject *kobj,
struct attribute *attr, char *data)
{
struct edac_device_block *block = to_block(kobj);
return sprintf(data, "%u\n", block->counters.ue_count);
}
static ssize_t block_ce_count_show(struct kobject *kobj,
struct attribute *attr, char *data)
{
struct edac_device_block *block = to_block(kobj);
return sprintf(data, "%u\n", block->counters.ce_count);
}
/* DEVICE block kobject release() function */
static void edac_device_ctrl_block_release(struct kobject *kobj)
{
struct edac_device_block *block;
edac_dbg(1, "\n");
/* get the container of the kobj */
block = to_block(kobj);
/* map from 'block kobj' to 'block->instance->controller->main_kobj'
* now 'release' the block kobject
*/
kobject_put(&block->instance->ctl->kobj);
}
/* Function to 'show' fields from the edac_dev 'block' structure */
static ssize_t edac_dev_block_show(struct kobject *kobj,
struct attribute *attr, char *buffer)
{
struct edac_dev_sysfs_block_attribute *block_attr =
to_block_attr(attr);
if (block_attr->show)
return block_attr->show(kobj, attr, buffer);
return -EIO;
}
/* Function to 'store' fields into the edac_dev 'block' structure */
static ssize_t edac_dev_block_store(struct kobject *kobj,
struct attribute *attr,
const char *buffer, size_t count)
{
struct edac_dev_sysfs_block_attribute *block_attr;
block_attr = to_block_attr(attr);
if (block_attr->store)
return block_attr->store(kobj, attr, buffer, count);
return -EIO;
}
/* edac_dev file operations for a 'block' */
static const struct sysfs_ops device_block_ops = {
.show = edac_dev_block_show,
.store = edac_dev_block_store
};
#define BLOCK_ATTR(_name,_mode,_show,_store) \
static struct edac_dev_sysfs_block_attribute attr_block_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
};
BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL);
BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL);
/* list of edac_dev 'block' attributes */
static struct edac_dev_sysfs_block_attribute *device_block_attr[] = {
&attr_block_ce_count,
&attr_block_ue_count,
NULL,
};
/* The 'ktype' for each edac_dev 'block' */
static struct kobj_type ktype_block_ctrl = {
.release = edac_device_ctrl_block_release,
.sysfs_ops = &device_block_ops,
.default_attrs = (struct attribute **)device_block_attr,
};
/* block ctor/dtor code */
/*
* edac_device_create_block
*/
static int edac_device_create_block(struct edac_device_ctl_info *edac_dev,
struct edac_device_instance *instance,
struct edac_device_block *block)
{
int i;
int err;
struct edac_dev_sysfs_block_attribute *sysfs_attrib;
struct kobject *main_kobj;
edac_dbg(4, "Instance '%s' inst_p=%p block '%s' block_p=%p\n",
instance->name, instance, block->name, block);
edac_dbg(4, "block kobj=%p block kobj->parent=%p\n",
&block->kobj, &block->kobj.parent);
/* init this block's kobject */
memset(&block->kobj, 0, sizeof(struct kobject));
/* bump the main kobject's reference count for this controller
* and this instance is dependent on the main
*/
main_kobj = kobject_get(&edac_dev->kobj);
if (!main_kobj) {
err = -ENODEV;
goto err_out;
}
/* Add this block's kobject */
err = kobject_init_and_add(&block->kobj, &ktype_block_ctrl,
&instance->kobj,
"%s", block->name);
if (err) {
edac_dbg(1, "Failed to register instance '%s'\n", block->name);
kobject_put(main_kobj);
err = -ENODEV;
goto err_out;
}
/* If there are driver level block attributes, then added them
* to the block kobject
*/
sysfs_attrib = block->block_attributes;
if (sysfs_attrib && block->nr_attribs) {
for (i = 0; i < block->nr_attribs; i++, sysfs_attrib++) {
edac_dbg(4, "creating block attrib='%s' attrib->%p to kobj=%p\n",
sysfs_attrib->attr.name,
sysfs_attrib, &block->kobj);
/* Create each block_attribute file */
err = sysfs_create_file(&block->kobj,
&sysfs_attrib->attr);
if (err)
goto err_on_attrib;
}
}
kobject_uevent(&block->kobj, KOBJ_ADD);
return 0;
/* Error unwind stack */
err_on_attrib:
kobject_put(&block->kobj);
err_out:
return err;
}
/*
* edac_device_delete_block(edac_dev,block);
*/
static void edac_device_delete_block(struct edac_device_ctl_info *edac_dev,
struct edac_device_block *block)
{
struct edac_dev_sysfs_block_attribute *sysfs_attrib;
int i;
/* if this block has 'attributes' then we need to iterate over the list
* and 'remove' the attributes on this block
*/
sysfs_attrib = block->block_attributes;
if (sysfs_attrib && block->nr_attribs) {
for (i = 0; i < block->nr_attribs; i++, sysfs_attrib++) {
/* remove each block_attrib file */
sysfs_remove_file(&block->kobj,
(struct attribute *) sysfs_attrib);
}
}
/* unregister this block's kobject, SEE:
* edac_device_ctrl_block_release() callback operation
*/
kobject_put(&block->kobj);
}
/* instance ctor/dtor code */
/*
* edac_device_create_instance
* create just one instance of an edac_device 'instance'
*/
static int edac_device_create_instance(struct edac_device_ctl_info *edac_dev,
int idx)
{
int i, j;
int err;
struct edac_device_instance *instance;
struct kobject *main_kobj;
instance = &edac_dev->instances[idx];
/* Init the instance's kobject */
memset(&instance->kobj, 0, sizeof(struct kobject));
instance->ctl = edac_dev;
/* bump the main kobject's reference count for this controller
* and this instance is dependent on the main
*/
main_kobj = kobject_get(&edac_dev->kobj);
if (!main_kobj) {
err = -ENODEV;
goto err_out;
}
/* Formally register this instance's kobject under the edac_device */
err = kobject_init_and_add(&instance->kobj, &ktype_instance_ctrl,
&edac_dev->kobj, "%s", instance->name);
if (err != 0) {
edac_dbg(2, "Failed to register instance '%s'\n",
instance->name);
kobject_put(main_kobj);
goto err_out;
}
edac_dbg(4, "now register '%d' blocks for instance %d\n",
instance->nr_blocks, idx);
/* register all blocks of this instance */
for (i = 0; i < instance->nr_blocks; i++) {
err = edac_device_create_block(edac_dev, instance,
&instance->blocks[i]);
if (err) {
/* If any fail, remove all previous ones */
for (j = 0; j < i; j++)
edac_device_delete_block(edac_dev,
&instance->blocks[j]);
goto err_release_instance_kobj;
}
}
kobject_uevent(&instance->kobj, KOBJ_ADD);
edac_dbg(4, "Registered instance %d '%s' kobject\n",
idx, instance->name);
return 0;
/* error unwind stack */
err_release_instance_kobj:
kobject_put(&instance->kobj);
err_out:
return err;
}
/*
* edac_device_remove_instance
* remove an edac_device instance
*/
static void edac_device_delete_instance(struct edac_device_ctl_info *edac_dev,
int idx)
{
struct edac_device_instance *instance;
int i;
instance = &edac_dev->instances[idx];
/* unregister all blocks in this instance */
for (i = 0; i < instance->nr_blocks; i++)
edac_device_delete_block(edac_dev, &instance->blocks[i]);
/* unregister this instance's kobject, SEE:
* edac_device_ctrl_instance_release() for callback operation
*/
kobject_put(&instance->kobj);
}
/*
* edac_device_create_instances
* create the first level of 'instances' for this device
* (ie 'cache' might have 'cache0', 'cache1', 'cache2', etc
*/
static int edac_device_create_instances(struct edac_device_ctl_info *edac_dev)
{
int i, j;
int err;
edac_dbg(0, "\n");
/* iterate over creation of the instances */
for (i = 0; i < edac_dev->nr_instances; i++) {
err = edac_device_create_instance(edac_dev, i);
if (err) {
/* unwind previous instances on error */
for (j = 0; j < i; j++)
edac_device_delete_instance(edac_dev, j);
return err;
}
}
return 0;
}
/*
* edac_device_delete_instances(edac_dev);
* unregister all the kobjects of the instances
*/
static void edac_device_delete_instances(struct edac_device_ctl_info *edac_dev)
{
int i;
/* iterate over creation of the instances */
for (i = 0; i < edac_dev->nr_instances; i++)
edac_device_delete_instance(edac_dev, i);
}
/* edac_dev sysfs ctor/dtor code */
/*
* edac_device_add_main_sysfs_attributes
* add some attributes to this instance's main kobject
*/
static int edac_device_add_main_sysfs_attributes(
struct edac_device_ctl_info *edac_dev)
{
struct edac_dev_sysfs_attribute *sysfs_attrib;
int err = 0;
sysfs_attrib = edac_dev->sysfs_attributes;
if (sysfs_attrib) {
/* iterate over the array and create an attribute for each
* entry in the list
*/
while (sysfs_attrib->attr.name != NULL) {
err = sysfs_create_file(&edac_dev->kobj,
(struct attribute*) sysfs_attrib);
if (err)
goto err_out;
sysfs_attrib++;
}
}
err_out:
return err;
}
/*
* edac_device_remove_main_sysfs_attributes
* remove any attributes to this instance's main kobject
*/
static void edac_device_remove_main_sysfs_attributes(
struct edac_device_ctl_info *edac_dev)
{
struct edac_dev_sysfs_attribute *sysfs_attrib;
/* if there are main attributes, defined, remove them. First,
* point to the start of the array and iterate over it
* removing each attribute listed from this device's instance's kobject
*/
sysfs_attrib = edac_dev->sysfs_attributes;
if (sysfs_attrib) {
while (sysfs_attrib->attr.name != NULL) {
sysfs_remove_file(&edac_dev->kobj,
(struct attribute *) sysfs_attrib);
sysfs_attrib++;
}
}
}
/*
* edac_device_create_sysfs() Constructor
*
* accept a created edac_device control structure
* and 'export' it to sysfs. The 'main' kobj should already have been
* created. 'instance' and 'block' kobjects should be registered
* along with any 'block' attributes from the low driver. In addition,
* the main attributes (if any) are connected to the main kobject of
* the control structure.
*
* Return:
* 0 Success
* !0 Failure
*/
int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev)
{
int err;
struct kobject *edac_kobj = &edac_dev->kobj;
edac_dbg(0, "idx=%d\n", edac_dev->dev_idx);
/* go create any main attributes callers wants */
err = edac_device_add_main_sysfs_attributes(edac_dev);
if (err) {
edac_dbg(0, "failed to add sysfs attribs\n");
goto err_out;
}
/* create a symlink from the edac device
* to the platform 'device' being used for this
*/
err = sysfs_create_link(edac_kobj,
&edac_dev->dev->kobj, EDAC_DEVICE_SYMLINK);
if (err) {
edac_dbg(0, "sysfs_create_link() returned err= %d\n", err);
goto err_remove_main_attribs;
}
/* Create the first level instance directories
* In turn, the nested blocks beneath the instances will
* be registered as well
*/
err = edac_device_create_instances(edac_dev);
if (err) {
edac_dbg(0, "edac_device_create_instances() returned err= %d\n",
err);
goto err_remove_link;
}
edac_dbg(4, "create-instances done, idx=%d\n", edac_dev->dev_idx);
return 0;
/* Error unwind stack */
err_remove_link:
/* remove the sym link */
sysfs_remove_link(&edac_dev->kobj, EDAC_DEVICE_SYMLINK);
err_remove_main_attribs:
edac_device_remove_main_sysfs_attributes(edac_dev);
err_out:
return err;
}
/*
* edac_device_remove_sysfs() destructor
*
* given an edac_device struct, tear down the kobject resources
*/
void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev)
{
edac_dbg(0, "\n");
/* remove any main attributes for this device */
edac_device_remove_main_sysfs_attributes(edac_dev);
/* remove the device sym link */
sysfs_remove_link(&edac_dev->kobj, EDAC_DEVICE_SYMLINK);
/* walk the instance/block kobject tree, deconstructing it */
edac_device_delete_instances(edac_dev);
}

1303
drivers/edac/edac_mc.c Normal file

File diff suppressed because it is too large Load diff

1174
drivers/edac/edac_mc_sysfs.c Normal file

File diff suppressed because it is too large Load diff

154
drivers/edac/edac_module.c Normal file
View file

@ -0,0 +1,154 @@
/*
* edac_module.c
*
* (C) 2007 www.softwarebitmaker.com
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
* warranty of any kind, whether express or implied.
*
* Author: Doug Thompson <dougthompson@xmission.com>
*
*/
#include <linux/edac.h>
#include "edac_core.h"
#include "edac_module.h"
#define EDAC_VERSION "Ver: 3.0.0"
#ifdef CONFIG_EDAC_DEBUG
static int edac_set_debug_level(const char *buf, struct kernel_param *kp)
{
unsigned long val;
int ret;
ret = kstrtoul(buf, 0, &val);
if (ret)
return ret;
if (val > 4)
return -EINVAL;
return param_set_int(buf, kp);
}
/* Values of 0 to 4 will generate output */
int edac_debug_level = 2;
EXPORT_SYMBOL_GPL(edac_debug_level);
module_param_call(edac_debug_level, edac_set_debug_level, param_get_int,
&edac_debug_level, 0644);
MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2");
#endif
/* scope is to module level only */
struct workqueue_struct *edac_workqueue;
/*
* edac_op_state_to_string()
*/
char *edac_op_state_to_string(int opstate)
{
if (opstate == OP_RUNNING_POLL)
return "POLLED";
else if (opstate == OP_RUNNING_INTERRUPT)
return "INTERRUPT";
else if (opstate == OP_RUNNING_POLL_INTR)
return "POLL-INTR";
else if (opstate == OP_ALLOC)
return "ALLOC";
else if (opstate == OP_OFFLINE)
return "OFFLINE";
return "UNKNOWN";
}
/*
* edac_workqueue_setup
* initialize the edac work queue for polling operations
*/
static int edac_workqueue_setup(void)
{
edac_workqueue = create_singlethread_workqueue("edac-poller");
if (edac_workqueue == NULL)
return -ENODEV;
else
return 0;
}
/*
* edac_workqueue_teardown
* teardown the edac workqueue
*/
static void edac_workqueue_teardown(void)
{
if (edac_workqueue) {
flush_workqueue(edac_workqueue);
destroy_workqueue(edac_workqueue);
edac_workqueue = NULL;
}
}
/*
* edac_init
* module initialization entry point
*/
static int __init edac_init(void)
{
int err = 0;
edac_printk(KERN_INFO, EDAC_MC, EDAC_VERSION "\n");
/*
* Harvest and clear any boot/initialization PCI parity errors
*
* FIXME: This only clears errors logged by devices present at time of
* module initialization. We should also do an initial clear
* of each newly hotplugged device.
*/
edac_pci_clear_parity_errors();
err = edac_mc_sysfs_init();
if (err)
goto error;
edac_debugfs_init();
/* Setup/Initialize the workq for this core */
err = edac_workqueue_setup();
if (err) {
edac_printk(KERN_ERR, EDAC_MC, "init WorkQueue failure\n");
goto error;
}
return 0;
error:
return err;
}
/*
* edac_exit()
* module exit/termination function
*/
static void __exit edac_exit(void)
{
edac_dbg(0, "\n");
/* tear down the various subsystems */
edac_workqueue_teardown();
edac_mc_sysfs_exit();
edac_debugfs_exit();
}
/*
* Inform the kernel of our entry and exit points
*/
subsys_initcall(edac_init);
module_exit(edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al");
MODULE_DESCRIPTION("Core library routines for EDAC reporting");

View file

@ -0,0 +1,99 @@
/*
* edac_module.h
*
* For defining functions/data for within the EDAC_CORE module only
*
* written by doug thompson <norsk5@xmission.h>
*/
#ifndef __EDAC_MODULE_H__
#define __EDAC_MODULE_H__
#include "edac_core.h"
/*
* INTERNAL EDAC MODULE:
* EDAC memory controller sysfs create/remove functions
* and setup/teardown functions
*
* edac_mc objects
*/
/* on edac_mc_sysfs.c */
int edac_mc_sysfs_init(void);
void edac_mc_sysfs_exit(void);
extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci);
extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
void edac_unregister_sysfs(struct mem_ctl_info *mci);
extern int edac_get_log_ue(void);
extern int edac_get_log_ce(void);
extern int edac_get_panic_on_ue(void);
extern int edac_mc_get_log_ue(void);
extern int edac_mc_get_log_ce(void);
extern int edac_mc_get_panic_on_ue(void);
extern int edac_get_poll_msec(void);
extern int edac_mc_get_poll_msec(void);
unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
unsigned len);
/* on edac_device.c */
extern int edac_device_register_sysfs_main_kobj(
struct edac_device_ctl_info *edac_dev);
extern void edac_device_unregister_sysfs_main_kobj(
struct edac_device_ctl_info *edac_dev);
extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev);
extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev);
/* edac core workqueue: single CPU mode */
extern struct workqueue_struct *edac_workqueue;
extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
unsigned msec);
extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev);
extern void edac_device_reset_delay_period(struct edac_device_ctl_info
*edac_dev, unsigned long value);
extern void edac_mc_reset_delay_period(unsigned long value);
extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
/*
* EDAC debugfs functions
*/
#ifdef CONFIG_EDAC_DEBUG
int edac_debugfs_init(void);
void edac_debugfs_exit(void);
#else
static inline int edac_debugfs_init(void)
{
return -ENODEV;
}
static inline void edac_debugfs_exit(void) {}
#endif
/*
* EDAC PCI functions
*/
#ifdef CONFIG_PCI
extern void edac_pci_do_parity_check(void);
extern void edac_pci_clear_parity_errors(void);
extern int edac_sysfs_pci_setup(void);
extern void edac_sysfs_pci_teardown(void);
extern int edac_pci_get_check_errors(void);
extern int edac_pci_get_poll_msec(void);
extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
extern void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg);
extern void edac_pci_handle_npe(struct edac_pci_ctl_info *pci,
const char *msg);
#else /* CONFIG_PCI */
/* pre-process these away */
#define edac_pci_do_parity_check()
#define edac_pci_clear_parity_errors()
#define edac_sysfs_pci_setup() (0)
#define edac_sysfs_pci_teardown()
#define edac_pci_get_check_errors()
#define edac_pci_get_poll_msec()
#define edac_pci_handle_pe()
#define edac_pci_handle_npe()
#endif /* CONFIG_PCI */
#endif /* __EDAC_MODULE_H__ */

498
drivers/edac/edac_pci.c Normal file
View file

@ -0,0 +1,498 @@
/*
* EDAC PCI component
*
* Author: Dave Jiang <djiang@mvista.com>
*
* 2007 (c) MontaVista Software, Inc. This file is licensed under
* the terms of the GNU General Public License version 2. This program
* is licensed "as is" without any warranty of any kind, whether express
* or implied.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/ctype.h>
#include <linux/workqueue.h>
#include <asm/uaccess.h>
#include <asm/page.h>
#include "edac_core.h"
#include "edac_module.h"
static DEFINE_MUTEX(edac_pci_ctls_mutex);
static LIST_HEAD(edac_pci_list);
static atomic_t pci_indexes = ATOMIC_INIT(0);
/*
* edac_pci_alloc_ctl_info
*
* The alloc() function for the 'edac_pci' control info
* structure. The chip driver will allocate one of these for each
* edac_pci it is going to control/register with the EDAC CORE.
*/
struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
const char *edac_pci_name)
{
struct edac_pci_ctl_info *pci;
void *p = NULL, *pvt;
unsigned int size;
edac_dbg(1, "\n");
pci = edac_align_ptr(&p, sizeof(*pci), 1);
pvt = edac_align_ptr(&p, 1, sz_pvt);
size = ((unsigned long)pvt) + sz_pvt;
/* Alloc the needed control struct memory */
pci = kzalloc(size, GFP_KERNEL);
if (pci == NULL)
return NULL;
/* Now much private space */
pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL;
pci->pvt_info = pvt;
pci->op_state = OP_ALLOC;
snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name);
return pci;
}
EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info);
/*
* edac_pci_free_ctl_info()
*
* Last action on the pci control structure.
*
* call the remove sysfs information, which will unregister
* this control struct's kobj. When that kobj's ref count
* goes to zero, its release function will be call and then
* kfree() the memory.
*/
void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci)
{
edac_dbg(1, "\n");
edac_pci_remove_sysfs(pci);
}
EXPORT_SYMBOL_GPL(edac_pci_free_ctl_info);
/*
* find_edac_pci_by_dev()
* scans the edac_pci list for a specific 'struct device *'
*
* return NULL if not found, or return control struct pointer
*/
static struct edac_pci_ctl_info *find_edac_pci_by_dev(struct device *dev)
{
struct edac_pci_ctl_info *pci;
struct list_head *item;
edac_dbg(1, "\n");
list_for_each(item, &edac_pci_list) {
pci = list_entry(item, struct edac_pci_ctl_info, link);
if (pci->dev == dev)
return pci;
}
return NULL;
}
/*
* add_edac_pci_to_global_list
* Before calling this function, caller must assign a unique value to
* edac_dev->pci_idx.
* Return:
* 0 on success
* 1 on failure
*/
static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci)
{
struct list_head *item, *insert_before;
struct edac_pci_ctl_info *rover;
edac_dbg(1, "\n");
insert_before = &edac_pci_list;
/* Determine if already on the list */
rover = find_edac_pci_by_dev(pci->dev);
if (unlikely(rover != NULL))
goto fail0;
/* Insert in ascending order by 'pci_idx', so find position */
list_for_each(item, &edac_pci_list) {
rover = list_entry(item, struct edac_pci_ctl_info, link);
if (rover->pci_idx >= pci->pci_idx) {
if (unlikely(rover->pci_idx == pci->pci_idx))
goto fail1;
insert_before = item;
break;
}
}
list_add_tail_rcu(&pci->link, insert_before);
return 0;
fail0:
edac_printk(KERN_WARNING, EDAC_PCI,
"%s (%s) %s %s already assigned %d\n",
dev_name(rover->dev), edac_dev_name(rover),
rover->mod_name, rover->ctl_name, rover->pci_idx);
return 1;
fail1:
edac_printk(KERN_WARNING, EDAC_PCI,
"but in low-level driver: attempt to assign\n"
"\tduplicate pci_idx %d in %s()\n", rover->pci_idx,
__func__);
return 1;
}
/*
* del_edac_pci_from_global_list
*
* remove the PCI control struct from the global list
*/
static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci)
{
list_del_rcu(&pci->link);
/* these are for safe removal of devices from global list while
* NMI handlers may be traversing list
*/
synchronize_rcu();
INIT_LIST_HEAD(&pci->link);
}
#if 0
/* Older code, but might use in the future */
/*
* edac_pci_find()
* Search for an edac_pci_ctl_info structure whose index is 'idx'
*
* If found, return a pointer to the structure
* Else return NULL.
*
* Caller must hold pci_ctls_mutex.
*/
struct edac_pci_ctl_info *edac_pci_find(int idx)
{
struct list_head *item;
struct edac_pci_ctl_info *pci;
/* Iterage over list, looking for exact match of ID */
list_for_each(item, &edac_pci_list) {
pci = list_entry(item, struct edac_pci_ctl_info, link);
if (pci->pci_idx >= idx) {
if (pci->pci_idx == idx)
return pci;
/* not on list, so terminate early */
break;
}
}
return NULL;
}
EXPORT_SYMBOL_GPL(edac_pci_find);
#endif
/*
* edac_pci_workq_function()
*
* periodic function that performs the operation
* scheduled by a workq request, for a given PCI control struct
*/
static void edac_pci_workq_function(struct work_struct *work_req)
{
struct delayed_work *d_work = to_delayed_work(work_req);
struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work);
int msec;
unsigned long delay;
edac_dbg(3, "checking\n");
mutex_lock(&edac_pci_ctls_mutex);
if (pci->op_state == OP_RUNNING_POLL) {
/* we might be in POLL mode, but there may NOT be a poll func
*/
if ((pci->edac_check != NULL) && edac_pci_get_check_errors())
pci->edac_check(pci);
/* if we are on a one second period, then use round */
msec = edac_pci_get_poll_msec();
if (msec == 1000)
delay = round_jiffies_relative(msecs_to_jiffies(msec));
else
delay = msecs_to_jiffies(msec);
/* Reschedule only if we are in POLL mode */
queue_delayed_work(edac_workqueue, &pci->work, delay);
}
mutex_unlock(&edac_pci_ctls_mutex);
}
/*
* edac_pci_workq_setup()
* initialize a workq item for this edac_pci instance
* passing in the new delay period in msec
*
* locking model:
* called when 'edac_pci_ctls_mutex' is locked
*/
static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci,
unsigned int msec)
{
edac_dbg(0, "\n");
INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function);
queue_delayed_work(edac_workqueue, &pci->work,
msecs_to_jiffies(edac_pci_get_poll_msec()));
}
/*
* edac_pci_workq_teardown()
* stop the workq processing on this edac_pci instance
*/
static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
{
int status;
edac_dbg(0, "\n");
status = cancel_delayed_work(&pci->work);
if (status == 0)
flush_workqueue(edac_workqueue);
}
/*
* edac_pci_reset_delay_period
*
* called with a new period value for the workq period
* a) stop current workq timer
* b) restart workq timer with new value
*/
void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
unsigned long value)
{
edac_dbg(0, "\n");
edac_pci_workq_teardown(pci);
/* need to lock for the setup */
mutex_lock(&edac_pci_ctls_mutex);
edac_pci_workq_setup(pci, value);
mutex_unlock(&edac_pci_ctls_mutex);
}
EXPORT_SYMBOL_GPL(edac_pci_reset_delay_period);
/*
* edac_pci_alloc_index: Allocate a unique PCI index number
*
* Return:
* allocated index number
*
*/
int edac_pci_alloc_index(void)
{
return atomic_inc_return(&pci_indexes) - 1;
}
EXPORT_SYMBOL_GPL(edac_pci_alloc_index);
/*
* edac_pci_add_device: Insert the 'edac_dev' structure into the
* edac_pci global list and create sysfs entries associated with
* edac_pci structure.
* @pci: pointer to the edac_device structure to be added to the list
* @edac_idx: A unique numeric identifier to be assigned to the
* 'edac_pci' structure.
*
* Return:
* 0 Success
* !0 Failure
*/
int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
{
edac_dbg(0, "\n");
pci->pci_idx = edac_idx;
pci->start_time = jiffies;
mutex_lock(&edac_pci_ctls_mutex);
if (add_edac_pci_to_global_list(pci))
goto fail0;
if (edac_pci_create_sysfs(pci)) {
edac_pci_printk(pci, KERN_WARNING,
"failed to create sysfs pci\n");
goto fail1;
}
if (pci->edac_check != NULL) {
pci->op_state = OP_RUNNING_POLL;
edac_pci_workq_setup(pci, 1000);
} else {
pci->op_state = OP_RUNNING_INTERRUPT;
}
edac_pci_printk(pci, KERN_INFO,
"Giving out device to module %s controller %s: DEV %s (%s)\n",
pci->mod_name, pci->ctl_name, pci->dev_name,
edac_op_state_to_string(pci->op_state));
mutex_unlock(&edac_pci_ctls_mutex);
return 0;
/* error unwind stack */
fail1:
del_edac_pci_from_global_list(pci);
fail0:
mutex_unlock(&edac_pci_ctls_mutex);
return 1;
}
EXPORT_SYMBOL_GPL(edac_pci_add_device);
/*
* edac_pci_del_device()
* Remove sysfs entries for specified edac_pci structure and
* then remove edac_pci structure from global list
*
* @dev:
* Pointer to 'struct device' representing edac_pci structure
* to remove
*
* Return:
* Pointer to removed edac_pci structure,
* or NULL if device not found
*/
struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
{
struct edac_pci_ctl_info *pci;
edac_dbg(0, "\n");
mutex_lock(&edac_pci_ctls_mutex);
/* ensure the control struct is on the global list
* if not, then leave
*/
pci = find_edac_pci_by_dev(dev);
if (pci == NULL) {
mutex_unlock(&edac_pci_ctls_mutex);
return NULL;
}
pci->op_state = OP_OFFLINE;
del_edac_pci_from_global_list(pci);
mutex_unlock(&edac_pci_ctls_mutex);
/* stop the workq timer */
edac_pci_workq_teardown(pci);
edac_printk(KERN_INFO, EDAC_PCI,
"Removed device %d for %s %s: DEV %s\n",
pci->pci_idx, pci->mod_name, pci->ctl_name, edac_dev_name(pci));
return pci;
}
EXPORT_SYMBOL_GPL(edac_pci_del_device);
/*
* edac_pci_generic_check
*
* a Generic parity check API
*/
static void edac_pci_generic_check(struct edac_pci_ctl_info *pci)
{
edac_dbg(4, "\n");
edac_pci_do_parity_check();
}
/* free running instance index counter */
static int edac_pci_idx;
#define EDAC_PCI_GENCTL_NAME "EDAC PCI controller"
struct edac_pci_gen_data {
int edac_idx;
};
/*
* edac_pci_create_generic_ctl
*
* A generic constructor for a PCI parity polling device
* Some systems have more than one domain of PCI busses.
* For systems with one domain, then this API will
* provide for a generic poller.
*
* This routine calls the edac_pci_alloc_ctl_info() for
* the generic device, with default values
*/
struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
const char *mod_name)
{
struct edac_pci_ctl_info *pci;
struct edac_pci_gen_data *pdata;
pci = edac_pci_alloc_ctl_info(sizeof(*pdata), EDAC_PCI_GENCTL_NAME);
if (!pci)
return NULL;
pdata = pci->pvt_info;
pci->dev = dev;
dev_set_drvdata(pci->dev, pci);
pci->dev_name = pci_name(to_pci_dev(dev));
pci->mod_name = mod_name;
pci->ctl_name = EDAC_PCI_GENCTL_NAME;
if (edac_op_state == EDAC_OPSTATE_POLL)
pci->edac_check = edac_pci_generic_check;
pdata->edac_idx = edac_pci_idx++;
if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
edac_dbg(3, "failed edac_pci_add_device()\n");
edac_pci_free_ctl_info(pci);
return NULL;
}
return pci;
}
EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl);
/*
* edac_pci_release_generic_ctl
*
* The release function of a generic EDAC PCI polling device
*/
void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci)
{
edac_dbg(0, "pci mod=%s\n", pci->mod_name);
edac_pci_del_device(pci->dev);
edac_pci_free_ctl_info(pci);
}
EXPORT_SYMBOL_GPL(edac_pci_release_generic_ctl);

View file

@ -0,0 +1,765 @@
/*
* (C) 2005, 2006 Linux Networx (http://lnxi.com)
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written Doug Thompson <norsk5@xmission.com>
*
*/
#include <linux/module.h>
#include <linux/edac.h>
#include <linux/slab.h>
#include <linux/ctype.h>
#include "edac_core.h"
#include "edac_module.h"
/* Turn off this whole feature if PCI is not configured */
#ifdef CONFIG_PCI
#define EDAC_PCI_SYMLINK "device"
/* data variables exported via sysfs */
static int check_pci_errors; /* default NO check PCI parity */
static int edac_pci_panic_on_pe; /* default NO panic on PCI Parity */
static int edac_pci_log_pe = 1; /* log PCI parity errors */
static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */
static int edac_pci_poll_msec = 1000; /* one second workq period */
static atomic_t pci_parity_count = ATOMIC_INIT(0);
static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
static struct kobject *edac_pci_top_main_kobj;
static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);
/* getter functions for the data variables */
int edac_pci_get_check_errors(void)
{
return check_pci_errors;
}
static int edac_pci_get_log_pe(void)
{
return edac_pci_log_pe;
}
static int edac_pci_get_log_npe(void)
{
return edac_pci_log_npe;
}
static int edac_pci_get_panic_on_pe(void)
{
return edac_pci_panic_on_pe;
}
int edac_pci_get_poll_msec(void)
{
return edac_pci_poll_msec;
}
/**************************** EDAC PCI sysfs instance *******************/
static ssize_t instance_pe_count_show(struct edac_pci_ctl_info *pci, char *data)
{
return sprintf(data, "%u\n", atomic_read(&pci->counters.pe_count));
}
static ssize_t instance_npe_count_show(struct edac_pci_ctl_info *pci,
char *data)
{
return sprintf(data, "%u\n", atomic_read(&pci->counters.npe_count));
}
#define to_instance(k) container_of(k, struct edac_pci_ctl_info, kobj)
#define to_instance_attr(a) container_of(a, struct instance_attribute, attr)
/* DEVICE instance kobject release() function */
static void edac_pci_instance_release(struct kobject *kobj)
{
struct edac_pci_ctl_info *pci;
edac_dbg(0, "\n");
/* Form pointer to containing struct, the pci control struct */
pci = to_instance(kobj);
/* decrement reference count on top main kobj */
kobject_put(edac_pci_top_main_kobj);
kfree(pci); /* Free the control struct */
}
/* instance specific attribute structure */
struct instance_attribute {
struct attribute attr;
ssize_t(*show) (struct edac_pci_ctl_info *, char *);
ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t);
};
/* Function to 'show' fields from the edac_pci 'instance' structure */
static ssize_t edac_pci_instance_show(struct kobject *kobj,
struct attribute *attr, char *buffer)
{
struct edac_pci_ctl_info *pci = to_instance(kobj);
struct instance_attribute *instance_attr = to_instance_attr(attr);
if (instance_attr->show)
return instance_attr->show(pci, buffer);
return -EIO;
}
/* Function to 'store' fields into the edac_pci 'instance' structure */
static ssize_t edac_pci_instance_store(struct kobject *kobj,
struct attribute *attr,
const char *buffer, size_t count)
{
struct edac_pci_ctl_info *pci = to_instance(kobj);
struct instance_attribute *instance_attr = to_instance_attr(attr);
if (instance_attr->store)
return instance_attr->store(pci, buffer, count);
return -EIO;
}
/* fs_ops table */
static const struct sysfs_ops pci_instance_ops = {
.show = edac_pci_instance_show,
.store = edac_pci_instance_store
};
#define INSTANCE_ATTR(_name, _mode, _show, _store) \
static struct instance_attribute attr_instance_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
};
INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL);
INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL);
/* pci instance attributes */
static struct instance_attribute *pci_instance_attr[] = {
&attr_instance_pe_count,
&attr_instance_npe_count,
NULL
};
/* the ktype for a pci instance */
static struct kobj_type ktype_pci_instance = {
.release = edac_pci_instance_release,
.sysfs_ops = &pci_instance_ops,
.default_attrs = (struct attribute **)pci_instance_attr,
};
/*
* edac_pci_create_instance_kobj
*
* construct one EDAC PCI instance's kobject for use
*/
static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
{
struct kobject *main_kobj;
int err;
edac_dbg(0, "\n");
/* First bump the ref count on the top main kobj, which will
* track the number of PCI instances we have, and thus nest
* properly on keeping the module loaded
*/
main_kobj = kobject_get(edac_pci_top_main_kobj);
if (!main_kobj) {
err = -ENODEV;
goto error_out;
}
/* And now register this new kobject under the main kobj */
err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance,
edac_pci_top_main_kobj, "pci%d", idx);
if (err != 0) {
edac_dbg(2, "failed to register instance pci%d\n", idx);
kobject_put(edac_pci_top_main_kobj);
goto error_out;
}
kobject_uevent(&pci->kobj, KOBJ_ADD);
edac_dbg(1, "Register instance 'pci%d' kobject\n", idx);
return 0;
/* Error unwind statck */
error_out:
return err;
}
/*
* edac_pci_unregister_sysfs_instance_kobj
*
* unregister the kobj for the EDAC PCI instance
*/
static void edac_pci_unregister_sysfs_instance_kobj(
struct edac_pci_ctl_info *pci)
{
edac_dbg(0, "\n");
/* Unregister the instance kobject and allow its release
* function release the main reference count and then
* kfree the memory
*/
kobject_put(&pci->kobj);
}
/***************************** EDAC PCI sysfs root **********************/
#define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj)
#define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr)
/* simple show/store functions for attributes */
static ssize_t edac_pci_int_show(void *ptr, char *buffer)
{
int *value = ptr;
return sprintf(buffer, "%d\n", *value);
}
static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count)
{
int *value = ptr;
if (isdigit(*buffer))
*value = simple_strtoul(buffer, NULL, 0);
return count;
}
struct edac_pci_dev_attribute {
struct attribute attr;
void *value;
ssize_t(*show) (void *, char *);
ssize_t(*store) (void *, const char *, size_t);
};
/* Set of show/store abstract level functions for PCI Parity object */
static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
struct edac_pci_dev_attribute *edac_pci_dev;
edac_pci_dev = (struct edac_pci_dev_attribute *)attr;
if (edac_pci_dev->show)
return edac_pci_dev->show(edac_pci_dev->value, buffer);
return -EIO;
}
static ssize_t edac_pci_dev_store(struct kobject *kobj,
struct attribute *attr, const char *buffer,
size_t count)
{
struct edac_pci_dev_attribute *edac_pci_dev;
edac_pci_dev = (struct edac_pci_dev_attribute *)attr;
if (edac_pci_dev->store)
return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
return -EIO;
}
static const struct sysfs_ops edac_pci_sysfs_ops = {
.show = edac_pci_dev_show,
.store = edac_pci_dev_store
};
#define EDAC_PCI_ATTR(_name,_mode,_show,_store) \
static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.value = &_name, \
.show = _show, \
.store = _store, \
};
#define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \
static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.value = _data, \
.show = _show, \
.store = _store, \
};
/* PCI Parity control files */
EDAC_PCI_ATTR(check_pci_errors, S_IRUGO | S_IWUSR, edac_pci_int_show,
edac_pci_int_store);
EDAC_PCI_ATTR(edac_pci_log_pe, S_IRUGO | S_IWUSR, edac_pci_int_show,
edac_pci_int_store);
EDAC_PCI_ATTR(edac_pci_log_npe, S_IRUGO | S_IWUSR, edac_pci_int_show,
edac_pci_int_store);
EDAC_PCI_ATTR(edac_pci_panic_on_pe, S_IRUGO | S_IWUSR, edac_pci_int_show,
edac_pci_int_store);
EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL);
EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL);
/* Base Attributes of the memory ECC object */
static struct edac_pci_dev_attribute *edac_pci_attr[] = {
&edac_pci_attr_check_pci_errors,
&edac_pci_attr_edac_pci_log_pe,
&edac_pci_attr_edac_pci_log_npe,
&edac_pci_attr_edac_pci_panic_on_pe,
&edac_pci_attr_pci_parity_count,
&edac_pci_attr_pci_nonparity_count,
NULL,
};
/*
* edac_pci_release_main_kobj
*
* This release function is called when the reference count to the
* passed kobj goes to zero.
*
* This kobj is the 'main' kobject that EDAC PCI instances
* link to, and thus provide for proper nesting counts
*/
static void edac_pci_release_main_kobj(struct kobject *kobj)
{
edac_dbg(0, "here to module_put(THIS_MODULE)\n");
kfree(kobj);
/* last reference to top EDAC PCI kobject has been removed,
* NOW release our ref count on the core module
*/
module_put(THIS_MODULE);
}
/* ktype struct for the EDAC PCI main kobj */
static struct kobj_type ktype_edac_pci_main_kobj = {
.release = edac_pci_release_main_kobj,
.sysfs_ops = &edac_pci_sysfs_ops,
.default_attrs = (struct attribute **)edac_pci_attr,
};
/**
* edac_pci_main_kobj_setup()
*
* setup the sysfs for EDAC PCI attributes
* assumes edac_subsys has already been initialized
*/
static int edac_pci_main_kobj_setup(void)
{
int err;
struct bus_type *edac_subsys;
edac_dbg(0, "\n");
/* check and count if we have already created the main kobject */
if (atomic_inc_return(&edac_pci_sysfs_refcount) != 1)
return 0;
/* First time, so create the main kobject and its
* controls and attributes
*/
edac_subsys = edac_get_sysfs_subsys();
if (edac_subsys == NULL) {
edac_dbg(1, "no edac_subsys\n");
err = -ENODEV;
goto decrement_count_fail;
}
/* Bump the reference count on this module to ensure the
* modules isn't unloaded until we deconstruct the top
* level main kobj for EDAC PCI
*/
if (!try_module_get(THIS_MODULE)) {
edac_dbg(1, "try_module_get() failed\n");
err = -ENODEV;
goto mod_get_fail;
}
edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
if (!edac_pci_top_main_kobj) {
edac_dbg(1, "Failed to allocate\n");
err = -ENOMEM;
goto kzalloc_fail;
}
/* Instanstiate the pci object */
err = kobject_init_and_add(edac_pci_top_main_kobj,
&ktype_edac_pci_main_kobj,
&edac_subsys->dev_root->kobj, "pci");
if (err) {
edac_dbg(1, "Failed to register '.../edac/pci'\n");
goto kobject_init_and_add_fail;
}
/* At this point, to 'release' the top level kobject
* for EDAC PCI, then edac_pci_main_kobj_teardown()
* must be used, for resources to be cleaned up properly
*/
kobject_uevent(edac_pci_top_main_kobj, KOBJ_ADD);
edac_dbg(1, "Registered '.../edac/pci' kobject\n");
return 0;
/* Error unwind statck */
kobject_init_and_add_fail:
kfree(edac_pci_top_main_kobj);
kzalloc_fail:
module_put(THIS_MODULE);
mod_get_fail:
edac_put_sysfs_subsys();
decrement_count_fail:
/* if are on this error exit, nothing to tear down */
atomic_dec(&edac_pci_sysfs_refcount);
return err;
}
/*
* edac_pci_main_kobj_teardown()
*
* if no longer linked (needed) remove the top level EDAC PCI
* kobject with its controls and attributes
*/
static void edac_pci_main_kobj_teardown(void)
{
edac_dbg(0, "\n");
/* Decrement the count and only if no more controller instances
* are connected perform the unregisteration of the top level
* main kobj
*/
if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
edac_dbg(0, "called kobject_put on main kobj\n");
kobject_put(edac_pci_top_main_kobj);
edac_put_sysfs_subsys();
}
}
/*
*
* edac_pci_create_sysfs
*
* Create the controls/attributes for the specified EDAC PCI device
*/
int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci)
{
int err;
struct kobject *edac_kobj = &pci->kobj;
edac_dbg(0, "idx=%d\n", pci->pci_idx);
/* create the top main EDAC PCI kobject, IF needed */
err = edac_pci_main_kobj_setup();
if (err)
return err;
/* Create this instance's kobject under the MAIN kobject */
err = edac_pci_create_instance_kobj(pci, pci->pci_idx);
if (err)
goto unregister_cleanup;
err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK);
if (err) {
edac_dbg(0, "sysfs_create_link() returned err= %d\n", err);
goto symlink_fail;
}
return 0;
/* Error unwind stack */
symlink_fail:
edac_pci_unregister_sysfs_instance_kobj(pci);
unregister_cleanup:
edac_pci_main_kobj_teardown();
return err;
}
/*
* edac_pci_remove_sysfs
*
* remove the controls and attributes for this EDAC PCI device
*/
void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci)
{
edac_dbg(0, "index=%d\n", pci->pci_idx);
/* Remove the symlink */
sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK);
/* remove this PCI instance's sysfs entries */
edac_pci_unregister_sysfs_instance_kobj(pci);
/* Call the main unregister function, which will determine
* if this 'pci' is the last instance.
* If it is, the main kobject will be unregistered as a result
*/
edac_dbg(0, "calling edac_pci_main_kobj_teardown()\n");
edac_pci_main_kobj_teardown();
}
/************************ PCI error handling *************************/
static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
{
int where;
u16 status;
where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
pci_read_config_word(dev, where, &status);
/* If we get back 0xFFFF then we must suspect that the card has been
* pulled but the Linux PCI layer has not yet finished cleaning up.
* We don't want to report on such devices
*/
if (status == 0xFFFF) {
u32 sanity;
pci_read_config_dword(dev, 0, &sanity);
if (sanity == 0xFFFFFFFF)
return 0;
}
status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
PCI_STATUS_PARITY;
if (status)
/* reset only the bits we are interested in */
pci_write_config_word(dev, where, status);
return status;
}
/* Clear any PCI parity errors logged by this device. */
static void edac_pci_dev_parity_clear(struct pci_dev *dev)
{
u8 header_type;
get_pci_parity_status(dev, 0);
/* read the device TYPE, looking for bridges */
pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
get_pci_parity_status(dev, 1);
}
/*
* PCI Parity polling
*
* Function to retrieve the current parity status
* and decode it
*
*/
static void edac_pci_dev_parity_test(struct pci_dev *dev)
{
unsigned long flags;
u16 status;
u8 header_type;
/* stop any interrupts until we can acquire the status */
local_irq_save(flags);
/* read the STATUS register on this device */
status = get_pci_parity_status(dev, 0);
/* read the device TYPE, looking for bridges */
pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
local_irq_restore(flags);
edac_dbg(4, "PCI STATUS= 0x%04x %s\n", status, dev_name(&dev->dev));
/* check the status reg for errors on boards NOT marked as broken
* if broken, we cannot trust any of the status bits
*/
if (status && !dev->broken_parity_status) {
if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) {
edac_printk(KERN_CRIT, EDAC_PCI,
"Signaled System Error on %s\n",
pci_name(dev));
atomic_inc(&pci_nonparity_count);
}
if (status & (PCI_STATUS_PARITY)) {
edac_printk(KERN_CRIT, EDAC_PCI,
"Master Data Parity Error on %s\n",
pci_name(dev));
atomic_inc(&pci_parity_count);
}
if (status & (PCI_STATUS_DETECTED_PARITY)) {
edac_printk(KERN_CRIT, EDAC_PCI,
"Detected Parity Error on %s\n",
pci_name(dev));
atomic_inc(&pci_parity_count);
}
}
edac_dbg(4, "PCI HEADER TYPE= 0x%02x %s\n",
header_type, dev_name(&dev->dev));
if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
/* On bridges, need to examine secondary status register */
status = get_pci_parity_status(dev, 1);
edac_dbg(4, "PCI SEC_STATUS= 0x%04x %s\n",
status, dev_name(&dev->dev));
/* check the secondary status reg for errors,
* on NOT broken boards
*/
if (status && !dev->broken_parity_status) {
if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) {
edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
"Signaled System Error on %s\n",
pci_name(dev));
atomic_inc(&pci_nonparity_count);
}
if (status & (PCI_STATUS_PARITY)) {
edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
"Master Data Parity Error on "
"%s\n", pci_name(dev));
atomic_inc(&pci_parity_count);
}
if (status & (PCI_STATUS_DETECTED_PARITY)) {
edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
"Detected Parity Error on %s\n",
pci_name(dev));
atomic_inc(&pci_parity_count);
}
}
}
}
/* reduce some complexity in definition of the iterator */
typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
/*
* pci_dev parity list iterator
*
* Scan the PCI device list looking for SERRORs, Master Parity ERRORS or
* Parity ERRORs on primary or secondary devices.
*/
static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
{
struct pci_dev *dev = NULL;
for_each_pci_dev(dev)
fn(dev);
}
/*
* edac_pci_do_parity_check
*
* performs the actual PCI parity check operation
*/
void edac_pci_do_parity_check(void)
{
int before_count;
edac_dbg(3, "\n");
/* if policy has PCI check off, leave now */
if (!check_pci_errors)
return;
before_count = atomic_read(&pci_parity_count);
/* scan all PCI devices looking for a Parity Error on devices and
* bridges.
* The iterator calls pci_get_device() which might sleep, thus
* we cannot disable interrupts in this scan.
*/
edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
/* Only if operator has selected panic on PCI Error */
if (edac_pci_get_panic_on_pe()) {
/* If the count is different 'after' from 'before' */
if (before_count != atomic_read(&pci_parity_count))
panic("EDAC: PCI Parity Error");
}
}
/*
* edac_pci_clear_parity_errors
*
* function to perform an iteration over the PCI devices
* and clearn their current status
*/
void edac_pci_clear_parity_errors(void)
{
/* Clear any PCI bus parity errors that devices initially have logged
* in their registers.
*/
edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
}
/*
* edac_pci_handle_pe
*
* Called to handle a PARITY ERROR event
*/
void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg)
{
/* global PE counter incremented by edac_pci_do_parity_check() */
atomic_inc(&pci->counters.pe_count);
if (edac_pci_get_log_pe())
edac_pci_printk(pci, KERN_WARNING,
"Parity Error ctl: %s %d: %s\n",
pci->ctl_name, pci->pci_idx, msg);
/*
* poke all PCI devices and see which one is the troublemaker
* panic() is called if set
*/
edac_pci_do_parity_check();
}
EXPORT_SYMBOL_GPL(edac_pci_handle_pe);
/*
* edac_pci_handle_npe
*
* Called to handle a NON-PARITY ERROR event
*/
void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg)
{
/* global NPE counter incremented by edac_pci_do_parity_check() */
atomic_inc(&pci->counters.npe_count);
if (edac_pci_get_log_npe())
edac_pci_printk(pci, KERN_WARNING,
"Non-Parity Error ctl: %s %d: %s\n",
pci->ctl_name, pci->pci_idx, msg);
/*
* poke all PCI devices and see which one is the troublemaker
* panic() is called if set
*/
edac_pci_do_parity_check();
}
EXPORT_SYMBOL_GPL(edac_pci_handle_npe);
/*
* Define the PCI parameter to the module
*/
module_param(check_pci_errors, int, 0644);
MODULE_PARM_DESC(check_pci_errors,
"Check for PCI bus parity errors: 0=off 1=on");
module_param(edac_pci_panic_on_pe, int, 0644);
MODULE_PARM_DESC(edac_pci_panic_on_pe,
"Panic on PCI Bus Parity error: 0=off 1=on");
#endif /* CONFIG_PCI */

110
drivers/edac/edac_stub.c Normal file
View file

@ -0,0 +1,110 @@
/*
* common EDAC components that must be in kernel
*
* Author: Dave Jiang <djiang@mvista.com>
*
* 2007 (c) MontaVista Software, Inc.
* 2010 (c) Advanced Micro Devices Inc.
* Borislav Petkov <bp@alien8.de>
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
* warranty of any kind, whether express or implied.
*
*/
#include <linux/module.h>
#include <linux/edac.h>
#include <linux/atomic.h>
#include <linux/device.h>
#include <asm/edac.h>
int edac_op_state = EDAC_OPSTATE_INVAL;
EXPORT_SYMBOL_GPL(edac_op_state);
atomic_t edac_handlers = ATOMIC_INIT(0);
EXPORT_SYMBOL_GPL(edac_handlers);
int edac_err_assert = 0;
EXPORT_SYMBOL_GPL(edac_err_assert);
static atomic_t edac_subsys_valid = ATOMIC_INIT(0);
int edac_report_status = EDAC_REPORTING_ENABLED;
EXPORT_SYMBOL_GPL(edac_report_status);
static int __init edac_report_setup(char *str)
{
if (!str)
return -EINVAL;
if (!strncmp(str, "on", 2))
set_edac_report_status(EDAC_REPORTING_ENABLED);
else if (!strncmp(str, "off", 3))
set_edac_report_status(EDAC_REPORTING_DISABLED);
else if (!strncmp(str, "force", 5))
set_edac_report_status(EDAC_REPORTING_FORCE);
return 0;
}
__setup("edac_report=", edac_report_setup);
/*
* called to determine if there is an EDAC driver interested in
* knowing an event (such as NMI) occurred
*/
int edac_handler_set(void)
{
if (edac_op_state == EDAC_OPSTATE_POLL)
return 0;
return atomic_read(&edac_handlers);
}
EXPORT_SYMBOL_GPL(edac_handler_set);
/*
* handler for NMI type of interrupts to assert error
*/
void edac_atomic_assert_error(void)
{
edac_err_assert++;
}
EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
/*
* sysfs object: /sys/devices/system/edac
* need to export to other files
*/
struct bus_type edac_subsys = {
.name = "edac",
.dev_name = "edac",
};
EXPORT_SYMBOL_GPL(edac_subsys);
/* return pointer to the 'edac' node in sysfs */
struct bus_type *edac_get_sysfs_subsys(void)
{
int err = 0;
if (atomic_read(&edac_subsys_valid))
goto out;
/* create the /sys/devices/system/edac directory */
err = subsys_system_register(&edac_subsys, NULL);
if (err) {
printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n");
return NULL;
}
out:
atomic_inc(&edac_subsys_valid);
return &edac_subsys;
}
EXPORT_SYMBOL_GPL(edac_get_sysfs_subsys);
void edac_put_sysfs_subsys(void)
{
/* last user unregisters it */
if (atomic_dec_and_test(&edac_subsys_valid))
bus_unregister(&edac_subsys);
}
EXPORT_SYMBOL_GPL(edac_put_sysfs_subsys);

547
drivers/edac/ghes_edac.c Normal file
View file

@ -0,0 +1,547 @@
/*
* GHES/EDAC Linux driver
*
* This file may be distributed under the terms of the GNU General Public
* License version 2.
*
* Copyright (c) 2013 by Mauro Carvalho Chehab
*
* Red Hat Inc. http://www.redhat.com
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <acpi/ghes.h>
#include <linux/edac.h>
#include <linux/dmi.h>
#include "edac_core.h"
#include <ras/ras_event.h>
#define GHES_EDAC_REVISION " Ver: 1.0.0"
struct ghes_edac_pvt {
struct list_head list;
struct ghes *ghes;
struct mem_ctl_info *mci;
/* Buffers for the error handling routine */
char detail_location[240];
char other_detail[160];
char msg[80];
};
static LIST_HEAD(ghes_reglist);
static DEFINE_MUTEX(ghes_edac_lock);
static int ghes_edac_mc_num;
/* Memory Device - Type 17 of SMBIOS spec */
struct memdev_dmi_entry {
u8 type;
u8 length;
u16 handle;
u16 phys_mem_array_handle;
u16 mem_err_info_handle;
u16 total_width;
u16 data_width;
u16 size;
u8 form_factor;
u8 device_set;
u8 device_locator;
u8 bank_locator;
u8 memory_type;
u16 type_detail;
u16 speed;
u8 manufacturer;
u8 serial_number;
u8 asset_tag;
u8 part_number;
u8 attributes;
u32 extended_size;
u16 conf_mem_clk_speed;
} __attribute__((__packed__));
struct ghes_edac_dimm_fill {
struct mem_ctl_info *mci;
unsigned count;
};
char *memory_type[] = {
[MEM_EMPTY] = "EMPTY",
[MEM_RESERVED] = "RESERVED",
[MEM_UNKNOWN] = "UNKNOWN",
[MEM_FPM] = "FPM",
[MEM_EDO] = "EDO",
[MEM_BEDO] = "BEDO",
[MEM_SDR] = "SDR",
[MEM_RDR] = "RDR",
[MEM_DDR] = "DDR",
[MEM_RDDR] = "RDDR",
[MEM_RMBS] = "RMBS",
[MEM_DDR2] = "DDR2",
[MEM_FB_DDR2] = "FB_DDR2",
[MEM_RDDR2] = "RDDR2",
[MEM_XDR] = "XDR",
[MEM_DDR3] = "DDR3",
[MEM_RDDR3] = "RDDR3",
};
static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
{
int *num_dimm = arg;
if (dh->type == DMI_ENTRY_MEM_DEVICE)
(*num_dimm)++;
}
static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
{
struct ghes_edac_dimm_fill *dimm_fill = arg;
struct mem_ctl_info *mci = dimm_fill->mci;
if (dh->type == DMI_ENTRY_MEM_DEVICE) {
struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
mci->n_layers,
dimm_fill->count, 0, 0);
if (entry->size == 0xffff) {
pr_info("Can't get DIMM%i size\n",
dimm_fill->count);
dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
} else if (entry->size == 0x7fff) {
dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
} else {
if (entry->size & 1 << 15)
dimm->nr_pages = MiB_TO_PAGES((entry->size &
0x7fff) << 10);
else
dimm->nr_pages = MiB_TO_PAGES(entry->size);
}
switch (entry->memory_type) {
case 0x12:
if (entry->type_detail & 1 << 13)
dimm->mtype = MEM_RDDR;
else
dimm->mtype = MEM_DDR;
break;
case 0x13:
if (entry->type_detail & 1 << 13)
dimm->mtype = MEM_RDDR2;
else
dimm->mtype = MEM_DDR2;
break;
case 0x14:
dimm->mtype = MEM_FB_DDR2;
break;
case 0x18:
if (entry->type_detail & 1 << 13)
dimm->mtype = MEM_RDDR3;
else
dimm->mtype = MEM_DDR3;
break;
default:
if (entry->type_detail & 1 << 6)
dimm->mtype = MEM_RMBS;
else if ((entry->type_detail & ((1 << 7) | (1 << 13)))
== ((1 << 7) | (1 << 13)))
dimm->mtype = MEM_RDR;
else if (entry->type_detail & 1 << 7)
dimm->mtype = MEM_SDR;
else if (entry->type_detail & 1 << 9)
dimm->mtype = MEM_EDO;
else
dimm->mtype = MEM_UNKNOWN;
}
/*
* Actually, we can only detect if the memory has bits for
* checksum or not
*/
if (entry->total_width == entry->data_width)
dimm->edac_mode = EDAC_NONE;
else
dimm->edac_mode = EDAC_SECDED;
dimm->dtype = DEV_UNKNOWN;
dimm->grain = 128; /* Likely, worse case */
/*
* FIXME: It shouldn't be hard to also fill the DIMM labels
*/
if (dimm->nr_pages) {
edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
dimm_fill->count, memory_type[dimm->mtype],
PAGES_TO_MiB(dimm->nr_pages),
(dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
entry->memory_type, entry->type_detail,
entry->total_width, entry->data_width);
}
dimm_fill->count++;
}
}
void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
struct cper_sec_mem_err *mem_err)
{
enum hw_event_mc_err_type type;
struct edac_raw_error_desc *e;
struct mem_ctl_info *mci;
struct ghes_edac_pvt *pvt = NULL;
char *p;
u8 grain_bits;
list_for_each_entry(pvt, &ghes_reglist, list) {
if (ghes == pvt->ghes)
break;
}
if (!pvt) {
pr_err("Internal error: Can't find EDAC structure\n");
return;
}
mci = pvt->mci;
e = &mci->error_desc;
/* Cleans the error report buffer */
memset(e, 0, sizeof (*e));
e->error_count = 1;
strcpy(e->label, "unknown label");
e->msg = pvt->msg;
e->other_detail = pvt->other_detail;
e->top_layer = -1;
e->mid_layer = -1;
e->low_layer = -1;
*pvt->other_detail = '\0';
*pvt->msg = '\0';
switch (sev) {
case GHES_SEV_CORRECTED:
type = HW_EVENT_ERR_CORRECTED;
break;
case GHES_SEV_RECOVERABLE:
type = HW_EVENT_ERR_UNCORRECTED;
break;
case GHES_SEV_PANIC:
type = HW_EVENT_ERR_FATAL;
break;
default:
case GHES_SEV_NO:
type = HW_EVENT_ERR_INFO;
}
edac_dbg(1, "error validation_bits: 0x%08llx\n",
(long long)mem_err->validation_bits);
/* Error type, mapped on e->msg */
if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
p = pvt->msg;
switch (mem_err->error_type) {
case 0:
p += sprintf(p, "Unknown");
break;
case 1:
p += sprintf(p, "No error");
break;
case 2:
p += sprintf(p, "Single-bit ECC");
break;
case 3:
p += sprintf(p, "Multi-bit ECC");
break;
case 4:
p += sprintf(p, "Single-symbol ChipKill ECC");
break;
case 5:
p += sprintf(p, "Multi-symbol ChipKill ECC");
break;
case 6:
p += sprintf(p, "Master abort");
break;
case 7:
p += sprintf(p, "Target abort");
break;
case 8:
p += sprintf(p, "Parity Error");
break;
case 9:
p += sprintf(p, "Watchdog timeout");
break;
case 10:
p += sprintf(p, "Invalid address");
break;
case 11:
p += sprintf(p, "Mirror Broken");
break;
case 12:
p += sprintf(p, "Memory Sparing");
break;
case 13:
p += sprintf(p, "Scrub corrected error");
break;
case 14:
p += sprintf(p, "Scrub uncorrected error");
break;
case 15:
p += sprintf(p, "Physical Memory Map-out event");
break;
default:
p += sprintf(p, "reserved error (%d)",
mem_err->error_type);
}
} else {
strcpy(pvt->msg, "unknown error");
}
/* Error address */
if (mem_err->validation_bits & CPER_MEM_VALID_PA) {
e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
}
/* Error grain */
if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
/* Memory error location, mapped on e->location */
p = e->location;
if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
p += sprintf(p, "node:%d ", mem_err->node);
if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
p += sprintf(p, "card:%d ", mem_err->card);
if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
p += sprintf(p, "module:%d ", mem_err->module);
if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
p += sprintf(p, "rank:%d ", mem_err->rank);
if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
p += sprintf(p, "bank:%d ", mem_err->bank);
if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
p += sprintf(p, "row:%d ", mem_err->row);
if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
p += sprintf(p, "col:%d ", mem_err->column);
if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
const char *bank = NULL, *device = NULL;
dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
if (bank != NULL && device != NULL)
p += sprintf(p, "DIMM location:%s %s ", bank, device);
else
p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
mem_err->mem_dev_handle);
}
if (p > e->location)
*(p - 1) = '\0';
/* All other fields are mapped on e->other_detail */
p = pvt->other_detail;
if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
u64 status = mem_err->error_status;
p += sprintf(p, "status(0x%016llx): ", (long long)status);
switch ((status >> 8) & 0xff) {
case 1:
p += sprintf(p, "Error detected internal to the component ");
break;
case 16:
p += sprintf(p, "Error detected in the bus ");
break;
case 4:
p += sprintf(p, "Storage error in DRAM memory ");
break;
case 5:
p += sprintf(p, "Storage error in TLB ");
break;
case 6:
p += sprintf(p, "Storage error in cache ");
break;
case 7:
p += sprintf(p, "Error in one or more functional units ");
break;
case 8:
p += sprintf(p, "component failed self test ");
break;
case 9:
p += sprintf(p, "Overflow or undervalue of internal queue ");
break;
case 17:
p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
break;
case 18:
p += sprintf(p, "Improper access error ");
break;
case 19:
p += sprintf(p, "Access to a memory address which is not mapped to any component ");
break;
case 20:
p += sprintf(p, "Loss of Lockstep ");
break;
case 21:
p += sprintf(p, "Response not associated with a request ");
break;
case 22:
p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
break;
case 23:
p += sprintf(p, "Detection of a PATH_ERROR ");
break;
case 25:
p += sprintf(p, "Bus operation timeout ");
break;
case 26:
p += sprintf(p, "A read was issued to data that has been poisoned ");
break;
default:
p += sprintf(p, "reserved ");
break;
}
}
if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
p += sprintf(p, "requestorID: 0x%016llx ",
(long long)mem_err->requestor_id);
if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
p += sprintf(p, "responderID: 0x%016llx ",
(long long)mem_err->responder_id);
if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
p += sprintf(p, "targetID: 0x%016llx ",
(long long)mem_err->responder_id);
if (p > pvt->other_detail)
*(p - 1) = '\0';
/* Generate the trace event */
grain_bits = fls_long(e->grain);
sprintf(pvt->detail_location, "APEI location: %s %s",
e->location, e->other_detail);
trace_mc_event(type, e->msg, e->label, e->error_count,
mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
grain_bits, e->syndrome, pvt->detail_location);
/* Report the error via EDAC API */
edac_raw_mc_handle_error(type, mci, e);
}
EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error);
int ghes_edac_register(struct ghes *ghes, struct device *dev)
{
bool fake = false;
int rc, num_dimm = 0;
struct mem_ctl_info *mci;
struct edac_mc_layer layers[1];
struct ghes_edac_pvt *pvt;
struct ghes_edac_dimm_fill dimm_fill;
/* Get the number of DIMMs */
dmi_walk(ghes_edac_count_dimms, &num_dimm);
/* Check if we've got a bogus BIOS */
if (num_dimm == 0) {
fake = true;
num_dimm = 1;
}
layers[0].type = EDAC_MC_LAYER_ALL_MEM;
layers[0].size = num_dimm;
layers[0].is_virt_csrow = true;
/*
* We need to serialize edac_mc_alloc() and edac_mc_add_mc(),
* to avoid duplicated memory controller numbers
*/
mutex_lock(&ghes_edac_lock);
mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers,
sizeof(*pvt));
if (!mci) {
pr_info("Can't allocate memory for EDAC data\n");
mutex_unlock(&ghes_edac_lock);
return -ENOMEM;
}
pvt = mci->pvt_info;
memset(pvt, 0, sizeof(*pvt));
list_add_tail(&pvt->list, &ghes_reglist);
pvt->ghes = ghes;
pvt->mci = mci;
mci->pdev = dev;
mci->mtype_cap = MEM_FLAG_EMPTY;
mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = "ghes_edac.c";
mci->mod_ver = GHES_EDAC_REVISION;
mci->ctl_name = "ghes_edac";
mci->dev_name = "ghes";
if (!ghes_edac_mc_num) {
if (!fake) {
pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
pr_info("If you find incorrect reports, please contact your hardware vendor\n");
pr_info("to correct its BIOS.\n");
pr_info("This system has %d DIMM sockets.\n",
num_dimm);
} else {
pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
pr_info("work on such system. Use this driver with caution\n");
}
}
if (!fake) {
/*
* Fill DIMM info from DMI for the memory controller #0
*
* Keep it in blank for the other memory controllers, as
* there's no reliable way to properly credit each DIMM to
* the memory controller, as different BIOSes fill the
* DMI bank location fields on different ways
*/
if (!ghes_edac_mc_num) {
dimm_fill.count = 0;
dimm_fill.mci = mci;
dmi_walk(ghes_edac_dmidecode, &dimm_fill);
}
} else {
struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
mci->n_layers, 0, 0, 0);
dimm->nr_pages = 1;
dimm->grain = 128;
dimm->mtype = MEM_UNKNOWN;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_SECDED;
}
rc = edac_mc_add_mc(mci);
if (rc < 0) {
pr_info("Can't register at EDAC core\n");
edac_mc_free(mci);
mutex_unlock(&ghes_edac_lock);
return -ENODEV;
}
ghes_edac_mc_num++;
mutex_unlock(&ghes_edac_lock);
return 0;
}
EXPORT_SYMBOL_GPL(ghes_edac_register);
void ghes_edac_unregister(struct ghes *ghes)
{
struct mem_ctl_info *mci;
struct ghes_edac_pvt *pvt, *tmp;
list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) {
if (ghes == pvt->ghes) {
mci = pvt->mci;
edac_mc_del_mc(mci->pdev);
edac_mc_free(mci);
list_del(&pvt->list);
}
}
}
EXPORT_SYMBOL_GPL(ghes_edac_unregister);

View file

@ -0,0 +1,154 @@
/*
* Copyright 2011-2012 Calxeda, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/edac.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/of_platform.h>
#include "edac_core.h"
#include "edac_module.h"
#define SR_CLR_SB_ECC_INTR 0x0
#define SR_CLR_DB_ECC_INTR 0x4
struct hb_l2_drvdata {
void __iomem *base;
int sb_irq;
int db_irq;
};
static irqreturn_t highbank_l2_err_handler(int irq, void *dev_id)
{
struct edac_device_ctl_info *dci = dev_id;
struct hb_l2_drvdata *drvdata = dci->pvt_info;
if (irq == drvdata->sb_irq) {
writel(1, drvdata->base + SR_CLR_SB_ECC_INTR);
edac_device_handle_ce(dci, 0, 0, dci->ctl_name);
}
if (irq == drvdata->db_irq) {
writel(1, drvdata->base + SR_CLR_DB_ECC_INTR);
edac_device_handle_ue(dci, 0, 0, dci->ctl_name);
}
return IRQ_HANDLED;
}
static const struct of_device_id hb_l2_err_of_match[] = {
{ .compatible = "calxeda,hb-sregs-l2-ecc", },
{},
};
MODULE_DEVICE_TABLE(of, hb_l2_err_of_match);
static int highbank_l2_err_probe(struct platform_device *pdev)
{
const struct of_device_id *id;
struct edac_device_ctl_info *dci;
struct hb_l2_drvdata *drvdata;
struct resource *r;
int res = 0;
dci = edac_device_alloc_ctl_info(sizeof(*drvdata), "cpu",
1, "L", 1, 2, NULL, 0, 0);
if (!dci)
return -ENOMEM;
drvdata = dci->pvt_info;
dci->dev = &pdev->dev;
platform_set_drvdata(pdev, dci);
if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL))
return -ENOMEM;
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
dev_err(&pdev->dev, "Unable to get mem resource\n");
res = -ENODEV;
goto err;
}
if (!devm_request_mem_region(&pdev->dev, r->start,
resource_size(r), dev_name(&pdev->dev))) {
dev_err(&pdev->dev, "Error while requesting mem region\n");
res = -EBUSY;
goto err;
}
drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
if (!drvdata->base) {
dev_err(&pdev->dev, "Unable to map regs\n");
res = -ENOMEM;
goto err;
}
id = of_match_device(hb_l2_err_of_match, &pdev->dev);
dci->mod_name = pdev->dev.driver->name;
dci->ctl_name = id ? id->compatible : "unknown";
dci->dev_name = dev_name(&pdev->dev);
if (edac_device_add_device(dci))
goto err;
drvdata->db_irq = platform_get_irq(pdev, 0);
res = devm_request_irq(&pdev->dev, drvdata->db_irq,
highbank_l2_err_handler,
0, dev_name(&pdev->dev), dci);
if (res < 0)
goto err2;
drvdata->sb_irq = platform_get_irq(pdev, 1);
res = devm_request_irq(&pdev->dev, drvdata->sb_irq,
highbank_l2_err_handler,
0, dev_name(&pdev->dev), dci);
if (res < 0)
goto err2;
devres_close_group(&pdev->dev, NULL);
return 0;
err2:
edac_device_del_device(&pdev->dev);
err:
devres_release_group(&pdev->dev, NULL);
edac_device_free_ctl_info(dci);
return res;
}
static int highbank_l2_err_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(dci);
return 0;
}
static struct platform_driver highbank_l2_edac_driver = {
.probe = highbank_l2_err_probe,
.remove = highbank_l2_err_remove,
.driver = {
.name = "hb_l2_edac",
.of_match_table = hb_l2_err_of_match,
},
};
module_platform_driver(highbank_l2_edac_driver);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Calxeda, Inc.");
MODULE_DESCRIPTION("EDAC Driver for Calxeda Highbank L2 Cache");

View file

@ -0,0 +1,281 @@
/*
* Copyright 2011-2012 Calxeda, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/edac.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/of_platform.h>
#include <linux/uaccess.h>
#include "edac_core.h"
#include "edac_module.h"
/* DDR Ctrlr Error Registers */
#define HB_DDR_ECC_ERR_BASE 0x128
#define MW_DDR_ECC_ERR_BASE 0x1b4
#define HB_DDR_ECC_OPT 0x00
#define HB_DDR_ECC_U_ERR_ADDR 0x08
#define HB_DDR_ECC_U_ERR_STAT 0x0c
#define HB_DDR_ECC_U_ERR_DATAL 0x10
#define HB_DDR_ECC_U_ERR_DATAH 0x14
#define HB_DDR_ECC_C_ERR_ADDR 0x18
#define HB_DDR_ECC_C_ERR_STAT 0x1c
#define HB_DDR_ECC_C_ERR_DATAL 0x20
#define HB_DDR_ECC_C_ERR_DATAH 0x24
#define HB_DDR_ECC_OPT_MODE_MASK 0x3
#define HB_DDR_ECC_OPT_FWC 0x100
#define HB_DDR_ECC_OPT_XOR_SHIFT 16
/* DDR Ctrlr Interrupt Registers */
#define HB_DDR_ECC_INT_BASE 0x180
#define MW_DDR_ECC_INT_BASE 0x218
#define HB_DDR_ECC_INT_STATUS 0x00
#define HB_DDR_ECC_INT_ACK 0x04
#define HB_DDR_ECC_INT_STAT_CE 0x8
#define HB_DDR_ECC_INT_STAT_DOUBLE_CE 0x10
#define HB_DDR_ECC_INT_STAT_UE 0x20
#define HB_DDR_ECC_INT_STAT_DOUBLE_UE 0x40
struct hb_mc_drvdata {
void __iomem *mc_err_base;
void __iomem *mc_int_base;
};
static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id)
{
struct mem_ctl_info *mci = dev_id;
struct hb_mc_drvdata *drvdata = mci->pvt_info;
u32 status, err_addr;
/* Read the interrupt status register */
status = readl(drvdata->mc_int_base + HB_DDR_ECC_INT_STATUS);
if (status & HB_DDR_ECC_INT_STAT_UE) {
err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_U_ERR_ADDR);
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
err_addr >> PAGE_SHIFT,
err_addr & ~PAGE_MASK, 0,
0, 0, -1,
mci->ctl_name, "");
}
if (status & HB_DDR_ECC_INT_STAT_CE) {
u32 syndrome = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_STAT);
syndrome = (syndrome >> 8) & 0xff;
err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_ADDR);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
err_addr >> PAGE_SHIFT,
err_addr & ~PAGE_MASK, syndrome,
0, 0, -1,
mci->ctl_name, "");
}
/* clear the error, clears the interrupt */
writel(status, drvdata->mc_int_base + HB_DDR_ECC_INT_ACK);
return IRQ_HANDLED;
}
static void highbank_mc_err_inject(struct mem_ctl_info *mci, u8 synd)
{
struct hb_mc_drvdata *pdata = mci->pvt_info;
u32 reg;
reg = readl(pdata->mc_err_base + HB_DDR_ECC_OPT);
reg &= HB_DDR_ECC_OPT_MODE_MASK;
reg |= (synd << HB_DDR_ECC_OPT_XOR_SHIFT) | HB_DDR_ECC_OPT_FWC;
writel(reg, pdata->mc_err_base + HB_DDR_ECC_OPT);
}
#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
static ssize_t highbank_mc_inject_ctrl(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
u8 synd;
if (kstrtou8(buf, 16, &synd))
return -EINVAL;
highbank_mc_err_inject(mci, synd);
return count;
}
static DEVICE_ATTR(inject_ctrl, S_IWUSR, NULL, highbank_mc_inject_ctrl);
struct hb_mc_settings {
int err_offset;
int int_offset;
};
static struct hb_mc_settings hb_settings = {
.err_offset = HB_DDR_ECC_ERR_BASE,
.int_offset = HB_DDR_ECC_INT_BASE,
};
static struct hb_mc_settings mw_settings = {
.err_offset = MW_DDR_ECC_ERR_BASE,
.int_offset = MW_DDR_ECC_INT_BASE,
};
static struct of_device_id hb_ddr_ctrl_of_match[] = {
{ .compatible = "calxeda,hb-ddr-ctrl", .data = &hb_settings },
{ .compatible = "calxeda,ecx-2000-ddr-ctrl", .data = &mw_settings },
{},
};
MODULE_DEVICE_TABLE(of, hb_ddr_ctrl_of_match);
static int highbank_mc_probe(struct platform_device *pdev)
{
const struct of_device_id *id;
const struct hb_mc_settings *settings;
struct edac_mc_layer layers[2];
struct mem_ctl_info *mci;
struct hb_mc_drvdata *drvdata;
struct dimm_info *dimm;
struct resource *r;
void __iomem *base;
u32 control;
int irq;
int res = 0;
id = of_match_device(hb_ddr_ctrl_of_match, &pdev->dev);
if (!id)
return -ENODEV;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = 1;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = 1;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
sizeof(struct hb_mc_drvdata));
if (!mci)
return -ENOMEM;
mci->pdev = &pdev->dev;
drvdata = mci->pvt_info;
platform_set_drvdata(pdev, mci);
if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL))
return -ENOMEM;
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
dev_err(&pdev->dev, "Unable to get mem resource\n");
res = -ENODEV;
goto err;
}
if (!devm_request_mem_region(&pdev->dev, r->start,
resource_size(r), dev_name(&pdev->dev))) {
dev_err(&pdev->dev, "Error while requesting mem region\n");
res = -EBUSY;
goto err;
}
base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
if (!base) {
dev_err(&pdev->dev, "Unable to map regs\n");
res = -ENOMEM;
goto err;
}
settings = id->data;
drvdata->mc_err_base = base + settings->err_offset;
drvdata->mc_int_base = base + settings->int_offset;
control = readl(drvdata->mc_err_base + HB_DDR_ECC_OPT) & 0x3;
if (!control || (control == 0x2)) {
dev_err(&pdev->dev, "No ECC present, or ECC disabled\n");
res = -ENODEV;
goto err;
}
mci->mtype_cap = MEM_FLAG_DDR3;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = pdev->dev.driver->name;
mci->mod_ver = "1";
mci->ctl_name = id->compatible;
mci->dev_name = dev_name(&pdev->dev);
mci->scrub_mode = SCRUB_SW_SRC;
/* Only a single 4GB DIMM is supported */
dimm = *mci->dimms;
dimm->nr_pages = (~0UL >> PAGE_SHIFT) + 1;
dimm->grain = 8;
dimm->dtype = DEV_X8;
dimm->mtype = MEM_DDR3;
dimm->edac_mode = EDAC_SECDED;
res = edac_mc_add_mc(mci);
if (res < 0)
goto err;
irq = platform_get_irq(pdev, 0);
res = devm_request_irq(&pdev->dev, irq, highbank_mc_err_handler,
0, dev_name(&pdev->dev), mci);
if (res < 0) {
dev_err(&pdev->dev, "Unable to request irq %d\n", irq);
goto err2;
}
device_create_file(&mci->dev, &dev_attr_inject_ctrl);
devres_close_group(&pdev->dev, NULL);
return 0;
err2:
edac_mc_del_mc(&pdev->dev);
err:
devres_release_group(&pdev->dev, NULL);
edac_mc_free(mci);
return res;
}
static int highbank_mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
device_remove_file(&mci->dev, &dev_attr_inject_ctrl);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
return 0;
}
static struct platform_driver highbank_mc_edac_driver = {
.probe = highbank_mc_probe,
.remove = highbank_mc_remove,
.driver = {
.name = "hb_mc_edac",
.of_match_table = hb_ddr_ctrl_of_match,
},
};
module_platform_driver(highbank_mc_edac_driver);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Calxeda, Inc.");
MODULE_DESCRIPTION("EDAC Driver for Calxeda Highbank");

570
drivers/edac/i3000_edac.c Normal file
View file

@ -0,0 +1,570 @@
/*
* Intel 3000/3010 Memory Controller kernel module
* Copyright (C) 2007 Akamai Technologies, Inc.
* Shamelessly copied from:
* Intel D82875P Memory Controller kernel module
* (C) 2003 Linux Networx (http://lnxi.com)
*
* This file may be distributed under the terms of the
* GNU General Public License.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_core.h"
#define I3000_REVISION "1.1"
#define EDAC_MOD_STR "i3000_edac"
#define I3000_RANKS 8
#define I3000_RANKS_PER_CHANNEL 4
#define I3000_CHANNELS 2
/* Intel 3000 register addresses - device 0 function 0 - DRAM Controller */
#define I3000_MCHBAR 0x44 /* MCH Memory Mapped Register BAR */
#define I3000_MCHBAR_MASK 0xffffc000
#define I3000_MMR_WINDOW_SIZE 16384
#define I3000_EDEAP 0x70 /* Extended DRAM Error Address Pointer (8b)
*
* 7:1 reserved
* 0 bit 32 of address
*/
#define I3000_DEAP 0x58 /* DRAM Error Address Pointer (32b)
*
* 31:7 address
* 6:1 reserved
* 0 Error channel 0/1
*/
#define I3000_DEAP_GRAIN (1 << 7)
/*
* Helper functions to decode the DEAP/EDEAP hardware registers.
*
* The type promotion here is deliberate; we're deriving an
* unsigned long pfn and offset from hardware regs which are u8/u32.
*/
static inline unsigned long deap_pfn(u8 edeap, u32 deap)
{
deap >>= PAGE_SHIFT;
deap |= (edeap & 1) << (32 - PAGE_SHIFT);
return deap;
}
static inline unsigned long deap_offset(u32 deap)
{
return deap & ~(I3000_DEAP_GRAIN - 1) & ~PAGE_MASK;
}
static inline int deap_channel(u32 deap)
{
return deap & 1;
}
#define I3000_DERRSYN 0x5c /* DRAM Error Syndrome (8b)
*
* 7:0 DRAM ECC Syndrome
*/
#define I3000_ERRSTS 0xc8 /* Error Status Register (16b)
*
* 15:12 reserved
* 11 MCH Thermal Sensor Event
* for SMI/SCI/SERR
* 10 reserved
* 9 LOCK to non-DRAM Memory Flag (LCKF)
* 8 Received Refresh Timeout Flag (RRTOF)
* 7:2 reserved
* 1 Multi-bit DRAM ECC Error Flag (DMERR)
* 0 Single-bit DRAM ECC Error Flag (DSERR)
*/
#define I3000_ERRSTS_BITS 0x0b03 /* bits which indicate errors */
#define I3000_ERRSTS_UE 0x0002
#define I3000_ERRSTS_CE 0x0001
#define I3000_ERRCMD 0xca /* Error Command (16b)
*
* 15:12 reserved
* 11 SERR on MCH Thermal Sensor Event
* (TSESERR)
* 10 reserved
* 9 SERR on LOCK to non-DRAM Memory
* (LCKERR)
* 8 SERR on DRAM Refresh Timeout
* (DRTOERR)
* 7:2 reserved
* 1 SERR Multi-Bit DRAM ECC Error
* (DMERR)
* 0 SERR on Single-Bit ECC Error
* (DSERR)
*/
/* Intel MMIO register space - device 0 function 0 - MMR space */
#define I3000_DRB_SHIFT 25 /* 32MiB grain */
#define I3000_C0DRB 0x100 /* Channel 0 DRAM Rank Boundary (8b x 4)
*
* 7:0 Channel 0 DRAM Rank Boundary Address
*/
#define I3000_C1DRB 0x180 /* Channel 1 DRAM Rank Boundary (8b x 4)
*
* 7:0 Channel 1 DRAM Rank Boundary Address
*/
#define I3000_C0DRA 0x108 /* Channel 0 DRAM Rank Attribute (8b x 2)
*
* 7 reserved
* 6:4 DRAM odd Rank Attribute
* 3 reserved
* 2:0 DRAM even Rank Attribute
*
* Each attribute defines the page
* size of the corresponding rank:
* 000: unpopulated
* 001: reserved
* 010: 4 KB
* 011: 8 KB
* 100: 16 KB
* Others: reserved
*/
#define I3000_C1DRA 0x188 /* Channel 1 DRAM Rank Attribute (8b x 2) */
static inline unsigned char odd_rank_attrib(unsigned char dra)
{
return (dra & 0x70) >> 4;
}
static inline unsigned char even_rank_attrib(unsigned char dra)
{
return dra & 0x07;
}
#define I3000_C0DRC0 0x120 /* DRAM Controller Mode 0 (32b)
*
* 31:30 reserved
* 29 Initialization Complete (IC)
* 28:11 reserved
* 10:8 Refresh Mode Select (RMS)
* 7 reserved
* 6:4 Mode Select (SMS)
* 3:2 reserved
* 1:0 DRAM Type (DT)
*/
#define I3000_C0DRC1 0x124 /* DRAM Controller Mode 1 (32b)
*
* 31 Enhanced Addressing Enable (ENHADE)
* 30:0 reserved
*/
enum i3000p_chips {
I3000 = 0,
};
struct i3000_dev_info {
const char *ctl_name;
};
struct i3000_error_info {
u16 errsts;
u8 derrsyn;
u8 edeap;
u32 deap;
u16 errsts2;
};
static const struct i3000_dev_info i3000_devs[] = {
[I3000] = {
.ctl_name = "i3000"},
};
static struct pci_dev *mci_pdev;
static int i3000_registered = 1;
static struct edac_pci_ctl_info *i3000_pci;
static void i3000_get_error_info(struct mem_ctl_info *mci,
struct i3000_error_info *info)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
* registers at once and the registers can transition from CE being
* overwritten by UE.
*/
pci_read_config_word(pdev, I3000_ERRSTS, &info->errsts);
if (!(info->errsts & I3000_ERRSTS_BITS))
return;
pci_read_config_byte(pdev, I3000_EDEAP, &info->edeap);
pci_read_config_dword(pdev, I3000_DEAP, &info->deap);
pci_read_config_byte(pdev, I3000_DERRSYN, &info->derrsyn);
pci_read_config_word(pdev, I3000_ERRSTS, &info->errsts2);
/*
* If the error is the same for both reads then the first set
* of reads is valid. If there is a change then there is a CE
* with no info and the second set of reads is valid and
* should be UE info.
*/
if ((info->errsts ^ info->errsts2) & I3000_ERRSTS_BITS) {
pci_read_config_byte(pdev, I3000_EDEAP, &info->edeap);
pci_read_config_dword(pdev, I3000_DEAP, &info->deap);
pci_read_config_byte(pdev, I3000_DERRSYN, &info->derrsyn);
}
/*
* Clear any error bits.
* (Yes, we really clear bits by writing 1 to them.)
*/
pci_write_bits16(pdev, I3000_ERRSTS, I3000_ERRSTS_BITS,
I3000_ERRSTS_BITS);
}
static int i3000_process_error_info(struct mem_ctl_info *mci,
struct i3000_error_info *info,
int handle_errors)
{
int row, multi_chan, channel;
unsigned long pfn, offset;
multi_chan = mci->csrows[0]->nr_channels - 1;
if (!(info->errsts & I3000_ERRSTS_BITS))
return 0;
if (!handle_errors)
return 1;
if ((info->errsts ^ info->errsts2) & I3000_ERRSTS_BITS) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1,
"UE overwrote CE", "");
info->errsts = info->errsts2;
}
pfn = deap_pfn(info->edeap, info->deap);
offset = deap_offset(info->deap);
channel = deap_channel(info->deap);
row = edac_mc_find_csrow_by_page(mci, pfn);
if (info->errsts & I3000_ERRSTS_UE)
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
pfn, offset, 0,
row, -1, -1,
"i3000 UE", "");
else
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
pfn, offset, info->derrsyn,
row, multi_chan ? channel : 0, -1,
"i3000 CE", "");
return 1;
}
static void i3000_check(struct mem_ctl_info *mci)
{
struct i3000_error_info info;
edac_dbg(1, "MC%d\n", mci->mc_idx);
i3000_get_error_info(mci, &info);
i3000_process_error_info(mci, &info, 1);
}
static int i3000_is_interleaved(const unsigned char *c0dra,
const unsigned char *c1dra,
const unsigned char *c0drb,
const unsigned char *c1drb)
{
int i;
/*
* If the channels aren't populated identically then
* we're not interleaved.
*/
for (i = 0; i < I3000_RANKS_PER_CHANNEL / 2; i++)
if (odd_rank_attrib(c0dra[i]) != odd_rank_attrib(c1dra[i]) ||
even_rank_attrib(c0dra[i]) !=
even_rank_attrib(c1dra[i]))
return 0;
/*
* If the rank boundaries for the two channels are different
* then we're not interleaved.
*/
for (i = 0; i < I3000_RANKS_PER_CHANNEL; i++)
if (c0drb[i] != c1drb[i])
return 0;
return 1;
}
static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc;
int i, j;
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
unsigned long last_cumul_size, nr_pages;
int interleaved, nr_channels;
unsigned char dra[I3000_RANKS / 2], drb[I3000_RANKS];
unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2];
unsigned char *c0drb = drb, *c1drb = &drb[I3000_RANKS_PER_CHANNEL];
unsigned long mchbar;
void __iomem *window;
edac_dbg(0, "MC:\n");
pci_read_config_dword(pdev, I3000_MCHBAR, (u32 *) & mchbar);
mchbar &= I3000_MCHBAR_MASK;
window = ioremap_nocache(mchbar, I3000_MMR_WINDOW_SIZE);
if (!window) {
printk(KERN_ERR "i3000: cannot map mmio space at 0x%lx\n",
mchbar);
return -ENODEV;
}
c0dra[0] = readb(window + I3000_C0DRA + 0); /* ranks 0,1 */
c0dra[1] = readb(window + I3000_C0DRA + 1); /* ranks 2,3 */
c1dra[0] = readb(window + I3000_C1DRA + 0); /* ranks 0,1 */
c1dra[1] = readb(window + I3000_C1DRA + 1); /* ranks 2,3 */
for (i = 0; i < I3000_RANKS_PER_CHANNEL; i++) {
c0drb[i] = readb(window + I3000_C0DRB + i);
c1drb[i] = readb(window + I3000_C1DRB + i);
}
iounmap(window);
/*
* Figure out how many channels we have.
*
* If we have what the datasheet calls "asymmetric channels"
* (essentially the same as what was called "virtual single
* channel mode" in the i82875) then it's a single channel as
* far as EDAC is concerned.
*/
interleaved = i3000_is_interleaved(c0dra, c1dra, c0drb, c1drb);
nr_channels = interleaved ? 2 : 1;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = I3000_RANKS / nr_channels;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = nr_channels;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
edac_dbg(3, "MC: init mci\n");
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = I3000_REVISION;
mci->ctl_name = i3000_devs[dev_idx].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = i3000_check;
mci->ctl_page_to_phys = NULL;
/*
* The dram rank boundary (DRB) reg values are boundary addresses
* for each DRAM rank with a granularity of 32MB. DRB regs are
* cumulative; the last one will contain the total memory
* contained in all ranks.
*
* If we're in interleaved mode then we're only walking through
* the ranks of controller 0, so we double all the values we see.
*/
for (last_cumul_size = i = 0; i < mci->nr_csrows; i++) {
u8 value;
u32 cumul_size;
struct csrow_info *csrow = mci->csrows[i];
value = drb[i];
cumul_size = value << (I3000_DRB_SHIFT - PAGE_SHIFT);
if (interleaved)
cumul_size <<= 1;
edac_dbg(3, "MC: (%d) cumul_size 0x%x\n", i, cumul_size);
if (cumul_size == last_cumul_size)
continue;
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
for (j = 0; j < nr_channels; j++) {
struct dimm_info *dimm = csrow->channels[j]->dimm;
dimm->nr_pages = nr_pages / nr_channels;
dimm->grain = I3000_DEAP_GRAIN;
dimm->mtype = MEM_DDR2;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
}
/*
* Clear any error bits.
* (Yes, we really clear bits by writing 1 to them.)
*/
pci_write_bits16(pdev, I3000_ERRSTS, I3000_ERRSTS_BITS,
I3000_ERRSTS_BITS);
rc = -ENODEV;
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
goto fail;
}
/* allocating generic PCI control info */
i3000_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
if (!i3000_pci) {
printk(KERN_WARNING
"%s(): Unable to create PCI control\n",
__func__);
printk(KERN_WARNING
"%s(): PCI error report via EDAC not setup\n",
__func__);
}
/* get this far and it's successful */
edac_dbg(3, "MC: success\n");
return 0;
fail:
if (mci)
edac_mc_free(mci);
return rc;
}
/* returns count (>= 0), or negative on error */
static int i3000_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int rc;
edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
rc = i3000_probe1(pdev, ent->driver_data);
if (!mci_pdev)
mci_pdev = pci_dev_get(pdev);
return rc;
}
static void i3000_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
edac_dbg(0, "\n");
if (i3000_pci)
edac_pci_release_generic_ctl(i3000_pci);
mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
return;
edac_mc_free(mci);
}
static const struct pci_device_id i3000_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 3000_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I3000},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, i3000_pci_tbl);
static struct pci_driver i3000_driver = {
.name = EDAC_MOD_STR,
.probe = i3000_init_one,
.remove = i3000_remove_one,
.id_table = i3000_pci_tbl,
};
static int __init i3000_init(void)
{
int pci_rc;
edac_dbg(3, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
pci_rc = pci_register_driver(&i3000_driver);
if (pci_rc < 0)
goto fail0;
if (!mci_pdev) {
i3000_registered = 0;
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_3000_HB, NULL);
if (!mci_pdev) {
edac_dbg(0, "i3000 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = i3000_init_one(mci_pdev, i3000_pci_tbl);
if (pci_rc < 0) {
edac_dbg(0, "i3000 init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
}
return 0;
fail1:
pci_unregister_driver(&i3000_driver);
fail0:
if (mci_pdev)
pci_dev_put(mci_pdev);
return pci_rc;
}
static void __exit i3000_exit(void)
{
edac_dbg(3, "MC:\n");
pci_unregister_driver(&i3000_driver);
if (!i3000_registered) {
i3000_remove_one(mci_pdev);
pci_dev_put(mci_pdev);
}
}
module_init(i3000_init);
module_exit(i3000_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Akamai Technologies Arthur Ulfeldt/Jason Uhlenkott");
MODULE_DESCRIPTION("MC support for Intel 3000 memory hub controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

551
drivers/edac/i3200_edac.c Normal file
View file

@ -0,0 +1,551 @@
/*
* Intel 3200/3210 Memory Controller kernel module
* Copyright (C) 2008-2009 Akamai Technologies, Inc.
* Portions by Hitoshi Mitake <h.mitake@gmail.com>.
*
* This file may be distributed under the terms of the
* GNU General Public License.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include <linux/io.h>
#include "edac_core.h"
#include <asm-generic/io-64-nonatomic-lo-hi.h>
#define I3200_REVISION "1.1"
#define EDAC_MOD_STR "i3200_edac"
#define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0
#define I3200_DIMMS 4
#define I3200_RANKS 8
#define I3200_RANKS_PER_CHANNEL 4
#define I3200_CHANNELS 2
/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
#define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */
#define I3200_MCHBAR_HIGH 0x4c
#define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */
#define I3200_MMR_WINDOW_SIZE 16384
#define I3200_TOM 0xa0 /* Top of Memory (16b)
*
* 15:10 reserved
* 9:0 total populated physical memory
*/
#define I3200_TOM_MASK 0x3ff /* bits 9:0 */
#define I3200_TOM_SHIFT 26 /* 64MiB grain */
#define I3200_ERRSTS 0xc8 /* Error Status Register (16b)
*
* 15 reserved
* 14 Isochronous TBWRR Run Behind FIFO Full
* (ITCV)
* 13 Isochronous TBWRR Run Behind FIFO Put
* (ITSTV)
* 12 reserved
* 11 MCH Thermal Sensor Event
* for SMI/SCI/SERR (GTSE)
* 10 reserved
* 9 LOCK to non-DRAM Memory Flag (LCKF)
* 8 reserved
* 7 DRAM Throttle Flag (DTF)
* 6:2 reserved
* 1 Multi-bit DRAM ECC Error Flag (DMERR)
* 0 Single-bit DRAM ECC Error Flag (DSERR)
*/
#define I3200_ERRSTS_UE 0x0002
#define I3200_ERRSTS_CE 0x0001
#define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
/* Intel MMIO register space - device 0 function 0 - MMR space */
#define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4)
*
* 15:10 reserved
* 9:0 Channel 0 DRAM Rank Boundary Address
*/
#define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */
#define I3200_DRB_MASK 0x3ff /* bits 9:0 */
#define I3200_DRB_SHIFT 26 /* 64MiB grain */
#define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b)
*
* 63:48 Error Column Address (ERRCOL)
* 47:32 Error Row Address (ERRROW)
* 31:29 Error Bank Address (ERRBANK)
* 28:27 Error Rank Address (ERRRANK)
* 26:24 reserved
* 23:16 Error Syndrome (ERRSYND)
* 15: 2 reserved
* 1 Multiple Bit Error Status (MERRSTS)
* 0 Correctable Error Status (CERRSTS)
*/
#define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */
#define I3200_ECCERRLOG_CE 0x1
#define I3200_ECCERRLOG_UE 0x2
#define I3200_ECCERRLOG_RANK_BITS 0x18000000
#define I3200_ECCERRLOG_RANK_SHIFT 27
#define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000
#define I3200_ECCERRLOG_SYNDROME_SHIFT 16
#define I3200_CAPID0 0xe0 /* P.95 of spec for details */
struct i3200_priv {
void __iomem *window;
};
static int nr_channels;
static int how_many_channels(struct pci_dev *pdev)
{
int n_channels;
unsigned char capid0_8b; /* 8th byte of CAPID0 */
pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
edac_dbg(0, "In single channel mode\n");
n_channels = 1;
} else {
edac_dbg(0, "In dual channel mode\n");
n_channels = 2;
}
if (capid0_8b & 0x10) /* check if both channels are filled */
edac_dbg(0, "2 DIMMS per channel disabled\n");
else
edac_dbg(0, "2 DIMMS per channel enabled\n");
return n_channels;
}
static unsigned long eccerrlog_syndrome(u64 log)
{
return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
I3200_ECCERRLOG_SYNDROME_SHIFT;
}
static int eccerrlog_row(int channel, u64 log)
{
u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
I3200_ECCERRLOG_RANK_SHIFT);
return rank | (channel * I3200_RANKS_PER_CHANNEL);
}
enum i3200_chips {
I3200 = 0,
};
struct i3200_dev_info {
const char *ctl_name;
};
struct i3200_error_info {
u16 errsts;
u16 errsts2;
u64 eccerrlog[I3200_CHANNELS];
};
static const struct i3200_dev_info i3200_devs[] = {
[I3200] = {
.ctl_name = "i3200"
},
};
static struct pci_dev *mci_pdev;
static int i3200_registered = 1;
static void i3200_clear_error_info(struct mem_ctl_info *mci)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
/*
* Clear any error bits.
* (Yes, we really clear bits by writing 1 to them.)
*/
pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
I3200_ERRSTS_BITS);
}
static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
struct i3200_error_info *info)
{
struct pci_dev *pdev;
struct i3200_priv *priv = mci->pvt_info;
void __iomem *window = priv->window;
pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
* registers at once and the registers can transition from CE being
* overwritten by UE.
*/
pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
if (!(info->errsts & I3200_ERRSTS_BITS))
return;
info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
if (nr_channels == 2)
info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
/*
* If the error is the same for both reads then the first set
* of reads is valid. If there is a change then there is a CE
* with no info and the second set of reads is valid and
* should be UE info.
*/
if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
if (nr_channels == 2)
info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
}
i3200_clear_error_info(mci);
}
static void i3200_process_error_info(struct mem_ctl_info *mci,
struct i3200_error_info *info)
{
int channel;
u64 log;
if (!(info->errsts & I3200_ERRSTS_BITS))
return;
if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
}
for (channel = 0; channel < nr_channels; channel++) {
log = info->eccerrlog[channel];
if (log & I3200_ECCERRLOG_UE) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
0, 0, 0,
eccerrlog_row(channel, log),
-1, -1,
"i3000 UE", "");
} else if (log & I3200_ECCERRLOG_CE) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0, eccerrlog_syndrome(log),
eccerrlog_row(channel, log),
-1, -1,
"i3000 CE", "");
}
}
}
static void i3200_check(struct mem_ctl_info *mci)
{
struct i3200_error_info info;
edac_dbg(1, "MC%d\n", mci->mc_idx);
i3200_get_and_clear_error_info(mci, &info);
i3200_process_error_info(mci, &info);
}
static void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
{
union {
u64 mchbar;
struct {
u32 mchbar_low;
u32 mchbar_high;
};
} u;
void __iomem *window;
pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
u.mchbar &= I3200_MCHBAR_MASK;
if (u.mchbar != (resource_size_t)u.mchbar) {
printk(KERN_ERR
"i3200: mmio space beyond accessible range (0x%llx)\n",
(unsigned long long)u.mchbar);
return NULL;
}
window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
if (!window)
printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
(unsigned long long)u.mchbar);
return window;
}
static void i3200_get_drbs(void __iomem *window,
u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
{
int i;
for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]);
}
}
static bool i3200_is_stacked(struct pci_dev *pdev,
u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
{
u16 tom;
pci_read_config_word(pdev, I3200_TOM, &tom);
tom &= I3200_TOM_MASK;
return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
}
static unsigned long drb_to_nr_pages(
u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
int channel, int rank)
{
int n;
n = drbs[channel][rank];
if (!n)
return 0;
if (rank > 0)
n -= drbs[channel][rank - 1];
if (stacked && (channel == 1) &&
drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
return n;
}
static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc;
int i, j;
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
bool stacked;
void __iomem *window;
struct i3200_priv *priv;
edac_dbg(0, "MC:\n");
window = i3200_map_mchbar(pdev);
if (!window)
return -ENODEV;
i3200_get_drbs(window, drbs);
nr_channels = how_many_channels(pdev);
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = I3200_DIMMS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = nr_channels;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
sizeof(struct i3200_priv));
if (!mci)
return -ENOMEM;
edac_dbg(3, "MC: init mci\n");
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = I3200_REVISION;
mci->ctl_name = i3200_devs[dev_idx].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = i3200_check;
mci->ctl_page_to_phys = NULL;
priv = mci->pvt_info;
priv->window = window;
stacked = i3200_is_stacked(pdev, drbs);
/*
* The dram rank boundary (DRB) reg values are boundary addresses
* for each DRAM rank with a granularity of 64MB. DRB regs are
* cumulative; the last one will contain the total memory
* contained in all ranks.
*/
for (i = 0; i < I3200_DIMMS; i++) {
unsigned long nr_pages;
for (j = 0; j < nr_channels; j++) {
struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
mci->n_layers, i, j, 0);
nr_pages = drb_to_nr_pages(drbs, stacked, j, i);
if (nr_pages == 0)
continue;
edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j,
stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
dimm->nr_pages = nr_pages;
dimm->grain = nr_pages << PAGE_SHIFT;
dimm->mtype = MEM_DDR2;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
}
i3200_clear_error_info(mci);
rc = -ENODEV;
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
goto fail;
}
/* get this far and it's successful */
edac_dbg(3, "MC: success\n");
return 0;
fail:
iounmap(window);
if (mci)
edac_mc_free(mci);
return rc;
}
static int i3200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int rc;
edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
rc = i3200_probe1(pdev, ent->driver_data);
if (!mci_pdev)
mci_pdev = pci_dev_get(pdev);
return rc;
}
static void i3200_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct i3200_priv *priv;
edac_dbg(0, "\n");
mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
return;
priv = mci->pvt_info;
iounmap(priv->window);
edac_mc_free(mci);
pci_disable_device(pdev);
}
static const struct pci_device_id i3200_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I3200},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
static struct pci_driver i3200_driver = {
.name = EDAC_MOD_STR,
.probe = i3200_init_one,
.remove = i3200_remove_one,
.id_table = i3200_pci_tbl,
};
static int __init i3200_init(void)
{
int pci_rc;
edac_dbg(3, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
pci_rc = pci_register_driver(&i3200_driver);
if (pci_rc < 0)
goto fail0;
if (!mci_pdev) {
i3200_registered = 0;
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_3200_HB, NULL);
if (!mci_pdev) {
edac_dbg(0, "i3200 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
if (pci_rc < 0) {
edac_dbg(0, "i3200 init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
}
return 0;
fail1:
pci_unregister_driver(&i3200_driver);
fail0:
if (mci_pdev)
pci_dev_put(mci_pdev);
return pci_rc;
}
static void __exit i3200_exit(void)
{
edac_dbg(3, "MC:\n");
pci_unregister_driver(&i3200_driver);
if (!i3200_registered) {
i3200_remove_one(mci_pdev);
pci_dev_put(mci_pdev);
}
}
module_init(i3200_init);
module_exit(i3200_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Akamai Technologies, Inc.");
MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

1594
drivers/edac/i5000_edac.c Normal file

File diff suppressed because it is too large Load diff

1250
drivers/edac/i5100_edac.c Normal file

File diff suppressed because it is too large Load diff

1478
drivers/edac/i5400_edac.c Normal file

File diff suppressed because it is too large Load diff

1218
drivers/edac/i7300_edac.c Normal file

File diff suppressed because it is too large Load diff

2466
drivers/edac/i7core_edac.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,473 @@
/*
* Intel 82443BX/GX (440BX/GX chipset) Memory Controller EDAC kernel
* module (C) 2006 Tim Small
*
* This file may be distributed under the terms of the GNU General
* Public License.
*
* Written by Tim Small <tim@buttersideup.com>, based on work by Linux
* Networx, Thayne Harbaugh, Dan Hollis <goemon at anime dot net> and
* others.
*
* 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>.
*
* Written with reference to 82443BX Host Bridge Datasheet:
* http://download.intel.com/design/chipsets/datashts/29063301.pdf
* references to this document given in [].
*
* This module doesn't support the 440LX, but it may be possible to
* make it do so (the 440LX's register definitions are different, but
* not completely so - I haven't studied them in enough detail to know
* how easy this would be).
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_core.h"
#define I82443_REVISION "0.1"
#define EDAC_MOD_STR "i82443bxgx_edac"
/* The 82443BX supports SDRAM, or EDO (EDO for mobile only), "Memory
* Size: 8 MB to 512 MB (1GB with Registered DIMMs) with eight memory
* rows" "The 82443BX supports multiple-bit error detection and
* single-bit error correction when ECC mode is enabled and
* single/multi-bit error detection when correction is disabled.
* During writes to the DRAM, the 82443BX generates ECC for the data
* on a QWord basis. Partial QWord writes require a read-modify-write
* cycle when ECC is enabled."
*/
/* "Additionally, the 82443BX ensures that the data is corrected in
* main memory so that accumulation of errors is prevented. Another
* error within the same QWord would result in a double-bit error
* which is unrecoverable. This is known as hardware scrubbing since
* it requires no software intervention to correct the data in memory."
*/
/* [Also see page 100 (section 4.3), "DRAM Interface"]
* [Also see page 112 (section 4.6.1.4), ECC]
*/
#define I82443BXGX_NR_CSROWS 8
#define I82443BXGX_NR_CHANS 1
#define I82443BXGX_NR_DIMMS 4
/* 82443 PCI Device 0 */
#define I82443BXGX_NBXCFG 0x50 /* 32bit register starting at this PCI
* config space offset */
#define I82443BXGX_NBXCFG_OFFSET_NON_ECCROW 24 /* Array of bits, zero if
* row is non-ECC */
#define I82443BXGX_NBXCFG_OFFSET_DRAM_FREQ 12 /* 2 bits,00=100MHz,10=66 MHz */
#define I82443BXGX_NBXCFG_OFFSET_DRAM_INTEGRITY 7 /* 2 bits: */
#define I82443BXGX_NBXCFG_INTEGRITY_NONE 0x0 /* 00 = Non-ECC */
#define I82443BXGX_NBXCFG_INTEGRITY_EC 0x1 /* 01 = EC (only) */
#define I82443BXGX_NBXCFG_INTEGRITY_ECC 0x2 /* 10 = ECC */
#define I82443BXGX_NBXCFG_INTEGRITY_SCRUB 0x3 /* 11 = ECC + HW Scrub */
#define I82443BXGX_NBXCFG_OFFSET_ECC_DIAG_ENABLE 6
/* 82443 PCI Device 0 */
#define I82443BXGX_EAP 0x80 /* 32bit register starting at this PCI
* config space offset, Error Address
* Pointer Register */
#define I82443BXGX_EAP_OFFSET_EAP 12 /* High 20 bits of error address */
#define I82443BXGX_EAP_OFFSET_MBE BIT(1) /* Err at EAP was multi-bit (W1TC) */
#define I82443BXGX_EAP_OFFSET_SBE BIT(0) /* Err at EAP was single-bit (W1TC) */
#define I82443BXGX_ERRCMD 0x90 /* 8bit register starting at this PCI
* config space offset. */
#define I82443BXGX_ERRCMD_OFFSET_SERR_ON_MBE BIT(1) /* 1 = enable */
#define I82443BXGX_ERRCMD_OFFSET_SERR_ON_SBE BIT(0) /* 1 = enable */
#define I82443BXGX_ERRSTS 0x91 /* 16bit register starting at this PCI
* config space offset. */
#define I82443BXGX_ERRSTS_OFFSET_MBFRE 5 /* 3 bits - first err row multibit */
#define I82443BXGX_ERRSTS_OFFSET_MEF BIT(4) /* 1 = MBE occurred */
#define I82443BXGX_ERRSTS_OFFSET_SBFRE 1 /* 3 bits - first err row singlebit */
#define I82443BXGX_ERRSTS_OFFSET_SEF BIT(0) /* 1 = SBE occurred */
#define I82443BXGX_DRAMC 0x57 /* 8bit register starting at this PCI
* config space offset. */
#define I82443BXGX_DRAMC_OFFSET_DT 3 /* 2 bits, DRAM Type */
#define I82443BXGX_DRAMC_DRAM_IS_EDO 0 /* 00 = EDO */
#define I82443BXGX_DRAMC_DRAM_IS_SDRAM 1 /* 01 = SDRAM */
#define I82443BXGX_DRAMC_DRAM_IS_RSDRAM 2 /* 10 = Registered SDRAM */
#define I82443BXGX_DRB 0x60 /* 8x 8bit registers starting at this PCI
* config space offset. */
/* FIXME - don't poll when ECC disabled? */
struct i82443bxgx_edacmc_error_info {
u32 eap;
};
static struct edac_pci_ctl_info *i82443bxgx_pci;
static struct pci_dev *mci_pdev; /* init dev: in case that AGP code has
* already registered driver
*/
static int i82443bxgx_registered = 1;
static void i82443bxgx_edacmc_get_error_info(struct mem_ctl_info *mci,
struct i82443bxgx_edacmc_error_info
*info)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
pci_read_config_dword(pdev, I82443BXGX_EAP, &info->eap);
if (info->eap & I82443BXGX_EAP_OFFSET_SBE)
/* Clear error to allow next error to be reported [p.61] */
pci_write_bits32(pdev, I82443BXGX_EAP,
I82443BXGX_EAP_OFFSET_SBE,
I82443BXGX_EAP_OFFSET_SBE);
if (info->eap & I82443BXGX_EAP_OFFSET_MBE)
/* Clear error to allow next error to be reported [p.61] */
pci_write_bits32(pdev, I82443BXGX_EAP,
I82443BXGX_EAP_OFFSET_MBE,
I82443BXGX_EAP_OFFSET_MBE);
}
static int i82443bxgx_edacmc_process_error_info(struct mem_ctl_info *mci,
struct
i82443bxgx_edacmc_error_info
*info, int handle_errors)
{
int error_found = 0;
u32 eapaddr, page, pageoffset;
/* bits 30:12 hold the 4kb block in which the error occurred
* [p.61] */
eapaddr = (info->eap & 0xfffff000);
page = eapaddr >> PAGE_SHIFT;
pageoffset = eapaddr - (page << PAGE_SHIFT);
if (info->eap & I82443BXGX_EAP_OFFSET_SBE) {
error_found = 1;
if (handle_errors)
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, pageoffset, 0,
edac_mc_find_csrow_by_page(mci, page),
0, -1, mci->ctl_name, "");
}
if (info->eap & I82443BXGX_EAP_OFFSET_MBE) {
error_found = 1;
if (handle_errors)
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
page, pageoffset, 0,
edac_mc_find_csrow_by_page(mci, page),
0, -1, mci->ctl_name, "");
}
return error_found;
}
static void i82443bxgx_edacmc_check(struct mem_ctl_info *mci)
{
struct i82443bxgx_edacmc_error_info info;
edac_dbg(1, "MC%d\n", mci->mc_idx);
i82443bxgx_edacmc_get_error_info(mci, &info);
i82443bxgx_edacmc_process_error_info(mci, &info, 1);
}
static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
struct pci_dev *pdev,
enum edac_type edac_mode,
enum mem_type mtype)
{
struct csrow_info *csrow;
struct dimm_info *dimm;
int index;
u8 drbar, dramc;
u32 row_base, row_high_limit, row_high_limit_last;
pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc);
row_high_limit_last = 0;
for (index = 0; index < mci->nr_csrows; index++) {
csrow = mci->csrows[index];
dimm = csrow->channels[0]->dimm;
pci_read_config_byte(pdev, I82443BXGX_DRB + index, &drbar);
edac_dbg(1, "MC%d: Row=%d DRB = %#0x\n",
mci->mc_idx, index, drbar);
row_high_limit = ((u32) drbar << 23);
/* find the DRAM Chip Select Base address and mask */
edac_dbg(1, "MC%d: Row=%d, Boundary Address=%#0x, Last = %#0x\n",
mci->mc_idx, index, row_high_limit,
row_high_limit_last);
/* 440GX goes to 2GB, represented with a DRB of 0. */
if (row_high_limit_last && !row_high_limit)
row_high_limit = 1UL << 31;
/* This row is empty [p.49] */
if (row_high_limit == row_high_limit_last)
continue;
row_base = row_high_limit_last;
csrow->first_page = row_base >> PAGE_SHIFT;
csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
dimm->nr_pages = csrow->last_page - csrow->first_page + 1;
/* EAP reports in 4kilobyte granularity [61] */
dimm->grain = 1 << 12;
dimm->mtype = mtype;
/* I don't think 440BX can tell you device type? FIXME? */
dimm->dtype = DEV_UNKNOWN;
/* Mode is global to all rows on 440BX */
dimm->edac_mode = edac_mode;
row_high_limit_last = row_high_limit;
}
}
static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
u8 dramc;
u32 nbxcfg, ecc_mode;
enum mem_type mtype;
enum edac_type edac_mode;
edac_dbg(0, "MC:\n");
/* Something is really hosed if PCI config space reads from
* the MC aren't working.
*/
if (pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg))
return -EIO;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = I82443BXGX_NR_CSROWS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = I82443BXGX_NR_CHANS;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (mci == NULL)
return -ENOMEM;
edac_dbg(0, "MC: mci = %p\n", mci);
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_EDO | MEM_FLAG_SDR | MEM_FLAG_RDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc);
switch ((dramc >> I82443BXGX_DRAMC_OFFSET_DT) & (BIT(0) | BIT(1))) {
case I82443BXGX_DRAMC_DRAM_IS_EDO:
mtype = MEM_EDO;
break;
case I82443BXGX_DRAMC_DRAM_IS_SDRAM:
mtype = MEM_SDR;
break;
case I82443BXGX_DRAMC_DRAM_IS_RSDRAM:
mtype = MEM_RDR;
break;
default:
edac_dbg(0, "Unknown/reserved DRAM type value in DRAMC register!\n");
mtype = -MEM_UNKNOWN;
}
if ((mtype == MEM_SDR) || (mtype == MEM_RDR))
mci->edac_cap = mci->edac_ctl_cap;
else
mci->edac_cap = EDAC_FLAG_NONE;
mci->scrub_cap = SCRUB_FLAG_HW_SRC;
pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg);
ecc_mode = ((nbxcfg >> I82443BXGX_NBXCFG_OFFSET_DRAM_INTEGRITY) &
(BIT(0) | BIT(1)));
mci->scrub_mode = (ecc_mode == I82443BXGX_NBXCFG_INTEGRITY_SCRUB)
? SCRUB_HW_SRC : SCRUB_NONE;
switch (ecc_mode) {
case I82443BXGX_NBXCFG_INTEGRITY_NONE:
edac_mode = EDAC_NONE;
break;
case I82443BXGX_NBXCFG_INTEGRITY_EC:
edac_mode = EDAC_EC;
break;
case I82443BXGX_NBXCFG_INTEGRITY_ECC:
case I82443BXGX_NBXCFG_INTEGRITY_SCRUB:
edac_mode = EDAC_SECDED;
break;
default:
edac_dbg(0, "Unknown/reserved ECC state in NBXCFG register!\n");
edac_mode = EDAC_UNKNOWN;
break;
}
i82443bxgx_init_csrows(mci, pdev, edac_mode, mtype);
/* Many BIOSes don't clear error flags on boot, so do this
* here, or we get "phantom" errors occurring at module-load
* time. */
pci_write_bits32(pdev, I82443BXGX_EAP,
(I82443BXGX_EAP_OFFSET_SBE |
I82443BXGX_EAP_OFFSET_MBE),
(I82443BXGX_EAP_OFFSET_SBE |
I82443BXGX_EAP_OFFSET_MBE));
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = I82443_REVISION;
mci->ctl_name = "I82443BXGX";
mci->dev_name = pci_name(pdev);
mci->edac_check = i82443bxgx_edacmc_check;
mci->ctl_page_to_phys = NULL;
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
/* allocating generic PCI control info */
i82443bxgx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
if (!i82443bxgx_pci) {
printk(KERN_WARNING
"%s(): Unable to create PCI control\n",
__func__);
printk(KERN_WARNING
"%s(): PCI error report via EDAC not setup\n",
__func__);
}
edac_dbg(3, "MC: success\n");
return 0;
fail:
edac_mc_free(mci);
return -ENODEV;
}
EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1);
/* returns count (>= 0), or negative on error */
static int i82443bxgx_edacmc_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
int rc;
edac_dbg(0, "MC:\n");
/* don't need to call pci_enable_device() */
rc = i82443bxgx_edacmc_probe1(pdev, ent->driver_data);
if (mci_pdev == NULL)
mci_pdev = pci_dev_get(pdev);
return rc;
}
static void i82443bxgx_edacmc_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
edac_dbg(0, "\n");
if (i82443bxgx_pci)
edac_pci_release_generic_ctl(i82443bxgx_pci);
if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
return;
edac_mc_free(mci);
}
EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one);
static const struct pci_device_id i82443bxgx_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2)},
{0,} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, i82443bxgx_pci_tbl);
static struct pci_driver i82443bxgx_edacmc_driver = {
.name = EDAC_MOD_STR,
.probe = i82443bxgx_edacmc_init_one,
.remove = i82443bxgx_edacmc_remove_one,
.id_table = i82443bxgx_pci_tbl,
};
static int __init i82443bxgx_edacmc_init(void)
{
int pci_rc;
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
pci_rc = pci_register_driver(&i82443bxgx_edacmc_driver);
if (pci_rc < 0)
goto fail0;
if (mci_pdev == NULL) {
const struct pci_device_id *id = &i82443bxgx_pci_tbl[0];
int i = 0;
i82443bxgx_registered = 0;
while (mci_pdev == NULL && id->vendor != 0) {
mci_pdev = pci_get_device(id->vendor,
id->device, NULL);
i++;
id = &i82443bxgx_pci_tbl[i];
}
if (!mci_pdev) {
edac_dbg(0, "i82443bxgx pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = i82443bxgx_edacmc_init_one(mci_pdev, i82443bxgx_pci_tbl);
if (pci_rc < 0) {
edac_dbg(0, "i82443bxgx init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
}
return 0;
fail1:
pci_unregister_driver(&i82443bxgx_edacmc_driver);
fail0:
if (mci_pdev != NULL)
pci_dev_put(mci_pdev);
return pci_rc;
}
static void __exit i82443bxgx_edacmc_exit(void)
{
pci_unregister_driver(&i82443bxgx_edacmc_driver);
if (!i82443bxgx_registered)
i82443bxgx_edacmc_remove_one(mci_pdev);
if (mci_pdev)
pci_dev_put(mci_pdev);
}
module_init(i82443bxgx_edacmc_init);
module_exit(i82443bxgx_edacmc_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD");
MODULE_DESCRIPTION("EDAC MC support for Intel 82443BX/GX memory controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

371
drivers/edac/i82860_edac.c Normal file
View file

@ -0,0 +1,371 @@
/*
* Intel 82860 Memory Controller kernel module
* (C) 2005 Red Hat (http://www.redhat.com)
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written by Ben Woodard <woodard@redhat.com>
* shamelessly copied from and based upon the edac_i82875 driver
* by Thayne Harbaugh of Linux Networx. (http://lnxi.com)
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_core.h"
#define I82860_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "i82860_edac"
#define i82860_printk(level, fmt, arg...) \
edac_printk(level, "i82860", fmt, ##arg)
#define i82860_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "i82860", fmt, ##arg)
#ifndef PCI_DEVICE_ID_INTEL_82860_0
#define PCI_DEVICE_ID_INTEL_82860_0 0x2531
#endif /* PCI_DEVICE_ID_INTEL_82860_0 */
#define I82860_MCHCFG 0x50
#define I82860_GBA 0x60
#define I82860_GBA_MASK 0x7FF
#define I82860_GBA_SHIFT 24
#define I82860_ERRSTS 0xC8
#define I82860_EAP 0xE4
#define I82860_DERRCTL_STS 0xE2
enum i82860_chips {
I82860 = 0,
};
struct i82860_dev_info {
const char *ctl_name;
};
struct i82860_error_info {
u16 errsts;
u32 eap;
u16 derrsyn;
u16 errsts2;
};
static const struct i82860_dev_info i82860_devs[] = {
[I82860] = {
.ctl_name = "i82860"},
};
static struct pci_dev *mci_pdev; /* init dev: in case that AGP code
* has already registered driver
*/
static struct edac_pci_ctl_info *i82860_pci;
static void i82860_get_error_info(struct mem_ctl_info *mci,
struct i82860_error_info *info)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
* registers at once and the registers can transition from CE being
* overwritten by UE.
*/
pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts);
pci_read_config_dword(pdev, I82860_EAP, &info->eap);
pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts2);
pci_write_bits16(pdev, I82860_ERRSTS, 0x0003, 0x0003);
/*
* If the error is the same for both reads then the first set of reads
* is valid. If there is a change then there is a CE no info and the
* second set of reads is valid and should be UE info.
*/
if (!(info->errsts2 & 0x0003))
return;
if ((info->errsts ^ info->errsts2) & 0x0003) {
pci_read_config_dword(pdev, I82860_EAP, &info->eap);
pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
}
}
static int i82860_process_error_info(struct mem_ctl_info *mci,
struct i82860_error_info *info,
int handle_errors)
{
struct dimm_info *dimm;
int row;
if (!(info->errsts2 & 0x0003))
return 0;
if (!handle_errors)
return 1;
if ((info->errsts ^ info->errsts2) & 0x0003) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
}
info->eap >>= PAGE_SHIFT;
row = edac_mc_find_csrow_by_page(mci, info->eap);
dimm = mci->csrows[row]->channels[0]->dimm;
if (info->errsts & 0x0002)
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
info->eap, 0, 0,
dimm->location[0], dimm->location[1], -1,
"i82860 UE", "");
else
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
info->eap, 0, info->derrsyn,
dimm->location[0], dimm->location[1], -1,
"i82860 CE", "");
return 1;
}
static void i82860_check(struct mem_ctl_info *mci)
{
struct i82860_error_info info;
edac_dbg(1, "MC%d\n", mci->mc_idx);
i82860_get_error_info(mci, &info);
i82860_process_error_info(mci, &info, 1);
}
static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
{
unsigned long last_cumul_size;
u16 mchcfg_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */
u16 value;
u32 cumul_size;
struct csrow_info *csrow;
struct dimm_info *dimm;
int index;
pci_read_config_word(pdev, I82860_MCHCFG, &mchcfg_ddim);
mchcfg_ddim = mchcfg_ddim & 0x180;
last_cumul_size = 0;
/* The group row boundary (GRA) reg values are boundary address
* for each DRAM row with a granularity of 16MB. GRA regs are
* cumulative; therefore GRA15 will contain the total memory contained
* in all eight rows.
*/
for (index = 0; index < mci->nr_csrows; index++) {
csrow = mci->csrows[index];
dimm = csrow->channels[0]->dimm;
pci_read_config_word(pdev, I82860_GBA + index * 2, &value);
cumul_size = (value & I82860_GBA_MASK) <<
(I82860_GBA_SHIFT - PAGE_SHIFT);
edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
if (cumul_size == last_cumul_size)
continue; /* not populated */
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
dimm->nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
dimm->grain = 1 << 12; /* I82860_EAP has 4KiB reolution */
dimm->mtype = MEM_RMBS;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
}
}
static int i82860_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct i82860_error_info discard;
/*
* RDRAM has channels but these don't map onto the csrow abstraction.
* According with the datasheet, there are 2 Rambus channels, supporting
* up to 16 direct RDRAM devices.
* The device groups from the GRA registers seem to map reasonably
* well onto the notion of a chip select row.
* There are 16 GRA registers and since the name is associated with
* the channel and the GRA registers map to physical devices so we are
* going to make 1 channel for group.
*/
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = 2;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_SLOT;
layers[1].size = 8;
layers[1].is_virt_csrow = true;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
edac_dbg(3, "init mci\n");
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
/* I"m not sure about this but I think that all RDRAM is SECDED */
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = I82860_REVISION;
mci->ctl_name = i82860_devs[dev_idx].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = i82860_check;
mci->ctl_page_to_phys = NULL;
i82860_init_csrows(mci, pdev);
i82860_get_error_info(mci, &discard); /* clear counters */
/* Here we assume that we will never see multiple instances of this
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
/* allocating generic PCI control info */
i82860_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
if (!i82860_pci) {
printk(KERN_WARNING
"%s(): Unable to create PCI control\n",
__func__);
printk(KERN_WARNING
"%s(): PCI error report via EDAC not setup\n",
__func__);
}
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
fail:
edac_mc_free(mci);
return -ENODEV;
}
/* returns count (>= 0), or negative on error */
static int i82860_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
int rc;
edac_dbg(0, "\n");
i82860_printk(KERN_INFO, "i82860 init one\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
rc = i82860_probe1(pdev, ent->driver_data);
if (rc == 0)
mci_pdev = pci_dev_get(pdev);
return rc;
}
static void i82860_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
edac_dbg(0, "\n");
if (i82860_pci)
edac_pci_release_generic_ctl(i82860_pci);
if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
return;
edac_mc_free(mci);
}
static const struct pci_device_id i82860_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I82860},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, i82860_pci_tbl);
static struct pci_driver i82860_driver = {
.name = EDAC_MOD_STR,
.probe = i82860_init_one,
.remove = i82860_remove_one,
.id_table = i82860_pci_tbl,
};
static int __init i82860_init(void)
{
int pci_rc;
edac_dbg(3, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
if ((pci_rc = pci_register_driver(&i82860_driver)) < 0)
goto fail0;
if (!mci_pdev) {
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82860_0, NULL);
if (mci_pdev == NULL) {
edac_dbg(0, "860 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = i82860_init_one(mci_pdev, i82860_pci_tbl);
if (pci_rc < 0) {
edac_dbg(0, "860 init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
}
return 0;
fail1:
pci_unregister_driver(&i82860_driver);
fail0:
if (mci_pdev != NULL)
pci_dev_put(mci_pdev);
return pci_rc;
}
static void __exit i82860_exit(void)
{
edac_dbg(3, "\n");
pci_unregister_driver(&i82860_driver);
if (mci_pdev != NULL)
pci_dev_put(mci_pdev);
}
module_init(i82860_init);
module_exit(i82860_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) "
"Ben Woodard <woodard@redhat.com>");
MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

604
drivers/edac/i82875p_edac.c Normal file
View file

@ -0,0 +1,604 @@
/*
* Intel D82875P Memory Controller kernel module
* (C) 2003 Linux Networx (http://lnxi.com)
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written by Thayne Harbaugh
* Contributors:
* Wang Zhenyu at intel.com
*
* $Id: edac_i82875p.c,v 1.5.2.11 2005/10/05 00:43:44 dsp_llnl Exp $
*
* Note: E7210 appears same as D82875P - zhenyu.z.wang at intel.com
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_core.h"
#define I82875P_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "i82875p_edac"
#define i82875p_printk(level, fmt, arg...) \
edac_printk(level, "i82875p", fmt, ##arg)
#define i82875p_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "i82875p", fmt, ##arg)
#ifndef PCI_DEVICE_ID_INTEL_82875_0
#define PCI_DEVICE_ID_INTEL_82875_0 0x2578
#endif /* PCI_DEVICE_ID_INTEL_82875_0 */
#ifndef PCI_DEVICE_ID_INTEL_82875_6
#define PCI_DEVICE_ID_INTEL_82875_6 0x257e
#endif /* PCI_DEVICE_ID_INTEL_82875_6 */
/* four csrows in dual channel, eight in single channel */
#define I82875P_NR_DIMMS 8
#define I82875P_NR_CSROWS(nr_chans) (I82875P_NR_DIMMS / (nr_chans))
/* Intel 82875p register addresses - device 0 function 0 - DRAM Controller */
#define I82875P_EAP 0x58 /* Error Address Pointer (32b)
*
* 31:12 block address
* 11:0 reserved
*/
#define I82875P_DERRSYN 0x5c /* DRAM Error Syndrome (8b)
*
* 7:0 DRAM ECC Syndrome
*/
#define I82875P_DES 0x5d /* DRAM Error Status (8b)
*
* 7:1 reserved
* 0 Error channel 0/1
*/
#define I82875P_ERRSTS 0xc8 /* Error Status Register (16b)
*
* 15:10 reserved
* 9 non-DRAM lock error (ndlock)
* 8 Sftwr Generated SMI
* 7 ECC UE
* 6 reserved
* 5 MCH detects unimplemented cycle
* 4 AGP access outside GA
* 3 Invalid AGP access
* 2 Invalid GA translation table
* 1 Unsupported AGP command
* 0 ECC CE
*/
#define I82875P_ERRCMD 0xca /* Error Command (16b)
*
* 15:10 reserved
* 9 SERR on non-DRAM lock
* 8 SERR on ECC UE
* 7 SERR on ECC CE
* 6 target abort on high exception
* 5 detect unimplemented cyc
* 4 AGP access outside of GA
* 3 SERR on invalid AGP access
* 2 invalid translation table
* 1 SERR on unsupported AGP command
* 0 reserved
*/
/* Intel 82875p register addresses - device 6 function 0 - DRAM Controller */
#define I82875P_PCICMD6 0x04 /* PCI Command Register (16b)
*
* 15:10 reserved
* 9 fast back-to-back - ro 0
* 8 SERR enable - ro 0
* 7 addr/data stepping - ro 0
* 6 parity err enable - ro 0
* 5 VGA palette snoop - ro 0
* 4 mem wr & invalidate - ro 0
* 3 special cycle - ro 0
* 2 bus master - ro 0
* 1 mem access dev6 - 0(dis),1(en)
* 0 IO access dev3 - 0(dis),1(en)
*/
#define I82875P_BAR6 0x10 /* Mem Delays Base ADDR Reg (32b)
*
* 31:12 mem base addr [31:12]
* 11:4 address mask - ro 0
* 3 prefetchable - ro 0(non),1(pre)
* 2:1 mem type - ro 0
* 0 mem space - ro 0
*/
/* Intel 82875p MMIO register space - device 0 function 0 - MMR space */
#define I82875P_DRB_SHIFT 26 /* 64MiB grain */
#define I82875P_DRB 0x00 /* DRAM Row Boundary (8b x 8)
*
* 7 reserved
* 6:0 64MiB row boundary addr
*/
#define I82875P_DRA 0x10 /* DRAM Row Attribute (4b x 8)
*
* 7 reserved
* 6:4 row attr row 1
* 3 reserved
* 2:0 row attr row 0
*
* 000 = 4KiB
* 001 = 8KiB
* 010 = 16KiB
* 011 = 32KiB
*/
#define I82875P_DRC 0x68 /* DRAM Controller Mode (32b)
*
* 31:30 reserved
* 29 init complete
* 28:23 reserved
* 22:21 nr chan 00=1,01=2
* 20 reserved
* 19:18 Data Integ Mode 00=none,01=ecc
* 17:11 reserved
* 10:8 refresh mode
* 7 reserved
* 6:4 mode select
* 3:2 reserved
* 1:0 DRAM type 01=DDR
*/
enum i82875p_chips {
I82875P = 0,
};
struct i82875p_pvt {
struct pci_dev *ovrfl_pdev;
void __iomem *ovrfl_window;
};
struct i82875p_dev_info {
const char *ctl_name;
};
struct i82875p_error_info {
u16 errsts;
u32 eap;
u8 des;
u8 derrsyn;
u16 errsts2;
};
static const struct i82875p_dev_info i82875p_devs[] = {
[I82875P] = {
.ctl_name = "i82875p"},
};
static struct pci_dev *mci_pdev; /* init dev: in case that AGP code has
* already registered driver
*/
static struct edac_pci_ctl_info *i82875p_pci;
static void i82875p_get_error_info(struct mem_ctl_info *mci,
struct i82875p_error_info *info)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
* registers at once and the registers can transition from CE being
* overwritten by UE.
*/
pci_read_config_word(pdev, I82875P_ERRSTS, &info->errsts);
if (!(info->errsts & 0x0081))
return;
pci_read_config_dword(pdev, I82875P_EAP, &info->eap);
pci_read_config_byte(pdev, I82875P_DES, &info->des);
pci_read_config_byte(pdev, I82875P_DERRSYN, &info->derrsyn);
pci_read_config_word(pdev, I82875P_ERRSTS, &info->errsts2);
/*
* If the error is the same then we can for both reads then
* the first set of reads is valid. If there is a change then
* there is a CE no info and the second set of reads is valid
* and should be UE info.
*/
if ((info->errsts ^ info->errsts2) & 0x0081) {
pci_read_config_dword(pdev, I82875P_EAP, &info->eap);
pci_read_config_byte(pdev, I82875P_DES, &info->des);
pci_read_config_byte(pdev, I82875P_DERRSYN, &info->derrsyn);
}
pci_write_bits16(pdev, I82875P_ERRSTS, 0x0081, 0x0081);
}
static int i82875p_process_error_info(struct mem_ctl_info *mci,
struct i82875p_error_info *info,
int handle_errors)
{
int row, multi_chan;
multi_chan = mci->csrows[0]->nr_channels - 1;
if (!(info->errsts & 0x0081))
return 0;
if (!handle_errors)
return 1;
if ((info->errsts ^ info->errsts2) & 0x0081) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1,
"UE overwrote CE", "");
info->errsts = info->errsts2;
}
info->eap >>= PAGE_SHIFT;
row = edac_mc_find_csrow_by_page(mci, info->eap);
if (info->errsts & 0x0080)
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
info->eap, 0, 0,
row, -1, -1,
"i82875p UE", "");
else
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
info->eap, 0, info->derrsyn,
row, multi_chan ? (info->des & 0x1) : 0,
-1, "i82875p CE", "");
return 1;
}
static void i82875p_check(struct mem_ctl_info *mci)
{
struct i82875p_error_info info;
edac_dbg(1, "MC%d\n", mci->mc_idx);
i82875p_get_error_info(mci, &info);
i82875p_process_error_info(mci, &info, 1);
}
/* Return 0 on success or 1 on failure. */
static int i82875p_setup_overfl_dev(struct pci_dev *pdev,
struct pci_dev **ovrfl_pdev,
void __iomem **ovrfl_window)
{
struct pci_dev *dev;
void __iomem *window;
*ovrfl_pdev = NULL;
*ovrfl_window = NULL;
dev = pci_get_device(PCI_VEND_DEV(INTEL, 82875_6), NULL);
if (dev == NULL) {
/* Intel tells BIOS developers to hide device 6 which
* configures the overflow device access containing
* the DRBs - this is where we expose device 6.
* http://www.x86-secret.com/articles/tweak/pat/patsecrets-2.htm
*/
pci_write_bits8(pdev, 0xf4, 0x2, 0x2);
dev = pci_scan_single_device(pdev->bus, PCI_DEVFN(6, 0));
if (dev == NULL)
return 1;
pci_bus_assign_resources(dev->bus);
pci_bus_add_device(dev);
}
*ovrfl_pdev = dev;
if (pci_enable_device(dev)) {
i82875p_printk(KERN_ERR, "%s(): Failed to enable overflow "
"device\n", __func__);
return 1;
}
if (pci_request_regions(dev, pci_name(dev))) {
#ifdef CORRECT_BIOS
goto fail0;
#endif
}
/* cache is irrelevant for PCI bus reads/writes */
window = pci_ioremap_bar(dev, 0);
if (window == NULL) {
i82875p_printk(KERN_ERR, "%s(): Failed to ioremap bar6\n",
__func__);
goto fail1;
}
*ovrfl_window = window;
return 0;
fail1:
pci_release_regions(dev);
#ifdef CORRECT_BIOS
fail0:
pci_disable_device(dev);
#endif
/* NOTE: the ovrfl proc entry and pci_dev are intentionally left */
return 1;
}
/* Return 1 if dual channel mode is active. Else return 0. */
static inline int dual_channel_active(u32 drc)
{
return (drc >> 21) & 0x1;
}
static void i82875p_init_csrows(struct mem_ctl_info *mci,
struct pci_dev *pdev,
void __iomem * ovrfl_window, u32 drc)
{
struct csrow_info *csrow;
struct dimm_info *dimm;
unsigned nr_chans = dual_channel_active(drc) + 1;
unsigned long last_cumul_size;
u8 value;
u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */
u32 cumul_size, nr_pages;
int index, j;
drc_ddim = (drc >> 18) & 0x1;
last_cumul_size = 0;
/* The dram row boundary (DRB) reg values are boundary address
* for each DRAM row with a granularity of 32 or 64MB (single/dual
* channel operation). DRB regs are cumulative; therefore DRB7 will
* contain the total memory contained in all eight rows.
*/
for (index = 0; index < mci->nr_csrows; index++) {
csrow = mci->csrows[index];
value = readb(ovrfl_window + I82875P_DRB + index);
cumul_size = value << (I82875P_DRB_SHIFT - PAGE_SHIFT);
edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
if (cumul_size == last_cumul_size)
continue; /* not populated */
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
for (j = 0; j < nr_chans; j++) {
dimm = csrow->channels[j]->dimm;
dimm->nr_pages = nr_pages / nr_chans;
dimm->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */
dimm->mtype = MEM_DDR;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE;
}
}
}
static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc = -ENODEV;
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct i82875p_pvt *pvt;
struct pci_dev *ovrfl_pdev;
void __iomem *ovrfl_window;
u32 drc;
u32 nr_chans;
struct i82875p_error_info discard;
edac_dbg(0, "\n");
if (i82875p_setup_overfl_dev(pdev, &ovrfl_pdev, &ovrfl_window))
return -ENODEV;
drc = readl(ovrfl_window + I82875P_DRC);
nr_chans = dual_channel_active(drc) + 1;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = I82875P_NR_CSROWS(nr_chans);
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = nr_chans;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (!mci) {
rc = -ENOMEM;
goto fail0;
}
edac_dbg(3, "init mci\n");
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_UNKNOWN;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = I82875P_REVISION;
mci->ctl_name = i82875p_devs[dev_idx].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = i82875p_check;
mci->ctl_page_to_phys = NULL;
edac_dbg(3, "init pvt\n");
pvt = (struct i82875p_pvt *)mci->pvt_info;
pvt->ovrfl_pdev = ovrfl_pdev;
pvt->ovrfl_window = ovrfl_window;
i82875p_init_csrows(mci, pdev, ovrfl_window, drc);
i82875p_get_error_info(mci, &discard); /* clear counters */
/* Here we assume that we will never see multiple instances of this
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail1;
}
/* allocating generic PCI control info */
i82875p_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
if (!i82875p_pci) {
printk(KERN_WARNING
"%s(): Unable to create PCI control\n",
__func__);
printk(KERN_WARNING
"%s(): PCI error report via EDAC not setup\n",
__func__);
}
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
fail1:
edac_mc_free(mci);
fail0:
iounmap(ovrfl_window);
pci_release_regions(ovrfl_pdev);
pci_disable_device(ovrfl_pdev);
/* NOTE: the ovrfl proc entry and pci_dev are intentionally left */
return rc;
}
/* returns count (>= 0), or negative on error */
static int i82875p_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
int rc;
edac_dbg(0, "\n");
i82875p_printk(KERN_INFO, "i82875p init one\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
rc = i82875p_probe1(pdev, ent->driver_data);
if (mci_pdev == NULL)
mci_pdev = pci_dev_get(pdev);
return rc;
}
static void i82875p_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct i82875p_pvt *pvt = NULL;
edac_dbg(0, "\n");
if (i82875p_pci)
edac_pci_release_generic_ctl(i82875p_pci);
if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
return;
pvt = (struct i82875p_pvt *)mci->pvt_info;
if (pvt->ovrfl_window)
iounmap(pvt->ovrfl_window);
if (pvt->ovrfl_pdev) {
#ifdef CORRECT_BIOS
pci_release_regions(pvt->ovrfl_pdev);
#endif /*CORRECT_BIOS */
pci_disable_device(pvt->ovrfl_pdev);
pci_dev_put(pvt->ovrfl_pdev);
}
edac_mc_free(mci);
}
static const struct pci_device_id i82875p_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I82875P},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl);
static struct pci_driver i82875p_driver = {
.name = EDAC_MOD_STR,
.probe = i82875p_init_one,
.remove = i82875p_remove_one,
.id_table = i82875p_pci_tbl,
};
static int __init i82875p_init(void)
{
int pci_rc;
edac_dbg(3, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
pci_rc = pci_register_driver(&i82875p_driver);
if (pci_rc < 0)
goto fail0;
if (mci_pdev == NULL) {
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82875_0, NULL);
if (!mci_pdev) {
edac_dbg(0, "875p pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = i82875p_init_one(mci_pdev, i82875p_pci_tbl);
if (pci_rc < 0) {
edac_dbg(0, "875p init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
}
return 0;
fail1:
pci_unregister_driver(&i82875p_driver);
fail0:
if (mci_pdev != NULL)
pci_dev_put(mci_pdev);
return pci_rc;
}
static void __exit i82875p_exit(void)
{
edac_dbg(3, "\n");
i82875p_remove_one(mci_pdev);
pci_dev_put(mci_pdev);
pci_unregister_driver(&i82875p_driver);
}
module_init(i82875p_init);
module_exit(i82875p_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh");
MODULE_DESCRIPTION("MC support for Intel 82875 memory hub controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

714
drivers/edac/i82975x_edac.c Normal file
View file

@ -0,0 +1,714 @@
/*
* Intel 82975X Memory Controller kernel module
* (C) 2007 aCarLab (India) Pvt. Ltd. (http://acarlab.com)
* (C) 2007 jetzbroadband (http://jetzbroadband.com)
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written by Arvind R.
* Copied from i82875p_edac.c source:
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_core.h"
#define I82975X_REVISION " Ver: 1.0.0"
#define EDAC_MOD_STR "i82975x_edac"
#define i82975x_printk(level, fmt, arg...) \
edac_printk(level, "i82975x", fmt, ##arg)
#define i82975x_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "i82975x", fmt, ##arg)
#ifndef PCI_DEVICE_ID_INTEL_82975_0
#define PCI_DEVICE_ID_INTEL_82975_0 0x277c
#endif /* PCI_DEVICE_ID_INTEL_82975_0 */
#define I82975X_NR_DIMMS 8
#define I82975X_NR_CSROWS(nr_chans) (I82975X_NR_DIMMS / (nr_chans))
/* Intel 82975X register addresses - device 0 function 0 - DRAM Controller */
#define I82975X_EAP 0x58 /* Dram Error Address Pointer (32b)
*
* 31:7 128 byte cache-line address
* 6:1 reserved
* 0 0: CH0; 1: CH1
*/
#define I82975X_DERRSYN 0x5c /* Dram Error SYNdrome (8b)
*
* 7:0 DRAM ECC Syndrome
*/
#define I82975X_DES 0x5d /* Dram ERRor DeSTination (8b)
* 0h: Processor Memory Reads
* 1h:7h reserved
* More - See Page 65 of Intel DocSheet.
*/
#define I82975X_ERRSTS 0xc8 /* Error Status Register (16b)
*
* 15:12 reserved
* 11 Thermal Sensor Event
* 10 reserved
* 9 non-DRAM lock error (ndlock)
* 8 Refresh Timeout
* 7:2 reserved
* 1 ECC UE (multibit DRAM error)
* 0 ECC CE (singlebit DRAM error)
*/
/* Error Reporting is supported by 3 mechanisms:
1. DMI SERR generation ( ERRCMD )
2. SMI DMI generation ( SMICMD )
3. SCI DMI generation ( SCICMD )
NOTE: Only ONE of the three must be enabled
*/
#define I82975X_ERRCMD 0xca /* Error Command (16b)
*
* 15:12 reserved
* 11 Thermal Sensor Event
* 10 reserved
* 9 non-DRAM lock error (ndlock)
* 8 Refresh Timeout
* 7:2 reserved
* 1 ECC UE (multibit DRAM error)
* 0 ECC CE (singlebit DRAM error)
*/
#define I82975X_SMICMD 0xcc /* Error Command (16b)
*
* 15:2 reserved
* 1 ECC UE (multibit DRAM error)
* 0 ECC CE (singlebit DRAM error)
*/
#define I82975X_SCICMD 0xce /* Error Command (16b)
*
* 15:2 reserved
* 1 ECC UE (multibit DRAM error)
* 0 ECC CE (singlebit DRAM error)
*/
#define I82975X_XEAP 0xfc /* Extended Dram Error Address Pointer (8b)
*
* 7:1 reserved
* 0 Bit32 of the Dram Error Address
*/
#define I82975X_MCHBAR 0x44 /*
*
* 31:14 Base Addr of 16K memory-mapped
* configuration space
* 13:1 reserverd
* 0 mem-mapped config space enable
*/
/* NOTE: Following addresses have to indexed using MCHBAR offset (44h, 32b) */
/* Intel 82975x memory mapped register space */
#define I82975X_DRB_SHIFT 25 /* fixed 32MiB grain */
#define I82975X_DRB 0x100 /* DRAM Row Boundary (8b x 8)
*
* 7 set to 1 in highest DRB of
* channel if 4GB in ch.
* 6:2 upper boundary of rank in
* 32MB grains
* 1:0 set to 0
*/
#define I82975X_DRB_CH0R0 0x100
#define I82975X_DRB_CH0R1 0x101
#define I82975X_DRB_CH0R2 0x102
#define I82975X_DRB_CH0R3 0x103
#define I82975X_DRB_CH1R0 0x180
#define I82975X_DRB_CH1R1 0x181
#define I82975X_DRB_CH1R2 0x182
#define I82975X_DRB_CH1R3 0x183
#define I82975X_DRA 0x108 /* DRAM Row Attribute (4b x 8)
* defines the PAGE SIZE to be used
* for the rank
* 7 reserved
* 6:4 row attr of odd rank, i.e. 1
* 3 reserved
* 2:0 row attr of even rank, i.e. 0
*
* 000 = unpopulated
* 001 = reserved
* 010 = 4KiB
* 011 = 8KiB
* 100 = 16KiB
* others = reserved
*/
#define I82975X_DRA_CH0R01 0x108
#define I82975X_DRA_CH0R23 0x109
#define I82975X_DRA_CH1R01 0x188
#define I82975X_DRA_CH1R23 0x189
#define I82975X_BNKARC 0x10e /* Type of device in each rank - Bank Arch (16b)
*
* 15:8 reserved
* 7:6 Rank 3 architecture
* 5:4 Rank 2 architecture
* 3:2 Rank 1 architecture
* 1:0 Rank 0 architecture
*
* 00 => 4 banks
* 01 => 8 banks
*/
#define I82975X_C0BNKARC 0x10e
#define I82975X_C1BNKARC 0x18e
#define I82975X_DRC 0x120 /* DRAM Controller Mode0 (32b)
*
* 31:30 reserved
* 29 init complete
* 28:11 reserved, according to Intel
* 22:21 number of channels
* 00=1 01=2 in 82875
* seems to be ECC mode
* bits in 82975 in Asus
* P5W
* 19:18 Data Integ Mode
* 00=none 01=ECC in 82875
* 10:8 refresh mode
* 7 reserved
* 6:4 mode select
* 3:2 reserved
* 1:0 DRAM type 10=Second Revision
* DDR2 SDRAM
* 00, 01, 11 reserved
*/
#define I82975X_DRC_CH0M0 0x120
#define I82975X_DRC_CH1M0 0x1A0
#define I82975X_DRC_M1 0x124 /* DRAM Controller Mode1 (32b)
* 31 0=Standard Address Map
* 1=Enhanced Address Map
* 30:0 reserved
*/
#define I82975X_DRC_CH0M1 0x124
#define I82975X_DRC_CH1M1 0x1A4
enum i82975x_chips {
I82975X = 0,
};
struct i82975x_pvt {
void __iomem *mch_window;
};
struct i82975x_dev_info {
const char *ctl_name;
};
struct i82975x_error_info {
u16 errsts;
u32 eap;
u8 des;
u8 derrsyn;
u16 errsts2;
u8 chan; /* the channel is bit 0 of EAP */
u8 xeap; /* extended eap bit */
};
static const struct i82975x_dev_info i82975x_devs[] = {
[I82975X] = {
.ctl_name = "i82975x"
},
};
static struct pci_dev *mci_pdev; /* init dev: in case that AGP code has
* already registered driver
*/
static int i82975x_registered = 1;
static void i82975x_get_error_info(struct mem_ctl_info *mci,
struct i82975x_error_info *info)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
* registers at once and the registers can transition from CE being
* overwritten by UE.
*/
pci_read_config_word(pdev, I82975X_ERRSTS, &info->errsts);
pci_read_config_dword(pdev, I82975X_EAP, &info->eap);
pci_read_config_byte(pdev, I82975X_XEAP, &info->xeap);
pci_read_config_byte(pdev, I82975X_DES, &info->des);
pci_read_config_byte(pdev, I82975X_DERRSYN, &info->derrsyn);
pci_read_config_word(pdev, I82975X_ERRSTS, &info->errsts2);
pci_write_bits16(pdev, I82975X_ERRSTS, 0x0003, 0x0003);
/*
* If the error is the same then we can for both reads then
* the first set of reads is valid. If there is a change then
* there is a CE no info and the second set of reads is valid
* and should be UE info.
*/
if (!(info->errsts2 & 0x0003))
return;
if ((info->errsts ^ info->errsts2) & 0x0003) {
pci_read_config_dword(pdev, I82975X_EAP, &info->eap);
pci_read_config_byte(pdev, I82975X_XEAP, &info->xeap);
pci_read_config_byte(pdev, I82975X_DES, &info->des);
pci_read_config_byte(pdev, I82975X_DERRSYN,
&info->derrsyn);
}
}
static int i82975x_process_error_info(struct mem_ctl_info *mci,
struct i82975x_error_info *info, int handle_errors)
{
int row, chan;
unsigned long offst, page;
if (!(info->errsts2 & 0x0003))
return 0;
if (!handle_errors)
return 1;
if ((info->errsts ^ info->errsts2) & 0x0003) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
}
page = (unsigned long) info->eap;
page >>= 1;
if (info->xeap & 1)
page |= 0x80000000;
page >>= (PAGE_SHIFT - 1);
row = edac_mc_find_csrow_by_page(mci, page);
if (row == -1) {
i82975x_mc_printk(mci, KERN_ERR, "error processing EAP:\n"
"\tXEAP=%u\n"
"\t EAP=0x%08x\n"
"\tPAGE=0x%08x\n",
(info->xeap & 1) ? 1 : 0, info->eap, (unsigned int) page);
return 0;
}
chan = (mci->csrows[row]->nr_channels == 1) ? 0 : info->eap & 1;
offst = info->eap
& ((1 << PAGE_SHIFT) -
(1 << mci->csrows[row]->channels[chan]->dimm->grain));
if (info->errsts & 0x0002)
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
page, offst, 0,
row, -1, -1,
"i82975x UE", "");
else
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, offst, info->derrsyn,
row, chan ? chan : 0, -1,
"i82975x CE", "");
return 1;
}
static void i82975x_check(struct mem_ctl_info *mci)
{
struct i82975x_error_info info;
edac_dbg(1, "MC%d\n", mci->mc_idx);
i82975x_get_error_info(mci, &info);
i82975x_process_error_info(mci, &info, 1);
}
/* Return 1 if dual channel mode is active. Else return 0. */
static int dual_channel_active(void __iomem *mch_window)
{
/*
* We treat interleaved-symmetric configuration as dual-channel - EAP's
* bit-0 giving the channel of the error location.
*
* All other configurations are treated as single channel - the EAP's
* bit-0 will resolve ok in symmetric area of mixed
* (symmetric/asymmetric) configurations
*/
u8 drb[4][2];
int row;
int dualch;
for (dualch = 1, row = 0; dualch && (row < 4); row++) {
drb[row][0] = readb(mch_window + I82975X_DRB + row);
drb[row][1] = readb(mch_window + I82975X_DRB + row + 0x80);
dualch = dualch && (drb[row][0] == drb[row][1]);
}
return dualch;
}
static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank)
{
/*
* ECC is possible on i92975x ONLY with DEV_X8
*/
return DEV_X8;
}
static void i82975x_init_csrows(struct mem_ctl_info *mci,
struct pci_dev *pdev, void __iomem *mch_window)
{
struct csrow_info *csrow;
unsigned long last_cumul_size;
u8 value;
u32 cumul_size, nr_pages;
int index, chan;
struct dimm_info *dimm;
enum dev_type dtype;
last_cumul_size = 0;
/*
* 82875 comment:
* The dram row boundary (DRB) reg values are boundary address
* for each DRAM row with a granularity of 32 or 64MB (single/dual
* channel operation). DRB regs are cumulative; therefore DRB7 will
* contain the total memory contained in all rows.
*
*/
for (index = 0; index < mci->nr_csrows; index++) {
csrow = mci->csrows[index];
value = readb(mch_window + I82975X_DRB + index +
((index >= 4) ? 0x80 : 0));
cumul_size = value;
cumul_size <<= (I82975X_DRB_SHIFT - PAGE_SHIFT);
/*
* Adjust cumul_size w.r.t number of channels
*
*/
if (csrow->nr_channels > 1)
cumul_size <<= 1;
edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
nr_pages = cumul_size - last_cumul_size;
if (!nr_pages)
continue;
/*
* Initialise dram labels
* index values:
* [0-7] for single-channel; i.e. csrow->nr_channels = 1
* [0-3] for dual-channel; i.e. csrow->nr_channels = 2
*/
dtype = i82975x_dram_type(mch_window, index);
for (chan = 0; chan < csrow->nr_channels; chan++) {
dimm = mci->csrows[index]->channels[chan]->dimm;
dimm->nr_pages = nr_pages / csrow->nr_channels;
snprintf(csrow->channels[chan]->dimm->label, EDAC_MC_LABEL_LEN, "DIMM %c%d",
(chan == 0) ? 'A' : 'B',
index);
dimm->grain = 1 << 7; /* 128Byte cache-line resolution */
dimm->dtype = i82975x_dram_type(mch_window, index);
dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */
dimm->edac_mode = EDAC_SECDED; /* only supported */
}
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
last_cumul_size = cumul_size;
}
}
/* #define i82975x_DEBUG_IOMEM */
#ifdef i82975x_DEBUG_IOMEM
static void i82975x_print_dram_timings(void __iomem *mch_window)
{
/*
* The register meanings are from Intel specs;
* (shows 13-5-5-5 for 800-DDR2)
* Asus P5W Bios reports 15-5-4-4
* What's your religion?
*/
static const int caslats[4] = { 5, 4, 3, 6 };
u32 dtreg[2];
dtreg[0] = readl(mch_window + 0x114);
dtreg[1] = readl(mch_window + 0x194);
i82975x_printk(KERN_INFO, "DRAM Timings : Ch0 Ch1\n"
" RAS Active Min = %d %d\n"
" CAS latency = %d %d\n"
" RAS to CAS = %d %d\n"
" RAS precharge = %d %d\n",
(dtreg[0] >> 19 ) & 0x0f,
(dtreg[1] >> 19) & 0x0f,
caslats[(dtreg[0] >> 8) & 0x03],
caslats[(dtreg[1] >> 8) & 0x03],
((dtreg[0] >> 4) & 0x07) + 2,
((dtreg[1] >> 4) & 0x07) + 2,
(dtreg[0] & 0x07) + 2,
(dtreg[1] & 0x07) + 2
);
}
#endif
static int i82975x_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc = -ENODEV;
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct i82975x_pvt *pvt;
void __iomem *mch_window;
u32 mchbar;
u32 drc[2];
struct i82975x_error_info discard;
int chans;
#ifdef i82975x_DEBUG_IOMEM
u8 c0drb[4];
u8 c1drb[4];
#endif
edac_dbg(0, "\n");
pci_read_config_dword(pdev, I82975X_MCHBAR, &mchbar);
if (!(mchbar & 1)) {
edac_dbg(3, "failed, MCHBAR disabled!\n");
goto fail0;
}
mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */
mch_window = ioremap_nocache(mchbar, 0x1000);
#ifdef i82975x_DEBUG_IOMEM
i82975x_printk(KERN_INFO, "MCHBAR real = %0x, remapped = %p\n",
mchbar, mch_window);
c0drb[0] = readb(mch_window + I82975X_DRB_CH0R0);
c0drb[1] = readb(mch_window + I82975X_DRB_CH0R1);
c0drb[2] = readb(mch_window + I82975X_DRB_CH0R2);
c0drb[3] = readb(mch_window + I82975X_DRB_CH0R3);
c1drb[0] = readb(mch_window + I82975X_DRB_CH1R0);
c1drb[1] = readb(mch_window + I82975X_DRB_CH1R1);
c1drb[2] = readb(mch_window + I82975X_DRB_CH1R2);
c1drb[3] = readb(mch_window + I82975X_DRB_CH1R3);
i82975x_printk(KERN_INFO, "DRBCH0R0 = 0x%02x\n", c0drb[0]);
i82975x_printk(KERN_INFO, "DRBCH0R1 = 0x%02x\n", c0drb[1]);
i82975x_printk(KERN_INFO, "DRBCH0R2 = 0x%02x\n", c0drb[2]);
i82975x_printk(KERN_INFO, "DRBCH0R3 = 0x%02x\n", c0drb[3]);
i82975x_printk(KERN_INFO, "DRBCH1R0 = 0x%02x\n", c1drb[0]);
i82975x_printk(KERN_INFO, "DRBCH1R1 = 0x%02x\n", c1drb[1]);
i82975x_printk(KERN_INFO, "DRBCH1R2 = 0x%02x\n", c1drb[2]);
i82975x_printk(KERN_INFO, "DRBCH1R3 = 0x%02x\n", c1drb[3]);
#endif
drc[0] = readl(mch_window + I82975X_DRC_CH0M0);
drc[1] = readl(mch_window + I82975X_DRC_CH1M0);
#ifdef i82975x_DEBUG_IOMEM
i82975x_printk(KERN_INFO, "DRC_CH0 = %0x, %s\n", drc[0],
((drc[0] >> 21) & 3) == 1 ?
"ECC enabled" : "ECC disabled");
i82975x_printk(KERN_INFO, "DRC_CH1 = %0x, %s\n", drc[1],
((drc[1] >> 21) & 3) == 1 ?
"ECC enabled" : "ECC disabled");
i82975x_printk(KERN_INFO, "C0 BNKARC = %0x\n",
readw(mch_window + I82975X_C0BNKARC));
i82975x_printk(KERN_INFO, "C1 BNKARC = %0x\n",
readw(mch_window + I82975X_C1BNKARC));
i82975x_print_dram_timings(mch_window);
goto fail1;
#endif
if (!(((drc[0] >> 21) & 3) == 1 || ((drc[1] >> 21) & 3) == 1)) {
i82975x_printk(KERN_INFO, "ECC disabled on both channels.\n");
goto fail1;
}
chans = dual_channel_active(mch_window) + 1;
/* assuming only one controller, index thus is 0 */
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = I82975X_NR_DIMMS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = I82975X_NR_CSROWS(chans);
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (!mci) {
rc = -ENOMEM;
goto fail1;
}
edac_dbg(3, "init mci\n");
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = I82975X_REVISION;
mci->ctl_name = i82975x_devs[dev_idx].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = i82975x_check;
mci->ctl_page_to_phys = NULL;
edac_dbg(3, "init pvt\n");
pvt = (struct i82975x_pvt *) mci->pvt_info;
pvt->mch_window = mch_window;
i82975x_init_csrows(mci, pdev, mch_window);
mci->scrub_mode = SCRUB_HW_SRC;
i82975x_get_error_info(mci, &discard); /* clear counters */
/* finalize this instance of memory controller with edac core */
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail2;
}
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
fail2:
edac_mc_free(mci);
fail1:
iounmap(mch_window);
fail0:
return rc;
}
/* returns count (>= 0), or negative on error */
static int i82975x_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
int rc;
edac_dbg(0, "\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
rc = i82975x_probe1(pdev, ent->driver_data);
if (mci_pdev == NULL)
mci_pdev = pci_dev_get(pdev);
return rc;
}
static void i82975x_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct i82975x_pvt *pvt;
edac_dbg(0, "\n");
mci = edac_mc_del_mc(&pdev->dev);
if (mci == NULL)
return;
pvt = mci->pvt_info;
if (pvt->mch_window)
iounmap( pvt->mch_window );
edac_mc_free(mci);
}
static const struct pci_device_id i82975x_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I82975X
},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, i82975x_pci_tbl);
static struct pci_driver i82975x_driver = {
.name = EDAC_MOD_STR,
.probe = i82975x_init_one,
.remove = i82975x_remove_one,
.id_table = i82975x_pci_tbl,
};
static int __init i82975x_init(void)
{
int pci_rc;
edac_dbg(3, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
pci_rc = pci_register_driver(&i82975x_driver);
if (pci_rc < 0)
goto fail0;
if (mci_pdev == NULL) {
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82975_0, NULL);
if (!mci_pdev) {
edac_dbg(0, "i82975x pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = i82975x_init_one(mci_pdev, i82975x_pci_tbl);
if (pci_rc < 0) {
edac_dbg(0, "i82975x init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
}
return 0;
fail1:
pci_unregister_driver(&i82975x_driver);
fail0:
if (mci_pdev != NULL)
pci_dev_put(mci_pdev);
return pci_rc;
}
static void __exit i82975x_exit(void)
{
edac_dbg(3, "\n");
pci_unregister_driver(&i82975x_driver);
if (!i82975x_registered) {
i82975x_remove_one(mci_pdev);
pci_dev_put(mci_pdev);
}
}
module_init(i82975x_init);
module_exit(i82975x_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arvind R. <arvino55@gmail.com>");
MODULE_DESCRIPTION("MC support for Intel 82975 memory hub controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

536
drivers/edac/ie31200_edac.c Normal file
View file

@ -0,0 +1,536 @@
/*
* Intel E3-1200
* Copyright (C) 2014 Jason Baron <jbaron@akamai.com>
*
* Support for the E3-1200 processor family. Heavily based on previous
* Intel EDAC drivers.
*
* Since the DRAM controller is on the cpu chip, we can use its PCI device
* id to identify these processors.
*
* PCI DRAM controller device ids (Taken from The PCI ID Repository - http://pci-ids.ucw.cz/)
*
* 0108: Xeon E3-1200 Processor Family DRAM Controller
* 010c: Xeon E3-1200/2nd Generation Core Processor Family DRAM Controller
* 0150: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller
* 0158: Xeon E3-1200 v2/Ivy Bridge DRAM Controller
* 015c: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller
* 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
* 0c08: Xeon E3-1200 v3 Processor DRAM Controller
*
* Based on Intel specification:
* http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
* http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html
*
* According to the above datasheet (p.16):
* "
* 6. Software must not access B0/D0/F0 32-bit memory-mapped registers with
* requests that cross a DW boundary.
* "
*
* Thus, we make use of the explicit: lo_hi_readq(), which breaks the readq into
* 2 readl() calls. This restriction may be lifted in subsequent chip releases,
* but lo_hi_readq() ensures that we are safe across all e3-1200 processors.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>
#include "edac_core.h"
#define IE31200_REVISION "1.0"
#define EDAC_MOD_STR "ie31200_edac"
#define ie31200_printk(level, fmt, arg...) \
edac_printk(level, "ie31200", fmt, ##arg)
#define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108
#define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c
#define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150
#define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158
#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c
#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
#define IE31200_DIMMS 4
#define IE31200_RANKS 8
#define IE31200_RANKS_PER_CHANNEL 4
#define IE31200_DIMMS_PER_CHANNEL 2
#define IE31200_CHANNELS 2
/* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */
#define IE31200_MCHBAR_LOW 0x48
#define IE31200_MCHBAR_HIGH 0x4c
#define IE31200_MCHBAR_MASK GENMASK_ULL(38, 15)
#define IE31200_MMR_WINDOW_SIZE BIT(15)
/*
* Error Status Register (16b)
*
* 15 reserved
* 14 Isochronous TBWRR Run Behind FIFO Full
* (ITCV)
* 13 Isochronous TBWRR Run Behind FIFO Put
* (ITSTV)
* 12 reserved
* 11 MCH Thermal Sensor Event
* for SMI/SCI/SERR (GTSE)
* 10 reserved
* 9 LOCK to non-DRAM Memory Flag (LCKF)
* 8 reserved
* 7 DRAM Throttle Flag (DTF)
* 6:2 reserved
* 1 Multi-bit DRAM ECC Error Flag (DMERR)
* 0 Single-bit DRAM ECC Error Flag (DSERR)
*/
#define IE31200_ERRSTS 0xc8
#define IE31200_ERRSTS_UE BIT(1)
#define IE31200_ERRSTS_CE BIT(0)
#define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE)
/*
* Channel 0 ECC Error Log (64b)
*
* 63:48 Error Column Address (ERRCOL)
* 47:32 Error Row Address (ERRROW)
* 31:29 Error Bank Address (ERRBANK)
* 28:27 Error Rank Address (ERRRANK)
* 26:24 reserved
* 23:16 Error Syndrome (ERRSYND)
* 15: 2 reserved
* 1 Multiple Bit Error Status (MERRSTS)
* 0 Correctable Error Status (CERRSTS)
*/
#define IE31200_C0ECCERRLOG 0x40c8
#define IE31200_C1ECCERRLOG 0x44c8
#define IE31200_ECCERRLOG_CE BIT(0)
#define IE31200_ECCERRLOG_UE BIT(1)
#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27)
#define IE31200_ECCERRLOG_RANK_SHIFT 27
#define IE31200_ECCERRLOG_SYNDROME_BITS GENMASK_ULL(23, 16)
#define IE31200_ECCERRLOG_SYNDROME_SHIFT 16
#define IE31200_ECCERRLOG_SYNDROME(log) \
((log & IE31200_ECCERRLOG_SYNDROME_BITS) >> \
IE31200_ECCERRLOG_SYNDROME_SHIFT)
#define IE31200_CAPID0 0xe4
#define IE31200_CAPID0_PDCD BIT(4)
#define IE31200_CAPID0_DDPCD BIT(6)
#define IE31200_CAPID0_ECC BIT(1)
#define IE31200_MAD_DIMM_0_OFFSET 0x5004
#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0)
#define IE31200_MAD_DIMM_A_RANK BIT(17)
#define IE31200_MAD_DIMM_A_WIDTH BIT(19)
#define IE31200_PAGES(n) (n << (28 - PAGE_SHIFT))
static int nr_channels;
struct ie31200_priv {
void __iomem *window;
};
enum ie31200_chips {
IE31200 = 0,
};
struct ie31200_dev_info {
const char *ctl_name;
};
struct ie31200_error_info {
u16 errsts;
u16 errsts2;
u64 eccerrlog[IE31200_CHANNELS];
};
static const struct ie31200_dev_info ie31200_devs[] = {
[IE31200] = {
.ctl_name = "IE31200"
},
};
struct dimm_data {
u8 size; /* in 256MB multiples */
u8 dual_rank : 1,
x16_width : 1; /* 0 means x8 width */
};
static int how_many_channels(struct pci_dev *pdev)
{
int n_channels;
unsigned char capid0_2b; /* 2nd byte of CAPID0 */
pci_read_config_byte(pdev, IE31200_CAPID0 + 1, &capid0_2b);
/* check PDCD: Dual Channel Disable */
if (capid0_2b & IE31200_CAPID0_PDCD) {
edac_dbg(0, "In single channel mode\n");
n_channels = 1;
} else {
edac_dbg(0, "In dual channel mode\n");
n_channels = 2;
}
/* check DDPCD - check if both channels are filled */
if (capid0_2b & IE31200_CAPID0_DDPCD)
edac_dbg(0, "2 DIMMS per channel disabled\n");
else
edac_dbg(0, "2 DIMMS per channel enabled\n");
return n_channels;
}
static bool ecc_capable(struct pci_dev *pdev)
{
unsigned char capid0_4b; /* 4th byte of CAPID0 */
pci_read_config_byte(pdev, IE31200_CAPID0 + 3, &capid0_4b);
if (capid0_4b & IE31200_CAPID0_ECC)
return false;
return true;
}
static int eccerrlog_row(int channel, u64 log)
{
int rank = ((log & IE31200_ECCERRLOG_RANK_BITS) >>
IE31200_ECCERRLOG_RANK_SHIFT);
return rank | (channel * IE31200_RANKS_PER_CHANNEL);
}
static void ie31200_clear_error_info(struct mem_ctl_info *mci)
{
/*
* Clear any error bits.
* (Yes, we really clear bits by writing 1 to them.)
*/
pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS,
IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS);
}
static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
struct ie31200_error_info *info)
{
struct pci_dev *pdev;
struct ie31200_priv *priv = mci->pvt_info;
void __iomem *window = priv->window;
pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
* registers at once and the registers can transition from CE being
* overwritten by UE.
*/
pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts);
if (!(info->errsts & IE31200_ERRSTS_BITS))
return;
info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
if (nr_channels == 2)
info->eccerrlog[1] = lo_hi_readq(window + IE31200_C1ECCERRLOG);
pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts2);
/*
* If the error is the same for both reads then the first set
* of reads is valid. If there is a change then there is a CE
* with no info and the second set of reads is valid and
* should be UE info.
*/
if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
if (nr_channels == 2)
info->eccerrlog[1] =
lo_hi_readq(window + IE31200_C1ECCERRLOG);
}
ie31200_clear_error_info(mci);
}
static void ie31200_process_error_info(struct mem_ctl_info *mci,
struct ie31200_error_info *info)
{
int channel;
u64 log;
if (!(info->errsts & IE31200_ERRSTS_BITS))
return;
if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
}
for (channel = 0; channel < nr_channels; channel++) {
log = info->eccerrlog[channel];
if (log & IE31200_ECCERRLOG_UE) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
0, 0, 0,
eccerrlog_row(channel, log),
channel, -1,
"ie31200 UE", "");
} else if (log & IE31200_ECCERRLOG_CE) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0,
IE31200_ECCERRLOG_SYNDROME(log),
eccerrlog_row(channel, log),
channel, -1,
"ie31200 CE", "");
}
}
}
static void ie31200_check(struct mem_ctl_info *mci)
{
struct ie31200_error_info info;
edac_dbg(1, "MC%d\n", mci->mc_idx);
ie31200_get_and_clear_error_info(mci, &info);
ie31200_process_error_info(mci, &info);
}
static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
{
union {
u64 mchbar;
struct {
u32 mchbar_low;
u32 mchbar_high;
};
} u;
void __iomem *window;
pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low);
pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high);
u.mchbar &= IE31200_MCHBAR_MASK;
if (u.mchbar != (resource_size_t)u.mchbar) {
ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n",
(unsigned long long)u.mchbar);
return NULL;
}
window = ioremap_nocache(u.mchbar, IE31200_MMR_WINDOW_SIZE);
if (!window)
ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n",
(unsigned long long)u.mchbar);
return window;
}
static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
{
int i, j, ret;
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL];
void __iomem *window;
struct ie31200_priv *priv;
u32 addr_decode;
edac_dbg(0, "MC:\n");
if (!ecc_capable(pdev)) {
ie31200_printk(KERN_INFO, "No ECC support\n");
return -ENODEV;
}
nr_channels = how_many_channels(pdev);
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = IE31200_DIMMS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = nr_channels;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
sizeof(struct ie31200_priv));
if (!mci)
return -ENOMEM;
window = ie31200_map_mchbar(pdev);
if (!window) {
ret = -ENODEV;
goto fail_free;
}
edac_dbg(3, "MC: init mci\n");
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR3;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = IE31200_REVISION;
mci->ctl_name = ie31200_devs[dev_idx].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = ie31200_check;
mci->ctl_page_to_phys = NULL;
priv = mci->pvt_info;
priv->window = window;
/* populate DIMM info */
for (i = 0; i < IE31200_CHANNELS; i++) {
addr_decode = readl(window + IE31200_MAD_DIMM_0_OFFSET +
(i * 4));
edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
dimm_info[i][j].size = (addr_decode >> (j * 8)) &
IE31200_MAD_DIMM_SIZE;
dimm_info[i][j].dual_rank = (addr_decode &
(IE31200_MAD_DIMM_A_RANK << j)) ? 1 : 0;
dimm_info[i][j].x16_width = (addr_decode &
(IE31200_MAD_DIMM_A_WIDTH << j)) ? 1 : 0;
edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n",
dimm_info[i][j].size,
dimm_info[i][j].dual_rank,
dimm_info[i][j].x16_width);
}
}
/*
* The dram rank boundary (DRB) reg values are boundary addresses
* for each DRAM rank with a granularity of 64MB. DRB regs are
* cumulative; the last one will contain the total memory
* contained in all ranks.
*/
for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) {
for (j = 0; j < IE31200_CHANNELS; j++) {
struct dimm_info *dimm;
unsigned long nr_pages;
nr_pages = IE31200_PAGES(dimm_info[j][i].size);
if (nr_pages == 0)
continue;
if (dimm_info[j][i].dual_rank) {
nr_pages = nr_pages / 2;
dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
mci->n_layers, (i * 2) + 1,
j, 0);
dimm->nr_pages = nr_pages;
edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
dimm->grain = 8; /* just a guess */
dimm->mtype = MEM_DDR3;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
mci->n_layers, i * 2, j, 0);
dimm->nr_pages = nr_pages;
edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
dimm->grain = 8; /* same guess */
dimm->mtype = MEM_DDR3;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
}
ie31200_clear_error_info(mci);
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
ret = -ENODEV;
goto fail_unmap;
}
/* get this far and it's successful */
edac_dbg(3, "MC: success\n");
return 0;
fail_unmap:
iounmap(window);
fail_free:
edac_mc_free(mci);
return ret;
}
static int ie31200_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
return ie31200_probe1(pdev, ent->driver_data);
}
static void ie31200_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct ie31200_priv *priv;
edac_dbg(0, "\n");
mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
return;
priv = mci->pvt_info;
iounmap(priv->window);
edac_mc_free(mci);
}
static const struct pci_device_id ie31200_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, IE31200_HB_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
PCI_VEND_DEV(INTEL, IE31200_HB_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
PCI_VEND_DEV(INTEL, IE31200_HB_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
PCI_VEND_DEV(INTEL, IE31200_HB_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
PCI_VEND_DEV(INTEL, IE31200_HB_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
PCI_VEND_DEV(INTEL, IE31200_HB_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl);
static struct pci_driver ie31200_driver = {
.name = EDAC_MOD_STR,
.probe = ie31200_init_one,
.remove = ie31200_remove_one,
.id_table = ie31200_pci_tbl,
};
static int __init ie31200_init(void)
{
edac_dbg(3, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
return pci_register_driver(&ie31200_driver);
}
static void __exit ie31200_exit(void)
{
edac_dbg(3, "MC:\n");
pci_unregister_driver(&ie31200_driver);
}
module_init(ie31200_init);
module_exit(ie31200_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
MODULE_DESCRIPTION("MC support for Intel Processor E31200 memory hub controllers");

934
drivers/edac/mce_amd.c Normal file
View file

@ -0,0 +1,934 @@
#include <linux/module.h>
#include <linux/slab.h>
#include "mce_amd.h"
static struct amd_decoder_ops *fam_ops;
static u8 xec_mask = 0xf;
static bool report_gart_errors;
static void (*nb_bus_decoder)(int node_id, struct mce *m);
void amd_report_gart_errors(bool v)
{
report_gart_errors = v;
}
EXPORT_SYMBOL_GPL(amd_report_gart_errors);
void amd_register_ecc_decoder(void (*f)(int, struct mce *))
{
nb_bus_decoder = f;
}
EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
{
if (nb_bus_decoder) {
WARN_ON(nb_bus_decoder != f);
nb_bus_decoder = NULL;
}
}
EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
/*
* string representation for the different MCA reported error types, see F3x48
* or MSR0000_0411.
*/
/* transaction type */
static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
/* cache level */
static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
/* memory transaction type */
static const char * const rrrr_msgs[] = {
"GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
};
/* participating processor */
const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
EXPORT_SYMBOL_GPL(pp_msgs);
/* request timeout */
static const char * const to_msgs[] = { "no timeout", "timed out" };
/* memory or i/o */
static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
/* internal error type */
static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
static const char * const f15h_mc1_mce_desc[] = {
"UC during a demand linefill from L2",
"Parity error during data load from IC",
"Parity error for IC valid bit",
"Main tag parity error",
"Parity error in prediction queue",
"PFB data/address parity error",
"Parity error in the branch status reg",
"PFB promotion address error",
"Tag error during probe/victimization",
"Parity error for IC probe tag valid bit",
"PFB non-cacheable bit parity error",
"PFB valid bit parity error", /* xec = 0xd */
"Microcode Patch Buffer", /* xec = 010 */
"uop queue",
"insn buffer",
"predecode buffer",
"fetch address FIFO",
"dispatch uop queue"
};
static const char * const f15h_mc2_mce_desc[] = {
"Fill ECC error on data fills", /* xec = 0x4 */
"Fill parity error on insn fills",
"Prefetcher request FIFO parity error",
"PRQ address parity error",
"PRQ data parity error",
"WCC Tag ECC error",
"WCC Data ECC error",
"WCB Data parity error",
"VB Data ECC or parity error",
"L2 Tag ECC error", /* xec = 0x10 */
"Hard L2 Tag ECC error",
"Multiple hits on L2 tag",
"XAB parity error",
"PRB address parity error"
};
static const char * const mc4_mce_desc[] = {
"DRAM ECC error detected on the NB",
"CRC error detected on HT link",
"Link-defined sync error packets detected on HT link",
"HT Master abort",
"HT Target abort",
"Invalid GART PTE entry during GART table walk",
"Unsupported atomic RMW received from an IO link",
"Watchdog timeout due to lack of progress",
"DRAM ECC error detected on the NB",
"SVM DMA Exclusion Vector error",
"HT data error detected on link",
"Protocol error (link, L3, probe filter)",
"NB internal arrays parity error",
"DRAM addr/ctl signals parity error",
"IO link transmission error",
"L3 data cache ECC error", /* xec = 0x1c */
"L3 cache tag error",
"L3 LRU parity bits error",
"ECC Error in the Probe Filter directory"
};
static const char * const mc5_mce_desc[] = {
"CPU Watchdog timer expire",
"Wakeup array dest tag",
"AG payload array",
"EX payload array",
"IDRF array",
"Retire dispatch queue",
"Mapper checkpoint array",
"Physical register file EX0 port",
"Physical register file EX1 port",
"Physical register file AG0 port",
"Physical register file AG1 port",
"Flag register file",
"DE error occurred",
"Retire status queue"
};
static bool f12h_mc0_mce(u16 ec, u8 xec)
{
bool ret = false;
if (MEM_ERROR(ec)) {
u8 ll = LL(ec);
ret = true;
if (ll == LL_L2)
pr_cont("during L1 linefill from L2.\n");
else if (ll == LL_L1)
pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
else
ret = false;
}
return ret;
}
static bool f10h_mc0_mce(u16 ec, u8 xec)
{
if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
pr_cont("during data scrub.\n");
return true;
}
return f12h_mc0_mce(ec, xec);
}
static bool k8_mc0_mce(u16 ec, u8 xec)
{
if (BUS_ERROR(ec)) {
pr_cont("during system linefill.\n");
return true;
}
return f10h_mc0_mce(ec, xec);
}
static bool cat_mc0_mce(u16 ec, u8 xec)
{
u8 r4 = R4(ec);
bool ret = true;
if (MEM_ERROR(ec)) {
if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
return false;
switch (r4) {
case R4_DRD:
case R4_DWR:
pr_cont("Data/Tag parity error due to %s.\n",
(r4 == R4_DRD ? "load/hw prf" : "store"));
break;
case R4_EVICT:
pr_cont("Copyback parity error on a tag miss.\n");
break;
case R4_SNOOP:
pr_cont("Tag parity error during snoop.\n");
break;
default:
ret = false;
}
} else if (BUS_ERROR(ec)) {
if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
return false;
pr_cont("System read data error on a ");
switch (r4) {
case R4_RD:
pr_cont("TLB reload.\n");
break;
case R4_DWR:
pr_cont("store.\n");
break;
case R4_DRD:
pr_cont("load.\n");
break;
default:
ret = false;
}
} else {
ret = false;
}
return ret;
}
static bool f15h_mc0_mce(u16 ec, u8 xec)
{
bool ret = true;
if (MEM_ERROR(ec)) {
switch (xec) {
case 0x0:
pr_cont("Data Array access error.\n");
break;
case 0x1:
pr_cont("UC error during a linefill from L2/NB.\n");
break;
case 0x2:
case 0x11:
pr_cont("STQ access error.\n");
break;
case 0x3:
pr_cont("SCB access error.\n");
break;
case 0x10:
pr_cont("Tag error.\n");
break;
case 0x12:
pr_cont("LDQ access error.\n");
break;
default:
ret = false;
}
} else if (BUS_ERROR(ec)) {
if (!xec)
pr_cont("System Read Data Error.\n");
else
pr_cont(" Internal error condition type %d.\n", xec);
} else if (INT_ERROR(ec)) {
if (xec <= 0x1f)
pr_cont("Hardware Assert.\n");
else
ret = false;
} else
ret = false;
return ret;
}
static void decode_mc0_mce(struct mce *m)
{
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "MC0 Error: ");
/* TLB error signatures are the same across families */
if (TLB_ERROR(ec)) {
if (TT(ec) == TT_DATA) {
pr_cont("%s TLB %s.\n", LL_MSG(ec),
((xec == 2) ? "locked miss"
: (xec ? "multimatch" : "parity")));
return;
}
} else if (fam_ops->mc0_mce(ec, xec))
;
else
pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
}
static bool k8_mc1_mce(u16 ec, u8 xec)
{
u8 ll = LL(ec);
bool ret = true;
if (!MEM_ERROR(ec))
return false;
if (ll == 0x2)
pr_cont("during a linefill from L2.\n");
else if (ll == 0x1) {
switch (R4(ec)) {
case R4_IRD:
pr_cont("Parity error during data load.\n");
break;
case R4_EVICT:
pr_cont("Copyback Parity/Victim error.\n");
break;
case R4_SNOOP:
pr_cont("Tag Snoop error.\n");
break;
default:
ret = false;
break;
}
} else
ret = false;
return ret;
}
static bool cat_mc1_mce(u16 ec, u8 xec)
{
u8 r4 = R4(ec);
bool ret = true;
if (!MEM_ERROR(ec))
return false;
if (TT(ec) != TT_INSTR)
return false;
if (r4 == R4_IRD)
pr_cont("Data/tag array parity error for a tag hit.\n");
else if (r4 == R4_SNOOP)
pr_cont("Tag error during snoop/victimization.\n");
else if (xec == 0x0)
pr_cont("Tag parity error from victim castout.\n");
else if (xec == 0x2)
pr_cont("Microcode patch RAM parity error.\n");
else
ret = false;
return ret;
}
static bool f15h_mc1_mce(u16 ec, u8 xec)
{
bool ret = true;
if (!MEM_ERROR(ec))
return false;
switch (xec) {
case 0x0 ... 0xa:
pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
break;
case 0xd:
pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
break;
case 0x10:
pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
break;
case 0x11 ... 0x15:
pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
break;
default:
ret = false;
}
return ret;
}
static void decode_mc1_mce(struct mce *m)
{
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "MC1 Error: ");
if (TLB_ERROR(ec))
pr_cont("%s TLB %s.\n", LL_MSG(ec),
(xec ? "multimatch" : "parity error"));
else if (BUS_ERROR(ec)) {
bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
} else if (INT_ERROR(ec)) {
if (xec <= 0x3f)
pr_cont("Hardware Assert.\n");
else
goto wrong_mc1_mce;
} else if (fam_ops->mc1_mce(ec, xec))
;
else
goto wrong_mc1_mce;
return;
wrong_mc1_mce:
pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
}
static bool k8_mc2_mce(u16 ec, u8 xec)
{
bool ret = true;
if (xec == 0x1)
pr_cont(" in the write data buffers.\n");
else if (xec == 0x3)
pr_cont(" in the victim data buffers.\n");
else if (xec == 0x2 && MEM_ERROR(ec))
pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
else if (xec == 0x0) {
if (TLB_ERROR(ec))
pr_cont(": %s error in a Page Descriptor Cache or "
"Guest TLB.\n", TT_MSG(ec));
else if (BUS_ERROR(ec))
pr_cont(": %s/ECC error in data read from NB: %s.\n",
R4_MSG(ec), PP_MSG(ec));
else if (MEM_ERROR(ec)) {
u8 r4 = R4(ec);
if (r4 >= 0x7)
pr_cont(": %s error during data copyback.\n",
R4_MSG(ec));
else if (r4 <= 0x1)
pr_cont(": %s parity/ECC error during data "
"access from L2.\n", R4_MSG(ec));
else
ret = false;
} else
ret = false;
} else
ret = false;
return ret;
}
static bool f15h_mc2_mce(u16 ec, u8 xec)
{
bool ret = true;
if (TLB_ERROR(ec)) {
if (xec == 0x0)
pr_cont("Data parity TLB read error.\n");
else if (xec == 0x1)
pr_cont("Poison data provided for TLB fill.\n");
else
ret = false;
} else if (BUS_ERROR(ec)) {
if (xec > 2)
ret = false;
pr_cont("Error during attempted NB data read.\n");
} else if (MEM_ERROR(ec)) {
switch (xec) {
case 0x4 ... 0xc:
pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
break;
case 0x10 ... 0x14:
pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
break;
default:
ret = false;
}
} else if (INT_ERROR(ec)) {
if (xec <= 0x3f)
pr_cont("Hardware Assert.\n");
else
ret = false;
}
return ret;
}
static bool f16h_mc2_mce(u16 ec, u8 xec)
{
u8 r4 = R4(ec);
if (!MEM_ERROR(ec))
return false;
switch (xec) {
case 0x04 ... 0x05:
pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
break;
case 0x09 ... 0x0b:
case 0x0d ... 0x0f:
pr_cont("ECC error in L2 tag (%s).\n",
((r4 == R4_GEN) ? "BankReq" :
((r4 == R4_SNOOP) ? "Prb" : "Fill")));
break;
case 0x10 ... 0x19:
case 0x1b:
pr_cont("ECC error in L2 data array (%s).\n",
(((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
((r4 == R4_GEN) ? "Attr" :
((r4 == R4_EVICT) ? "Vict" : "Fill"))));
break;
case 0x1c ... 0x1d:
case 0x1f:
pr_cont("Parity error in L2 attribute bits (%s).\n",
((r4 == R4_RD) ? "Hit" :
((r4 == R4_GEN) ? "Attr" : "Fill")));
break;
default:
return false;
}
return true;
}
static void decode_mc2_mce(struct mce *m)
{
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "MC2 Error: ");
if (!fam_ops->mc2_mce(ec, xec))
pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
}
static void decode_mc3_mce(struct mce *m)
{
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
if (boot_cpu_data.x86 >= 0x14) {
pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
" please report on LKML.\n");
return;
}
pr_emerg(HW_ERR "MC3 Error");
if (xec == 0x0) {
u8 r4 = R4(ec);
if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
goto wrong_mc3_mce;
pr_cont(" during %s.\n", R4_MSG(ec));
} else
goto wrong_mc3_mce;
return;
wrong_mc3_mce:
pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
}
static void decode_mc4_mce(struct mce *m)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
int node_id = amd_get_nb_id(m->extcpu);
u16 ec = EC(m->status);
u8 xec = XEC(m->status, 0x1f);
u8 offset = 0;
pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
switch (xec) {
case 0x0 ... 0xe:
/* special handling for DRAM ECCs */
if (xec == 0x0 || xec == 0x8) {
/* no ECCs on F11h */
if (c->x86 == 0x11)
goto wrong_mc4_mce;
pr_cont("%s.\n", mc4_mce_desc[xec]);
if (nb_bus_decoder)
nb_bus_decoder(node_id, m);
return;
}
break;
case 0xf:
if (TLB_ERROR(ec))
pr_cont("GART Table Walk data error.\n");
else if (BUS_ERROR(ec))
pr_cont("DMA Exclusion Vector Table Walk error.\n");
else
goto wrong_mc4_mce;
return;
case 0x19:
if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
pr_cont("Compute Unit Data Error.\n");
else
goto wrong_mc4_mce;
return;
case 0x1c ... 0x1f:
offset = 13;
break;
default:
goto wrong_mc4_mce;
}
pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
return;
wrong_mc4_mce:
pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
}
static void decode_mc5_mce(struct mce *m)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask);
if (c->x86 == 0xf || c->x86 == 0x11)
goto wrong_mc5_mce;
pr_emerg(HW_ERR "MC5 Error: ");
if (INT_ERROR(ec)) {
if (xec <= 0x1f) {
pr_cont("Hardware Assert.\n");
return;
} else
goto wrong_mc5_mce;
}
if (xec == 0x0 || xec == 0xc)
pr_cont("%s.\n", mc5_mce_desc[xec]);
else if (xec <= 0xd)
pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
else
goto wrong_mc5_mce;
return;
wrong_mc5_mce:
pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
}
static void decode_mc6_mce(struct mce *m)
{
u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "MC6 Error: ");
switch (xec) {
case 0x0:
pr_cont("Hardware Assertion");
break;
case 0x1:
pr_cont("Free List");
break;
case 0x2:
pr_cont("Physical Register File");
break;
case 0x3:
pr_cont("Retire Queue");
break;
case 0x4:
pr_cont("Scheduler table");
break;
case 0x5:
pr_cont("Status Register File");
break;
default:
goto wrong_mc6_mce;
break;
}
pr_cont(" parity error.\n");
return;
wrong_mc6_mce:
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
}
static inline void amd_decode_err_code(u16 ec)
{
if (INT_ERROR(ec)) {
pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
return;
}
pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
if (BUS_ERROR(ec))
pr_cont(", mem/io: %s", II_MSG(ec));
else
pr_cont(", tx: %s", TT_MSG(ec));
if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
pr_cont(", mem-tx: %s", R4_MSG(ec));
if (BUS_ERROR(ec))
pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
}
pr_cont("\n");
}
/*
* Filter out unwanted MCE signatures here.
*/
static bool amd_filter_mce(struct mce *m)
{
u8 xec = (m->status >> 16) & 0x1f;
/*
* NB GART TLB error reporting is disabled by default.
*/
if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
return true;
return false;
}
static const char *decode_error_status(struct mce *m)
{
if (m->status & MCI_STATUS_UC) {
if (m->status & MCI_STATUS_PCC)
return "System Fatal error.";
if (m->mcgstatus & MCG_STATUS_RIPV)
return "Uncorrected, software restartable error.";
return "Uncorrected, software containable error.";
}
if (m->status & MCI_STATUS_DEFERRED)
return "Deferred error.";
return "Corrected error, no action required.";
}
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
{
struct mce *m = (struct mce *)data;
struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
int ecc;
if (amd_filter_mce(m))
return NOTIFY_STOP;
pr_emerg(HW_ERR "%s\n", decode_error_status(m));
pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
m->extcpu,
c->x86, c->x86_model, c->x86_mask,
m->bank,
((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
if (c->x86 == 0x15 || c->x86 == 0x16)
pr_cont("|%s|%s",
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
/* do the two bits[14:13] together */
ecc = (m->status >> 45) & 0x3;
if (ecc)
pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
pr_cont("]: 0x%016llx\n", m->status);
if (m->status & MCI_STATUS_ADDRV)
pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
if (!fam_ops)
goto err_code;
switch (m->bank) {
case 0:
decode_mc0_mce(m);
break;
case 1:
decode_mc1_mce(m);
break;
case 2:
decode_mc2_mce(m);
break;
case 3:
decode_mc3_mce(m);
break;
case 4:
decode_mc4_mce(m);
break;
case 5:
decode_mc5_mce(m);
break;
case 6:
decode_mc6_mce(m);
break;
default:
break;
}
err_code:
amd_decode_err_code(m->status & 0xffff);
return NOTIFY_STOP;
}
EXPORT_SYMBOL_GPL(amd_decode_mce);
static struct notifier_block amd_mce_dec_nb = {
.notifier_call = amd_decode_mce,
};
static int __init mce_amd_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
if (!fam_ops)
return -ENOMEM;
switch (c->x86) {
case 0xf:
fam_ops->mc0_mce = k8_mc0_mce;
fam_ops->mc1_mce = k8_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce;
break;
case 0x10:
fam_ops->mc0_mce = f10h_mc0_mce;
fam_ops->mc1_mce = k8_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce;
break;
case 0x11:
fam_ops->mc0_mce = k8_mc0_mce;
fam_ops->mc1_mce = k8_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce;
break;
case 0x12:
fam_ops->mc0_mce = f12h_mc0_mce;
fam_ops->mc1_mce = k8_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce;
break;
case 0x14:
fam_ops->mc0_mce = cat_mc0_mce;
fam_ops->mc1_mce = cat_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce;
break;
case 0x15:
xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
fam_ops->mc0_mce = f15h_mc0_mce;
fam_ops->mc1_mce = f15h_mc1_mce;
fam_ops->mc2_mce = f15h_mc2_mce;
break;
case 0x16:
xec_mask = 0x1f;
fam_ops->mc0_mce = cat_mc0_mce;
fam_ops->mc1_mce = cat_mc1_mce;
fam_ops->mc2_mce = f16h_mc2_mce;
break;
default:
printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
kfree(fam_ops);
fam_ops = NULL;
}
pr_info("MCE: In-kernel MCE decoding enabled.\n");
mce_register_decode_chain(&amd_mce_dec_nb);
return 0;
}
early_initcall(mce_amd_init);
#ifdef MODULE
static void __exit mce_amd_exit(void)
{
mce_unregister_decode_chain(&amd_mce_dec_nb);
kfree(fam_ops);
}
MODULE_DESCRIPTION("AMD MCE decoder");
MODULE_ALIAS("edac-mce-amd");
MODULE_LICENSE("GPL");
module_exit(mce_amd_exit);
#endif

87
drivers/edac/mce_amd.h Normal file
View file

@ -0,0 +1,87 @@
#ifndef _EDAC_MCE_AMD_H
#define _EDAC_MCE_AMD_H
#include <linux/notifier.h>
#include <asm/mce.h>
#define EC(x) ((x) & 0xffff)
#define XEC(x, mask) (((x) >> 16) & mask)
#define LOW_SYNDROME(x) (((x) >> 15) & 0xff)
#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff)
#define TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010)
#define MEM_ERROR(x) (((x) & 0xFF00) == 0x0100)
#define BUS_ERROR(x) (((x) & 0xF800) == 0x0800)
#define INT_ERROR(x) (((x) & 0xF4FF) == 0x0400)
#define TT(x) (((x) >> 2) & 0x3)
#define TT_MSG(x) tt_msgs[TT(x)]
#define II(x) (((x) >> 2) & 0x3)
#define II_MSG(x) ii_msgs[II(x)]
#define LL(x) ((x) & 0x3)
#define LL_MSG(x) ll_msgs[LL(x)]
#define TO(x) (((x) >> 8) & 0x1)
#define TO_MSG(x) to_msgs[TO(x)]
#define PP(x) (((x) >> 9) & 0x3)
#define PP_MSG(x) pp_msgs[PP(x)]
#define UU(x) (((x) >> 8) & 0x3)
#define UU_MSG(x) uu_msgs[UU(x)]
#define R4(x) (((x) >> 4) & 0xf)
#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!")
#define MCI_STATUS_DEFERRED BIT_64(44)
#define MCI_STATUS_POISON BIT_64(43)
extern const char * const pp_msgs[];
enum tt_ids {
TT_INSTR = 0,
TT_DATA,
TT_GEN,
TT_RESV,
};
enum ll_ids {
LL_RESV = 0,
LL_L1,
LL_L2,
LL_LG,
};
enum ii_ids {
II_MEM = 0,
II_RESV,
II_IO,
II_GEN,
};
enum rrrr_ids {
R4_GEN = 0,
R4_RD,
R4_WR,
R4_DRD,
R4_DWR,
R4_IRD,
R4_PREF,
R4_EVICT,
R4_SNOOP,
};
/*
* per-family decoder ops
*/
struct amd_decoder_ops {
bool (*mc0_mce)(u16, u8);
bool (*mc1_mce)(u16, u8);
bool (*mc2_mce)(u16, u8);
};
void amd_report_gart_errors(bool);
void amd_register_ecc_decoder(void (*f)(int, struct mce *));
void amd_unregister_ecc_decoder(void (*f)(int, struct mce *));
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
#endif /* _EDAC_MCE_AMD_H */

173
drivers/edac/mce_amd_inj.c Normal file
View file

@ -0,0 +1,173 @@
/*
* A simple MCE injection facility for testing the MCE decoding code. This
* driver should be built as module so that it can be loaded on production
* kernels for testing purposes.
*
* This file may be distributed under the terms of the GNU General Public
* License version 2.
*
* Copyright (c) 2010: Borislav Petkov <bp@alien8.de>
* Advanced Micro Devices Inc.
*/
#include <linux/kobject.h>
#include <linux/device.h>
#include <linux/edac.h>
#include <linux/module.h>
#include <asm/mce.h>
#include "mce_amd.h"
struct edac_mce_attr {
struct attribute attr;
ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf);
ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr,
const char *buf, size_t count);
};
#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \
static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store)
static struct kobject *mce_kobj;
/*
* Collect all the MCi_XXX settings
*/
static struct mce i_mce;
#define MCE_INJECT_STORE(reg) \
static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \
struct edac_mce_attr *attr, \
const char *data, size_t count)\
{ \
int ret = 0; \
unsigned long value; \
\
ret = kstrtoul(data, 16, &value); \
if (ret < 0) \
printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \
\
i_mce.reg = value; \
\
return count; \
}
MCE_INJECT_STORE(status);
MCE_INJECT_STORE(misc);
MCE_INJECT_STORE(addr);
#define MCE_INJECT_SHOW(reg) \
static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \
struct edac_mce_attr *attr, \
char *buf) \
{ \
return sprintf(buf, "0x%016llx\n", i_mce.reg); \
}
MCE_INJECT_SHOW(status);
MCE_INJECT_SHOW(misc);
MCE_INJECT_SHOW(addr);
EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store);
EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store);
EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store);
/*
* This denotes into which bank we're injecting and triggers
* the injection, at the same time.
*/
static ssize_t edac_inject_bank_store(struct kobject *kobj,
struct edac_mce_attr *attr,
const char *data, size_t count)
{
int ret = 0;
unsigned long value;
ret = kstrtoul(data, 10, &value);
if (ret < 0) {
printk(KERN_ERR "Invalid bank value!\n");
return -EINVAL;
}
if (value > 5)
if (boot_cpu_data.x86 != 0x15 || value > 6) {
printk(KERN_ERR "Non-existent MCE bank: %lu\n", value);
return -EINVAL;
}
i_mce.bank = value;
amd_decode_mce(NULL, 0, &i_mce);
return count;
}
static ssize_t edac_inject_bank_show(struct kobject *kobj,
struct edac_mce_attr *attr, char *buf)
{
return sprintf(buf, "%d\n", i_mce.bank);
}
EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store);
static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc,
&mce_attr_addr, &mce_attr_bank
};
static int __init edac_init_mce_inject(void)
{
struct bus_type *edac_subsys = NULL;
int i, err = 0;
edac_subsys = edac_get_sysfs_subsys();
if (!edac_subsys)
return -EINVAL;
mce_kobj = kobject_create_and_add("mce", &edac_subsys->dev_root->kobj);
if (!mce_kobj) {
printk(KERN_ERR "Error creating a mce kset.\n");
err = -ENOMEM;
goto err_mce_kobj;
}
for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) {
err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr);
if (err) {
printk(KERN_ERR "Error creating %s in sysfs.\n",
sysfs_attrs[i]->attr.name);
goto err_sysfs_create;
}
}
return 0;
err_sysfs_create:
while (--i >= 0)
sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
kobject_del(mce_kobj);
err_mce_kobj:
edac_put_sysfs_subsys();
return err;
}
static void __exit edac_exit_mce_inject(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++)
sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
kobject_del(mce_kobj);
edac_put_sysfs_subsys();
}
module_init(edac_init_mce_inject);
module_exit(edac_exit_mce_inject);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>");
MODULE_AUTHOR("AMD Inc.");
MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding");

1303
drivers/edac/mpc85xx_edac.c Normal file

File diff suppressed because it is too large Load diff

173
drivers/edac/mpc85xx_edac.h Normal file
View file

@ -0,0 +1,173 @@
/*
* Freescale MPC85xx Memory Controller kenel module
* Author: Dave Jiang <djiang@mvista.com>
*
* 2006-2007 (c) MontaVista Software, Inc. This file is licensed under
* the terms of the GNU General Public License version 2. This program
* is licensed "as is" without any warranty of any kind, whether express
* or implied.
*
*/
#ifndef _MPC85XX_EDAC_H_
#define _MPC85XX_EDAC_H_
#define MPC85XX_REVISION " Ver: 2.0.0"
#define EDAC_MOD_STR "MPC85xx_edac"
#define mpc85xx_printk(level, fmt, arg...) \
edac_printk(level, "MPC85xx", fmt, ##arg)
#define mpc85xx_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "MPC85xx", fmt, ##arg)
/*
* DRAM error defines
*/
/* DDR_SDRAM_CFG */
#define MPC85XX_MC_DDR_SDRAM_CFG 0x0110
#define MPC85XX_MC_CS_BNDS_0 0x0000
#define MPC85XX_MC_CS_BNDS_1 0x0008
#define MPC85XX_MC_CS_BNDS_2 0x0010
#define MPC85XX_MC_CS_BNDS_3 0x0018
#define MPC85XX_MC_CS_BNDS_OFS 0x0008
#define MPC85XX_MC_DATA_ERR_INJECT_HI 0x0e00
#define MPC85XX_MC_DATA_ERR_INJECT_LO 0x0e04
#define MPC85XX_MC_ECC_ERR_INJECT 0x0e08
#define MPC85XX_MC_CAPTURE_DATA_HI 0x0e20
#define MPC85XX_MC_CAPTURE_DATA_LO 0x0e24
#define MPC85XX_MC_CAPTURE_ECC 0x0e28
#define MPC85XX_MC_ERR_DETECT 0x0e40
#define MPC85XX_MC_ERR_DISABLE 0x0e44
#define MPC85XX_MC_ERR_INT_EN 0x0e48
#define MPC85XX_MC_CAPTURE_ATRIBUTES 0x0e4c
#define MPC85XX_MC_CAPTURE_ADDRESS 0x0e50
#define MPC85XX_MC_ERR_SBE 0x0e58
#define DSC_MEM_EN 0x80000000
#define DSC_ECC_EN 0x20000000
#define DSC_RD_EN 0x10000000
#define DSC_DBW_MASK 0x00180000
#define DSC_DBW_32 0x00080000
#define DSC_DBW_64 0x00000000
#define DSC_SDTYPE_MASK 0x07000000
#define DSC_SDTYPE_DDR 0x02000000
#define DSC_SDTYPE_DDR2 0x03000000
#define DSC_SDTYPE_DDR3 0x07000000
#define DSC_X32_EN 0x00000020
/* Err_Int_En */
#define DDR_EIE_MSEE 0x1 /* memory select */
#define DDR_EIE_SBEE 0x4 /* single-bit ECC error */
#define DDR_EIE_MBEE 0x8 /* multi-bit ECC error */
/* Err_Detect */
#define DDR_EDE_MSE 0x1 /* memory select */
#define DDR_EDE_SBE 0x4 /* single-bit ECC error */
#define DDR_EDE_MBE 0x8 /* multi-bit ECC error */
#define DDR_EDE_MME 0x80000000 /* multiple memory errors */
/* Err_Disable */
#define DDR_EDI_MSED 0x1 /* memory select disable */
#define DDR_EDI_SBED 0x4 /* single-bit ECC error disable */
#define DDR_EDI_MBED 0x8 /* multi-bit ECC error disable */
/*
* L2 Err defines
*/
#define MPC85XX_L2_ERRINJHI 0x0000
#define MPC85XX_L2_ERRINJLO 0x0004
#define MPC85XX_L2_ERRINJCTL 0x0008
#define MPC85XX_L2_CAPTDATAHI 0x0020
#define MPC85XX_L2_CAPTDATALO 0x0024
#define MPC85XX_L2_CAPTECC 0x0028
#define MPC85XX_L2_ERRDET 0x0040
#define MPC85XX_L2_ERRDIS 0x0044
#define MPC85XX_L2_ERRINTEN 0x0048
#define MPC85XX_L2_ERRATTR 0x004c
#define MPC85XX_L2_ERRADDR 0x0050
#define MPC85XX_L2_ERRCTL 0x0058
/* Error Interrupt Enable */
#define L2_EIE_L2CFGINTEN 0x1
#define L2_EIE_SBECCINTEN 0x4
#define L2_EIE_MBECCINTEN 0x8
#define L2_EIE_TPARINTEN 0x10
#define L2_EIE_MASK (L2_EIE_L2CFGINTEN | L2_EIE_SBECCINTEN | \
L2_EIE_MBECCINTEN | L2_EIE_TPARINTEN)
/* Error Detect */
#define L2_EDE_L2CFGERR 0x1
#define L2_EDE_SBECCERR 0x4
#define L2_EDE_MBECCERR 0x8
#define L2_EDE_TPARERR 0x10
#define L2_EDE_MULL2ERR 0x80000000
#define L2_EDE_CE_MASK L2_EDE_SBECCERR
#define L2_EDE_UE_MASK (L2_EDE_L2CFGERR | L2_EDE_MBECCERR | \
L2_EDE_TPARERR)
#define L2_EDE_MASK (L2_EDE_L2CFGERR | L2_EDE_SBECCERR | \
L2_EDE_MBECCERR | L2_EDE_TPARERR | L2_EDE_MULL2ERR)
/*
* PCI Err defines
*/
#define PCI_EDE_TOE 0x00000001
#define PCI_EDE_SCM 0x00000002
#define PCI_EDE_IRMSV 0x00000004
#define PCI_EDE_ORMSV 0x00000008
#define PCI_EDE_OWMSV 0x00000010
#define PCI_EDE_TGT_ABRT 0x00000020
#define PCI_EDE_MST_ABRT 0x00000040
#define PCI_EDE_TGT_PERR 0x00000080
#define PCI_EDE_MST_PERR 0x00000100
#define PCI_EDE_RCVD_SERR 0x00000200
#define PCI_EDE_ADDR_PERR 0x00000400
#define PCI_EDE_MULTI_ERR 0x80000000
#define PCI_EDE_PERR_MASK (PCI_EDE_TGT_PERR | PCI_EDE_MST_PERR | \
PCI_EDE_ADDR_PERR)
#define MPC85XX_PCI_ERR_DR 0x0000
#define MPC85XX_PCI_ERR_CAP_DR 0x0004
#define MPC85XX_PCI_ERR_EN 0x0008
#define PEX_ERR_ICCAIE_EN_BIT 0x00020000
#define MPC85XX_PCI_ERR_ATTRIB 0x000c
#define MPC85XX_PCI_ERR_ADDR 0x0010
#define PEX_ERR_ICCAD_DISR_BIT 0x00020000
#define MPC85XX_PCI_ERR_EXT_ADDR 0x0014
#define MPC85XX_PCI_ERR_DL 0x0018
#define MPC85XX_PCI_ERR_DH 0x001c
#define MPC85XX_PCI_GAS_TIMR 0x0020
#define MPC85XX_PCI_PCIX_TIMR 0x0024
#define MPC85XX_PCIE_ERR_CAP_R0 0x0028
#define MPC85XX_PCIE_ERR_CAP_R1 0x002c
#define MPC85XX_PCIE_ERR_CAP_R2 0x0030
#define MPC85XX_PCIE_ERR_CAP_R3 0x0034
struct mpc85xx_mc_pdata {
char *name;
int edac_idx;
void __iomem *mc_vbase;
int irq;
};
struct mpc85xx_l2_pdata {
char *name;
int edac_idx;
void __iomem *l2_vbase;
int irq;
};
struct mpc85xx_pci_pdata {
char *name;
bool is_pcie;
int edac_idx;
void __iomem *pci_vbase;
int irq;
};
#endif

905
drivers/edac/mv64x60_edac.c Normal file
View file

@ -0,0 +1,905 @@
/*
* Marvell MV64x60 Memory Controller kernel module for PPC platforms
*
* Author: Dave Jiang <djiang@mvista.com>
*
* 2006-2007 (c) MontaVista Software, Inc. This file is licensed under
* the terms of the GNU General Public License version 2. This program
* is licensed "as is" without any warranty of any kind, whether express
* or implied.
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/edac.h>
#include <linux/gfp.h>
#include "edac_core.h"
#include "edac_module.h"
#include "mv64x60_edac.h"
static const char *mv64x60_ctl_name = "MV64x60";
static int edac_dev_idx;
static int edac_pci_idx;
static int edac_mc_idx;
/*********************** PCI err device **********************************/
#ifdef CONFIG_PCI
static void mv64x60_pci_check(struct edac_pci_ctl_info *pci)
{
struct mv64x60_pci_pdata *pdata = pci->pvt_info;
u32 cause;
cause = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
if (!cause)
return;
printk(KERN_ERR "Error in PCI %d Interface\n", pdata->pci_hose);
printk(KERN_ERR "Cause register: 0x%08x\n", cause);
printk(KERN_ERR "Address Low: 0x%08x\n",
in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO));
printk(KERN_ERR "Address High: 0x%08x\n",
in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI));
printk(KERN_ERR "Attribute: 0x%08x\n",
in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR));
printk(KERN_ERR "Command: 0x%08x\n",
in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD));
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, ~cause);
if (cause & MV64X60_PCI_PE_MASK)
edac_pci_handle_pe(pci, pci->ctl_name);
if (!(cause & MV64X60_PCI_PE_MASK))
edac_pci_handle_npe(pci, pci->ctl_name);
}
static irqreturn_t mv64x60_pci_isr(int irq, void *dev_id)
{
struct edac_pci_ctl_info *pci = dev_id;
struct mv64x60_pci_pdata *pdata = pci->pvt_info;
u32 val;
val = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
if (!val)
return IRQ_NONE;
mv64x60_pci_check(pci);
return IRQ_HANDLED;
}
/*
* Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of
* errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as
* well. IOW, don't set bit 0.
*/
/* Erratum FEr PCI-#16: clear bit 0 of PCI SERRn Mask reg. */
static int __init mv64x60_pci_fixup(struct platform_device *pdev)
{
struct resource *r;
void __iomem *pci_serr;
r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
if (!r) {
printk(KERN_ERR "%s: Unable to get resource for "
"PCI err regs\n", __func__);
return -ENOENT;
}
pci_serr = ioremap(r->start, resource_size(r));
if (!pci_serr)
return -ENOMEM;
out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
iounmap(pci_serr);
return 0;
}
static int mv64x60_pci_err_probe(struct platform_device *pdev)
{
struct edac_pci_ctl_info *pci;
struct mv64x60_pci_pdata *pdata;
struct resource *r;
int res = 0;
if (!devres_open_group(&pdev->dev, mv64x60_pci_err_probe, GFP_KERNEL))
return -ENOMEM;
pci = edac_pci_alloc_ctl_info(sizeof(*pdata), "mv64x60_pci_err");
if (!pci)
return -ENOMEM;
pdata = pci->pvt_info;
pdata->pci_hose = pdev->id;
pdata->name = "mpc85xx_pci_err";
pdata->irq = NO_IRQ;
platform_set_drvdata(pdev, pci);
pci->dev = &pdev->dev;
pci->dev_name = dev_name(&pdev->dev);
pci->mod_name = EDAC_MOD_STR;
pci->ctl_name = pdata->name;
if (edac_op_state == EDAC_OPSTATE_POLL)
pci->edac_check = mv64x60_pci_check;
pdata->edac_idx = edac_pci_idx++;
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
printk(KERN_ERR "%s: Unable to get resource for "
"PCI err regs\n", __func__);
res = -ENOENT;
goto err;
}
if (!devm_request_mem_region(&pdev->dev,
r->start,
resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
res = -EBUSY;
goto err;
}
pdata->pci_vbase = devm_ioremap(&pdev->dev,
r->start,
resource_size(r));
if (!pdata->pci_vbase) {
printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
res = -ENOMEM;
goto err;
}
res = mv64x60_pci_fixup(pdev);
if (res < 0) {
printk(KERN_ERR "%s: PCI fixup failed\n", __func__);
goto err;
}
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
MV64X60_PCIx_ERR_MASK_VAL);
if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
edac_dbg(3, "failed edac_pci_add_device()\n");
goto err;
}
if (edac_op_state == EDAC_OPSTATE_INT) {
pdata->irq = platform_get_irq(pdev, 0);
res = devm_request_irq(&pdev->dev,
pdata->irq,
mv64x60_pci_isr,
IRQF_DISABLED,
"[EDAC] PCI err",
pci);
if (res < 0) {
printk(KERN_ERR "%s: Unable to request irq %d for "
"MV64x60 PCI ERR\n", __func__, pdata->irq);
res = -ENODEV;
goto err2;
}
printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for PCI Err\n",
pdata->irq);
}
devres_remove_group(&pdev->dev, mv64x60_pci_err_probe);
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
err2:
edac_pci_del_device(&pdev->dev);
err:
edac_pci_free_ctl_info(pci);
devres_release_group(&pdev->dev, mv64x60_pci_err_probe);
return res;
}
static int mv64x60_pci_err_remove(struct platform_device *pdev)
{
struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
edac_dbg(0, "\n");
edac_pci_del_device(&pdev->dev);
edac_pci_free_ctl_info(pci);
return 0;
}
static struct platform_driver mv64x60_pci_err_driver = {
.probe = mv64x60_pci_err_probe,
.remove = mv64x60_pci_err_remove,
.driver = {
.name = "mv64x60_pci_err",
}
};
#endif /* CONFIG_PCI */
/*********************** SRAM err device **********************************/
static void mv64x60_sram_check(struct edac_device_ctl_info *edac_dev)
{
struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
u32 cause;
cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
if (!cause)
return;
printk(KERN_ERR "Error in internal SRAM\n");
printk(KERN_ERR "Cause register: 0x%08x\n", cause);
printk(KERN_ERR "Address Low: 0x%08x\n",
in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO));
printk(KERN_ERR "Address High: 0x%08x\n",
in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI));
printk(KERN_ERR "Data Low: 0x%08x\n",
in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO));
printk(KERN_ERR "Data High: 0x%08x\n",
in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI));
printk(KERN_ERR "Parity: 0x%08x\n",
in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY));
out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
}
static irqreturn_t mv64x60_sram_isr(int irq, void *dev_id)
{
struct edac_device_ctl_info *edac_dev = dev_id;
struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
u32 cause;
cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
if (!cause)
return IRQ_NONE;
mv64x60_sram_check(edac_dev);
return IRQ_HANDLED;
}
static int mv64x60_sram_err_probe(struct platform_device *pdev)
{
struct edac_device_ctl_info *edac_dev;
struct mv64x60_sram_pdata *pdata;
struct resource *r;
int res = 0;
if (!devres_open_group(&pdev->dev, mv64x60_sram_err_probe, GFP_KERNEL))
return -ENOMEM;
edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata),
"sram", 1, NULL, 0, 0, NULL, 0,
edac_dev_idx);
if (!edac_dev) {
devres_release_group(&pdev->dev, mv64x60_sram_err_probe);
return -ENOMEM;
}
pdata = edac_dev->pvt_info;
pdata->name = "mv64x60_sram_err";
pdata->irq = NO_IRQ;
edac_dev->dev = &pdev->dev;
platform_set_drvdata(pdev, edac_dev);
edac_dev->dev_name = dev_name(&pdev->dev);
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
printk(KERN_ERR "%s: Unable to get resource for "
"SRAM err regs\n", __func__);
res = -ENOENT;
goto err;
}
if (!devm_request_mem_region(&pdev->dev,
r->start,
resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while request mem region\n",
__func__);
res = -EBUSY;
goto err;
}
pdata->sram_vbase = devm_ioremap(&pdev->dev,
r->start,
resource_size(r));
if (!pdata->sram_vbase) {
printk(KERN_ERR "%s: Unable to setup SRAM err regs\n",
__func__);
res = -ENOMEM;
goto err;
}
/* setup SRAM err registers */
out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
edac_dev->mod_name = EDAC_MOD_STR;
edac_dev->ctl_name = pdata->name;
if (edac_op_state == EDAC_OPSTATE_POLL)
edac_dev->edac_check = mv64x60_sram_check;
pdata->edac_idx = edac_dev_idx++;
if (edac_device_add_device(edac_dev) > 0) {
edac_dbg(3, "failed edac_device_add_device()\n");
goto err;
}
if (edac_op_state == EDAC_OPSTATE_INT) {
pdata->irq = platform_get_irq(pdev, 0);
res = devm_request_irq(&pdev->dev,
pdata->irq,
mv64x60_sram_isr,
IRQF_DISABLED,
"[EDAC] SRAM err",
edac_dev);
if (res < 0) {
printk(KERN_ERR
"%s: Unable to request irq %d for "
"MV64x60 SRAM ERR\n", __func__, pdata->irq);
res = -ENODEV;
goto err2;
}
printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for SRAM Err\n",
pdata->irq);
}
devres_remove_group(&pdev->dev, mv64x60_sram_err_probe);
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
err2:
edac_device_del_device(&pdev->dev);
err:
devres_release_group(&pdev->dev, mv64x60_sram_err_probe);
edac_device_free_ctl_info(edac_dev);
return res;
}
static int mv64x60_sram_err_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *edac_dev = platform_get_drvdata(pdev);
edac_dbg(0, "\n");
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(edac_dev);
return 0;
}
static struct platform_driver mv64x60_sram_err_driver = {
.probe = mv64x60_sram_err_probe,
.remove = mv64x60_sram_err_remove,
.driver = {
.name = "mv64x60_sram_err",
}
};
/*********************** CPU err device **********************************/
static void mv64x60_cpu_check(struct edac_device_ctl_info *edac_dev)
{
struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
u32 cause;
cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
MV64x60_CPU_CAUSE_MASK;
if (!cause)
return;
printk(KERN_ERR "Error on CPU interface\n");
printk(KERN_ERR "Cause register: 0x%08x\n", cause);
printk(KERN_ERR "Address Low: 0x%08x\n",
in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO));
printk(KERN_ERR "Address High: 0x%08x\n",
in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI));
printk(KERN_ERR "Data Low: 0x%08x\n",
in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO));
printk(KERN_ERR "Data High: 0x%08x\n",
in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI));
printk(KERN_ERR "Parity: 0x%08x\n",
in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY));
out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
}
static irqreturn_t mv64x60_cpu_isr(int irq, void *dev_id)
{
struct edac_device_ctl_info *edac_dev = dev_id;
struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
u32 cause;
cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
MV64x60_CPU_CAUSE_MASK;
if (!cause)
return IRQ_NONE;
mv64x60_cpu_check(edac_dev);
return IRQ_HANDLED;
}
static int mv64x60_cpu_err_probe(struct platform_device *pdev)
{
struct edac_device_ctl_info *edac_dev;
struct resource *r;
struct mv64x60_cpu_pdata *pdata;
int res = 0;
if (!devres_open_group(&pdev->dev, mv64x60_cpu_err_probe, GFP_KERNEL))
return -ENOMEM;
edac_dev = edac_device_alloc_ctl_info(sizeof(*pdata),
"cpu", 1, NULL, 0, 0, NULL, 0,
edac_dev_idx);
if (!edac_dev) {
devres_release_group(&pdev->dev, mv64x60_cpu_err_probe);
return -ENOMEM;
}
pdata = edac_dev->pvt_info;
pdata->name = "mv64x60_cpu_err";
pdata->irq = NO_IRQ;
edac_dev->dev = &pdev->dev;
platform_set_drvdata(pdev, edac_dev);
edac_dev->dev_name = dev_name(&pdev->dev);
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
printk(KERN_ERR "%s: Unable to get resource for "
"CPU err regs\n", __func__);
res = -ENOENT;
goto err;
}
if (!devm_request_mem_region(&pdev->dev,
r->start,
resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
res = -EBUSY;
goto err;
}
pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev,
r->start,
resource_size(r));
if (!pdata->cpu_vbase[0]) {
printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
res = -ENOMEM;
goto err;
}
r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
if (!r) {
printk(KERN_ERR "%s: Unable to get resource for "
"CPU err regs\n", __func__);
res = -ENOENT;
goto err;
}
if (!devm_request_mem_region(&pdev->dev,
r->start,
resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
res = -EBUSY;
goto err;
}
pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev,
r->start,
resource_size(r));
if (!pdata->cpu_vbase[1]) {
printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
res = -ENOMEM;
goto err;
}
/* setup CPU err registers */
out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0);
out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0x000000ff);
edac_dev->mod_name = EDAC_MOD_STR;
edac_dev->ctl_name = pdata->name;
if (edac_op_state == EDAC_OPSTATE_POLL)
edac_dev->edac_check = mv64x60_cpu_check;
pdata->edac_idx = edac_dev_idx++;
if (edac_device_add_device(edac_dev) > 0) {
edac_dbg(3, "failed edac_device_add_device()\n");
goto err;
}
if (edac_op_state == EDAC_OPSTATE_INT) {
pdata->irq = platform_get_irq(pdev, 0);
res = devm_request_irq(&pdev->dev,
pdata->irq,
mv64x60_cpu_isr,
IRQF_DISABLED,
"[EDAC] CPU err",
edac_dev);
if (res < 0) {
printk(KERN_ERR
"%s: Unable to request irq %d for MV64x60 "
"CPU ERR\n", __func__, pdata->irq);
res = -ENODEV;
goto err2;
}
printk(KERN_INFO EDAC_MOD_STR
" acquired irq %d for CPU Err\n", pdata->irq);
}
devres_remove_group(&pdev->dev, mv64x60_cpu_err_probe);
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
err2:
edac_device_del_device(&pdev->dev);
err:
devres_release_group(&pdev->dev, mv64x60_cpu_err_probe);
edac_device_free_ctl_info(edac_dev);
return res;
}
static int mv64x60_cpu_err_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *edac_dev = platform_get_drvdata(pdev);
edac_dbg(0, "\n");
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(edac_dev);
return 0;
}
static struct platform_driver mv64x60_cpu_err_driver = {
.probe = mv64x60_cpu_err_probe,
.remove = mv64x60_cpu_err_remove,
.driver = {
.name = "mv64x60_cpu_err",
}
};
/*********************** DRAM err device **********************************/
static void mv64x60_mc_check(struct mem_ctl_info *mci)
{
struct mv64x60_mc_pdata *pdata = mci->pvt_info;
u32 reg;
u32 err_addr;
u32 sdram_ecc;
u32 comp_ecc;
u32 syndrome;
reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
if (!reg)
return;
err_addr = reg & ~0x3;
sdram_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD);
comp_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC);
syndrome = sdram_ecc ^ comp_ecc;
/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
if (!(reg & 0x1))
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
err_addr >> PAGE_SHIFT,
err_addr & PAGE_MASK, syndrome,
0, 0, -1,
mci->ctl_name, "");
else /* 2 bit error, UE */
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
err_addr >> PAGE_SHIFT,
err_addr & PAGE_MASK, 0,
0, 0, -1,
mci->ctl_name, "");
/* clear the error */
out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
}
static irqreturn_t mv64x60_mc_isr(int irq, void *dev_id)
{
struct mem_ctl_info *mci = dev_id;
struct mv64x60_mc_pdata *pdata = mci->pvt_info;
u32 reg;
reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
if (!reg)
return IRQ_NONE;
/* writing 0's to the ECC err addr in check function clears irq */
mv64x60_mc_check(mci);
return IRQ_HANDLED;
}
static void get_total_mem(struct mv64x60_mc_pdata *pdata)
{
struct device_node *np = NULL;
const unsigned int *reg;
np = of_find_node_by_type(NULL, "memory");
if (!np)
return;
reg = of_get_property(np, "reg", NULL);
pdata->total_mem = reg[1];
}
static void mv64x60_init_csrows(struct mem_ctl_info *mci,
struct mv64x60_mc_pdata *pdata)
{
struct csrow_info *csrow;
struct dimm_info *dimm;
u32 devtype;
u32 ctl;
get_total_mem(pdata);
ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
csrow = mci->csrows[0];
dimm = csrow->channels[0]->dimm;
dimm->nr_pages = pdata->total_mem >> PAGE_SHIFT;
dimm->grain = 8;
dimm->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
devtype = (ctl >> 20) & 0x3;
switch (devtype) {
case 0x0:
dimm->dtype = DEV_X32;
break;
case 0x2: /* could be X8 too, but no way to tell */
dimm->dtype = DEV_X16;
break;
case 0x3:
dimm->dtype = DEV_X4;
break;
default:
dimm->dtype = DEV_UNKNOWN;
break;
}
dimm->edac_mode = EDAC_SECDED;
}
static int mv64x60_mc_err_probe(struct platform_device *pdev)
{
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct mv64x60_mc_pdata *pdata;
struct resource *r;
u32 ctl;
int res = 0;
if (!devres_open_group(&pdev->dev, mv64x60_mc_err_probe, GFP_KERNEL))
return -ENOMEM;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = 1;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = 1;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
sizeof(struct mv64x60_mc_pdata));
if (!mci) {
printk(KERN_ERR "%s: No memory for CPU err\n", __func__);
devres_release_group(&pdev->dev, mv64x60_mc_err_probe);
return -ENOMEM;
}
pdata = mci->pvt_info;
mci->pdev = &pdev->dev;
platform_set_drvdata(pdev, mci);
pdata->name = "mv64x60_mc_err";
pdata->irq = NO_IRQ;
mci->dev_name = dev_name(&pdev->dev);
pdata->edac_idx = edac_mc_idx++;
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
printk(KERN_ERR "%s: Unable to get resource for "
"MC err regs\n", __func__);
res = -ENOENT;
goto err;
}
if (!devm_request_mem_region(&pdev->dev,
r->start,
resource_size(r),
pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
res = -EBUSY;
goto err;
}
pdata->mc_vbase = devm_ioremap(&pdev->dev,
r->start,
resource_size(r));
if (!pdata->mc_vbase) {
printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__);
res = -ENOMEM;
goto err;
}
ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
if (!(ctl & MV64X60_SDRAM_ECC)) {
/* Non-ECC RAM? */
printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__);
res = -ENODEV;
goto err2;
}
edac_dbg(3, "init mci\n");
mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = MV64x60_REVISION;
mci->ctl_name = mv64x60_ctl_name;
if (edac_op_state == EDAC_OPSTATE_POLL)
mci->edac_check = mv64x60_mc_check;
mci->ctl_page_to_phys = NULL;
mci->scrub_mode = SCRUB_SW_SRC;
mv64x60_init_csrows(mci, pdata);
/* setup MC registers */
out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
ctl = (ctl & 0xff00ffff) | 0x10000;
out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL, ctl);
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "failed edac_mc_add_mc()\n");
goto err;
}
if (edac_op_state == EDAC_OPSTATE_INT) {
/* acquire interrupt that reports errors */
pdata->irq = platform_get_irq(pdev, 0);
res = devm_request_irq(&pdev->dev,
pdata->irq,
mv64x60_mc_isr,
IRQF_DISABLED,
"[EDAC] MC err",
mci);
if (res < 0) {
printk(KERN_ERR "%s: Unable to request irq %d for "
"MV64x60 DRAM ERR\n", __func__, pdata->irq);
res = -ENODEV;
goto err2;
}
printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for MC Err\n",
pdata->irq);
}
/* get this far and it's successful */
edac_dbg(3, "success\n");
return 0;
err2:
edac_mc_del_mc(&pdev->dev);
err:
devres_release_group(&pdev->dev, mv64x60_mc_err_probe);
edac_mc_free(mci);
return res;
}
static int mv64x60_mc_err_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_dbg(0, "\n");
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
return 0;
}
static struct platform_driver mv64x60_mc_err_driver = {
.probe = mv64x60_mc_err_probe,
.remove = mv64x60_mc_err_remove,
.driver = {
.name = "mv64x60_mc_err",
}
};
static int __init mv64x60_edac_init(void)
{
int ret = 0;
printk(KERN_INFO "Marvell MV64x60 EDAC driver " MV64x60_REVISION "\n");
printk(KERN_INFO "\t(C) 2006-2007 MontaVista Software\n");
/* make sure error reporting method is sane */
switch (edac_op_state) {
case EDAC_OPSTATE_POLL:
case EDAC_OPSTATE_INT:
break;
default:
edac_op_state = EDAC_OPSTATE_INT;
break;
}
ret = platform_driver_register(&mv64x60_mc_err_driver);
if (ret)
printk(KERN_WARNING EDAC_MOD_STR "MC err failed to register\n");
ret = platform_driver_register(&mv64x60_cpu_err_driver);
if (ret)
printk(KERN_WARNING EDAC_MOD_STR
"CPU err failed to register\n");
ret = platform_driver_register(&mv64x60_sram_err_driver);
if (ret)
printk(KERN_WARNING EDAC_MOD_STR
"SRAM err failed to register\n");
#ifdef CONFIG_PCI
ret = platform_driver_register(&mv64x60_pci_err_driver);
if (ret)
printk(KERN_WARNING EDAC_MOD_STR
"PCI err failed to register\n");
#endif
return ret;
}
module_init(mv64x60_edac_init);
static void __exit mv64x60_edac_exit(void)
{
#ifdef CONFIG_PCI
platform_driver_unregister(&mv64x60_pci_err_driver);
#endif
platform_driver_unregister(&mv64x60_sram_err_driver);
platform_driver_unregister(&mv64x60_cpu_err_driver);
platform_driver_unregister(&mv64x60_mc_err_driver);
}
module_exit(mv64x60_edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Montavista Software, Inc.");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state,
"EDAC Error Reporting state: 0=Poll, 2=Interrupt");

114
drivers/edac/mv64x60_edac.h Normal file
View file

@ -0,0 +1,114 @@
/*
* EDAC defs for Marvell MV64x60 bridge chip
*
* Author: Dave Jiang <djiang@mvista.com>
*
* 2007 (c) MontaVista Software, Inc. This file is licensed under
* the terms of the GNU General Public License version 2. This program
* is licensed "as is" without any warranty of any kind, whether express
* or implied.
*
*/
#ifndef _MV64X60_EDAC_H_
#define _MV64X60_EDAC_H_
#define MV64x60_REVISION " Ver: 2.0.0"
#define EDAC_MOD_STR "MV64x60_edac"
#define mv64x60_printk(level, fmt, arg...) \
edac_printk(level, "MV64x60", fmt, ##arg)
#define mv64x60_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "MV64x60", fmt, ##arg)
/* CPU Error Report Registers */
#define MV64x60_CPU_ERR_ADDR_LO 0x00 /* 0x0070 */
#define MV64x60_CPU_ERR_ADDR_HI 0x08 /* 0x0078 */
#define MV64x60_CPU_ERR_DATA_LO 0x00 /* 0x0128 */
#define MV64x60_CPU_ERR_DATA_HI 0x08 /* 0x0130 */
#define MV64x60_CPU_ERR_PARITY 0x10 /* 0x0138 */
#define MV64x60_CPU_ERR_CAUSE 0x18 /* 0x0140 */
#define MV64x60_CPU_ERR_MASK 0x20 /* 0x0148 */
#define MV64x60_CPU_CAUSE_MASK 0x07ffffff
/* SRAM Error Report Registers */
#define MV64X60_SRAM_ERR_CAUSE 0x08 /* 0x0388 */
#define MV64X60_SRAM_ERR_ADDR_LO 0x10 /* 0x0390 */
#define MV64X60_SRAM_ERR_ADDR_HI 0x78 /* 0x03f8 */
#define MV64X60_SRAM_ERR_DATA_LO 0x18 /* 0x0398 */
#define MV64X60_SRAM_ERR_DATA_HI 0x20 /* 0x03a0 */
#define MV64X60_SRAM_ERR_PARITY 0x28 /* 0x03a8 */
/* SDRAM Controller Registers */
#define MV64X60_SDRAM_CONFIG 0x00 /* 0x1400 */
#define MV64X60_SDRAM_ERR_DATA_HI 0x40 /* 0x1440 */
#define MV64X60_SDRAM_ERR_DATA_LO 0x44 /* 0x1444 */
#define MV64X60_SDRAM_ERR_ECC_RCVD 0x48 /* 0x1448 */
#define MV64X60_SDRAM_ERR_ECC_CALC 0x4c /* 0x144c */
#define MV64X60_SDRAM_ERR_ADDR 0x50 /* 0x1450 */
#define MV64X60_SDRAM_ERR_ECC_CNTL 0x54 /* 0x1454 */
#define MV64X60_SDRAM_ERR_ECC_ERR_CNT 0x58 /* 0x1458 */
#define MV64X60_SDRAM_REGISTERED 0x20000
#define MV64X60_SDRAM_ECC 0x40000
#ifdef CONFIG_PCI
/*
* Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of
* errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as
* well. IOW, don't set bit 0.
*/
#define MV64X60_PCIx_ERR_MASK_VAL 0x00a50c24
/* Register offsets from PCIx error address low register */
#define MV64X60_PCI_ERROR_ADDR_LO 0x00
#define MV64X60_PCI_ERROR_ADDR_HI 0x04
#define MV64X60_PCI_ERROR_ATTR 0x08
#define MV64X60_PCI_ERROR_CMD 0x10
#define MV64X60_PCI_ERROR_CAUSE 0x18
#define MV64X60_PCI_ERROR_MASK 0x1c
#define MV64X60_PCI_ERR_SWrPerr 0x0002
#define MV64X60_PCI_ERR_SRdPerr 0x0004
#define MV64X60_PCI_ERR_MWrPerr 0x0020
#define MV64X60_PCI_ERR_MRdPerr 0x0040
#define MV64X60_PCI_PE_MASK (MV64X60_PCI_ERR_SWrPerr | \
MV64X60_PCI_ERR_SRdPerr | \
MV64X60_PCI_ERR_MWrPerr | \
MV64X60_PCI_ERR_MRdPerr)
struct mv64x60_pci_pdata {
int pci_hose;
void __iomem *pci_vbase;
char *name;
int irq;
int edac_idx;
};
#endif /* CONFIG_PCI */
struct mv64x60_mc_pdata {
void __iomem *mc_vbase;
int total_mem;
char *name;
int irq;
int edac_idx;
};
struct mv64x60_cpu_pdata {
void __iomem *cpu_vbase[2];
char *name;
int irq;
int edac_idx;
};
struct mv64x60_sram_pdata {
void __iomem *sram_vbase;
char *name;
int irq;
int edac_idx;
};
#endif

View file

@ -0,0 +1,208 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2012 Cavium, Inc.
*
* Copyright (C) 2009 Wind River Systems,
* written by Ralf Baechle <ralf@linux-mips.org>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/edac.h>
#include <asm/octeon/cvmx.h>
#include "edac_core.h"
#include "edac_module.h"
#define EDAC_MOD_STR "octeon-l2c"
static void octeon_l2c_poll_oct1(struct edac_device_ctl_info *l2c)
{
union cvmx_l2t_err l2t_err, l2t_err_reset;
union cvmx_l2d_err l2d_err, l2d_err_reset;
l2t_err_reset.u64 = 0;
l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
if (l2t_err.s.sec_err) {
edac_device_handle_ce(l2c, 0, 0,
"Tag Single bit error (corrected)");
l2t_err_reset.s.sec_err = 1;
}
if (l2t_err.s.ded_err) {
edac_device_handle_ue(l2c, 0, 0,
"Tag Double bit error (detected)");
l2t_err_reset.s.ded_err = 1;
}
if (l2t_err_reset.u64)
cvmx_write_csr(CVMX_L2T_ERR, l2t_err_reset.u64);
l2d_err_reset.u64 = 0;
l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR);
if (l2d_err.s.sec_err) {
edac_device_handle_ce(l2c, 0, 1,
"Data Single bit error (corrected)");
l2d_err_reset.s.sec_err = 1;
}
if (l2d_err.s.ded_err) {
edac_device_handle_ue(l2c, 0, 1,
"Data Double bit error (detected)");
l2d_err_reset.s.ded_err = 1;
}
if (l2d_err_reset.u64)
cvmx_write_csr(CVMX_L2D_ERR, l2d_err_reset.u64);
}
static void _octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c, int tad)
{
union cvmx_l2c_err_tdtx err_tdtx, err_tdtx_reset;
union cvmx_l2c_err_ttgx err_ttgx, err_ttgx_reset;
char buf1[64];
char buf2[80];
err_tdtx_reset.u64 = 0;
err_tdtx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TDTX(tad));
if (err_tdtx.s.dbe || err_tdtx.s.sbe ||
err_tdtx.s.vdbe || err_tdtx.s.vsbe)
snprintf(buf1, sizeof(buf1),
"type:%d, syn:0x%x, way:%d",
err_tdtx.s.type, err_tdtx.s.syn, err_tdtx.s.wayidx);
if (err_tdtx.s.dbe) {
snprintf(buf2, sizeof(buf2),
"L2D Double bit error (detected):%s", buf1);
err_tdtx_reset.s.dbe = 1;
edac_device_handle_ue(l2c, tad, 1, buf2);
}
if (err_tdtx.s.sbe) {
snprintf(buf2, sizeof(buf2),
"L2D Single bit error (corrected):%s", buf1);
err_tdtx_reset.s.sbe = 1;
edac_device_handle_ce(l2c, tad, 1, buf2);
}
if (err_tdtx.s.vdbe) {
snprintf(buf2, sizeof(buf2),
"VBF Double bit error (detected):%s", buf1);
err_tdtx_reset.s.vdbe = 1;
edac_device_handle_ue(l2c, tad, 1, buf2);
}
if (err_tdtx.s.vsbe) {
snprintf(buf2, sizeof(buf2),
"VBF Single bit error (corrected):%s", buf1);
err_tdtx_reset.s.vsbe = 1;
edac_device_handle_ce(l2c, tad, 1, buf2);
}
if (err_tdtx_reset.u64)
cvmx_write_csr(CVMX_L2C_ERR_TDTX(tad), err_tdtx_reset.u64);
err_ttgx_reset.u64 = 0;
err_ttgx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TTGX(tad));
if (err_ttgx.s.dbe || err_ttgx.s.sbe)
snprintf(buf1, sizeof(buf1),
"type:%d, syn:0x%x, way:%d",
err_ttgx.s.type, err_ttgx.s.syn, err_ttgx.s.wayidx);
if (err_ttgx.s.dbe) {
snprintf(buf2, sizeof(buf2),
"Tag Double bit error (detected):%s", buf1);
err_ttgx_reset.s.dbe = 1;
edac_device_handle_ue(l2c, tad, 0, buf2);
}
if (err_ttgx.s.sbe) {
snprintf(buf2, sizeof(buf2),
"Tag Single bit error (corrected):%s", buf1);
err_ttgx_reset.s.sbe = 1;
edac_device_handle_ce(l2c, tad, 0, buf2);
}
if (err_ttgx_reset.u64)
cvmx_write_csr(CVMX_L2C_ERR_TTGX(tad), err_ttgx_reset.u64);
}
static void octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c)
{
int i;
for (i = 0; i < l2c->nr_instances; i++)
_octeon_l2c_poll_oct2(l2c, i);
}
static int octeon_l2c_probe(struct platform_device *pdev)
{
struct edac_device_ctl_info *l2c;
int num_tads = OCTEON_IS_MODEL(OCTEON_CN68XX) ? 4 : 1;
/* 'Tags' are block 0, 'Data' is block 1*/
l2c = edac_device_alloc_ctl_info(0, "l2c", num_tads, "l2c", 2, 0,
NULL, 0, edac_device_alloc_index());
if (!l2c)
return -ENOMEM;
l2c->dev = &pdev->dev;
platform_set_drvdata(pdev, l2c);
l2c->dev_name = dev_name(&pdev->dev);
l2c->mod_name = "octeon-l2c";
l2c->ctl_name = "octeon_l2c_err";
if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) {
union cvmx_l2t_err l2t_err;
union cvmx_l2d_err l2d_err;
l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
l2t_err.s.sec_intena = 0; /* We poll */
l2t_err.s.ded_intena = 0;
cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64);
l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR);
l2d_err.s.sec_intena = 0; /* We poll */
l2d_err.s.ded_intena = 0;
cvmx_write_csr(CVMX_L2T_ERR, l2d_err.u64);
l2c->edac_check = octeon_l2c_poll_oct1;
} else {
/* OCTEON II */
l2c->edac_check = octeon_l2c_poll_oct2;
}
if (edac_device_add_device(l2c) > 0) {
pr_err("%s: edac_device_add_device() failed\n", __func__);
goto err;
}
return 0;
err:
edac_device_free_ctl_info(l2c);
return -ENXIO;
}
static int octeon_l2c_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev);
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(l2c);
return 0;
}
static struct platform_driver octeon_l2c_driver = {
.probe = octeon_l2c_probe,
.remove = octeon_l2c_remove,
.driver = {
.name = "octeon_l2c_edac",
}
};
module_platform_driver(octeon_l2c_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");

View file

@ -0,0 +1,353 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2009 Wind River Systems,
* written by Ralf Baechle <ralf@linux-mips.org>
*
* Copyright (c) 2013 by Cisco Systems, Inc.
* All rights reserved.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/edac.h>
#include <linux/ctype.h>
#include <asm/octeon/octeon.h>
#include <asm/octeon/cvmx-lmcx-defs.h>
#include "edac_core.h"
#include "edac_module.h"
#define OCTEON_MAX_MC 4
#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
struct octeon_lmc_pvt {
unsigned long inject;
unsigned long error_type;
unsigned long dimm;
unsigned long rank;
unsigned long bank;
unsigned long row;
unsigned long col;
};
static void octeon_lmc_edac_poll(struct mem_ctl_info *mci)
{
union cvmx_lmcx_mem_cfg0 cfg0;
bool do_clear = false;
char msg[64];
cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx));
if (cfg0.s.sec_err || cfg0.s.ded_err) {
union cvmx_lmcx_fadr fadr;
fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx));
snprintf(msg, sizeof(msg),
"DIMM %d rank %d bank %d row %d col %d",
fadr.cn30xx.fdimm, fadr.cn30xx.fbunk,
fadr.cn30xx.fbank, fadr.cn30xx.frow, fadr.cn30xx.fcol);
}
if (cfg0.s.sec_err) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, msg, "");
cfg0.s.sec_err = -1; /* Done, re-arm */
do_clear = true;
}
if (cfg0.s.ded_err) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, msg, "");
cfg0.s.ded_err = -1; /* Done, re-arm */
do_clear = true;
}
if (do_clear)
cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx), cfg0.u64);
}
static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci)
{
struct octeon_lmc_pvt *pvt = mci->pvt_info;
union cvmx_lmcx_int int_reg;
bool do_clear = false;
char msg[64];
if (!pvt->inject)
int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx));
else {
if (pvt->error_type == 1)
int_reg.s.sec_err = 1;
if (pvt->error_type == 2)
int_reg.s.ded_err = 1;
}
if (int_reg.s.sec_err || int_reg.s.ded_err) {
union cvmx_lmcx_fadr fadr;
if (likely(!pvt->inject))
fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx));
else {
fadr.cn61xx.fdimm = pvt->dimm;
fadr.cn61xx.fbunk = pvt->rank;
fadr.cn61xx.fbank = pvt->bank;
fadr.cn61xx.frow = pvt->row;
fadr.cn61xx.fcol = pvt->col;
}
snprintf(msg, sizeof(msg),
"DIMM %d rank %d bank %d row %d col %d",
fadr.cn61xx.fdimm, fadr.cn61xx.fbunk,
fadr.cn61xx.fbank, fadr.cn61xx.frow, fadr.cn61xx.fcol);
}
if (int_reg.s.sec_err) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, msg, "");
int_reg.s.sec_err = -1; /* Done, re-arm */
do_clear = true;
}
if (int_reg.s.ded_err) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1, msg, "");
int_reg.s.ded_err = -1; /* Done, re-arm */
do_clear = true;
}
if (do_clear) {
if (likely(!pvt->inject))
cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64);
else
pvt->inject = 0;
}
}
/************************ MC SYSFS parts ***********************************/
/* Only a couple naming differences per template, so very similar */
#define TEMPLATE_SHOW(reg) \
static ssize_t octeon_mc_inject_##reg##_show(struct device *dev, \
struct device_attribute *attr, \
char *data) \
{ \
struct mem_ctl_info *mci = to_mci(dev); \
struct octeon_lmc_pvt *pvt = mci->pvt_info; \
return sprintf(data, "%016llu\n", (u64)pvt->reg); \
}
#define TEMPLATE_STORE(reg) \
static ssize_t octeon_mc_inject_##reg##_store(struct device *dev, \
struct device_attribute *attr, \
const char *data, size_t count) \
{ \
struct mem_ctl_info *mci = to_mci(dev); \
struct octeon_lmc_pvt *pvt = mci->pvt_info; \
if (isdigit(*data)) { \
if (!kstrtoul(data, 0, &pvt->reg)) \
return count; \
} \
return 0; \
}
TEMPLATE_SHOW(inject);
TEMPLATE_STORE(inject);
TEMPLATE_SHOW(dimm);
TEMPLATE_STORE(dimm);
TEMPLATE_SHOW(bank);
TEMPLATE_STORE(bank);
TEMPLATE_SHOW(rank);
TEMPLATE_STORE(rank);
TEMPLATE_SHOW(row);
TEMPLATE_STORE(row);
TEMPLATE_SHOW(col);
TEMPLATE_STORE(col);
static ssize_t octeon_mc_inject_error_type_store(struct device *dev,
struct device_attribute *attr,
const char *data,
size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct octeon_lmc_pvt *pvt = mci->pvt_info;
if (!strncmp(data, "single", 6))
pvt->error_type = 1;
else if (!strncmp(data, "double", 6))
pvt->error_type = 2;
return count;
}
static ssize_t octeon_mc_inject_error_type_show(struct device *dev,
struct device_attribute *attr,
char *data)
{
struct mem_ctl_info *mci = to_mci(dev);
struct octeon_lmc_pvt *pvt = mci->pvt_info;
if (pvt->error_type == 1)
return sprintf(data, "single");
else if (pvt->error_type == 2)
return sprintf(data, "double");
return 0;
}
static DEVICE_ATTR(inject, S_IRUGO | S_IWUSR,
octeon_mc_inject_inject_show, octeon_mc_inject_inject_store);
static DEVICE_ATTR(error_type, S_IRUGO | S_IWUSR,
octeon_mc_inject_error_type_show, octeon_mc_inject_error_type_store);
static DEVICE_ATTR(dimm, S_IRUGO | S_IWUSR,
octeon_mc_inject_dimm_show, octeon_mc_inject_dimm_store);
static DEVICE_ATTR(rank, S_IRUGO | S_IWUSR,
octeon_mc_inject_rank_show, octeon_mc_inject_rank_store);
static DEVICE_ATTR(bank, S_IRUGO | S_IWUSR,
octeon_mc_inject_bank_show, octeon_mc_inject_bank_store);
static DEVICE_ATTR(row, S_IRUGO | S_IWUSR,
octeon_mc_inject_row_show, octeon_mc_inject_row_store);
static DEVICE_ATTR(col, S_IRUGO | S_IWUSR,
octeon_mc_inject_col_show, octeon_mc_inject_col_store);
static int octeon_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
{
int rc;
rc = device_create_file(&mci->dev, &dev_attr_inject);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_error_type);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_dimm);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_rank);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_bank);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_row);
if (rc < 0)
return rc;
rc = device_create_file(&mci->dev, &dev_attr_col);
if (rc < 0)
return rc;
return 0;
}
static int octeon_lmc_edac_probe(struct platform_device *pdev)
{
struct mem_ctl_info *mci;
struct edac_mc_layer layers[1];
int mc = pdev->id;
opstate_init();
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = 1;
layers[0].is_virt_csrow = false;
if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) {
union cvmx_lmcx_mem_cfg0 cfg0;
cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0));
if (!cfg0.s.ecc_ena) {
dev_info(&pdev->dev, "Disabled (ECC not enabled)\n");
return 0;
}
mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt));
if (!mci)
return -ENXIO;
mci->pdev = &pdev->dev;
mci->dev_name = dev_name(&pdev->dev);
mci->mod_name = "octeon-lmc";
mci->ctl_name = "octeon-lmc-err";
mci->edac_check = octeon_lmc_edac_poll;
if (edac_mc_add_mc(mci)) {
dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
edac_mc_free(mci);
return -ENXIO;
}
if (octeon_set_mc_sysfs_attributes(mci)) {
dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n");
return -ENXIO;
}
cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
cfg0.s.intr_ded_ena = 0; /* We poll */
cfg0.s.intr_sec_ena = 0;
cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), cfg0.u64);
} else {
/* OCTEON II */
union cvmx_lmcx_int_en en;
union cvmx_lmcx_config config;
config.u64 = cvmx_read_csr(CVMX_LMCX_CONFIG(0));
if (!config.s.ecc_ena) {
dev_info(&pdev->dev, "Disabled (ECC not enabled)\n");
return 0;
}
mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt));
if (!mci)
return -ENXIO;
mci->pdev = &pdev->dev;
mci->dev_name = dev_name(&pdev->dev);
mci->mod_name = "octeon-lmc";
mci->ctl_name = "co_lmc_err";
mci->edac_check = octeon_lmc_edac_poll_o2;
if (edac_mc_add_mc(mci)) {
dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
edac_mc_free(mci);
return -ENXIO;
}
if (octeon_set_mc_sysfs_attributes(mci)) {
dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n");
return -ENXIO;
}
en.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
en.s.intr_ded_ena = 0; /* We poll */
en.s.intr_sec_ena = 0;
cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), en.u64);
}
platform_set_drvdata(pdev, mci);
return 0;
}
static int octeon_lmc_edac_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
return 0;
}
static struct platform_driver octeon_lmc_edac_driver = {
.probe = octeon_lmc_edac_probe,
.remove = octeon_lmc_edac_remove,
.driver = {
.name = "octeon_lmc_edac",
}
};
module_platform_driver(octeon_lmc_edac_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");

View file

@ -0,0 +1,143 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2012 Cavium, Inc.
*
* Copyright (C) 2009 Wind River Systems,
* written by Ralf Baechle <ralf@linux-mips.org>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/edac.h>
#include "edac_core.h"
#include "edac_module.h"
#include <asm/octeon/cvmx.h>
#include <asm/mipsregs.h>
extern int register_co_cache_error_notifier(struct notifier_block *nb);
extern int unregister_co_cache_error_notifier(struct notifier_block *nb);
extern unsigned long long cache_err_dcache[NR_CPUS];
struct co_cache_error {
struct notifier_block notifier;
struct edac_device_ctl_info *ed;
};
/**
* EDAC CPU cache error callback
*
* @event: non-zero if unrecoverable.
*/
static int co_cache_error_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct co_cache_error *p = container_of(this, struct co_cache_error,
notifier);
unsigned int core = cvmx_get_core_num();
unsigned int cpu = smp_processor_id();
u64 icache_err = read_octeon_c0_icacheerr();
u64 dcache_err;
if (event) {
dcache_err = cache_err_dcache[core];
cache_err_dcache[core] = 0;
} else {
dcache_err = read_octeon_c0_dcacheerr();
}
if (icache_err & 1) {
edac_device_printk(p->ed, KERN_ERR,
"CacheErr (Icache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n",
(unsigned long long)icache_err, core, cpu,
read_c0_errorepc());
write_octeon_c0_icacheerr(0);
edac_device_handle_ce(p->ed, cpu, 1, "icache");
}
if (dcache_err & 1) {
edac_device_printk(p->ed, KERN_ERR,
"CacheErr (Dcache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n",
(unsigned long long)dcache_err, core, cpu,
read_c0_errorepc());
if (event)
edac_device_handle_ue(p->ed, cpu, 0, "dcache");
else
edac_device_handle_ce(p->ed, cpu, 0, "dcache");
/* Clear the error indication */
if (OCTEON_IS_MODEL(OCTEON_FAM_2))
write_octeon_c0_dcacheerr(1);
else
write_octeon_c0_dcacheerr(0);
}
return NOTIFY_STOP;
}
static int co_cache_error_probe(struct platform_device *pdev)
{
struct co_cache_error *p = devm_kzalloc(&pdev->dev, sizeof(*p),
GFP_KERNEL);
if (!p)
return -ENOMEM;
p->notifier.notifier_call = co_cache_error_event;
platform_set_drvdata(pdev, p);
p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(),
"cache", 2, 0, NULL, 0,
edac_device_alloc_index());
if (!p->ed)
goto err;
p->ed->dev = &pdev->dev;
p->ed->dev_name = dev_name(&pdev->dev);
p->ed->mod_name = "octeon-cpu";
p->ed->ctl_name = "cache";
if (edac_device_add_device(p->ed)) {
pr_err("%s: edac_device_add_device() failed\n", __func__);
goto err1;
}
register_co_cache_error_notifier(&p->notifier);
return 0;
err1:
edac_device_free_ctl_info(p->ed);
err:
return -ENXIO;
}
static int co_cache_error_remove(struct platform_device *pdev)
{
struct co_cache_error *p = platform_get_drvdata(pdev);
unregister_co_cache_error_notifier(&p->notifier);
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(p->ed);
return 0;
}
static struct platform_driver co_cache_error_driver = {
.probe = co_cache_error_probe,
.remove = co_cache_error_remove,
.driver = {
.name = "octeon_pc_edac",
}
};
module_platform_driver(co_cache_error_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");

View file

@ -0,0 +1,111 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2012 Cavium, Inc.
* Copyright (C) 2009 Wind River Systems,
* written by Ralf Baechle <ralf@linux-mips.org>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/edac.h>
#include <asm/octeon/cvmx.h>
#include <asm/octeon/cvmx-npi-defs.h>
#include <asm/octeon/cvmx-pci-defs.h>
#include <asm/octeon/octeon.h>
#include "edac_core.h"
#include "edac_module.h"
static void octeon_pci_poll(struct edac_pci_ctl_info *pci)
{
union cvmx_pci_cfg01 cfg01;
cfg01.u32 = octeon_npi_read32(CVMX_NPI_PCI_CFG01);
if (cfg01.s.dpe) { /* Detected parity error */
edac_pci_handle_pe(pci, pci->ctl_name);
cfg01.s.dpe = 1; /* Reset */
octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
}
if (cfg01.s.sse) {
edac_pci_handle_npe(pci, "Signaled System Error");
cfg01.s.sse = 1; /* Reset */
octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
}
if (cfg01.s.rma) {
edac_pci_handle_npe(pci, "Received Master Abort");
cfg01.s.rma = 1; /* Reset */
octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
}
if (cfg01.s.rta) {
edac_pci_handle_npe(pci, "Received Target Abort");
cfg01.s.rta = 1; /* Reset */
octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
}
if (cfg01.s.sta) {
edac_pci_handle_npe(pci, "Signaled Target Abort");
cfg01.s.sta = 1; /* Reset */
octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
}
if (cfg01.s.mdpe) {
edac_pci_handle_npe(pci, "Master Data Parity Error");
cfg01.s.mdpe = 1; /* Reset */
octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
}
}
static int octeon_pci_probe(struct platform_device *pdev)
{
struct edac_pci_ctl_info *pci;
int res = 0;
pci = edac_pci_alloc_ctl_info(0, "octeon_pci_err");
if (!pci)
return -ENOMEM;
pci->dev = &pdev->dev;
platform_set_drvdata(pdev, pci);
pci->dev_name = dev_name(&pdev->dev);
pci->mod_name = "octeon-pci";
pci->ctl_name = "octeon_pci_err";
pci->edac_check = octeon_pci_poll;
if (edac_pci_add_device(pci, 0) > 0) {
pr_err("%s: edac_pci_add_device() failed\n", __func__);
goto err;
}
return 0;
err:
edac_pci_free_ctl_info(pci);
return res;
}
static int octeon_pci_remove(struct platform_device *pdev)
{
struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
edac_pci_del_device(&pdev->dev);
edac_pci_free_ctl_info(pci);
return 0;
}
static struct platform_driver octeon_pci_driver = {
.probe = octeon_pci_probe,
.remove = octeon_pci_remove,
.driver = {
.name = "octeon_pci_edac",
}
};
module_platform_driver(octeon_pci_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");

315
drivers/edac/pasemi_edac.c Normal file
View file

@ -0,0 +1,315 @@
/*
* Copyright (C) 2006-2007 PA Semi, Inc
*
* Author: Egor Martovetsky <egor@pasemi.com>
* Maintained by: Olof Johansson <olof@lixom.net>
*
* Driver for the PWRficient onchip memory controllers
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_core.h"
#define MODULE_NAME "pasemi_edac"
#define MCCFG_MCEN 0x300
#define MCCFG_MCEN_MMC_EN 0x00000001
#define MCCFG_ERRCOR 0x388
#define MCCFG_ERRCOR_RNK_FAIL_DET_EN 0x00000100
#define MCCFG_ERRCOR_ECC_GEN_EN 0x00000010
#define MCCFG_ERRCOR_ECC_CRR_EN 0x00000001
#define MCCFG_SCRUB 0x384
#define MCCFG_SCRUB_RGLR_SCRB_EN 0x00000001
#define MCDEBUG_ERRCTL1 0x728
#define MCDEBUG_ERRCTL1_RFL_LOG_EN 0x00080000
#define MCDEBUG_ERRCTL1_MBE_LOG_EN 0x00040000
#define MCDEBUG_ERRCTL1_SBE_LOG_EN 0x00020000
#define MCDEBUG_ERRSTA 0x730
#define MCDEBUG_ERRSTA_RFL_STATUS 0x00000004
#define MCDEBUG_ERRSTA_MBE_STATUS 0x00000002
#define MCDEBUG_ERRSTA_SBE_STATUS 0x00000001
#define MCDEBUG_ERRCNT1 0x734
#define MCDEBUG_ERRCNT1_SBE_CNT_OVRFLO 0x00000080
#define MCDEBUG_ERRLOG1A 0x738
#define MCDEBUG_ERRLOG1A_MERR_TYPE_M 0x30000000
#define MCDEBUG_ERRLOG1A_MERR_TYPE_NONE 0x00000000
#define MCDEBUG_ERRLOG1A_MERR_TYPE_SBE 0x10000000
#define MCDEBUG_ERRLOG1A_MERR_TYPE_MBE 0x20000000
#define MCDEBUG_ERRLOG1A_MERR_TYPE_RFL 0x30000000
#define MCDEBUG_ERRLOG1A_MERR_BA_M 0x00700000
#define MCDEBUG_ERRLOG1A_MERR_BA_S 20
#define MCDEBUG_ERRLOG1A_MERR_CS_M 0x00070000
#define MCDEBUG_ERRLOG1A_MERR_CS_S 16
#define MCDEBUG_ERRLOG1A_SYNDROME_M 0x0000ffff
#define MCDRAM_RANKCFG 0x114
#define MCDRAM_RANKCFG_EN 0x00000001
#define MCDRAM_RANKCFG_TYPE_SIZE_M 0x000001c0
#define MCDRAM_RANKCFG_TYPE_SIZE_S 6
#define PASEMI_EDAC_NR_CSROWS 8
#define PASEMI_EDAC_NR_CHANS 1
#define PASEMI_EDAC_ERROR_GRAIN 64
static int last_page_in_mmc;
static int system_mmc_id;
static u32 pasemi_edac_get_error_info(struct mem_ctl_info *mci)
{
struct pci_dev *pdev = to_pci_dev(mci->pdev);
u32 tmp;
pci_read_config_dword(pdev, MCDEBUG_ERRSTA,
&tmp);
tmp &= (MCDEBUG_ERRSTA_RFL_STATUS | MCDEBUG_ERRSTA_MBE_STATUS
| MCDEBUG_ERRSTA_SBE_STATUS);
if (tmp) {
if (tmp & MCDEBUG_ERRSTA_SBE_STATUS)
pci_write_config_dword(pdev, MCDEBUG_ERRCNT1,
MCDEBUG_ERRCNT1_SBE_CNT_OVRFLO);
pci_write_config_dword(pdev, MCDEBUG_ERRSTA, tmp);
}
return tmp;
}
static void pasemi_edac_process_error_info(struct mem_ctl_info *mci, u32 errsta)
{
struct pci_dev *pdev = to_pci_dev(mci->pdev);
u32 errlog1a;
u32 cs;
if (!errsta)
return;
pci_read_config_dword(pdev, MCDEBUG_ERRLOG1A, &errlog1a);
cs = (errlog1a & MCDEBUG_ERRLOG1A_MERR_CS_M) >>
MCDEBUG_ERRLOG1A_MERR_CS_S;
/* uncorrectable/multi-bit errors */
if (errsta & (MCDEBUG_ERRSTA_MBE_STATUS |
MCDEBUG_ERRSTA_RFL_STATUS)) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
mci->csrows[cs]->first_page, 0, 0,
cs, 0, -1, mci->ctl_name, "");
}
/* correctable/single-bit errors */
if (errsta & MCDEBUG_ERRSTA_SBE_STATUS)
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
mci->csrows[cs]->first_page, 0, 0,
cs, 0, -1, mci->ctl_name, "");
}
static void pasemi_edac_check(struct mem_ctl_info *mci)
{
u32 errsta;
errsta = pasemi_edac_get_error_info(mci);
if (errsta)
pasemi_edac_process_error_info(mci, errsta);
}
static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
struct pci_dev *pdev,
enum edac_type edac_mode)
{
struct csrow_info *csrow;
struct dimm_info *dimm;
u32 rankcfg;
int index;
for (index = 0; index < mci->nr_csrows; index++) {
csrow = mci->csrows[index];
dimm = csrow->channels[0]->dimm;
pci_read_config_dword(pdev,
MCDRAM_RANKCFG + (index * 12),
&rankcfg);
if (!(rankcfg & MCDRAM_RANKCFG_EN))
continue;
switch ((rankcfg & MCDRAM_RANKCFG_TYPE_SIZE_M) >>
MCDRAM_RANKCFG_TYPE_SIZE_S) {
case 0:
dimm->nr_pages = 128 << (20 - PAGE_SHIFT);
break;
case 1:
dimm->nr_pages = 256 << (20 - PAGE_SHIFT);
break;
case 2:
case 3:
dimm->nr_pages = 512 << (20 - PAGE_SHIFT);
break;
case 4:
dimm->nr_pages = 1024 << (20 - PAGE_SHIFT);
break;
case 5:
dimm->nr_pages = 2048 << (20 - PAGE_SHIFT);
break;
default:
edac_mc_printk(mci, KERN_ERR,
"Unrecognized Rank Config. rankcfg=%u\n",
rankcfg);
return -EINVAL;
}
csrow->first_page = last_page_in_mmc;
csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
last_page_in_mmc += dimm->nr_pages;
csrow->page_mask = 0;
dimm->grain = PASEMI_EDAC_ERROR_GRAIN;
dimm->mtype = MEM_DDR;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = edac_mode;
}
return 0;
}
static int pasemi_edac_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
u32 errctl1, errcor, scrub, mcen;
pci_read_config_dword(pdev, MCCFG_MCEN, &mcen);
if (!(mcen & MCCFG_MCEN_MMC_EN))
return -ENODEV;
/*
* We should think about enabling other error detection later on
*/
pci_read_config_dword(pdev, MCDEBUG_ERRCTL1, &errctl1);
errctl1 |= MCDEBUG_ERRCTL1_SBE_LOG_EN |
MCDEBUG_ERRCTL1_MBE_LOG_EN |
MCDEBUG_ERRCTL1_RFL_LOG_EN;
pci_write_config_dword(pdev, MCDEBUG_ERRCTL1, errctl1);
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = PASEMI_EDAC_NR_CSROWS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = PASEMI_EDAC_NR_CHANS;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(system_mmc_id++, ARRAY_SIZE(layers), layers,
0);
if (mci == NULL)
return -ENOMEM;
pci_read_config_dword(pdev, MCCFG_ERRCOR, &errcor);
errcor |= MCCFG_ERRCOR_RNK_FAIL_DET_EN |
MCCFG_ERRCOR_ECC_GEN_EN |
MCCFG_ERRCOR_ECC_CRR_EN;
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
mci->edac_cap = (errcor & MCCFG_ERRCOR_ECC_GEN_EN) ?
((errcor & MCCFG_ERRCOR_ECC_CRR_EN) ?
(EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_EC) :
EDAC_FLAG_NONE;
mci->mod_name = MODULE_NAME;
mci->dev_name = pci_name(pdev);
mci->ctl_name = "pasemi,pwrficient-mc";
mci->edac_check = pasemi_edac_check;
mci->ctl_page_to_phys = NULL;
pci_read_config_dword(pdev, MCCFG_SCRUB, &scrub);
mci->scrub_cap = SCRUB_FLAG_HW_PROG | SCRUB_FLAG_HW_SRC;
mci->scrub_mode =
((errcor & MCCFG_ERRCOR_ECC_CRR_EN) ? SCRUB_FLAG_HW_SRC : 0) |
((scrub & MCCFG_SCRUB_RGLR_SCRB_EN) ? SCRUB_FLAG_HW_PROG : 0);
if (pasemi_edac_init_csrows(mci, pdev,
(mci->edac_cap & EDAC_FLAG_SECDED) ?
EDAC_SECDED :
((mci->edac_cap & EDAC_FLAG_EC) ?
EDAC_EC : EDAC_NONE)))
goto fail;
/*
* Clear status
*/
pasemi_edac_get_error_info(mci);
if (edac_mc_add_mc(mci))
goto fail;
/* get this far and it's successful */
return 0;
fail:
edac_mc_free(mci);
return -ENODEV;
}
static void pasemi_edac_remove(struct pci_dev *pdev)
{
struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
return;
edac_mc_free(mci);
}
static const struct pci_device_id pasemi_edac_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_PASEMI, 0xa00a) },
{ }
};
MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl);
static struct pci_driver pasemi_edac_driver = {
.name = MODULE_NAME,
.probe = pasemi_edac_probe,
.remove = pasemi_edac_remove,
.id_table = pasemi_edac_pci_tbl,
};
static int __init pasemi_edac_init(void)
{
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
return pci_register_driver(&pasemi_edac_driver);
}
static void __exit pasemi_edac_exit(void)
{
pci_unregister_driver(&pasemi_edac_driver);
}
module_init(pasemi_edac_init);
module_exit(pasemi_edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>");
MODULE_DESCRIPTION("MC support for PA Semi PWRficient memory controller");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

1440
drivers/edac/ppc4xx_edac.c Normal file

File diff suppressed because it is too large Load diff

172
drivers/edac/ppc4xx_edac.h Normal file
View file

@ -0,0 +1,172 @@
/*
* Copyright (c) 2008 Nuovation System Designs, LLC
* Grant Erickson <gerickson@nuovations.com>
*
* This file defines processor mnemonics for accessing and managing
* the IBM DDR1/DDR2 ECC controller found in the 405EX[r], 440SP,
* 440SPe, 460EX, 460GT and 460SX.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; version 2 of the
* License.
*
*/
#ifndef __PPC4XX_EDAC_H
#define __PPC4XX_EDAC_H
#include <linux/types.h>
/*
* Macro for generating register field mnemonics
*/
#define PPC_REG_BITS 32
#define PPC_REG_VAL(bit, val) ((val) << ((PPC_REG_BITS - 1) - (bit)))
#define PPC_REG_DECODE(bit, val) ((val) >> ((PPC_REG_BITS - 1) - (bit)))
/*
* IBM 4xx DDR1/DDR2 SDRAM memory controller registers (at least those
* relevant to ECC)
*/
#define SDRAM_BESR 0x00 /* Error status (read/clear) */
#define SDRAM_BESRT 0x01 /* Error statuss (test/set) */
#define SDRAM_BEARL 0x02 /* Error address low */
#define SDRAM_BEARH 0x03 /* Error address high */
#define SDRAM_WMIRQ 0x06 /* Write master (read/clear) */
#define SDRAM_WMIRQT 0x07 /* Write master (test/set) */
#define SDRAM_MCOPT1 0x20 /* Controller options 1 */
#define SDRAM_MBXCF_BASE 0x40 /* Bank n configuration base */
#define SDRAM_MBXCF(n) (SDRAM_MBXCF_BASE + (4 * (n)))
#define SDRAM_MB0CF SDRAM_MBXCF(0)
#define SDRAM_MB1CF SDRAM_MBXCF(1)
#define SDRAM_MB2CF SDRAM_MBXCF(2)
#define SDRAM_MB3CF SDRAM_MBXCF(3)
#define SDRAM_ECCCR 0x98 /* ECC error status */
#define SDRAM_ECCES SDRAM_ECCCR
/*
* PLB Master IDs
*/
#define SDRAM_PLB_M0ID_FIRST 0
#define SDRAM_PLB_M0ID_ICU SDRAM_PLB_M0ID_FIRST
#define SDRAM_PLB_M0ID_PCIE0 1
#define SDRAM_PLB_M0ID_PCIE1 2
#define SDRAM_PLB_M0ID_DMA 3
#define SDRAM_PLB_M0ID_DCU 4
#define SDRAM_PLB_M0ID_OPB 5
#define SDRAM_PLB_M0ID_MAL 6
#define SDRAM_PLB_M0ID_SEC 7
#define SDRAM_PLB_M0ID_AHB 8
#define SDRAM_PLB_M0ID_LAST SDRAM_PLB_M0ID_AHB
#define SDRAM_PLB_M0ID_COUNT (SDRAM_PLB_M0ID_LAST - \
SDRAM_PLB_M0ID_FIRST + 1)
/*
* Memory Controller Bus Error Status Register
*/
#define SDRAM_BESR_MASK PPC_REG_VAL(7, 0xFF)
#define SDRAM_BESR_M0ID_MASK PPC_REG_VAL(3, 0xF)
#define SDRAM_BESR_M0ID_DECODE(n) PPC_REG_DECODE(3, n)
#define SDRAM_BESR_M0ID_ICU PPC_REG_VAL(3, SDRAM_PLB_M0ID_ICU)
#define SDRAM_BESR_M0ID_PCIE0 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE0)
#define SDRAM_BESR_M0ID_PCIE1 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE1)
#define SDRAM_BESR_M0ID_DMA PPC_REG_VAL(3, SDRAM_PLB_M0ID_DMA)
#define SDRAM_BESR_M0ID_DCU PPC_REG_VAL(3, SDRAM_PLB_M0ID_DCU)
#define SDRAM_BESR_M0ID_OPB PPC_REG_VAL(3, SDRAM_PLB_M0ID_OPB)
#define SDRAM_BESR_M0ID_MAL PPC_REG_VAL(3, SDRAM_PLB_M0ID_MAL)
#define SDRAM_BESR_M0ID_SEC PPC_REG_VAL(3, SDRAM_PLB_M0ID_SEC)
#define SDRAM_BESR_M0ID_AHB PPC_REG_VAL(3, SDRAM_PLB_M0ID_AHB)
#define SDRAM_BESR_M0ET_MASK PPC_REG_VAL(6, 0x7)
#define SDRAM_BESR_M0ET_NONE PPC_REG_VAL(6, 0)
#define SDRAM_BESR_M0ET_ECC PPC_REG_VAL(6, 1)
#define SDRAM_BESR_M0RW_MASK PPC_REG_VAL(7, 1)
#define SDRAM_BESR_M0RW_WRITE PPC_REG_VAL(7, 0)
#define SDRAM_BESR_M0RW_READ PPC_REG_VAL(7, 1)
/*
* Memory Controller PLB Write Master Interrupt Register
*/
#define SDRAM_WMIRQ_MASK PPC_REG_VAL(8, 0x1FF)
#define SDRAM_WMIRQ_ENCODE(id) PPC_REG_VAL((id % \
SDRAM_PLB_M0ID_COUNT), 1)
#define SDRAM_WMIRQ_ICU PPC_REG_VAL(SDRAM_PLB_M0ID_ICU, 1)
#define SDRAM_WMIRQ_PCIE0 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE0, 1)
#define SDRAM_WMIRQ_PCIE1 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE1, 1)
#define SDRAM_WMIRQ_DMA PPC_REG_VAL(SDRAM_PLB_M0ID_DMA, 1)
#define SDRAM_WMIRQ_DCU PPC_REG_VAL(SDRAM_PLB_M0ID_DCU, 1)
#define SDRAM_WMIRQ_OPB PPC_REG_VAL(SDRAM_PLB_M0ID_OPB, 1)
#define SDRAM_WMIRQ_MAL PPC_REG_VAL(SDRAM_PLB_M0ID_MAL, 1)
#define SDRAM_WMIRQ_SEC PPC_REG_VAL(SDRAM_PLB_M0ID_SEC, 1)
#define SDRAM_WMIRQ_AHB PPC_REG_VAL(SDRAM_PLB_M0ID_AHB, 1)
/*
* Memory Controller Options 1 Register
*/
#define SDRAM_MCOPT1_MCHK_MASK PPC_REG_VAL(3, 0x3) /* ECC mask */
#define SDRAM_MCOPT1_MCHK_NON PPC_REG_VAL(3, 0x0) /* No ECC gen */
#define SDRAM_MCOPT1_MCHK_GEN PPC_REG_VAL(3, 0x2) /* ECC gen */
#define SDRAM_MCOPT1_MCHK_CHK PPC_REG_VAL(3, 0x1) /* ECC gen and chk */
#define SDRAM_MCOPT1_MCHK_CHK_REP PPC_REG_VAL(3, 0x3) /* ECC gen/chk/rpt */
#define SDRAM_MCOPT1_MCHK_DECODE(n) ((((u32)(n)) >> 28) & 0x3)
#define SDRAM_MCOPT1_RDEN_MASK PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM mask */
#define SDRAM_MCOPT1_RDEN PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM enbl */
#define SDRAM_MCOPT1_WDTH_MASK PPC_REG_VAL(7, 0x1) /* Width mask */
#define SDRAM_MCOPT1_WDTH_32 PPC_REG_VAL(7, 0x0) /* 32 bits */
#define SDRAM_MCOPT1_WDTH_16 PPC_REG_VAL(7, 0x1) /* 16 bits */
#define SDRAM_MCOPT1_DDR_TYPE_MASK PPC_REG_VAL(11, 0x1) /* DDR type mask */
#define SDRAM_MCOPT1_DDR1_TYPE PPC_REG_VAL(11, 0x0) /* DDR1 type */
#define SDRAM_MCOPT1_DDR2_TYPE PPC_REG_VAL(11, 0x1) /* DDR2 type */
/*
* Memory Bank 0 - n Configuration Register
*/
#define SDRAM_MBCF_BA_MASK PPC_REG_VAL(12, 0x1FFF)
#define SDRAM_MBCF_SZ_MASK PPC_REG_VAL(19, 0xF)
#define SDRAM_MBCF_SZ_DECODE(mbxcf) PPC_REG_DECODE(19, mbxcf)
#define SDRAM_MBCF_SZ_4MB PPC_REG_VAL(19, 0x0)
#define SDRAM_MBCF_SZ_8MB PPC_REG_VAL(19, 0x1)
#define SDRAM_MBCF_SZ_16MB PPC_REG_VAL(19, 0x2)
#define SDRAM_MBCF_SZ_32MB PPC_REG_VAL(19, 0x3)
#define SDRAM_MBCF_SZ_64MB PPC_REG_VAL(19, 0x4)
#define SDRAM_MBCF_SZ_128MB PPC_REG_VAL(19, 0x5)
#define SDRAM_MBCF_SZ_256MB PPC_REG_VAL(19, 0x6)
#define SDRAM_MBCF_SZ_512MB PPC_REG_VAL(19, 0x7)
#define SDRAM_MBCF_SZ_1GB PPC_REG_VAL(19, 0x8)
#define SDRAM_MBCF_SZ_2GB PPC_REG_VAL(19, 0x9)
#define SDRAM_MBCF_SZ_4GB PPC_REG_VAL(19, 0xA)
#define SDRAM_MBCF_SZ_8GB PPC_REG_VAL(19, 0xB)
#define SDRAM_MBCF_AM_MASK PPC_REG_VAL(23, 0xF)
#define SDRAM_MBCF_AM_MODE0 PPC_REG_VAL(23, 0x0)
#define SDRAM_MBCF_AM_MODE1 PPC_REG_VAL(23, 0x1)
#define SDRAM_MBCF_AM_MODE2 PPC_REG_VAL(23, 0x2)
#define SDRAM_MBCF_AM_MODE3 PPC_REG_VAL(23, 0x3)
#define SDRAM_MBCF_AM_MODE4 PPC_REG_VAL(23, 0x4)
#define SDRAM_MBCF_AM_MODE5 PPC_REG_VAL(23, 0x5)
#define SDRAM_MBCF_AM_MODE6 PPC_REG_VAL(23, 0x6)
#define SDRAM_MBCF_AM_MODE7 PPC_REG_VAL(23, 0x7)
#define SDRAM_MBCF_AM_MODE8 PPC_REG_VAL(23, 0x8)
#define SDRAM_MBCF_AM_MODE9 PPC_REG_VAL(23, 0x9)
#define SDRAM_MBCF_BE_MASK PPC_REG_VAL(31, 0x1)
#define SDRAM_MBCF_BE_DISABLE PPC_REG_VAL(31, 0x0)
#define SDRAM_MBCF_BE_ENABLE PPC_REG_VAL(31, 0x1)
/*
* ECC Error Status
*/
#define SDRAM_ECCES_MASK PPC_REG_VAL(21, 0x3FFFFF)
#define SDRAM_ECCES_BNCE_MASK PPC_REG_VAL(15, 0xFFFF)
#define SDRAM_ECCES_BNCE_ENCODE(lane) PPC_REG_VAL(((lane) & 0xF), 1)
#define SDRAM_ECCES_CKBER_MASK PPC_REG_VAL(17, 0x3)
#define SDRAM_ECCES_CKBER_NONE PPC_REG_VAL(17, 0)
#define SDRAM_ECCES_CKBER_16_ECC_0_3 PPC_REG_VAL(17, 2)
#define SDRAM_ECCES_CKBER_32_ECC_0_3 PPC_REG_VAL(17, 1)
#define SDRAM_ECCES_CKBER_32_ECC_4_8 PPC_REG_VAL(17, 2)
#define SDRAM_ECCES_CKBER_32_ECC_0_8 PPC_REG_VAL(17, 3)
#define SDRAM_ECCES_CE PPC_REG_VAL(18, 1)
#define SDRAM_ECCES_UE PPC_REG_VAL(19, 1)
#define SDRAM_ECCES_BKNER_MASK PPC_REG_VAL(21, 0x3)
#define SDRAM_ECCES_BK0ER PPC_REG_VAL(20, 1)
#define SDRAM_ECCES_BK1ER PPC_REG_VAL(21, 1)
#endif /* __PPC4XX_EDAC_H */

430
drivers/edac/r82600_edac.c Normal file
View file

@ -0,0 +1,430 @@
/*
* Radisys 82600 Embedded chipset Memory Controller kernel module
* (C) 2005 EADS Astrium
* This file may be distributed under the terms of the
* GNU General Public License.
*
* Written by Tim Small <tim@buttersideup.com>, based on work by Thayne
* Harbaugh, Dan Hollis <goemon at anime dot net> and others.
*
* $Id: edac_r82600.c,v 1.1.2.6 2005/10/05 00:43:44 dsp_llnl Exp $
*
* Written with reference to 82600 High Integration Dual PCI System
* Controller Data Book:
* www.radisys.com/files/support_downloads/007-01277-0002.82600DataBook.pdf
* references to this document given in []
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include "edac_core.h"
#define R82600_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "r82600_edac"
#define r82600_printk(level, fmt, arg...) \
edac_printk(level, "r82600", fmt, ##arg)
#define r82600_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "r82600", fmt, ##arg)
/* Radisys say "The 82600 integrates a main memory SDRAM controller that
* supports up to four banks of memory. The four banks can support a mix of
* sizes of 64 bit wide (72 bits with ECC) Synchronous DRAM (SDRAM) DIMMs,
* each of which can be any size from 16MB to 512MB. Both registered (control
* signals buffered) and unbuffered DIMM types are supported. Mixing of
* registered and unbuffered DIMMs as well as mixing of ECC and non-ECC DIMMs
* is not allowed. The 82600 SDRAM interface operates at the same frequency as
* the CPU bus, 66MHz, 100MHz or 133MHz."
*/
#define R82600_NR_CSROWS 4
#define R82600_NR_CHANS 1
#define R82600_NR_DIMMS 4
#define R82600_BRIDGE_ID 0x8200
/* Radisys 82600 register addresses - device 0 function 0 - PCI bridge */
#define R82600_DRAMC 0x57 /* Various SDRAM related control bits
* all bits are R/W
*
* 7 SDRAM ISA Hole Enable
* 6 Flash Page Mode Enable
* 5 ECC Enable: 1=ECC 0=noECC
* 4 DRAM DIMM Type: 1=
* 3 BIOS Alias Disable
* 2 SDRAM BIOS Flash Write Enable
* 1:0 SDRAM Refresh Rate: 00=Disabled
* 01=7.8usec (256Mbit SDRAMs)
* 10=15.6us 11=125usec
*/
#define R82600_SDRAMC 0x76 /* "SDRAM Control Register"
* More SDRAM related control bits
* all bits are R/W
*
* 15:8 Reserved.
*
* 7:5 Special SDRAM Mode Select
*
* 4 Force ECC
*
* 1=Drive ECC bits to 0 during
* write cycles (i.e. ECC test mode)
*
* 0=Normal ECC functioning
*
* 3 Enhanced Paging Enable
*
* 2 CAS# Latency 0=3clks 1=2clks
*
* 1 RAS# to CAS# Delay 0=3 1=2
*
* 0 RAS# Precharge 0=3 1=2
*/
#define R82600_EAP 0x80 /* ECC Error Address Pointer Register
*
* 31 Disable Hardware Scrubbing (RW)
* 0=Scrub on corrected read
* 1=Don't scrub on corrected read
*
* 30:12 Error Address Pointer (RO)
* Upper 19 bits of error address
*
* 11:4 Syndrome Bits (RO)
*
* 3 BSERR# on multibit error (RW)
* 1=enable 0=disable
*
* 2 NMI on Single Bit Eror (RW)
* 1=NMI triggered by SBE n.b. other
* prerequeists
* 0=NMI not triggered
*
* 1 MBE (R/WC)
* read 1=MBE at EAP (see above)
* read 0=no MBE, or SBE occurred first
* write 1=Clear MBE status (must also
* clear SBE)
* write 0=NOP
*
* 1 SBE (R/WC)
* read 1=SBE at EAP (see above)
* read 0=no SBE, or MBE occurred first
* write 1=Clear SBE status (must also
* clear MBE)
* write 0=NOP
*/
#define R82600_DRBA 0x60 /* + 0x60..0x63 SDRAM Row Boundary Address
* Registers
*
* 7:0 Address lines 30:24 - upper limit of
* each row [p57]
*/
struct r82600_error_info {
u32 eapr;
};
static bool disable_hardware_scrub;
static struct edac_pci_ctl_info *r82600_pci;
static void r82600_get_error_info(struct mem_ctl_info *mci,
struct r82600_error_info *info)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
pci_read_config_dword(pdev, R82600_EAP, &info->eapr);
if (info->eapr & BIT(0))
/* Clear error to allow next error to be reported [p.62] */
pci_write_bits32(pdev, R82600_EAP,
((u32) BIT(0) & (u32) BIT(1)),
((u32) BIT(0) & (u32) BIT(1)));
if (info->eapr & BIT(1))
/* Clear error to allow next error to be reported [p.62] */
pci_write_bits32(pdev, R82600_EAP,
((u32) BIT(0) & (u32) BIT(1)),
((u32) BIT(0) & (u32) BIT(1)));
}
static int r82600_process_error_info(struct mem_ctl_info *mci,
struct r82600_error_info *info,
int handle_errors)
{
int error_found;
u32 eapaddr, page;
u32 syndrome;
error_found = 0;
/* bits 30:12 store the upper 19 bits of the 32 bit error address */
eapaddr = ((info->eapr >> 12) & 0x7FFF) << 13;
/* Syndrome in bits 11:4 [p.62] */
syndrome = (info->eapr >> 4) & 0xFF;
/* the R82600 reports at less than page *
* granularity (upper 19 bits only) */
page = eapaddr >> PAGE_SHIFT;
if (info->eapr & BIT(0)) { /* CE? */
error_found = 1;
if (handle_errors)
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, 0, syndrome,
edac_mc_find_csrow_by_page(mci, page),
0, -1,
mci->ctl_name, "");
}
if (info->eapr & BIT(1)) { /* UE? */
error_found = 1;
if (handle_errors)
/* 82600 doesn't give enough info */
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
page, 0, 0,
edac_mc_find_csrow_by_page(mci, page),
0, -1,
mci->ctl_name, "");
}
return error_found;
}
static void r82600_check(struct mem_ctl_info *mci)
{
struct r82600_error_info info;
edac_dbg(1, "MC%d\n", mci->mc_idx);
r82600_get_error_info(mci, &info);
r82600_process_error_info(mci, &info, 1);
}
static inline int ecc_enabled(u8 dramcr)
{
return dramcr & BIT(5);
}
static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
u8 dramcr)
{
struct csrow_info *csrow;
struct dimm_info *dimm;
int index;
u8 drbar; /* SDRAM Row Boundary Address Register */
u32 row_high_limit, row_high_limit_last;
u32 reg_sdram, ecc_on, row_base;
ecc_on = ecc_enabled(dramcr);
reg_sdram = dramcr & BIT(4);
row_high_limit_last = 0;
for (index = 0; index < mci->nr_csrows; index++) {
csrow = mci->csrows[index];
dimm = csrow->channels[0]->dimm;
/* find the DRAM Chip Select Base address and mask */
pci_read_config_byte(pdev, R82600_DRBA + index, &drbar);
edac_dbg(1, "Row=%d DRBA = %#0x\n", index, drbar);
row_high_limit = ((u32) drbar << 24);
/* row_high_limit = ((u32)drbar << 24) | 0xffffffUL; */
edac_dbg(1, "Row=%d, Boundary Address=%#0x, Last = %#0x\n",
index, row_high_limit, row_high_limit_last);
/* Empty row [p.57] */
if (row_high_limit == row_high_limit_last)
continue;
row_base = row_high_limit_last;
csrow->first_page = row_base >> PAGE_SHIFT;
csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
dimm->nr_pages = csrow->last_page - csrow->first_page + 1;
/* Error address is top 19 bits - so granularity is *
* 14 bits */
dimm->grain = 1 << 14;
dimm->mtype = reg_sdram ? MEM_RDDR : MEM_DDR;
/* FIXME - check that this is unknowable with this chipset */
dimm->dtype = DEV_UNKNOWN;
/* Mode is global on 82600 */
dimm->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE;
row_high_limit_last = row_high_limit;
}
}
static int r82600_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
u8 dramcr;
u32 eapr;
u32 scrub_disabled;
u32 sdram_refresh_rate;
struct r82600_error_info discard;
edac_dbg(0, "\n");
pci_read_config_byte(pdev, R82600_DRAMC, &dramcr);
pci_read_config_dword(pdev, R82600_EAP, &eapr);
scrub_disabled = eapr & BIT(31);
sdram_refresh_rate = dramcr & (BIT(0) | BIT(1));
edac_dbg(2, "sdram refresh rate = %#0x\n", sdram_refresh_rate);
edac_dbg(2, "DRAMC register = %#0x\n", dramcr);
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = R82600_NR_CSROWS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = R82600_NR_CHANS;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (mci == NULL)
return -ENOMEM;
edac_dbg(0, "mci = %p\n", mci);
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
/* FIXME try to work out if the chip leads have been used for COM2
* instead on this board? [MA6?] MAYBE:
*/
/* On the R82600, the pins for memory bits 72:65 - i.e. the *
* EC bits are shared with the pins for COM2 (!), so if COM2 *
* is enabled, we assume COM2 is wired up, and thus no EDAC *
* is possible. */
mci->edac_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
if (ecc_enabled(dramcr)) {
if (scrub_disabled)
edac_dbg(3, "mci = %p - Scrubbing disabled! EAP: %#0x\n",
mci, eapr);
} else
mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = R82600_REVISION;
mci->ctl_name = "R82600";
mci->dev_name = pci_name(pdev);
mci->edac_check = r82600_check;
mci->ctl_page_to_phys = NULL;
r82600_init_csrows(mci, pdev, dramcr);
r82600_get_error_info(mci, &discard); /* clear counters */
/* Here we assume that we will never see multiple instances of this
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
/* get this far and it's successful */
if (disable_hardware_scrub) {
edac_dbg(3, "Disabling Hardware Scrub (scrub on error)\n");
pci_write_bits32(pdev, R82600_EAP, BIT(31), BIT(31));
}
/* allocating generic PCI control info */
r82600_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
if (!r82600_pci) {
printk(KERN_WARNING
"%s(): Unable to create PCI control\n",
__func__);
printk(KERN_WARNING
"%s(): PCI error report via EDAC not setup\n",
__func__);
}
edac_dbg(3, "success\n");
return 0;
fail:
edac_mc_free(mci);
return -ENODEV;
}
/* returns count (>= 0), or negative on error */
static int r82600_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
edac_dbg(0, "\n");
/* don't need to call pci_enable_device() */
return r82600_probe1(pdev, ent->driver_data);
}
static void r82600_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
edac_dbg(0, "\n");
if (r82600_pci)
edac_pci_release_generic_ctl(r82600_pci);
if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
return;
edac_mc_free(mci);
}
static const struct pci_device_id r82600_pci_tbl[] = {
{
PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID)
},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, r82600_pci_tbl);
static struct pci_driver r82600_driver = {
.name = EDAC_MOD_STR,
.probe = r82600_init_one,
.remove = r82600_remove_one,
.id_table = r82600_pci_tbl,
};
static int __init r82600_init(void)
{
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
return pci_register_driver(&r82600_driver);
}
static void __exit r82600_exit(void)
{
pci_unregister_driver(&r82600_driver);
}
module_init(r82600_init);
module_exit(r82600_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. "
"on behalf of EADS Astrium");
MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers");
module_param(disable_hardware_scrub, bool, 0644);
MODULE_PARM_DESC(disable_hardware_scrub,
"If set, disable the chipset's automatic scrub for CEs");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");

2439
drivers/edac/sb_edac.c Normal file

File diff suppressed because it is too large Load diff

266
drivers/edac/tile_edac.c Normal file
View file

@ -0,0 +1,266 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
* Tilera-specific EDAC driver.
*
* This source code is derived from the following driver:
*
* Cell MIC driver for ECC counting
*
* Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/edac.h>
#include <hv/hypervisor.h>
#include <hv/drv_mshim_intf.h>
#include "edac_core.h"
#define DRV_NAME "tile-edac"
/* Number of cs_rows needed per memory controller on TILEPro. */
#define TILE_EDAC_NR_CSROWS 1
/* Number of channels per memory controller on TILEPro. */
#define TILE_EDAC_NR_CHANS 1
/* Granularity of reported error in bytes on TILEPro. */
#define TILE_EDAC_ERROR_GRAIN 8
/* TILE processor has multiple independent memory controllers. */
struct platform_device *mshim_pdev[TILE_MAX_MSHIMS];
struct tile_edac_priv {
int hv_devhdl; /* Hypervisor device handle. */
int node; /* Memory controller instance #. */
unsigned int ce_count; /*
* Correctable-error counter
* kept by the driver.
*/
};
static void tile_edac_check(struct mem_ctl_info *mci)
{
struct tile_edac_priv *priv = mci->pvt_info;
struct mshim_mem_error mem_error;
if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_error,
sizeof(struct mshim_mem_error), MSHIM_MEM_ERROR_OFF) !=
sizeof(struct mshim_mem_error)) {
pr_err(DRV_NAME ": MSHIM_MEM_ERROR_OFF pread failure.\n");
return;
}
/* Check if the current error count is different from the saved one. */
if (mem_error.sbe_count != priv->ce_count) {
dev_dbg(mci->pdev, "ECC CE err on node %d\n", priv->node);
priv->ce_count = mem_error.sbe_count;
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0, 0,
0, 0, -1,
mci->ctl_name, "");
}
}
/*
* Initialize the 'csrows' table within the mci control structure with the
* addressing of memory.
*/
static int tile_edac_init_csrows(struct mem_ctl_info *mci)
{
struct csrow_info *csrow = mci->csrows[0];
struct tile_edac_priv *priv = mci->pvt_info;
struct mshim_mem_info mem_info;
struct dimm_info *dimm = csrow->channels[0]->dimm;
if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info,
sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) !=
sizeof(struct mshim_mem_info)) {
pr_err(DRV_NAME ": MSHIM_MEM_INFO_OFF pread failure.\n");
return -1;
}
if (mem_info.mem_ecc)
dimm->edac_mode = EDAC_SECDED;
else
dimm->edac_mode = EDAC_NONE;
switch (mem_info.mem_type) {
case DDR2:
dimm->mtype = MEM_DDR2;
break;
case DDR3:
dimm->mtype = MEM_DDR3;
break;
default:
return -1;
}
dimm->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
dimm->grain = TILE_EDAC_ERROR_GRAIN;
dimm->dtype = DEV_UNKNOWN;
return 0;
}
static int tile_edac_mc_probe(struct platform_device *pdev)
{
char hv_file[32];
int hv_devhdl;
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct tile_edac_priv *priv;
int rc;
sprintf(hv_file, "mshim/%d", pdev->id);
hv_devhdl = hv_dev_open((HV_VirtAddr)hv_file, 0);
if (hv_devhdl < 0)
return -EINVAL;
/* A TILE MC has a single channel and one chip-select row. */
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = TILE_EDAC_NR_CSROWS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = TILE_EDAC_NR_CHANS;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers,
sizeof(struct tile_edac_priv));
if (mci == NULL)
return -ENOMEM;
priv = mci->pvt_info;
priv->node = pdev->id;
priv->hv_devhdl = hv_devhdl;
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->mod_name = DRV_NAME;
#ifdef __tilegx__
mci->ctl_name = "TILEGx_Memory_Controller";
#else
mci->ctl_name = "TILEPro_Memory_Controller";
#endif
mci->dev_name = dev_name(&pdev->dev);
mci->edac_check = tile_edac_check;
/*
* Initialize the MC control structure 'csrows' table
* with the mapping and control information.
*/
if (tile_edac_init_csrows(mci)) {
/* No csrows found. */
mci->edac_cap = EDAC_FLAG_NONE;
} else {
mci->edac_cap = EDAC_FLAG_SECDED;
}
platform_set_drvdata(pdev, mci);
/* Register with EDAC core */
rc = edac_mc_add_mc(mci);
if (rc) {
dev_err(&pdev->dev, "failed to register with EDAC core\n");
edac_mc_free(mci);
return rc;
}
return 0;
}
static int tile_edac_mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
if (mci)
edac_mc_free(mci);
return 0;
}
static struct platform_driver tile_edac_mc_driver = {
.driver = {
.name = DRV_NAME,
.owner = THIS_MODULE,
},
.probe = tile_edac_mc_probe,
.remove = tile_edac_mc_remove,
};
/*
* Driver init routine.
*/
static int __init tile_edac_init(void)
{
char hv_file[32];
struct platform_device *pdev;
int i, err, num = 0;
/* Only support POLL mode. */
edac_op_state = EDAC_OPSTATE_POLL;
err = platform_driver_register(&tile_edac_mc_driver);
if (err)
return err;
for (i = 0; i < TILE_MAX_MSHIMS; i++) {
/*
* Not all memory controllers are configured such as in the
* case of a simulator. So we register only those mshims
* that are configured by the hypervisor.
*/
sprintf(hv_file, "mshim/%d", i);
if (hv_dev_open((HV_VirtAddr)hv_file, 0) < 0)
continue;
pdev = platform_device_register_simple(DRV_NAME, i, NULL, 0);
if (IS_ERR(pdev))
continue;
mshim_pdev[i] = pdev;
num++;
}
if (num == 0) {
platform_driver_unregister(&tile_edac_mc_driver);
return -ENODEV;
}
return 0;
}
/*
* Driver cleanup routine.
*/
static void __exit tile_edac_exit(void)
{
int i;
for (i = 0; i < TILE_MAX_MSHIMS; i++) {
struct platform_device *pdev = mshim_pdev[i];
if (!pdev)
continue;
platform_device_unregister(pdev);
}
platform_driver_unregister(&tile_edac_mc_driver);
}
module_init(tile_edac_init);
module_exit(tile_edac_exit);

528
drivers/edac/x38_edac.c Normal file
View file

@ -0,0 +1,528 @@
/*
* Intel X38 Memory Controller kernel module
* Copyright (C) 2008 Cluster Computing, Inc.
*
* This file may be distributed under the terms of the
* GNU General Public License.
*
* This file is based on i3200_edac.c
*
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/edac.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>
#include "edac_core.h"
#define X38_REVISION "1.1"
#define EDAC_MOD_STR "x38_edac"
#define PCI_DEVICE_ID_INTEL_X38_HB 0x29e0
#define X38_RANKS 8
#define X38_RANKS_PER_CHANNEL 4
#define X38_CHANNELS 2
/* Intel X38 register addresses - device 0 function 0 - DRAM Controller */
#define X38_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */
#define X38_MCHBAR_HIGH 0x4c
#define X38_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */
#define X38_MMR_WINDOW_SIZE 16384
#define X38_TOM 0xa0 /* Top of Memory (16b)
*
* 15:10 reserved
* 9:0 total populated physical memory
*/
#define X38_TOM_MASK 0x3ff /* bits 9:0 */
#define X38_TOM_SHIFT 26 /* 64MiB grain */
#define X38_ERRSTS 0xc8 /* Error Status Register (16b)
*
* 15 reserved
* 14 Isochronous TBWRR Run Behind FIFO Full
* (ITCV)
* 13 Isochronous TBWRR Run Behind FIFO Put
* (ITSTV)
* 12 reserved
* 11 MCH Thermal Sensor Event
* for SMI/SCI/SERR (GTSE)
* 10 reserved
* 9 LOCK to non-DRAM Memory Flag (LCKF)
* 8 reserved
* 7 DRAM Throttle Flag (DTF)
* 6:2 reserved
* 1 Multi-bit DRAM ECC Error Flag (DMERR)
* 0 Single-bit DRAM ECC Error Flag (DSERR)
*/
#define X38_ERRSTS_UE 0x0002
#define X38_ERRSTS_CE 0x0001
#define X38_ERRSTS_BITS (X38_ERRSTS_UE | X38_ERRSTS_CE)
/* Intel MMIO register space - device 0 function 0 - MMR space */
#define X38_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4)
*
* 15:10 reserved
* 9:0 Channel 0 DRAM Rank Boundary Address
*/
#define X38_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */
#define X38_DRB_MASK 0x3ff /* bits 9:0 */
#define X38_DRB_SHIFT 26 /* 64MiB grain */
#define X38_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b)
*
* 63:48 Error Column Address (ERRCOL)
* 47:32 Error Row Address (ERRROW)
* 31:29 Error Bank Address (ERRBANK)
* 28:27 Error Rank Address (ERRRANK)
* 26:24 reserved
* 23:16 Error Syndrome (ERRSYND)
* 15: 2 reserved
* 1 Multiple Bit Error Status (MERRSTS)
* 0 Correctable Error Status (CERRSTS)
*/
#define X38_C1ECCERRLOG 0x680 /* Channel 1 ECC Error Log (64b) */
#define X38_ECCERRLOG_CE 0x1
#define X38_ECCERRLOG_UE 0x2
#define X38_ECCERRLOG_RANK_BITS 0x18000000
#define X38_ECCERRLOG_SYNDROME_BITS 0xff0000
#define X38_CAPID0 0xe0 /* see P.94 of spec for details */
static int x38_channel_num;
static int how_many_channel(struct pci_dev *pdev)
{
unsigned char capid0_8b; /* 8th byte of CAPID0 */
pci_read_config_byte(pdev, X38_CAPID0 + 8, &capid0_8b);
if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
edac_dbg(0, "In single channel mode\n");
x38_channel_num = 1;
} else {
edac_dbg(0, "In dual channel mode\n");
x38_channel_num = 2;
}
return x38_channel_num;
}
static unsigned long eccerrlog_syndrome(u64 log)
{
return (log & X38_ECCERRLOG_SYNDROME_BITS) >> 16;
}
static int eccerrlog_row(int channel, u64 log)
{
return ((log & X38_ECCERRLOG_RANK_BITS) >> 27) |
(channel * X38_RANKS_PER_CHANNEL);
}
enum x38_chips {
X38 = 0,
};
struct x38_dev_info {
const char *ctl_name;
};
struct x38_error_info {
u16 errsts;
u16 errsts2;
u64 eccerrlog[X38_CHANNELS];
};
static const struct x38_dev_info x38_devs[] = {
[X38] = {
.ctl_name = "x38"},
};
static struct pci_dev *mci_pdev;
static int x38_registered = 1;
static void x38_clear_error_info(struct mem_ctl_info *mci)
{
struct pci_dev *pdev;
pdev = to_pci_dev(mci->pdev);
/*
* Clear any error bits.
* (Yes, we really clear bits by writing 1 to them.)
*/
pci_write_bits16(pdev, X38_ERRSTS, X38_ERRSTS_BITS,
X38_ERRSTS_BITS);
}
static void x38_get_and_clear_error_info(struct mem_ctl_info *mci,
struct x38_error_info *info)
{
struct pci_dev *pdev;
void __iomem *window = mci->pvt_info;
pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
* registers at once and the registers can transition from CE being
* overwritten by UE.
*/
pci_read_config_word(pdev, X38_ERRSTS, &info->errsts);
if (!(info->errsts & X38_ERRSTS_BITS))
return;
info->eccerrlog[0] = lo_hi_readq(window + X38_C0ECCERRLOG);
if (x38_channel_num == 2)
info->eccerrlog[1] = lo_hi_readq(window + X38_C1ECCERRLOG);
pci_read_config_word(pdev, X38_ERRSTS, &info->errsts2);
/*
* If the error is the same for both reads then the first set
* of reads is valid. If there is a change then there is a CE
* with no info and the second set of reads is valid and
* should be UE info.
*/
if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) {
info->eccerrlog[0] = lo_hi_readq(window + X38_C0ECCERRLOG);
if (x38_channel_num == 2)
info->eccerrlog[1] =
lo_hi_readq(window + X38_C1ECCERRLOG);
}
x38_clear_error_info(mci);
}
static void x38_process_error_info(struct mem_ctl_info *mci,
struct x38_error_info *info)
{
int channel;
u64 log;
if (!(info->errsts & X38_ERRSTS_BITS))
return;
if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
-1, -1, -1,
"UE overwrote CE", "");
info->errsts = info->errsts2;
}
for (channel = 0; channel < x38_channel_num; channel++) {
log = info->eccerrlog[channel];
if (log & X38_ECCERRLOG_UE) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
0, 0, 0,
eccerrlog_row(channel, log),
-1, -1,
"x38 UE", "");
} else if (log & X38_ECCERRLOG_CE) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0, eccerrlog_syndrome(log),
eccerrlog_row(channel, log),
-1, -1,
"x38 CE", "");
}
}
}
static void x38_check(struct mem_ctl_info *mci)
{
struct x38_error_info info;
edac_dbg(1, "MC%d\n", mci->mc_idx);
x38_get_and_clear_error_info(mci, &info);
x38_process_error_info(mci, &info);
}
static void __iomem *x38_map_mchbar(struct pci_dev *pdev)
{
union {
u64 mchbar;
struct {
u32 mchbar_low;
u32 mchbar_high;
};
} u;
void __iomem *window;
pci_read_config_dword(pdev, X38_MCHBAR_LOW, &u.mchbar_low);
pci_write_config_dword(pdev, X38_MCHBAR_LOW, u.mchbar_low | 0x1);
pci_read_config_dword(pdev, X38_MCHBAR_HIGH, &u.mchbar_high);
u.mchbar &= X38_MCHBAR_MASK;
if (u.mchbar != (resource_size_t)u.mchbar) {
printk(KERN_ERR
"x38: mmio space beyond accessible range (0x%llx)\n",
(unsigned long long)u.mchbar);
return NULL;
}
window = ioremap_nocache(u.mchbar, X38_MMR_WINDOW_SIZE);
if (!window)
printk(KERN_ERR "x38: cannot map mmio space at 0x%llx\n",
(unsigned long long)u.mchbar);
return window;
}
static void x38_get_drbs(void __iomem *window,
u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL])
{
int i;
for (i = 0; i < X38_RANKS_PER_CHANNEL; i++) {
drbs[0][i] = readw(window + X38_C0DRB + 2*i) & X38_DRB_MASK;
drbs[1][i] = readw(window + X38_C1DRB + 2*i) & X38_DRB_MASK;
}
}
static bool x38_is_stacked(struct pci_dev *pdev,
u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL])
{
u16 tom;
pci_read_config_word(pdev, X38_TOM, &tom);
tom &= X38_TOM_MASK;
return drbs[X38_CHANNELS - 1][X38_RANKS_PER_CHANNEL - 1] == tom;
}
static unsigned long drb_to_nr_pages(
u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL],
bool stacked, int channel, int rank)
{
int n;
n = drbs[channel][rank];
if (rank > 0)
n -= drbs[channel][rank - 1];
if (stacked && (channel == 1) && drbs[channel][rank] ==
drbs[channel][X38_RANKS_PER_CHANNEL - 1]) {
n -= drbs[0][X38_RANKS_PER_CHANNEL - 1];
}
n <<= (X38_DRB_SHIFT - PAGE_SHIFT);
return n;
}
static int x38_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc;
int i, j;
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL];
bool stacked;
void __iomem *window;
edac_dbg(0, "MC:\n");
window = x38_map_mchbar(pdev);
if (!window)
return -ENODEV;
x38_get_drbs(window, drbs);
how_many_channel(pdev);
/* FIXME: unconventional pvt_info usage */
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = X38_RANKS;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = x38_channel_num;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
edac_dbg(3, "MC: init mci\n");
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = X38_REVISION;
mci->ctl_name = x38_devs[dev_idx].ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = x38_check;
mci->ctl_page_to_phys = NULL;
mci->pvt_info = window;
stacked = x38_is_stacked(pdev, drbs);
/*
* The dram rank boundary (DRB) reg values are boundary addresses
* for each DRAM rank with a granularity of 64MB. DRB regs are
* cumulative; the last one will contain the total memory
* contained in all ranks.
*/
for (i = 0; i < mci->nr_csrows; i++) {
unsigned long nr_pages;
struct csrow_info *csrow = mci->csrows[i];
nr_pages = drb_to_nr_pages(drbs, stacked,
i / X38_RANKS_PER_CHANNEL,
i % X38_RANKS_PER_CHANNEL);
if (nr_pages == 0)
continue;
for (j = 0; j < x38_channel_num; j++) {
struct dimm_info *dimm = csrow->channels[j]->dimm;
dimm->nr_pages = nr_pages / x38_channel_num;
dimm->grain = nr_pages << PAGE_SHIFT;
dimm->mtype = MEM_DDR2;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
}
x38_clear_error_info(mci);
rc = -ENODEV;
if (edac_mc_add_mc(mci)) {
edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
goto fail;
}
/* get this far and it's successful */
edac_dbg(3, "MC: success\n");
return 0;
fail:
iounmap(window);
if (mci)
edac_mc_free(mci);
return rc;
}
static int x38_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int rc;
edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
rc = x38_probe1(pdev, ent->driver_data);
if (!mci_pdev)
mci_pdev = pci_dev_get(pdev);
return rc;
}
static void x38_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
edac_dbg(0, "\n");
mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
return;
iounmap(mci->pvt_info);
edac_mc_free(mci);
}
static const struct pci_device_id x38_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, X38_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
X38},
{
0,
} /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, x38_pci_tbl);
static struct pci_driver x38_driver = {
.name = EDAC_MOD_STR,
.probe = x38_init_one,
.remove = x38_remove_one,
.id_table = x38_pci_tbl,
};
static int __init x38_init(void)
{
int pci_rc;
edac_dbg(3, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
pci_rc = pci_register_driver(&x38_driver);
if (pci_rc < 0)
goto fail0;
if (!mci_pdev) {
x38_registered = 0;
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_X38_HB, NULL);
if (!mci_pdev) {
edac_dbg(0, "x38 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = x38_init_one(mci_pdev, x38_pci_tbl);
if (pci_rc < 0) {
edac_dbg(0, "x38 init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
}
return 0;
fail1:
pci_unregister_driver(&x38_driver);
fail0:
if (mci_pdev)
pci_dev_put(mci_pdev);
return pci_rc;
}
static void __exit x38_exit(void)
{
edac_dbg(3, "MC:\n");
pci_unregister_driver(&x38_driver);
if (!x38_registered) {
x38_remove_one(mci_pdev);
pci_dev_put(mci_pdev);
}
}
module_init(x38_init);
module_exit(x38_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Cluster Computing, Inc. Hitoshi Mitake");
MODULE_DESCRIPTION("MC support for Intel X38 memory hub controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");