Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

View file

@ -0,0 +1,28 @@
config PPC_POWERNV
depends on PPC64 && PPC_BOOK3S
bool "IBM PowerNV (Non-Virtualized) platform support"
select PPC_NATIVE
select PPC_XICS
select PPC_ICP_NATIVE
select PPC_P7_NAP
select PPC_PCI_CHOICE if EMBEDDED
select EPAPR_BOOT
select PPC_INDIRECT_PIO
select PPC_UDBG_16550
select PPC_SCOM
select ARCH_RANDOM
select CPU_FREQ
select CPU_FREQ_GOV_PERFORMANCE
select CPU_FREQ_GOV_POWERSAVE
select CPU_FREQ_GOV_USERSPACE
select CPU_FREQ_GOV_ONDEMAND
select CPU_FREQ_GOV_CONSERVATIVE
select PPC_DOORBELL
default y
config PPC_POWERNV_RTAS
depends on PPC_POWERNV
bool "Support for RTAS based PowerNV platforms such as BML"
default y
select PPC_ICS_RTAS
select PPC_RTAS

View file

@ -0,0 +1,11 @@
obj-y += setup.o opal-wrappers.o opal.o opal-async.o
obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o opal-hmi.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,521 @@
/*
* The file intends to implement the platform dependent EEH operations on
* powernv platform. Actually, the powernv was created in order to fully
* hypervisor support.
*
* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/atomic.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
#include <linux/rbtree.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/firmware.h>
#include <asm/io.h>
#include <asm/iommu.h>
#include <asm/machdep.h>
#include <asm/msi_bitmap.h>
#include <asm/opal.h>
#include <asm/ppc-pci.h>
#include "powernv.h"
#include "pci.h"
/**
* powernv_eeh_init - EEH platform dependent initialization
*
* EEH platform dependent initialization on powernv
*/
static int powernv_eeh_init(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
/* We require OPALv3 */
if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
pr_warn("%s: OPALv3 is required !\n",
__func__);
return -EINVAL;
}
/* Set probe mode */
eeh_add_flag(EEH_PROBE_MODE_DEV);
/*
* P7IOC blocks PCI config access to frozen PE, but PHB3
* doesn't do that. So we have to selectively enable I/O
* prior to collecting error log.
*/
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
if (phb->model == PNV_PHB_MODEL_P7IOC)
eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
break;
}
return 0;
}
/**
* powernv_eeh_post_init - EEH platform dependent post initialization
*
* EEH platform dependent post initialization on powernv. When
* the function is called, the EEH PEs and devices should have
* been built. If the I/O cache staff has been built, EEH is
* ready to supply service.
*/
static int powernv_eeh_post_init(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
int ret = 0;
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
if (phb->eeh_ops && phb->eeh_ops->post_init) {
ret = phb->eeh_ops->post_init(hose);
if (ret)
break;
}
}
return ret;
}
/**
* powernv_eeh_dev_probe - Do probe on PCI device
* @dev: PCI device
* @flag: unused
*
* When EEH module is installed during system boot, all PCI devices
* are checked one by one to see if it supports EEH. The function
* is introduced for the purpose. By default, EEH has been enabled
* on all PCI devices. That's to say, we only need do necessary
* initialization on the corresponding eeh device and create PE
* accordingly.
*
* It's notable that's unsafe to retrieve the EEH device through
* the corresponding PCI device. During the PCI device hotplug, which
* was possiblly triggered by EEH core, the binding between EEH device
* and the PCI device isn't built yet.
*/
static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
struct pnv_phb *phb = hose->private_data;
struct device_node *dn = pci_device_to_OF_node(dev);
struct eeh_dev *edev = of_node_to_eeh_dev(dn);
int ret;
/*
* When probing the root bridge, which doesn't have any
* subordinate PCI devices. We don't have OF node for
* the root bridge. So it's not reasonable to continue
* the probing.
*/
if (!dn || !edev || edev->pe)
return 0;
/* Skip for PCI-ISA bridge */
if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
return 0;
/* Initialize eeh device */
edev->class_code = dev->class;
edev->mode &= 0xFFFFFF00;
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
edev->mode |= EEH_DEV_BRIDGE;
edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
if (pci_is_pcie(dev)) {
edev->pcie_cap = pci_pcie_cap(dev);
if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
edev->mode |= EEH_DEV_ROOT_PORT;
else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
edev->mode |= EEH_DEV_DS_PORT;
edev->aer_cap = pci_find_ext_capability(dev,
PCI_EXT_CAP_ID_ERR);
}
edev->config_addr = ((dev->bus->number << 8) | dev->devfn);
edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
/* Create PE */
ret = eeh_add_to_parent_pe(edev);
if (ret) {
pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n",
__func__, pci_name(dev), ret);
return ret;
}
/*
* If the PE contains any one of following adapters, the
* PCI config space can't be accessed when dumping EEH log.
* Otherwise, we will run into fenced PHB caused by shortage
* of outbound credits in the adapter. The PCI config access
* should be blocked until PE reset. MMIO access is dropped
* by hardware certainly. In order to drop PCI config requests,
* one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
* will be checked in the backend for PE state retrival. If
* the PE becomes frozen for the first time and the flag has
* been set for the PE, we will set EEH_PE_CFG_BLOCKED for
* that PE to block its config space.
*
* Broadcom Austin 4-ports NICs (14e4:1657)
* Broadcom Shiner 2-ports 10G NICs (14e4:168e)
*/
if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) ||
(dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e))
edev->pe->state |= EEH_PE_CFG_RESTRICTED;
/*
* Cache the PE primary bus, which can't be fetched when
* full hotplug is in progress. In that case, all child
* PCI devices of the PE are expected to be removed prior
* to PE reset.
*/
if (!edev->pe->bus)
edev->pe->bus = dev->bus;
/*
* Enable EEH explicitly so that we will do EEH check
* while accessing I/O stuff
*/
eeh_add_flag(EEH_ENABLED);
/* Save memory bars */
eeh_save_bars(edev);
return 0;
}
/**
* powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
* @pe: EEH PE
* @option: operation to be issued
*
* The function is used to control the EEH functionality globally.
* Currently, following options are support according to PAPR:
* Enable EEH, Disable EEH, Enable MMIO and Enable DMA
*/
static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
{
struct pci_controller *hose = pe->phb;
struct pnv_phb *phb = hose->private_data;
int ret = -EEXIST;
/*
* What we need do is pass it down for hardware
* implementation to handle it.
*/
if (phb->eeh_ops && phb->eeh_ops->set_option)
ret = phb->eeh_ops->set_option(pe, option);
return ret;
}
/**
* powernv_eeh_get_pe_addr - Retrieve PE address
* @pe: EEH PE
*
* Retrieve the PE address according to the given tranditional
* PCI BDF (Bus/Device/Function) address.
*/
static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
{
return pe->addr;
}
/**
* powernv_eeh_get_state - Retrieve PE state
* @pe: EEH PE
* @delay: delay while PE state is temporarily unavailable
*
* Retrieve the state of the specified PE. For IODA-compitable
* platform, it should be retrieved from IODA table. Therefore,
* we prefer passing down to hardware implementation to handle
* it.
*/
static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay)
{
struct pci_controller *hose = pe->phb;
struct pnv_phb *phb = hose->private_data;
int ret = EEH_STATE_NOT_SUPPORT;
if (phb->eeh_ops && phb->eeh_ops->get_state) {
ret = phb->eeh_ops->get_state(pe);
/*
* If the PE state is temporarily unavailable,
* to inform the EEH core delay for default
* period (1 second)
*/
if (delay) {
*delay = 0;
if (ret & EEH_STATE_UNAVAILABLE)
*delay = 1000;
}
}
return ret;
}
/**
* powernv_eeh_reset - Reset the specified PE
* @pe: EEH PE
* @option: reset option
*
* Reset the specified PE
*/
static int powernv_eeh_reset(struct eeh_pe *pe, int option)
{
struct pci_controller *hose = pe->phb;
struct pnv_phb *phb = hose->private_data;
int ret = -EEXIST;
if (phb->eeh_ops && phb->eeh_ops->reset)
ret = phb->eeh_ops->reset(pe, option);
return ret;
}
/**
* powernv_eeh_wait_state - Wait for PE state
* @pe: EEH PE
* @max_wait: maximal period in microsecond
*
* Wait for the state of associated PE. It might take some time
* to retrieve the PE's state.
*/
static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
{
int ret;
int mwait;
while (1) {
ret = powernv_eeh_get_state(pe, &mwait);
/*
* If the PE's state is temporarily unavailable,
* we have to wait for the specified time. Otherwise,
* the PE's state will be returned immediately.
*/
if (ret != EEH_STATE_UNAVAILABLE)
return ret;
max_wait -= mwait;
if (max_wait <= 0) {
pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
__func__, pe->addr, max_wait);
return EEH_STATE_NOT_SUPPORT;
}
msleep(mwait);
}
return EEH_STATE_NOT_SUPPORT;
}
/**
* powernv_eeh_get_log - Retrieve error log
* @pe: EEH PE
* @severity: temporary or permanent error log
* @drv_log: driver log to be combined with retrieved error log
* @len: length of driver log
*
* Retrieve the temporary or permanent error from the PE.
*/
static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
char *drv_log, unsigned long len)
{
struct pci_controller *hose = pe->phb;
struct pnv_phb *phb = hose->private_data;
int ret = -EEXIST;
if (phb->eeh_ops && phb->eeh_ops->get_log)
ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
return ret;
}
/**
* powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
* @pe: EEH PE
*
* The function will be called to reconfigure the bridges included
* in the specified PE so that the mulfunctional PE would be recovered
* again.
*/
static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
{
struct pci_controller *hose = pe->phb;
struct pnv_phb *phb = hose->private_data;
int ret = 0;
if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
ret = phb->eeh_ops->configure_bridge(pe);
return ret;
}
/**
* powernv_pe_err_inject - Inject specified error to the indicated PE
* @pe: the indicated PE
* @type: error type
* @func: specific error type
* @addr: address
* @mask: address mask
*
* The routine is called to inject specified error, which is
* determined by @type and @func, to the indicated PE for
* testing purpose.
*/
static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
unsigned long addr, unsigned long mask)
{
struct pci_controller *hose = pe->phb;
struct pnv_phb *phb = hose->private_data;
int ret = -EEXIST;
if (phb->eeh_ops && phb->eeh_ops->err_inject)
ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
return ret;
}
static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
{
struct eeh_dev *edev = of_node_to_eeh_dev(dn);
if (!edev || !edev->pe)
return false;
if (edev->pe->state & EEH_PE_CFG_BLOCKED)
return true;
return false;
}
static int powernv_eeh_read_config(struct device_node *dn,
int where, int size, u32 *val)
{
if (powernv_eeh_cfg_blocked(dn)) {
*val = 0xFFFFFFFF;
return PCIBIOS_SET_FAILED;
}
return pnv_pci_cfg_read(dn, where, size, val);
}
static int powernv_eeh_write_config(struct device_node *dn,
int where, int size, u32 val)
{
if (powernv_eeh_cfg_blocked(dn))
return PCIBIOS_SET_FAILED;
return pnv_pci_cfg_write(dn, where, size, val);
}
/**
* powernv_eeh_next_error - Retrieve next EEH error to handle
* @pe: Affected PE
*
* Using OPAL API, to retrieve next EEH error for EEH core to handle
*/
static int powernv_eeh_next_error(struct eeh_pe **pe)
{
struct pci_controller *hose;
struct pnv_phb *phb = NULL;
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
break;
}
if (phb && phb->eeh_ops->next_error)
return phb->eeh_ops->next_error(pe);
return -EEXIST;
}
static int powernv_eeh_restore_config(struct device_node *dn)
{
struct eeh_dev *edev = of_node_to_eeh_dev(dn);
struct pnv_phb *phb;
s64 ret;
if (!edev)
return -EEXIST;
phb = edev->phb->private_data;
ret = opal_pci_reinit(phb->opal_id,
OPAL_REINIT_PCI_DEV, edev->config_addr);
if (ret) {
pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
__func__, edev->config_addr, ret);
return -EIO;
}
return 0;
}
static struct eeh_ops powernv_eeh_ops = {
.name = "powernv",
.init = powernv_eeh_init,
.post_init = powernv_eeh_post_init,
.of_probe = NULL,
.dev_probe = powernv_eeh_dev_probe,
.set_option = powernv_eeh_set_option,
.get_pe_addr = powernv_eeh_get_pe_addr,
.get_state = powernv_eeh_get_state,
.reset = powernv_eeh_reset,
.wait_state = powernv_eeh_wait_state,
.get_log = powernv_eeh_get_log,
.configure_bridge = powernv_eeh_configure_bridge,
.err_inject = powernv_eeh_err_inject,
.read_config = powernv_eeh_read_config,
.write_config = powernv_eeh_write_config,
.next_error = powernv_eeh_next_error,
.restore_config = powernv_eeh_restore_config
};
/**
* eeh_powernv_init - Register platform dependent EEH operations
*
* EEH initialization on powernv platform. This function should be
* called before any EEH related functions.
*/
static int __init eeh_powernv_init(void)
{
int ret = -EINVAL;
eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
ret = eeh_ops_register(&powernv_eeh_ops);
if (!ret)
pr_info("EEH: PowerNV platform initialized\n");
else
pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
return ret;
}
machine_early_initcall(powernv, eeh_powernv_init);

View file

@ -0,0 +1,205 @@
/*
* PowerNV OPAL asynchronous completion interfaces
*
* Copyright 2013 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <linux/gfp.h>
#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/opal.h>
#define N_ASYNC_COMPLETIONS 64
static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL};
static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS);
static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
static DEFINE_SPINLOCK(opal_async_comp_lock);
static struct semaphore opal_async_sem;
static struct opal_msg *opal_async_responses;
static unsigned int opal_max_async_tokens;
int __opal_async_get_token(void)
{
unsigned long flags;
int token;
spin_lock_irqsave(&opal_async_comp_lock, flags);
token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
if (token >= opal_max_async_tokens) {
token = -EBUSY;
goto out;
}
if (__test_and_set_bit(token, opal_async_token_map)) {
token = -EBUSY;
goto out;
}
__clear_bit(token, opal_async_complete_map);
out:
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
return token;
}
int opal_async_get_token_interruptible(void)
{
int token;
/* Wait until a token is available */
if (down_interruptible(&opal_async_sem))
return -ERESTARTSYS;
token = __opal_async_get_token();
if (token < 0)
up(&opal_async_sem);
return token;
}
int __opal_async_release_token(int token)
{
unsigned long flags;
if (token < 0 || token >= opal_max_async_tokens) {
pr_err("%s: Passed token is out of range, token %d\n",
__func__, token);
return -EINVAL;
}
spin_lock_irqsave(&opal_async_comp_lock, flags);
__set_bit(token, opal_async_complete_map);
__clear_bit(token, opal_async_token_map);
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
return 0;
}
int opal_async_release_token(int token)
{
int ret;
ret = __opal_async_release_token(token);
if (ret)
return ret;
up(&opal_async_sem);
return 0;
}
int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
{
if (token >= opal_max_async_tokens) {
pr_err("%s: Invalid token passed\n", __func__);
return -EINVAL;
}
if (!msg) {
pr_err("%s: Invalid message pointer passed\n", __func__);
return -EINVAL;
}
wait_event(opal_async_wait, test_bit(token, opal_async_complete_map));
memcpy(msg, &opal_async_responses[token], sizeof(*msg));
return 0;
}
static int opal_async_comp_event(struct notifier_block *nb,
unsigned long msg_type, void *msg)
{
struct opal_msg *comp_msg = msg;
unsigned long flags;
uint64_t token;
if (msg_type != OPAL_MSG_ASYNC_COMP)
return 0;
token = be64_to_cpu(comp_msg->params[0]);
memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg));
spin_lock_irqsave(&opal_async_comp_lock, flags);
__set_bit(token, opal_async_complete_map);
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
wake_up(&opal_async_wait);
return 0;
}
static struct notifier_block opal_async_comp_nb = {
.notifier_call = opal_async_comp_event,
.next = NULL,
.priority = 0,
};
static int __init opal_async_comp_init(void)
{
struct device_node *opal_node;
const __be32 *async;
int err;
opal_node = of_find_node_by_path("/ibm,opal");
if (!opal_node) {
pr_err("%s: Opal node not found\n", __func__);
err = -ENOENT;
goto out;
}
async = of_get_property(opal_node, "opal-msg-async-num", NULL);
if (!async) {
pr_err("%s: %s has no opal-msg-async-num\n",
__func__, opal_node->full_name);
err = -ENOENT;
goto out_opal_node;
}
opal_max_async_tokens = be32_to_cpup(async);
if (opal_max_async_tokens > N_ASYNC_COMPLETIONS)
opal_max_async_tokens = N_ASYNC_COMPLETIONS;
err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
&opal_async_comp_nb);
if (err) {
pr_err("%s: Can't register OPAL event notifier (%d)\n",
__func__, err);
goto out_opal_node;
}
opal_async_responses = kzalloc(
sizeof(*opal_async_responses) * opal_max_async_tokens,
GFP_KERNEL);
if (!opal_async_responses) {
pr_err("%s: Out of memory, failed to do asynchronous "
"completion init\n", __func__);
err = -ENOMEM;
goto out_opal_node;
}
/* Initialize to 1 less than the maximum tokens available, as we may
* require to pop one during emergency through synchronous call to
* __opal_async_get_token()
*/
sema_init(&opal_async_sem, opal_max_async_tokens - 1);
out_opal_node:
of_node_put(opal_node);
out:
return err;
}
machine_subsys_initcall(powernv, opal_async_comp_init);

View file

@ -0,0 +1,456 @@
/*
* PowerNV OPAL Dump Interface
*
* Copyright 2013,2014 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kobject.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/delay.h>
#include <asm/opal.h>
#define DUMP_TYPE_FSP 0x01
struct dump_obj {
struct kobject kobj;
struct bin_attribute dump_attr;
uint32_t id; /* becomes object name */
uint32_t type;
uint32_t size;
char *buffer;
};
#define to_dump_obj(x) container_of(x, struct dump_obj, kobj)
struct dump_attribute {
struct attribute attr;
ssize_t (*show)(struct dump_obj *dump, struct dump_attribute *attr,
char *buf);
ssize_t (*store)(struct dump_obj *dump, struct dump_attribute *attr,
const char *buf, size_t count);
};
#define to_dump_attr(x) container_of(x, struct dump_attribute, attr)
static ssize_t dump_id_show(struct dump_obj *dump_obj,
struct dump_attribute *attr,
char *buf)
{
return sprintf(buf, "0x%x\n", dump_obj->id);
}
static const char* dump_type_to_string(uint32_t type)
{
switch (type) {
case 0x01: return "SP Dump";
case 0x02: return "System/Platform Dump";
case 0x03: return "SMA Dump";
default: return "unknown";
}
}
static ssize_t dump_type_show(struct dump_obj *dump_obj,
struct dump_attribute *attr,
char *buf)
{
return sprintf(buf, "0x%x %s\n", dump_obj->type,
dump_type_to_string(dump_obj->type));
}
static ssize_t dump_ack_show(struct dump_obj *dump_obj,
struct dump_attribute *attr,
char *buf)
{
return sprintf(buf, "ack - acknowledge dump\n");
}
/*
* Send acknowledgement to OPAL
*/
static int64_t dump_send_ack(uint32_t dump_id)
{
int rc;
rc = opal_dump_ack(dump_id);
if (rc)
pr_warn("%s: Failed to send ack to Dump ID 0x%x (%d)\n",
__func__, dump_id, rc);
return rc;
}
static ssize_t dump_ack_store(struct dump_obj *dump_obj,
struct dump_attribute *attr,
const char *buf,
size_t count)
{
dump_send_ack(dump_obj->id);
sysfs_remove_file_self(&dump_obj->kobj, &attr->attr);
kobject_put(&dump_obj->kobj);
return count;
}
/* Attributes of a dump
* The binary attribute of the dump itself is dynamic
* due to the dynamic size of the dump
*/
static struct dump_attribute id_attribute =
__ATTR(id, S_IRUGO, dump_id_show, NULL);
static struct dump_attribute type_attribute =
__ATTR(type, S_IRUGO, dump_type_show, NULL);
static struct dump_attribute ack_attribute =
__ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
static ssize_t init_dump_show(struct dump_obj *dump_obj,
struct dump_attribute *attr,
char *buf)
{
return sprintf(buf, "1 - initiate Service Processor(FSP) dump\n");
}
static int64_t dump_fips_init(uint8_t type)
{
int rc;
rc = opal_dump_init(type);
if (rc)
pr_warn("%s: Failed to initiate FSP dump (%d)\n",
__func__, rc);
return rc;
}
static ssize_t init_dump_store(struct dump_obj *dump_obj,
struct dump_attribute *attr,
const char *buf,
size_t count)
{
int rc;
rc = dump_fips_init(DUMP_TYPE_FSP);
if (rc == OPAL_SUCCESS)
pr_info("%s: Initiated FSP dump\n", __func__);
return count;
}
static struct dump_attribute initiate_attribute =
__ATTR(initiate_dump, 0600, init_dump_show, init_dump_store);
static struct attribute *initiate_attrs[] = {
&initiate_attribute.attr,
NULL,
};
static struct attribute_group initiate_attr_group = {
.attrs = initiate_attrs,
};
static struct kset *dump_kset;
static ssize_t dump_attr_show(struct kobject *kobj,
struct attribute *attr,
char *buf)
{
struct dump_attribute *attribute;
struct dump_obj *dump;
attribute = to_dump_attr(attr);
dump = to_dump_obj(kobj);
if (!attribute->show)
return -EIO;
return attribute->show(dump, attribute, buf);
}
static ssize_t dump_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t len)
{
struct dump_attribute *attribute;
struct dump_obj *dump;
attribute = to_dump_attr(attr);
dump = to_dump_obj(kobj);
if (!attribute->store)
return -EIO;
return attribute->store(dump, attribute, buf, len);
}
static const struct sysfs_ops dump_sysfs_ops = {
.show = dump_attr_show,
.store = dump_attr_store,
};
static void dump_release(struct kobject *kobj)
{
struct dump_obj *dump;
dump = to_dump_obj(kobj);
vfree(dump->buffer);
kfree(dump);
}
static struct attribute *dump_default_attrs[] = {
&id_attribute.attr,
&type_attribute.attr,
&ack_attribute.attr,
NULL,
};
static struct kobj_type dump_ktype = {
.sysfs_ops = &dump_sysfs_ops,
.release = &dump_release,
.default_attrs = dump_default_attrs,
};
static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type)
{
__be32 id, size, type;
int rc;
type = cpu_to_be32(0xffffffff);
rc = opal_dump_info2(&id, &size, &type);
if (rc == OPAL_PARAMETER)
rc = opal_dump_info(&id, &size);
*dump_id = be32_to_cpu(id);
*dump_size = be32_to_cpu(size);
*dump_type = be32_to_cpu(type);
if (rc)
pr_warn("%s: Failed to get dump info (%d)\n",
__func__, rc);
return rc;
}
static int64_t dump_read_data(struct dump_obj *dump)
{
struct opal_sg_list *list;
uint64_t addr;
int64_t rc;
/* Allocate memory */
dump->buffer = vzalloc(PAGE_ALIGN(dump->size));
if (!dump->buffer) {
pr_err("%s : Failed to allocate memory\n", __func__);
rc = -ENOMEM;
goto out;
}
/* Generate SG list */
list = opal_vmalloc_to_sg_list(dump->buffer, dump->size);
if (!list) {
rc = -ENOMEM;
goto out;
}
/* First entry address */
addr = __pa(list);
/* Fetch data */
rc = OPAL_BUSY_EVENT;
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_dump_read(dump->id, addr);
if (rc == OPAL_BUSY_EVENT) {
opal_poll_events(NULL);
msleep(20);
}
}
if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL)
pr_warn("%s: Extract dump failed for ID 0x%x\n",
__func__, dump->id);
/* Free SG list */
opal_free_sg_list(list);
out:
return rc;
}
static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buffer, loff_t pos, size_t count)
{
ssize_t rc;
struct dump_obj *dump = to_dump_obj(kobj);
if (!dump->buffer) {
rc = dump_read_data(dump);
if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) {
vfree(dump->buffer);
dump->buffer = NULL;
return -EIO;
}
if (rc == OPAL_PARTIAL) {
/* On a partial read, we just return EIO
* and rely on userspace to ask us to try
* again.
*/
pr_info("%s: Platform dump partially read. ID = 0x%x\n",
__func__, dump->id);
return -EIO;
}
}
memcpy(buffer, dump->buffer + pos, count);
/* You may think we could free the dump buffer now and retrieve
* it again later if needed, but due to current firmware limitation,
* that's not the case. So, once read into userspace once,
* we keep the dump around until it's acknowledged by userspace.
*/
return count;
}
static struct dump_obj *create_dump_obj(uint32_t id, size_t size,
uint32_t type)
{
struct dump_obj *dump;
int rc;
dump = kzalloc(sizeof(*dump), GFP_KERNEL);
if (!dump)
return NULL;
dump->kobj.kset = dump_kset;
kobject_init(&dump->kobj, &dump_ktype);
sysfs_bin_attr_init(&dump->dump_attr);
dump->dump_attr.attr.name = "dump";
dump->dump_attr.attr.mode = 0400;
dump->dump_attr.size = size;
dump->dump_attr.read = dump_attr_read;
dump->id = id;
dump->size = size;
dump->type = type;
rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id);
if (rc) {
kobject_put(&dump->kobj);
return NULL;
}
rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr);
if (rc) {
kobject_put(&dump->kobj);
return NULL;
}
pr_info("%s: New platform dump. ID = 0x%x Size %u\n",
__func__, dump->id, dump->size);
kobject_uevent(&dump->kobj, KOBJ_ADD);
return dump;
}
static int process_dump(void)
{
int rc;
uint32_t dump_id, dump_size, dump_type;
struct dump_obj *dump;
char name[22];
rc = dump_read_info(&dump_id, &dump_size, &dump_type);
if (rc != OPAL_SUCCESS)
return rc;
sprintf(name, "0x%x-0x%x", dump_type, dump_id);
/* we may get notified twice, let's handle
* that gracefully and not create two conflicting
* entries.
*/
if (kset_find_obj(dump_kset, name))
return 0;
dump = create_dump_obj(dump_id, dump_size, dump_type);
if (!dump)
return -1;
return 0;
}
static void dump_work_fn(struct work_struct *work)
{
process_dump();
}
static DECLARE_WORK(dump_work, dump_work_fn);
static void schedule_process_dump(void)
{
schedule_work(&dump_work);
}
/*
* New dump available notification
*
* Once we get notification, we add sysfs entries for it.
* We only fetch the dump on demand, and create sysfs asynchronously.
*/
static int dump_event(struct notifier_block *nb,
unsigned long events, void *change)
{
if (events & OPAL_EVENT_DUMP_AVAIL)
schedule_process_dump();
return 0;
}
static struct notifier_block dump_nb = {
.notifier_call = dump_event,
.next = NULL,
.priority = 0
};
void __init opal_platform_dump_init(void)
{
int rc;
/* ELOG not supported by firmware */
if (!opal_check_token(OPAL_DUMP_READ))
return;
dump_kset = kset_create_and_add("dump", NULL, opal_kobj);
if (!dump_kset) {
pr_warn("%s: Failed to create dump kset\n", __func__);
return;
}
rc = sysfs_create_group(&dump_kset->kobj, &initiate_attr_group);
if (rc) {
pr_warn("%s: Failed to create initiate dump attr group\n",
__func__);
kobject_put(&dump_kset->kobj);
return;
}
rc = opal_notifier_register(&dump_nb);
if (rc) {
pr_warn("%s: Can't register OPAL event notifier (%d)\n",
__func__, rc);
return;
}
opal_dump_resend_notification();
}

View file

@ -0,0 +1,319 @@
/*
* Error log support on PowerNV.
*
* Copyright 2013,2014 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
#include <linux/fcntl.h>
#include <linux/kobject.h>
#include <asm/uaccess.h>
#include <asm/opal.h>
struct elog_obj {
struct kobject kobj;
struct bin_attribute raw_attr;
uint64_t id;
uint64_t type;
size_t size;
char *buffer;
};
#define to_elog_obj(x) container_of(x, struct elog_obj, kobj)
struct elog_attribute {
struct attribute attr;
ssize_t (*show)(struct elog_obj *elog, struct elog_attribute *attr,
char *buf);
ssize_t (*store)(struct elog_obj *elog, struct elog_attribute *attr,
const char *buf, size_t count);
};
#define to_elog_attr(x) container_of(x, struct elog_attribute, attr)
static ssize_t elog_id_show(struct elog_obj *elog_obj,
struct elog_attribute *attr,
char *buf)
{
return sprintf(buf, "0x%llx\n", elog_obj->id);
}
static const char *elog_type_to_string(uint64_t type)
{
switch (type) {
case 0: return "PEL";
default: return "unknown";
}
}
static ssize_t elog_type_show(struct elog_obj *elog_obj,
struct elog_attribute *attr,
char *buf)
{
return sprintf(buf, "0x%llx %s\n",
elog_obj->type,
elog_type_to_string(elog_obj->type));
}
static ssize_t elog_ack_show(struct elog_obj *elog_obj,
struct elog_attribute *attr,
char *buf)
{
return sprintf(buf, "ack - acknowledge log message\n");
}
static ssize_t elog_ack_store(struct elog_obj *elog_obj,
struct elog_attribute *attr,
const char *buf,
size_t count)
{
opal_send_ack_elog(elog_obj->id);
sysfs_remove_file_self(&elog_obj->kobj, &attr->attr);
kobject_put(&elog_obj->kobj);
return count;
}
static struct elog_attribute id_attribute =
__ATTR(id, S_IRUGO, elog_id_show, NULL);
static struct elog_attribute type_attribute =
__ATTR(type, S_IRUGO, elog_type_show, NULL);
static struct elog_attribute ack_attribute =
__ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store);
static struct kset *elog_kset;
static ssize_t elog_attr_show(struct kobject *kobj,
struct attribute *attr,
char *buf)
{
struct elog_attribute *attribute;
struct elog_obj *elog;
attribute = to_elog_attr(attr);
elog = to_elog_obj(kobj);
if (!attribute->show)
return -EIO;
return attribute->show(elog, attribute, buf);
}
static ssize_t elog_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t len)
{
struct elog_attribute *attribute;
struct elog_obj *elog;
attribute = to_elog_attr(attr);
elog = to_elog_obj(kobj);
if (!attribute->store)
return -EIO;
return attribute->store(elog, attribute, buf, len);
}
static const struct sysfs_ops elog_sysfs_ops = {
.show = elog_attr_show,
.store = elog_attr_store,
};
static void elog_release(struct kobject *kobj)
{
struct elog_obj *elog;
elog = to_elog_obj(kobj);
kfree(elog->buffer);
kfree(elog);
}
static struct attribute *elog_default_attrs[] = {
&id_attribute.attr,
&type_attribute.attr,
&ack_attribute.attr,
NULL,
};
static struct kobj_type elog_ktype = {
.sysfs_ops = &elog_sysfs_ops,
.release = &elog_release,
.default_attrs = elog_default_attrs,
};
/* Maximum size of a single log on FSP is 16KB */
#define OPAL_MAX_ERRLOG_SIZE 16384
static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buffer, loff_t pos, size_t count)
{
int opal_rc;
struct elog_obj *elog = to_elog_obj(kobj);
/* We may have had an error reading before, so let's retry */
if (!elog->buffer) {
elog->buffer = kzalloc(elog->size, GFP_KERNEL);
if (!elog->buffer)
return -EIO;
opal_rc = opal_read_elog(__pa(elog->buffer),
elog->size, elog->id);
if (opal_rc != OPAL_SUCCESS) {
pr_err("ELOG: log read failed for log-id=%llx\n",
elog->id);
kfree(elog->buffer);
elog->buffer = NULL;
return -EIO;
}
}
memcpy(buffer, elog->buffer + pos, count);
return count;
}
static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
{
struct elog_obj *elog;
int rc;
elog = kzalloc(sizeof(*elog), GFP_KERNEL);
if (!elog)
return NULL;
elog->kobj.kset = elog_kset;
kobject_init(&elog->kobj, &elog_ktype);
sysfs_bin_attr_init(&elog->raw_attr);
elog->raw_attr.attr.name = "raw";
elog->raw_attr.attr.mode = 0400;
elog->raw_attr.size = size;
elog->raw_attr.read = raw_attr_read;
elog->id = id;
elog->size = size;
elog->type = type;
elog->buffer = kzalloc(elog->size, GFP_KERNEL);
if (elog->buffer) {
rc = opal_read_elog(__pa(elog->buffer),
elog->size, elog->id);
if (rc != OPAL_SUCCESS) {
pr_err("ELOG: log read failed for log-id=%llx\n",
elog->id);
kfree(elog->buffer);
elog->buffer = NULL;
}
}
rc = kobject_add(&elog->kobj, NULL, "0x%llx", id);
if (rc) {
kobject_put(&elog->kobj);
return NULL;
}
rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr);
if (rc) {
kobject_put(&elog->kobj);
return NULL;
}
kobject_uevent(&elog->kobj, KOBJ_ADD);
return elog;
}
static void elog_work_fn(struct work_struct *work)
{
__be64 size;
__be64 id;
__be64 type;
uint64_t elog_size;
uint64_t log_id;
uint64_t elog_type;
int rc;
char name[2+16+1];
rc = opal_get_elog_size(&id, &size, &type);
if (rc != OPAL_SUCCESS) {
pr_err("ELOG: OPAL log info read failed\n");
return;
}
elog_size = be64_to_cpu(size);
log_id = be64_to_cpu(id);
elog_type = be64_to_cpu(type);
WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE);
if (elog_size >= OPAL_MAX_ERRLOG_SIZE)
elog_size = OPAL_MAX_ERRLOG_SIZE;
sprintf(name, "0x%llx", log_id);
/* we may get notified twice, let's handle
* that gracefully and not create two conflicting
* entries.
*/
if (kset_find_obj(elog_kset, name))
return;
create_elog_obj(log_id, elog_size, elog_type);
}
static DECLARE_WORK(elog_work, elog_work_fn);
static int elog_event(struct notifier_block *nb,
unsigned long events, void *change)
{
/* check for error log event */
if (events & OPAL_EVENT_ERROR_LOG_AVAIL)
schedule_work(&elog_work);
return 0;
}
static struct notifier_block elog_nb = {
.notifier_call = elog_event,
.next = NULL,
.priority = 0
};
int __init opal_elog_init(void)
{
int rc = 0;
/* ELOG not supported by firmware */
if (!opal_check_token(OPAL_ELOG_READ))
return -1;
elog_kset = kset_create_and_add("elog", NULL, opal_kobj);
if (!elog_kset) {
pr_warn("%s: failed to create elog kset\n", __func__);
return -1;
}
rc = opal_notifier_register(&elog_nb);
if (rc) {
pr_err("%s: Can't register OPAL event notifier (%d)\n",
__func__, rc);
return rc;
}
/* We are now ready to pull error logs from opal. */
opal_resend_pending_logs();
return 0;
}

View file

@ -0,0 +1,588 @@
/*
* PowerNV OPAL Firmware Update Interface
*
* Copyright 2013 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define DEBUG
#include <linux/kernel.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/delay.h>
#include <asm/opal.h>
/* FLASH status codes */
#define FLASH_NO_OP -1099 /* No operation initiated by user */
#define FLASH_NO_AUTH -9002 /* Not a service authority partition */
/* Validate image status values */
#define VALIDATE_IMG_READY -1001 /* Image ready for validation */
#define VALIDATE_IMG_INCOMPLETE -1002 /* User copied < VALIDATE_BUF_SIZE */
/* Manage image status values */
#define MANAGE_ACTIVE_ERR -9001 /* Cannot overwrite active img */
/* Flash image status values */
#define FLASH_IMG_READY 0 /* Img ready for flash on reboot */
#define FLASH_INVALID_IMG -1003 /* Flash image shorter than expected */
#define FLASH_IMG_NULL_DATA -1004 /* Bad data in sg list entry */
#define FLASH_IMG_BAD_LEN -1005 /* Bad length in sg list entry */
/* Manage operation tokens */
#define FLASH_REJECT_TMP_SIDE 0 /* Reject temporary fw image */
#define FLASH_COMMIT_TMP_SIDE 1 /* Commit temporary fw image */
/* Update tokens */
#define FLASH_UPDATE_CANCEL 0 /* Cancel update request */
#define FLASH_UPDATE_INIT 1 /* Initiate update */
/* Validate image update result tokens */
#define VALIDATE_TMP_UPDATE 0 /* T side will be updated */
#define VALIDATE_FLASH_AUTH 1 /* Partition does not have authority */
#define VALIDATE_INVALID_IMG 2 /* Candidate image is not valid */
#define VALIDATE_CUR_UNKNOWN 3 /* Current fixpack level is unknown */
/*
* Current T side will be committed to P side before being replace with new
* image, and the new image is downlevel from current image
*/
#define VALIDATE_TMP_COMMIT_DL 4
/*
* Current T side will be committed to P side before being replaced with new
* image
*/
#define VALIDATE_TMP_COMMIT 5
/*
* T side will be updated with a downlevel image
*/
#define VALIDATE_TMP_UPDATE_DL 6
/*
* The candidate image's release date is later than the system's firmware
* service entitlement date - service warranty period has expired
*/
#define VALIDATE_OUT_OF_WRNTY 7
/* Validate buffer size */
#define VALIDATE_BUF_SIZE 4096
/* XXX: Assume candidate image size is <= 1GB */
#define MAX_IMAGE_SIZE 0x40000000
/* Image status */
enum {
IMAGE_INVALID,
IMAGE_LOADING,
IMAGE_READY,
};
/* Candidate image data */
struct image_data_t {
int status;
void *data;
uint32_t size;
};
/* Candidate image header */
struct image_header_t {
uint16_t magic;
uint16_t version;
uint32_t size;
};
struct validate_flash_t {
int status; /* Return status */
void *buf; /* Candidate image buffer */
uint32_t buf_size; /* Image size */
uint32_t result; /* Update results token */
};
struct manage_flash_t {
int status; /* Return status */
};
struct update_flash_t {
int status; /* Return status */
};
static struct image_header_t image_header;
static struct image_data_t image_data;
static struct validate_flash_t validate_flash_data;
static struct manage_flash_t manage_flash_data;
static struct update_flash_t update_flash_data;
static DEFINE_MUTEX(image_data_mutex);
/*
* Validate candidate image
*/
static inline void opal_flash_validate(void)
{
long ret;
void *buf = validate_flash_data.buf;
__be32 size = cpu_to_be32(validate_flash_data.buf_size);
__be32 result;
ret = opal_validate_flash(__pa(buf), &size, &result);
validate_flash_data.status = ret;
validate_flash_data.buf_size = be32_to_cpu(size);
validate_flash_data.result = be32_to_cpu(result);
}
/*
* Validate output format:
* validate result token
* current image version details
* new image version details
*/
static ssize_t validate_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct validate_flash_t *args_buf = &validate_flash_data;
int len;
/* Candidate image is not validated */
if (args_buf->status < VALIDATE_TMP_UPDATE) {
len = sprintf(buf, "%d\n", args_buf->status);
goto out;
}
/* Result token */
len = sprintf(buf, "%d\n", args_buf->result);
/* Current and candidate image version details */
if ((args_buf->result != VALIDATE_TMP_UPDATE) &&
(args_buf->result < VALIDATE_CUR_UNKNOWN))
goto out;
if (args_buf->buf_size > (VALIDATE_BUF_SIZE - len)) {
memcpy(buf + len, args_buf->buf, VALIDATE_BUF_SIZE - len);
len = VALIDATE_BUF_SIZE;
} else {
memcpy(buf + len, args_buf->buf, args_buf->buf_size);
len += args_buf->buf_size;
}
out:
/* Set status to default */
args_buf->status = FLASH_NO_OP;
return len;
}
/*
* Validate candidate firmware image
*
* Note:
* We are only interested in first 4K bytes of the
* candidate image.
*/
static ssize_t validate_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct validate_flash_t *args_buf = &validate_flash_data;
if (buf[0] != '1')
return -EINVAL;
mutex_lock(&image_data_mutex);
if (image_data.status != IMAGE_READY ||
image_data.size < VALIDATE_BUF_SIZE) {
args_buf->result = VALIDATE_INVALID_IMG;
args_buf->status = VALIDATE_IMG_INCOMPLETE;
goto out;
}
/* Copy first 4k bytes of candidate image */
memcpy(args_buf->buf, image_data.data, VALIDATE_BUF_SIZE);
args_buf->status = VALIDATE_IMG_READY;
args_buf->buf_size = VALIDATE_BUF_SIZE;
/* Validate candidate image */
opal_flash_validate();
out:
mutex_unlock(&image_data_mutex);
return count;
}
/*
* Manage flash routine
*/
static inline void opal_flash_manage(uint8_t op)
{
struct manage_flash_t *const args_buf = &manage_flash_data;
args_buf->status = opal_manage_flash(op);
}
/*
* Show manage flash status
*/
static ssize_t manage_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct manage_flash_t *const args_buf = &manage_flash_data;
int rc;
rc = sprintf(buf, "%d\n", args_buf->status);
/* Set status to default*/
args_buf->status = FLASH_NO_OP;
return rc;
}
/*
* Manage operations:
* 0 - Reject
* 1 - Commit
*/
static ssize_t manage_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
uint8_t op;
switch (buf[0]) {
case '0':
op = FLASH_REJECT_TMP_SIDE;
break;
case '1':
op = FLASH_COMMIT_TMP_SIDE;
break;
default:
return -EINVAL;
}
/* commit/reject temporary image */
opal_flash_manage(op);
return count;
}
/*
* OPAL update flash
*/
static int opal_flash_update(int op)
{
struct opal_sg_list *list;
unsigned long addr;
int64_t rc = OPAL_PARAMETER;
if (op == FLASH_UPDATE_CANCEL) {
pr_alert("FLASH: Image update cancelled\n");
addr = '\0';
goto flash;
}
list = opal_vmalloc_to_sg_list(image_data.data, image_data.size);
if (!list)
goto invalid_img;
/* First entry address */
addr = __pa(list);
flash:
rc = opal_update_flash(addr);
invalid_img:
return rc;
}
/* Return CPUs to OPAL before starting FW update */
static void flash_return_cpu(void *info)
{
int cpu = smp_processor_id();
if (!cpu_online(cpu))
return;
/* Disable IRQ */
hard_irq_disable();
/* Return the CPU to OPAL */
opal_return_cpu();
}
/* This gets called just before system reboots */
void opal_flash_term_callback(void)
{
struct cpumask mask;
if (update_flash_data.status != FLASH_IMG_READY)
return;
pr_alert("FLASH: Flashing new firmware\n");
pr_alert("FLASH: Image is %u bytes\n", image_data.size);
pr_alert("FLASH: Performing flash and reboot/shutdown\n");
pr_alert("FLASH: This will take several minutes. Do not power off!\n");
/* Small delay to help getting the above message out */
msleep(500);
/* Return secondary CPUs to firmware */
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
if (!cpumask_empty(&mask))
smp_call_function_many(&mask,
flash_return_cpu, NULL, false);
/* Hard disable interrupts */
hard_irq_disable();
}
/*
* Show candidate image status
*/
static ssize_t update_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct update_flash_t *const args_buf = &update_flash_data;
return sprintf(buf, "%d\n", args_buf->status);
}
/*
* Set update image flag
* 1 - Flash new image
* 0 - Cancel flash request
*/
static ssize_t update_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct update_flash_t *const args_buf = &update_flash_data;
int rc = count;
mutex_lock(&image_data_mutex);
switch (buf[0]) {
case '0':
if (args_buf->status == FLASH_IMG_READY)
opal_flash_update(FLASH_UPDATE_CANCEL);
args_buf->status = FLASH_NO_OP;
break;
case '1':
/* Image is loaded? */
if (image_data.status == IMAGE_READY)
args_buf->status =
opal_flash_update(FLASH_UPDATE_INIT);
else
args_buf->status = FLASH_INVALID_IMG;
break;
default:
rc = -EINVAL;
}
mutex_unlock(&image_data_mutex);
return rc;
}
/*
* Free image buffer
*/
static void free_image_buf(void)
{
void *addr;
int size;
addr = image_data.data;
size = PAGE_ALIGN(image_data.size);
while (size > 0) {
ClearPageReserved(vmalloc_to_page(addr));
addr += PAGE_SIZE;
size -= PAGE_SIZE;
}
vfree(image_data.data);
image_data.data = NULL;
image_data.status = IMAGE_INVALID;
}
/*
* Allocate image buffer.
*/
static int alloc_image_buf(char *buffer, size_t count)
{
void *addr;
int size;
if (count < sizeof(struct image_header_t)) {
pr_warn("FLASH: Invalid candidate image\n");
return -EINVAL;
}
memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t));
image_data.size = be32_to_cpu(image_header.size);
pr_debug("FLASH: Candidate image size = %u\n", image_data.size);
if (image_data.size > MAX_IMAGE_SIZE) {
pr_warn("FLASH: Too large image\n");
return -EINVAL;
}
if (image_data.size < VALIDATE_BUF_SIZE) {
pr_warn("FLASH: Image is shorter than expected\n");
return -EINVAL;
}
image_data.data = vzalloc(PAGE_ALIGN(image_data.size));
if (!image_data.data) {
pr_err("%s : Failed to allocate memory\n", __func__);
return -ENOMEM;
}
/* Pin memory */
addr = image_data.data;
size = PAGE_ALIGN(image_data.size);
while (size > 0) {
SetPageReserved(vmalloc_to_page(addr));
addr += PAGE_SIZE;
size -= PAGE_SIZE;
}
image_data.status = IMAGE_LOADING;
return 0;
}
/*
* Copy candidate image
*
* Parse candidate image header to get total image size
* and pre-allocate required memory.
*/
static ssize_t image_data_write(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buffer, loff_t pos, size_t count)
{
int rc;
mutex_lock(&image_data_mutex);
/* New image ? */
if (pos == 0) {
/* Free memory, if already allocated */
if (image_data.data)
free_image_buf();
/* Cancel outstanding image update request */
if (update_flash_data.status == FLASH_IMG_READY)
opal_flash_update(FLASH_UPDATE_CANCEL);
/* Allocate memory */
rc = alloc_image_buf(buffer, count);
if (rc)
goto out;
}
if (image_data.status != IMAGE_LOADING) {
rc = -ENOMEM;
goto out;
}
if ((pos + count) > image_data.size) {
rc = -EINVAL;
goto out;
}
memcpy(image_data.data + pos, (void *)buffer, count);
rc = count;
/* Set image status */
if ((pos + count) == image_data.size) {
pr_debug("FLASH: Candidate image loaded....\n");
image_data.status = IMAGE_READY;
}
out:
mutex_unlock(&image_data_mutex);
return rc;
}
/*
* sysfs interface :
* OPAL uses below sysfs files for code update.
* We create these files under /sys/firmware/opal.
*
* image : Interface to load candidate firmware image
* validate_flash : Validate firmware image
* manage_flash : Commit/Reject firmware image
* update_flash : Flash new firmware image
*
*/
static struct bin_attribute image_data_attr = {
.attr = {.name = "image", .mode = 0200},
.size = MAX_IMAGE_SIZE, /* Limit image size */
.write = image_data_write,
};
static struct kobj_attribute validate_attribute =
__ATTR(validate_flash, 0600, validate_show, validate_store);
static struct kobj_attribute manage_attribute =
__ATTR(manage_flash, 0600, manage_show, manage_store);
static struct kobj_attribute update_attribute =
__ATTR(update_flash, 0600, update_show, update_store);
static struct attribute *image_op_attrs[] = {
&validate_attribute.attr,
&manage_attribute.attr,
&update_attribute.attr,
NULL /* need to NULL terminate the list of attributes */
};
static struct attribute_group image_op_attr_group = {
.attrs = image_op_attrs,
};
void __init opal_flash_init(void)
{
int ret;
/* Allocate validate image buffer */
validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL);
if (!validate_flash_data.buf) {
pr_err("%s : Failed to allocate memory\n", __func__);
return;
}
/* Make sure /sys/firmware/opal directory is created */
if (!opal_kobj) {
pr_warn("FLASH: opal kobject is not available\n");
goto nokobj;
}
/* Create the sysfs files */
ret = sysfs_create_group(opal_kobj, &image_op_attr_group);
if (ret) {
pr_warn("FLASH: Failed to create sysfs files\n");
goto nokobj;
}
ret = sysfs_create_bin_file(opal_kobj, &image_data_attr);
if (ret) {
pr_warn("FLASH: Failed to create sysfs files\n");
goto nosysfs_file;
}
/* Set default status */
validate_flash_data.status = FLASH_NO_OP;
manage_flash_data.status = FLASH_NO_OP;
update_flash_data.status = FLASH_NO_OP;
image_data.status = IMAGE_INVALID;
return;
nosysfs_file:
sysfs_remove_group(opal_kobj, &image_op_attr_group);
nokobj:
kfree(validate_flash_data.buf);
return;
}

View file

@ -0,0 +1,189 @@
/*
* OPAL hypervisor Maintenance interrupt handling support in PowreNV.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; If not, see <http://www.gnu.org/licenses/>.
*
* Copyright 2014 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
#undef DEBUG
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/of.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <asm/opal.h>
#include <asm/cputable.h>
#include <asm/machdep.h>
static int opal_hmi_handler_nb_init;
struct OpalHmiEvtNode {
struct list_head list;
struct OpalHMIEvent hmi_evt;
};
static LIST_HEAD(opal_hmi_evt_list);
static DEFINE_SPINLOCK(opal_hmi_evt_lock);
static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
{
const char *level, *sevstr, *error_info;
static const char *hmi_error_types[] = {
"Malfunction Alert",
"Processor Recovery done",
"Processor recovery occurred again",
"Processor recovery occurred for masked error",
"Timer facility experienced an error",
"TFMR SPR is corrupted",
"UPS (Uniterrupted Power System) Overflow indication",
"An XSCOM operation failure",
"An XSCOM operation completed",
"SCOM has set a reserved FIR bit to cause recovery",
"Debug trigger has set a reserved FIR bit to cause recovery",
"A hypervisor resource error occurred"
};
/* Print things out */
if (hmi_evt->version < OpalHMIEvt_V1) {
pr_err("HMI Interrupt, Unknown event version %d !\n",
hmi_evt->version);
return;
}
switch (hmi_evt->severity) {
case OpalHMI_SEV_NO_ERROR:
level = KERN_INFO;
sevstr = "Harmless";
break;
case OpalHMI_SEV_WARNING:
level = KERN_WARNING;
sevstr = "";
break;
case OpalHMI_SEV_ERROR_SYNC:
level = KERN_ERR;
sevstr = "Severe";
break;
case OpalHMI_SEV_FATAL:
default:
level = KERN_ERR;
sevstr = "Fatal";
break;
}
printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
level, sevstr,
hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
"Recovered" : "Not recovered");
error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
hmi_error_types[hmi_evt->type]
: "Unknown";
printk("%s Error detail: %s\n", level, error_info);
printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer));
if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
(hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
printk("%s TFMR: %016llx\n", level,
be64_to_cpu(hmi_evt->tfmr));
}
static void hmi_event_handler(struct work_struct *work)
{
unsigned long flags;
struct OpalHMIEvent *hmi_evt;
struct OpalHmiEvtNode *msg_node;
uint8_t disposition;
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
while (!list_empty(&opal_hmi_evt_list)) {
msg_node = list_entry(opal_hmi_evt_list.next,
struct OpalHmiEvtNode, list);
list_del(&msg_node->list);
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
print_hmi_event_info(hmi_evt);
disposition = hmi_evt->disposition;
kfree(msg_node);
/*
* Check if HMI event has been recovered or not. If not
* then we can't continue, invoke panic.
*/
if (disposition != OpalHMI_DISPOSITION_RECOVERED)
panic("Unrecoverable HMI exception");
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
}
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
}
static DECLARE_WORK(hmi_event_work, hmi_event_handler);
/*
* opal_handle_hmi_event - notifier handler that queues up HMI events
* to be preocessed later.
*/
static int opal_handle_hmi_event(struct notifier_block *nb,
unsigned long msg_type, void *msg)
{
unsigned long flags;
struct OpalHMIEvent *hmi_evt;
struct opal_msg *hmi_msg = msg;
struct OpalHmiEvtNode *msg_node;
/* Sanity Checks */
if (msg_type != OPAL_MSG_HMI_EVT)
return 0;
/* HMI event info starts from param[0] */
hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
/* Delay the logging of HMI events to workqueue. */
msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
if (!msg_node) {
pr_err("HMI: out of memory, Opal message event not handled\n");
return -ENOMEM;
}
memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
list_add(&msg_node->list, &opal_hmi_evt_list);
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
schedule_work(&hmi_event_work);
return 0;
}
static struct notifier_block opal_hmi_handler_nb = {
.notifier_call = opal_handle_hmi_event,
.next = NULL,
.priority = 0,
};
static int __init opal_hmi_handler_init(void)
{
int ret;
if (!opal_hmi_handler_nb_init) {
ret = opal_message_notifier_register(
OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
if (ret) {
pr_err("%s: Can't register OPAL event notifier (%d)\n",
__func__, ret);
return ret;
}
opal_hmi_handler_nb_init = 1;
}
return 0;
}
machine_subsys_initcall(powernv, opal_hmi_handler_init);

View file

@ -0,0 +1,414 @@
/*
* PowerNV LPC bus handling.
*
* Copyright 2013 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/of.h>
#include <linux/bug.h>
#include <linux/debugfs.h>
#include <linux/io.h>
#include <linux/slab.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/xics.h>
#include <asm/opal.h>
#include <asm/prom.h>
#include <asm/uaccess.h>
#include <asm/debug.h>
static int opal_lpc_chip_id = -1;
static u8 opal_lpc_inb(unsigned long port)
{
int64_t rc;
__be32 data;
if (opal_lpc_chip_id < 0 || port > 0xffff)
return 0xff;
rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 1);
return rc ? 0xff : be32_to_cpu(data);
}
static __le16 __opal_lpc_inw(unsigned long port)
{
int64_t rc;
__be32 data;
if (opal_lpc_chip_id < 0 || port > 0xfffe)
return 0xffff;
if (port & 1)
return (__le16)opal_lpc_inb(port) << 8 | opal_lpc_inb(port + 1);
rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 2);
return rc ? 0xffff : be32_to_cpu(data);
}
static u16 opal_lpc_inw(unsigned long port)
{
return le16_to_cpu(__opal_lpc_inw(port));
}
static __le32 __opal_lpc_inl(unsigned long port)
{
int64_t rc;
__be32 data;
if (opal_lpc_chip_id < 0 || port > 0xfffc)
return 0xffffffff;
if (port & 3)
return (__le32)opal_lpc_inb(port ) << 24 |
(__le32)opal_lpc_inb(port + 1) << 16 |
(__le32)opal_lpc_inb(port + 2) << 8 |
opal_lpc_inb(port + 3);
rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 4);
return rc ? 0xffffffff : be32_to_cpu(data);
}
static u32 opal_lpc_inl(unsigned long port)
{
return le32_to_cpu(__opal_lpc_inl(port));
}
static void opal_lpc_outb(u8 val, unsigned long port)
{
if (opal_lpc_chip_id < 0 || port > 0xffff)
return;
opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 1);
}
static void __opal_lpc_outw(__le16 val, unsigned long port)
{
if (opal_lpc_chip_id < 0 || port > 0xfffe)
return;
if (port & 1) {
opal_lpc_outb(val >> 8, port);
opal_lpc_outb(val , port + 1);
return;
}
opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 2);
}
static void opal_lpc_outw(u16 val, unsigned long port)
{
__opal_lpc_outw(cpu_to_le16(val), port);
}
static void __opal_lpc_outl(__le32 val, unsigned long port)
{
if (opal_lpc_chip_id < 0 || port > 0xfffc)
return;
if (port & 3) {
opal_lpc_outb(val >> 24, port);
opal_lpc_outb(val >> 16, port + 1);
opal_lpc_outb(val >> 8, port + 2);
opal_lpc_outb(val , port + 3);
return;
}
opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 4);
}
static void opal_lpc_outl(u32 val, unsigned long port)
{
__opal_lpc_outl(cpu_to_le32(val), port);
}
static void opal_lpc_insb(unsigned long p, void *b, unsigned long c)
{
u8 *ptr = b;
while(c--)
*(ptr++) = opal_lpc_inb(p);
}
static void opal_lpc_insw(unsigned long p, void *b, unsigned long c)
{
__le16 *ptr = b;
while(c--)
*(ptr++) = __opal_lpc_inw(p);
}
static void opal_lpc_insl(unsigned long p, void *b, unsigned long c)
{
__le32 *ptr = b;
while(c--)
*(ptr++) = __opal_lpc_inl(p);
}
static void opal_lpc_outsb(unsigned long p, const void *b, unsigned long c)
{
const u8 *ptr = b;
while(c--)
opal_lpc_outb(*(ptr++), p);
}
static void opal_lpc_outsw(unsigned long p, const void *b, unsigned long c)
{
const __le16 *ptr = b;
while(c--)
__opal_lpc_outw(*(ptr++), p);
}
static void opal_lpc_outsl(unsigned long p, const void *b, unsigned long c)
{
const __le32 *ptr = b;
while(c--)
__opal_lpc_outl(*(ptr++), p);
}
static const struct ppc_pci_io opal_lpc_io = {
.inb = opal_lpc_inb,
.inw = opal_lpc_inw,
.inl = opal_lpc_inl,
.outb = opal_lpc_outb,
.outw = opal_lpc_outw,
.outl = opal_lpc_outl,
.insb = opal_lpc_insb,
.insw = opal_lpc_insw,
.insl = opal_lpc_insl,
.outsb = opal_lpc_outsb,
.outsw = opal_lpc_outsw,
.outsl = opal_lpc_outsl,
};
#ifdef CONFIG_DEBUG_FS
struct lpc_debugfs_entry {
enum OpalLPCAddressType lpc_type;
};
static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
struct lpc_debugfs_entry *lpc = filp->private_data;
u32 data, pos, len, todo;
int rc;
if (!access_ok(VERIFY_WRITE, ubuf, count))
return -EFAULT;
todo = count;
while (todo) {
pos = *ppos;
/*
* Select access size based on count and alignment and
* access type. IO and MEM only support byte acceses,
* FW supports all 3.
*/
len = 1;
if (lpc->lpc_type == OPAL_LPC_FW) {
if (todo > 3 && (pos & 3) == 0)
len = 4;
else if (todo > 1 && (pos & 1) == 0)
len = 2;
}
rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos,
&data, len);
if (rc)
return -ENXIO;
/*
* Now there is some trickery with the data returned by OPAL
* as it's the desired data right justified in a 32-bit BE
* word.
*
* This is a very bad interface and I'm to blame for it :-(
*
* So we can't just apply a 32-bit swap to what comes from OPAL,
* because user space expects the *bytes* to be in their proper
* respective positions (ie, LPC position).
*
* So what we really want to do here is to shift data right
* appropriately on a LE kernel.
*
* IE. If the LPC transaction has bytes B0, B1, B2 and B3 in that
* order, we have in memory written to by OPAL at the "data"
* pointer:
*
* Bytes: OPAL "data" LE "data"
* 32-bit: B0 B1 B2 B3 B0B1B2B3 B3B2B1B0
* 16-bit: B0 B1 0000B0B1 B1B00000
* 8-bit: B0 000000B0 B0000000
*
* So a BE kernel will have the leftmost of the above in the MSB
* and rightmost in the LSB and can just then "cast" the u32 "data"
* down to the appropriate quantity and write it.
*
* However, an LE kernel can't. It doesn't need to swap because a
* load from data followed by a store to user are going to preserve
* the byte ordering which is the wire byte order which is what the
* user wants, but in order to "crop" to the right size, we need to
* shift right first.
*/
switch(len) {
case 4:
rc = __put_user((u32)data, (u32 __user *)ubuf);
break;
case 2:
#ifdef __LITTLE_ENDIAN__
data >>= 16;
#endif
rc = __put_user((u16)data, (u16 __user *)ubuf);
break;
default:
#ifdef __LITTLE_ENDIAN__
data >>= 24;
#endif
rc = __put_user((u8)data, (u8 __user *)ubuf);
break;
}
if (rc)
return -EFAULT;
*ppos += len;
ubuf += len;
todo -= len;
}
return count;
}
static ssize_t lpc_debug_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *ppos)
{
struct lpc_debugfs_entry *lpc = filp->private_data;
u32 data, pos, len, todo;
int rc;
if (!access_ok(VERIFY_READ, ubuf, count))
return -EFAULT;
todo = count;
while (todo) {
pos = *ppos;
/*
* Select access size based on count and alignment and
* access type. IO and MEM only support byte acceses,
* FW supports all 3.
*/
len = 1;
if (lpc->lpc_type == OPAL_LPC_FW) {
if (todo > 3 && (pos & 3) == 0)
len = 4;
else if (todo > 1 && (pos & 1) == 0)
len = 2;
}
/*
* Similarly to the read case, we have some trickery here but
* it's different to handle. We need to pass the value to OPAL in
* a register whose layout depends on the access size. We want
* to reproduce the memory layout of the user, however we aren't
* doing a load from user and a store to another memory location
* which would achieve that. Here we pass the value to OPAL via
* a register which is expected to contain the "BE" interpretation
* of the byte sequence. IE: for a 32-bit access, byte 0 should be
* in the MSB. So here we *do* need to byteswap on LE.
*
* User bytes: LE "data" OPAL "data"
* 32-bit: B0 B1 B2 B3 B3B2B1B0 B0B1B2B3
* 16-bit: B0 B1 0000B1B0 0000B0B1
* 8-bit: B0 000000B0 000000B0
*/
switch(len) {
case 4:
rc = __get_user(data, (u32 __user *)ubuf);
data = cpu_to_be32(data);
break;
case 2:
rc = __get_user(data, (u16 __user *)ubuf);
data = cpu_to_be16(data);
break;
default:
rc = __get_user(data, (u8 __user *)ubuf);
break;
}
if (rc)
return -EFAULT;
rc = opal_lpc_write(opal_lpc_chip_id, lpc->lpc_type, pos,
data, len);
if (rc)
return -ENXIO;
*ppos += len;
ubuf += len;
todo -= len;
}
return count;
}
static const struct file_operations lpc_fops = {
.read = lpc_debug_read,
.write = lpc_debug_write,
.open = simple_open,
.llseek = default_llseek,
};
static int opal_lpc_debugfs_create_type(struct dentry *folder,
const char *fname,
enum OpalLPCAddressType type)
{
struct lpc_debugfs_entry *entry;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
entry->lpc_type = type;
debugfs_create_file(fname, 0600, folder, entry, &lpc_fops);
return 0;
}
static int opal_lpc_init_debugfs(void)
{
struct dentry *root;
int rc = 0;
if (opal_lpc_chip_id < 0)
return -ENODEV;
root = debugfs_create_dir("lpc", powerpc_debugfs_root);
rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO);
rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM);
rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW);
return rc;
}
machine_device_initcall(powernv, opal_lpc_init_debugfs);
#endif /* CONFIG_DEBUG_FS */
void opal_lpc_init(void)
{
struct device_node *np;
/*
* Look for a Power8 LPC bus tagged as "primary",
* we currently support only one though the OPAL APIs
* support any number.
*/
for_each_compatible_node(np, NULL, "ibm,power8-lpc") {
if (!of_device_is_available(np))
continue;
if (!of_get_property(np, "primary", NULL))
continue;
opal_lpc_chip_id = of_get_ibm_chip_id(np);
break;
}
if (opal_lpc_chip_id < 0)
return;
/* Setup special IO ops */
ppc_pci_io = opal_lpc_io;
isa_io_special = true;
pr_info("OPAL: Power8 LPC bus found, chip ID %d\n", opal_lpc_chip_id);
}

View file

@ -0,0 +1,147 @@
/*
* OPAL asynchronus Memory error handling support in PowreNV.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright 2013 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
#undef DEBUG
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/of.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <asm/machdep.h>
#include <asm/opal.h>
#include <asm/cputable.h>
static int opal_mem_err_nb_init;
static LIST_HEAD(opal_memory_err_list);
static DEFINE_SPINLOCK(opal_mem_err_lock);
struct OpalMsgNode {
struct list_head list;
struct opal_msg msg;
};
static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt)
{
uint64_t paddr_start, paddr_end;
pr_debug("%s: Retrived memory error event, type: 0x%x\n",
__func__, merr_evt->type);
switch (merr_evt->type) {
case OPAL_MEM_ERR_TYPE_RESILIENCE:
paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start);
paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end);
break;
case OPAL_MEM_ERR_TYPE_DYN_DALLOC:
paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start);
paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end);
break;
default:
return;
}
for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) {
memory_failure(paddr_start >> PAGE_SHIFT, 0, 0);
}
}
static void handle_memory_error(void)
{
unsigned long flags;
struct OpalMemoryErrorData *merr_evt;
struct OpalMsgNode *msg_node;
spin_lock_irqsave(&opal_mem_err_lock, flags);
while (!list_empty(&opal_memory_err_list)) {
msg_node = list_entry(opal_memory_err_list.next,
struct OpalMsgNode, list);
list_del(&msg_node->list);
spin_unlock_irqrestore(&opal_mem_err_lock, flags);
merr_evt = (struct OpalMemoryErrorData *)
&msg_node->msg.params[0];
handle_memory_error_event(merr_evt);
kfree(msg_node);
spin_lock_irqsave(&opal_mem_err_lock, flags);
}
spin_unlock_irqrestore(&opal_mem_err_lock, flags);
}
static void mem_error_handler(struct work_struct *work)
{
handle_memory_error();
}
static DECLARE_WORK(mem_error_work, mem_error_handler);
/*
* opal_memory_err_event - notifier handler that queues up the opal message
* to be preocessed later.
*/
static int opal_memory_err_event(struct notifier_block *nb,
unsigned long msg_type, void *msg)
{
unsigned long flags;
struct OpalMsgNode *msg_node;
if (msg_type != OPAL_MSG_MEM_ERR)
return 0;
msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
if (!msg_node) {
pr_err("MEMORY_ERROR: out of memory, Opal message event not"
"handled\n");
return -ENOMEM;
}
memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
spin_lock_irqsave(&opal_mem_err_lock, flags);
list_add(&msg_node->list, &opal_memory_err_list);
spin_unlock_irqrestore(&opal_mem_err_lock, flags);
schedule_work(&mem_error_work);
return 0;
}
static struct notifier_block opal_mem_err_nb = {
.notifier_call = opal_memory_err_event,
.next = NULL,
.priority = 0,
};
static int __init opal_mem_err_init(void)
{
int ret;
if (!opal_mem_err_nb_init) {
ret = opal_message_notifier_register(
OPAL_MSG_MEM_ERR, &opal_mem_err_nb);
if (ret) {
pr_err("%s: Can't register OPAL event notifier (%d)\n",
__func__, ret);
return ret;
}
opal_mem_err_nb_init = 1;
}
return 0;
}
machine_subsys_initcall(powernv, opal_mem_err_init);

View file

@ -0,0 +1,124 @@
/*
* PowerNV OPAL in-memory console interface
*
* Copyright 2014 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/io.h>
#include <asm/opal.h>
#include <linux/debugfs.h>
#include <linux/of.h>
#include <linux/types.h>
#include <asm/barrier.h>
/* OPAL in-memory console. Defined in OPAL source at core/console.c */
struct memcons {
__be64 magic;
#define MEMCONS_MAGIC 0x6630696567726173L
__be64 obuf_phys;
__be64 ibuf_phys;
__be32 obuf_size;
__be32 ibuf_size;
__be32 out_pos;
#define MEMCONS_OUT_POS_WRAP 0x80000000u
#define MEMCONS_OUT_POS_MASK 0x00ffffffu
__be32 in_prod;
__be32 in_cons;
};
static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *to,
loff_t pos, size_t count)
{
struct memcons *mc = bin_attr->private;
const char *conbuf;
ssize_t ret;
size_t first_read = 0;
uint32_t out_pos, avail;
if (!mc)
return -ENODEV;
out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos));
/* Now we've read out_pos, put a barrier in before reading the new
* data it points to in conbuf. */
smp_rmb();
conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
/* When the buffer has wrapped, read from the out_pos marker to the end
* of the buffer, and then read the remaining data as in the un-wrapped
* case. */
if (out_pos & MEMCONS_OUT_POS_WRAP) {
out_pos &= MEMCONS_OUT_POS_MASK;
avail = be32_to_cpu(mc->obuf_size) - out_pos;
ret = memory_read_from_buffer(to, count, &pos,
conbuf + out_pos, avail);
if (ret < 0)
goto out;
first_read = ret;
to += first_read;
count -= first_read;
pos -= avail;
if (count <= 0)
goto out;
}
/* Sanity check. The firmware should not do this to us. */
if (out_pos > be32_to_cpu(mc->obuf_size)) {
pr_err("OPAL: memory console corruption. Aborting read.\n");
return -EINVAL;
}
ret = memory_read_from_buffer(to, count, &pos, conbuf, out_pos);
if (ret < 0)
goto out;
ret += first_read;
out:
return ret;
}
static struct bin_attribute opal_msglog_attr = {
.attr = {.name = "msglog", .mode = 0444},
.read = opal_msglog_read
};
void __init opal_msglog_init(void)
{
u64 mcaddr;
struct memcons *mc;
if (of_property_read_u64(opal_node, "ibm,opal-memcons", &mcaddr)) {
pr_warn("OPAL: Property ibm,opal-memcons not found, no message log\n");
return;
}
mc = phys_to_virt(mcaddr);
if (!mc) {
pr_warn("OPAL: memory console address is invalid\n");
return;
}
if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) {
pr_warn("OPAL: memory console version is invalid\n");
return;
}
opal_msglog_attr.private = mc;
if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0)
pr_warn("OPAL: sysfs file creation failed\n");
}

View file

@ -0,0 +1,88 @@
/*
* PowerNV nvram code.
*
* Copyright 2011 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define DEBUG
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/of.h>
#include <asm/opal.h>
#include <asm/machdep.h>
static unsigned int nvram_size;
static ssize_t opal_nvram_size(void)
{
return nvram_size;
}
static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index)
{
s64 rc;
int off;
if (*index >= nvram_size)
return 0;
off = *index;
if ((off + count) > nvram_size)
count = nvram_size - off;
rc = opal_read_nvram(__pa(buf), count, off);
if (rc != OPAL_SUCCESS)
return -EIO;
*index += count;
return count;
}
static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
{
s64 rc = OPAL_BUSY;
int off;
if (*index >= nvram_size)
return 0;
off = *index;
if ((off + count) > nvram_size)
count = nvram_size - off;
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_write_nvram(__pa(buf), count, off);
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
}
*index += count;
return count;
}
void __init opal_nvram_init(void)
{
struct device_node *np;
const __be32 *nbytes_p;
np = of_find_compatible_node(NULL, NULL, "ibm,opal-nvram");
if (np == NULL)
return;
nbytes_p = of_get_property(np, "#bytes", NULL);
if (!nbytes_p) {
of_node_put(np);
return;
}
nvram_size = be32_to_cpup(nbytes_p);
pr_info("OPAL nvram setup, %u bytes\n", nvram_size);
of_node_put(np);
ppc_md.nvram_read = opal_nvram_read;
ppc_md.nvram_write = opal_nvram_write;
ppc_md.nvram_size = opal_nvram_size;
}

View file

@ -0,0 +1,114 @@
/*
* PowerNV Real Time Clock.
*
* Copyright 2011 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/time.h>
#include <linux/bcd.h>
#include <linux/rtc.h>
#include <linux/delay.h>
#include <asm/opal.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
{
tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) +
bcd2bin((y_m_d >> 16) & 0xff)) - 1900;
tm->tm_mon = bcd2bin((y_m_d >> 8) & 0xff) - 1;
tm->tm_mday = bcd2bin(y_m_d & 0xff);
tm->tm_hour = bcd2bin((h_m_s_ms >> 56) & 0xff);
tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff);
tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff);
GregorianDay(tm);
}
unsigned long __init opal_get_boot_time(void)
{
struct rtc_time tm;
u32 y_m_d;
u64 h_m_s_ms;
__be32 __y_m_d;
__be64 __h_m_s_ms;
long rc = OPAL_BUSY;
if (!opal_check_token(OPAL_RTC_READ))
goto out;
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
else
mdelay(10);
}
if (rc != OPAL_SUCCESS)
goto out;
y_m_d = be32_to_cpu(__y_m_d);
h_m_s_ms = be64_to_cpu(__h_m_s_ms);
opal_to_tm(y_m_d, h_m_s_ms, &tm);
return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec);
out:
ppc_md.get_rtc_time = NULL;
ppc_md.set_rtc_time = NULL;
return 0;
}
void opal_get_rtc_time(struct rtc_time *tm)
{
long rc = OPAL_BUSY;
u32 y_m_d;
u64 h_m_s_ms;
__be32 __y_m_d;
__be64 __h_m_s_ms;
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
else
mdelay(10);
}
if (rc != OPAL_SUCCESS)
return;
y_m_d = be32_to_cpu(__y_m_d);
h_m_s_ms = be64_to_cpu(__h_m_s_ms);
opal_to_tm(y_m_d, h_m_s_ms, tm);
}
int opal_set_rtc_time(struct rtc_time *tm)
{
long rc = OPAL_BUSY;
u32 y_m_d = 0;
u64 h_m_s_ms = 0;
y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) / 100)) << 24;
y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) % 100)) << 16;
y_m_d |= ((u32)bin2bcd((tm->tm_mon + 1))) << 8;
y_m_d |= ((u32)bin2bcd(tm->tm_mday));
h_m_s_ms |= ((u64)bin2bcd(tm->tm_hour)) << 56;
h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48;
h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40;
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_rtc_write(y_m_d, h_m_s_ms);
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
else
mdelay(10);
}
return rc == OPAL_SUCCESS ? 0 : -EIO;
}

View file

@ -0,0 +1,66 @@
/*
* PowerNV sensor code
*
* Copyright (C) 2013 IBM
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/delay.h>
#include <linux/mutex.h>
#include <asm/opal.h>
static DEFINE_MUTEX(opal_sensor_mutex);
/*
* This will return sensor information to driver based on the requested sensor
* handle. A handle is an opaque id for the powernv, read by the driver from the
* device tree..
*/
int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
{
int ret, token;
struct opal_msg msg;
__be32 data;
token = opal_async_get_token_interruptible();
if (token < 0) {
pr_err("%s: Couldn't get the token, returning\n", __func__);
ret = token;
goto out;
}
mutex_lock(&opal_sensor_mutex);
ret = opal_sensor_read(sensor_hndl, token, &data);
if (ret != OPAL_ASYNC_COMPLETION)
goto out_token;
ret = opal_async_wait_response(token, &msg);
if (ret) {
pr_err("%s: Failed to wait for the async response, %d\n",
__func__, ret);
goto out_token;
}
*sensor_data = be32_to_cpu(data);
ret = be64_to_cpu(msg.params[1]);
out_token:
mutex_unlock(&opal_sensor_mutex);
opal_async_release_token(token);
out:
return ret;
}
EXPORT_SYMBOL_GPL(opal_get_sensor_data);

View file

@ -0,0 +1,304 @@
/*
* PowerNV system parameter code
*
* Copyright (C) 2013 IBM
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kobject.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/of.h>
#include <linux/gfp.h>
#include <linux/stat.h>
#include <asm/opal.h>
#define MAX_PARAM_DATA_LEN 64
static DEFINE_MUTEX(opal_sysparam_mutex);
static struct kobject *sysparam_kobj;
static void *param_data_buf;
struct param_attr {
struct list_head list;
u32 param_id;
u32 param_size;
struct kobj_attribute kobj_attr;
};
static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
{
struct opal_msg msg;
ssize_t ret;
int token;
token = opal_async_get_token_interruptible();
if (token < 0) {
if (token != -ERESTARTSYS)
pr_err("%s: Couldn't get the token, returning\n",
__func__);
ret = token;
goto out;
}
ret = opal_get_param(token, param_id, (u64)buffer, length);
if (ret != OPAL_ASYNC_COMPLETION)
goto out_token;
ret = opal_async_wait_response(token, &msg);
if (ret) {
pr_err("%s: Failed to wait for the async response, %zd\n",
__func__, ret);
goto out_token;
}
ret = be64_to_cpu(msg.params[1]);
out_token:
opal_async_release_token(token);
out:
return ret;
}
static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
{
struct opal_msg msg;
int ret, token;
token = opal_async_get_token_interruptible();
if (token < 0) {
if (token != -ERESTARTSYS)
pr_err("%s: Couldn't get the token, returning\n",
__func__);
ret = token;
goto out;
}
ret = opal_set_param(token, param_id, (u64)buffer, length);
if (ret != OPAL_ASYNC_COMPLETION)
goto out_token;
ret = opal_async_wait_response(token, &msg);
if (ret) {
pr_err("%s: Failed to wait for the async response, %d\n",
__func__, ret);
goto out_token;
}
ret = be64_to_cpu(msg.params[1]);
out_token:
opal_async_release_token(token);
out:
return ret;
}
static ssize_t sys_param_show(struct kobject *kobj,
struct kobj_attribute *kobj_attr, char *buf)
{
struct param_attr *attr = container_of(kobj_attr, struct param_attr,
kobj_attr);
ssize_t ret;
mutex_lock(&opal_sysparam_mutex);
ret = opal_get_sys_param(attr->param_id, attr->param_size,
param_data_buf);
if (ret)
goto out;
memcpy(buf, param_data_buf, attr->param_size);
ret = attr->param_size;
out:
mutex_unlock(&opal_sysparam_mutex);
return ret;
}
static ssize_t sys_param_store(struct kobject *kobj,
struct kobj_attribute *kobj_attr, const char *buf, size_t count)
{
struct param_attr *attr = container_of(kobj_attr, struct param_attr,
kobj_attr);
ssize_t ret;
/* MAX_PARAM_DATA_LEN is sizeof(param_data_buf) */
if (count > MAX_PARAM_DATA_LEN)
count = MAX_PARAM_DATA_LEN;
mutex_lock(&opal_sysparam_mutex);
memcpy(param_data_buf, buf, count);
ret = opal_set_sys_param(attr->param_id, attr->param_size,
param_data_buf);
mutex_unlock(&opal_sysparam_mutex);
if (!ret)
ret = count;
return ret;
}
void __init opal_sys_param_init(void)
{
struct device_node *sysparam;
struct param_attr *attr;
u32 *id, *size;
int count, i;
u8 *perm;
if (!opal_kobj) {
pr_warn("SYSPARAM: opal kobject is not available\n");
goto out;
}
sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj);
if (!sysparam_kobj) {
pr_err("SYSPARAM: Failed to create sysparam kobject\n");
goto out;
}
/* Allocate big enough buffer for any get/set transactions */
param_data_buf = kzalloc(MAX_PARAM_DATA_LEN, GFP_KERNEL);
if (!param_data_buf) {
pr_err("SYSPARAM: Failed to allocate memory for param data "
"buf\n");
goto out_kobj_put;
}
sysparam = of_find_node_by_path("/ibm,opal/sysparams");
if (!sysparam) {
pr_err("SYSPARAM: Opal sysparam node not found\n");
goto out_param_buf;
}
if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) {
pr_err("SYSPARAM: Opal sysparam node not compatible\n");
goto out_node_put;
}
/* Number of parameters exposed through DT */
count = of_property_count_strings(sysparam, "param-name");
if (count < 0) {
pr_err("SYSPARAM: No string found of property param-name in "
"the node %s\n", sysparam->name);
goto out_node_put;
}
id = kzalloc(sizeof(*id) * count, GFP_KERNEL);
if (!id) {
pr_err("SYSPARAM: Failed to allocate memory to read parameter "
"id\n");
goto out_node_put;
}
size = kzalloc(sizeof(*size) * count, GFP_KERNEL);
if (!size) {
pr_err("SYSPARAM: Failed to allocate memory to read parameter "
"size\n");
goto out_free_id;
}
perm = kzalloc(sizeof(*perm) * count, GFP_KERNEL);
if (!perm) {
pr_err("SYSPARAM: Failed to allocate memory to read supported "
"action on the parameter");
goto out_free_size;
}
if (of_property_read_u32_array(sysparam, "param-id", id, count)) {
pr_err("SYSPARAM: Missing property param-id in the DT\n");
goto out_free_perm;
}
if (of_property_read_u32_array(sysparam, "param-len", size, count)) {
pr_err("SYSPARAM: Missing property param-len in the DT\n");
goto out_free_perm;
}
if (of_property_read_u8_array(sysparam, "param-perm", perm, count)) {
pr_err("SYSPARAM: Missing property param-perm in the DT\n");
goto out_free_perm;
}
attr = kzalloc(sizeof(*attr) * count, GFP_KERNEL);
if (!attr) {
pr_err("SYSPARAM: Failed to allocate memory for parameter "
"attributes\n");
goto out_free_perm;
}
/* For each of the parameters, populate the parameter attributes */
for (i = 0; i < count; i++) {
if (size[i] > MAX_PARAM_DATA_LEN) {
pr_warn("SYSPARAM: Not creating parameter %d as size "
"exceeds buffer length\n", i);
continue;
}
sysfs_attr_init(&attr[i].kobj_attr.attr);
attr[i].param_id = id[i];
attr[i].param_size = size[i];
if (of_property_read_string_index(sysparam, "param-name", i,
&attr[i].kobj_attr.attr.name))
continue;
/* If the parameter is read-only or read-write */
switch (perm[i] & 3) {
case OPAL_SYSPARAM_READ:
attr[i].kobj_attr.attr.mode = S_IRUGO;
break;
case OPAL_SYSPARAM_WRITE:
attr[i].kobj_attr.attr.mode = S_IWUSR;
break;
case OPAL_SYSPARAM_RW:
attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR;
break;
default:
break;
}
attr[i].kobj_attr.show = sys_param_show;
attr[i].kobj_attr.store = sys_param_store;
if (sysfs_create_file(sysparam_kobj, &attr[i].kobj_attr.attr)) {
pr_err("SYSPARAM: Failed to create sysfs file %s\n",
attr[i].kobj_attr.attr.name);
goto out_free_attr;
}
}
kfree(perm);
kfree(size);
kfree(id);
of_node_put(sysparam);
return;
out_free_attr:
kfree(attr);
out_free_perm:
kfree(perm);
out_free_size:
kfree(size);
out_free_id:
kfree(id);
out_node_put:
of_node_put(sysparam);
out_param_buf:
kfree(param_data_buf);
out_kobj_put:
kobject_put(sysparam_kobj);
out:
return;
}

View file

@ -0,0 +1,84 @@
#include <linux/percpu.h>
#include <linux/jump_label.h>
#include <asm/trace.h>
#ifdef HAVE_JUMP_LABEL
struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
void opal_tracepoint_regfunc(void)
{
static_key_slow_inc(&opal_tracepoint_key);
}
void opal_tracepoint_unregfunc(void)
{
static_key_slow_dec(&opal_tracepoint_key);
}
#else
/*
* We optimise OPAL calls by placing opal_tracepoint_refcount
* directly in the TOC so we can check if the opal tracepoints are
* enabled via a single load.
*/
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
extern long opal_tracepoint_refcount;
void opal_tracepoint_regfunc(void)
{
opal_tracepoint_refcount++;
}
void opal_tracepoint_unregfunc(void)
{
opal_tracepoint_refcount--;
}
#endif
/*
* Since the tracing code might execute OPAL calls we need to guard against
* recursion.
*/
static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
void __trace_opal_entry(unsigned long opcode, unsigned long *args)
{
unsigned long flags;
unsigned int *depth;
local_irq_save(flags);
depth = &__get_cpu_var(opal_trace_depth);
if (*depth)
goto out;
(*depth)++;
preempt_disable();
trace_opal_entry(opcode, args);
(*depth)--;
out:
local_irq_restore(flags);
}
void __trace_opal_exit(long opcode, unsigned long retval)
{
unsigned long flags;
unsigned int *depth;
local_irq_save(flags);
depth = &__get_cpu_var(opal_trace_depth);
if (*depth)
goto out;
(*depth)++;
trace_opal_exit(opcode, retval);
preempt_enable();
(*depth)--;
out:
local_irq_restore(flags);
}

View file

@ -0,0 +1,251 @@
/*
* PowerNV OPAL API wrappers
*
* Copyright 2011 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
#include <asm/hvcall.h>
#include <asm/asm-offsets.h>
#include <asm/opal.h>
#include <asm/jump_label.h>
.section ".text"
#ifdef CONFIG_TRACEPOINTS
#ifdef CONFIG_JUMP_LABEL
#define OPAL_BRANCH(LABEL) \
ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key)
#else
.section ".toc","aw"
.globl opal_tracepoint_refcount
opal_tracepoint_refcount:
.llong 0
.section ".text"
/*
* We branch around this in early init by using an unconditional cpu
* feature.
*/
#define OPAL_BRANCH(LABEL) \
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
ld r12,opal_tracepoint_refcount@toc(r2); \
cmpdi r12,0; \
bne- LABEL; \
1:
#endif
#else
#define OPAL_BRANCH(LABEL)
#endif
/* TODO:
*
* - Trace irqs in/off (needs saving/restoring all args, argh...)
* - Get r11 feed up by Dave so I can have better register usage
*/
#define OPAL_CALL(name, token) \
_GLOBAL_TOC(name); \
mflr r0; \
std r0,16(r1); \
li r0,token; \
OPAL_BRANCH(opal_tracepoint_entry) \
mfcr r12; \
stw r12,8(r1); \
std r1,PACAR1(r13); \
li r11,0; \
mfmsr r12; \
ori r11,r11,MSR_EE; \
std r12,PACASAVEDMSR(r13); \
andc r12,r12,r11; \
mtmsrd r12,1; \
LOAD_REG_ADDR(r11,opal_return); \
mtlr r11; \
li r11,MSR_DR|MSR_IR|MSR_LE;\
andc r12,r12,r11; \
mtspr SPRN_HSRR1,r12; \
LOAD_REG_ADDR(r11,opal); \
ld r12,8(r11); \
ld r2,0(r11); \
mtspr SPRN_HSRR0,r12; \
hrfid
opal_return:
/*
* Fixup endian on OPAL return... we should be able to simplify
* this by instead converting the below trampoline to a set of
* bytes (always BE) since MSR:LE will end up fixed up as a side
* effect of the rfid.
*/
FIXUP_ENDIAN
ld r2,PACATOC(r13);
lwz r4,8(r1);
ld r5,16(r1);
ld r6,PACASAVEDMSR(r13);
mtspr SPRN_SRR0,r5;
mtspr SPRN_SRR1,r6;
mtcr r4;
rfid
#ifdef CONFIG_TRACEPOINTS
opal_tracepoint_entry:
stdu r1,-STACKFRAMESIZE(r1)
std r0,STK_REG(R23)(r1)
std r3,STK_REG(R24)(r1)
std r4,STK_REG(R25)(r1)
std r5,STK_REG(R26)(r1)
std r6,STK_REG(R27)(r1)
std r7,STK_REG(R28)(r1)
std r8,STK_REG(R29)(r1)
std r9,STK_REG(R30)(r1)
std r10,STK_REG(R31)(r1)
mr r3,r0
addi r4,r1,STK_REG(R24)
bl __trace_opal_entry
ld r0,STK_REG(R23)(r1)
ld r3,STK_REG(R24)(r1)
ld r4,STK_REG(R25)(r1)
ld r5,STK_REG(R26)(r1)
ld r6,STK_REG(R27)(r1)
ld r7,STK_REG(R28)(r1)
ld r8,STK_REG(R29)(r1)
ld r9,STK_REG(R30)(r1)
ld r10,STK_REG(R31)(r1)
LOAD_REG_ADDR(r11,opal_tracepoint_return)
mfcr r12
std r11,16(r1)
stw r12,8(r1)
std r1,PACAR1(r13)
li r11,0
mfmsr r12
ori r11,r11,MSR_EE
std r12,PACASAVEDMSR(r13)
andc r12,r12,r11
mtmsrd r12,1
LOAD_REG_ADDR(r11,opal_return)
mtlr r11
li r11,MSR_DR|MSR_IR|MSR_LE
andc r12,r12,r11
mtspr SPRN_HSRR1,r12
LOAD_REG_ADDR(r11,opal)
ld r12,8(r11)
ld r2,0(r11)
mtspr SPRN_HSRR0,r12
hrfid
opal_tracepoint_return:
std r3,STK_REG(R31)(r1)
mr r4,r3
ld r0,STK_REG(R23)(r1)
bl __trace_opal_exit
ld r3,STK_REG(R31)(r1)
addi r1,r1,STACKFRAMESIZE
ld r0,16(r1)
mtlr r0
blr
#endif
OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE);
OPAL_CALL(opal_rtc_read, OPAL_RTC_READ);
OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE);
OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN);
OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT);
OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM);
OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM);
OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT);
OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS);
OPAL_CALL(opal_pci_set_hub_tce_memory, OPAL_PCI_SET_HUB_TCE_MEMORY);
OPAL_CALL(opal_pci_set_phb_tce_memory, OPAL_PCI_SET_PHB_TCE_MEMORY);
OPAL_CALL(opal_pci_config_read_byte, OPAL_PCI_CONFIG_READ_BYTE);
OPAL_CALL(opal_pci_config_read_half_word, OPAL_PCI_CONFIG_READ_HALF_WORD);
OPAL_CALL(opal_pci_config_read_word, OPAL_PCI_CONFIG_READ_WORD);
OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR);
OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET);
OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT);
OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC);
OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE);
OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW);
OPAL_CALL(opal_pci_map_pe_mmio_window, OPAL_PCI_MAP_PE_MMIO_WINDOW);
OPAL_CALL(opal_pci_set_phb_table_memory, OPAL_PCI_SET_PHB_TABLE_MEMORY);
OPAL_CALL(opal_pci_set_pe, OPAL_PCI_SET_PE);
OPAL_CALL(opal_pci_set_peltv, OPAL_PCI_SET_PELTV);
OPAL_CALL(opal_pci_set_mve, OPAL_PCI_SET_MVE);
OPAL_CALL(opal_pci_set_mve_enable, OPAL_PCI_SET_MVE_ENABLE);
OPAL_CALL(opal_pci_get_xive_reissue, OPAL_PCI_GET_XIVE_REISSUE);
OPAL_CALL(opal_pci_set_xive_reissue, OPAL_PCI_SET_XIVE_REISSUE);
OPAL_CALL(opal_pci_set_xive_pe, OPAL_PCI_SET_XIVE_PE);
OPAL_CALL(opal_get_xive_source, OPAL_GET_XIVE_SOURCE);
OPAL_CALL(opal_get_msi_32, OPAL_GET_MSI_32);
OPAL_CALL(opal_get_msi_64, OPAL_GET_MSI_64);
OPAL_CALL(opal_start_cpu, OPAL_START_CPU);
OPAL_CALL(opal_query_cpu_status, OPAL_QUERY_CPU_STATUS);
OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL);
OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW);
OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL);
OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET);
OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA);
OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA);
OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB);
OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT);
OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR);
OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL);
OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI);
OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2);
OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ);
OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE);
OPAL_CALL(opal_lpc_read, OPAL_LPC_READ);
OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE);
OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU);
OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS);
OPAL_CALL(opal_read_elog, OPAL_ELOG_READ);
OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK);
OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE);
OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND);
OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE);
OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE);
OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN);
OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT);
OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO);
OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2);
OPAL_CALL(opal_dump_read, OPAL_DUMP_READ);
OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK);
OPAL_CALL(opal_get_msg, OPAL_GET_MSG);
OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION);
OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND);
OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT);
OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE);

View file

@ -0,0 +1,133 @@
/*
* PowerNV LPC bus handling.
*
* Copyright 2013 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/of.h>
#include <linux/bug.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/opal.h>
#include <asm/scom.h>
/*
* We could probably fit that inside the scom_map_t
* which is a void* after all but it's really too ugly
* so let's kmalloc it for now
*/
struct opal_scom_map {
uint32_t chip;
uint64_t addr;
};
static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
{
struct opal_scom_map *m;
const __be32 *gcid;
if (!of_get_property(dev, "scom-controller", NULL)) {
pr_err("%s: device %s is not a SCOM controller\n",
__func__, dev->full_name);
return SCOM_MAP_INVALID;
}
gcid = of_get_property(dev, "ibm,chip-id", NULL);
if (!gcid) {
pr_err("%s: device %s has no ibm,chip-id\n",
__func__, dev->full_name);
return SCOM_MAP_INVALID;
}
m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
if (!m)
return NULL;
m->chip = be32_to_cpup(gcid);
m->addr = reg;
return (scom_map_t)m;
}
static void opal_scom_unmap(scom_map_t map)
{
kfree(map);
}
static int opal_xscom_err_xlate(int64_t rc)
{
switch(rc) {
case 0:
return 0;
/* Add more translations if necessary */
default:
return -EIO;
}
}
static u64 opal_scom_unmangle(u64 addr)
{
/*
* XSCOM indirect addresses have the top bit set. Additionally
* the rest of the top 3 nibbles is always 0.
*
* Because the debugfs interface uses signed offsets and shifts
* the address left by 3, we basically cannot use the top 4 bits
* of the 64-bit address, and thus cannot use the indirect bit.
*
* To deal with that, we support the indirect bit being in bit
* 4 (IBM notation) instead of bit 0 in this API, we do the
* conversion here. To leave room for further xscom address
* expansion, we only clear out the top byte
*
* For in-kernel use, we also support the real indirect bit, so
* we test for any of the top 5 bits
*
*/
if (addr & (0x1full << 59))
addr = (addr & ~(0xffull << 56)) | (1ull << 63);
return addr;
}
static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
{
struct opal_scom_map *m = map;
int64_t rc;
__be64 v;
reg = opal_scom_unmangle(m->addr + reg);
rc = opal_xscom_read(m->chip, reg, (__be64 *)__pa(&v));
*value = be64_to_cpu(v);
return opal_xscom_err_xlate(rc);
}
static int opal_scom_write(scom_map_t map, u64 reg, u64 value)
{
struct opal_scom_map *m = map;
int64_t rc;
reg = opal_scom_unmangle(m->addr + reg);
rc = opal_xscom_write(m->chip, reg, value);
return opal_xscom_err_xlate(rc);
}
static const struct scom_controller opal_scom_controller = {
.map = opal_scom_map,
.unmap = opal_scom_unmap,
.read = opal_scom_read,
.write = opal_scom_write
};
static int opal_xscom_init(void)
{
if (firmware_has_feature(FW_FEATURE_OPALv3))
scom_init(&opal_scom_controller);
return 0;
}
machine_arch_initcall(powernv, opal_xscom_init);

View file

@ -0,0 +1,807 @@
/*
* PowerNV OPAL high level interfaces
*
* Copyright 2011 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
#include <linux/types.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/kobject.h>
#include <linux/delay.h>
#include <linux/memblock.h>
#include <asm/machdep.h>
#include <asm/opal.h>
#include <asm/firmware.h>
#include <asm/mce.h>
#include "powernv.h"
/* /sys/firmware/opal */
struct kobject *opal_kobj;
struct opal {
u64 base;
u64 entry;
u64 size;
} opal;
struct mcheck_recoverable_range {
u64 start_addr;
u64 end_addr;
u64 recover_addr;
};
static struct mcheck_recoverable_range *mc_recoverable_range;
static int mc_recoverable_range_len;
struct device_node *opal_node;
static DEFINE_SPINLOCK(opal_write_lock);
extern u64 opal_mc_secondary_handler[];
static unsigned int *opal_irqs;
static unsigned int opal_irq_count;
static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
static DEFINE_SPINLOCK(opal_notifier_lock);
static uint64_t last_notified_mask = 0x0ul;
static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
static void opal_reinit_cores(void)
{
/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
*
* It will preserve non volatile GPRs and HSPRG0/1. It will
* also restore HIDs and other SPRs to their original value
* but it might clobber a bunch.
*/
#ifdef __BIG_ENDIAN__
opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
#else
opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
#endif
}
int __init early_init_dt_scan_opal(unsigned long node,
const char *uname, int depth, void *data)
{
const void *basep, *entryp, *sizep;
int basesz, entrysz, runtimesz;
if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
return 0;
basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
if (!basep || !entryp || !sizep)
return 1;
opal.base = of_read_number(basep, basesz/4);
opal.entry = of_read_number(entryp, entrysz/4);
opal.size = of_read_number(sizep, runtimesz/4);
pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n",
opal.base, basep, basesz);
pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
opal.entry, entryp, entrysz);
pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
opal.size, sizep, runtimesz);
powerpc_firmware_features |= FW_FEATURE_OPAL;
if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
powerpc_firmware_features |= FW_FEATURE_OPALv2;
powerpc_firmware_features |= FW_FEATURE_OPALv3;
pr_info("OPAL V3 detected !\n");
} else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
powerpc_firmware_features |= FW_FEATURE_OPALv2;
pr_info("OPAL V2 detected !\n");
} else {
pr_info("OPAL V1 detected !\n");
}
/* Reinit all cores with the right endian */
opal_reinit_cores();
/* Restore some bits */
if (cur_cpu_spec->cpu_restore)
cur_cpu_spec->cpu_restore();
return 1;
}
int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
const char *uname, int depth, void *data)
{
int i, psize, size;
const __be32 *prop;
if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
return 0;
prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
if (!prop)
return 1;
pr_debug("Found machine check recoverable ranges.\n");
/*
* Calculate number of available entries.
*
* Each recoverable address range entry is (start address, len,
* recovery address), 2 cells each for start and recovery address,
* 1 cell for len, totalling 5 cells per entry.
*/
mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
/* Sanity check */
if (!mc_recoverable_range_len)
return 1;
/* Size required to hold all the entries. */
size = mc_recoverable_range_len *
sizeof(struct mcheck_recoverable_range);
/*
* Allocate a buffer to hold the MC recoverable ranges. We would be
* accessing them in real mode, hence it needs to be within
* RMO region.
*/
mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
ppc64_rma_size));
memset(mc_recoverable_range, 0, size);
for (i = 0; i < mc_recoverable_range_len; i++) {
mc_recoverable_range[i].start_addr =
of_read_number(prop + (i * 5) + 0, 2);
mc_recoverable_range[i].end_addr =
mc_recoverable_range[i].start_addr +
of_read_number(prop + (i * 5) + 2, 1);
mc_recoverable_range[i].recover_addr =
of_read_number(prop + (i * 5) + 3, 2);
pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
mc_recoverable_range[i].start_addr,
mc_recoverable_range[i].end_addr,
mc_recoverable_range[i].recover_addr);
}
return 1;
}
static int __init opal_register_exception_handlers(void)
{
#ifdef __BIG_ENDIAN__
u64 glue;
if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
return -ENODEV;
/* Hookup some exception handlers except machine check. We use the
* fwnmi area at 0x7000 to provide the glue space to OPAL
*/
glue = 0x7000;
/*
* Check if we are running on newer firmware that exports
* OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
* the HMI interrupt and we catch it directly in Linux.
*
* For older firmware (i.e currently released POWER8 System Firmware
* as of today <= SV810_087), we fallback to old behavior and let OPAL
* patch the HMI vector and handle it inside OPAL firmware.
*
* For newer firmware (in development/yet to be released) we will
* start catching/handling HMI directly in Linux.
*/
if (!opal_check_token(OPAL_HANDLE_HMI)) {
pr_info("opal: Old firmware detected, OPAL handles HMIs.\n");
opal_register_exception_handler(
OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
0, glue);
glue += 128;
}
opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
#endif
return 0;
}
machine_early_initcall(powernv, opal_register_exception_handlers);
int opal_notifier_register(struct notifier_block *nb)
{
if (!nb) {
pr_warning("%s: Invalid argument (%p)\n",
__func__, nb);
return -EINVAL;
}
atomic_notifier_chain_register(&opal_notifier_head, nb);
return 0;
}
EXPORT_SYMBOL_GPL(opal_notifier_register);
int opal_notifier_unregister(struct notifier_block *nb)
{
if (!nb) {
pr_warning("%s: Invalid argument (%p)\n",
__func__, nb);
return -EINVAL;
}
atomic_notifier_chain_unregister(&opal_notifier_head, nb);
return 0;
}
EXPORT_SYMBOL_GPL(opal_notifier_unregister);
static void opal_do_notifier(uint64_t events)
{
unsigned long flags;
uint64_t changed_mask;
if (atomic_read(&opal_notifier_hold))
return;
spin_lock_irqsave(&opal_notifier_lock, flags);
changed_mask = last_notified_mask ^ events;
last_notified_mask = events;
spin_unlock_irqrestore(&opal_notifier_lock, flags);
/*
* We feed with the event bits and changed bits for
* enough information to the callback.
*/
atomic_notifier_call_chain(&opal_notifier_head,
events, (void *)changed_mask);
}
void opal_notifier_update_evt(uint64_t evt_mask,
uint64_t evt_val)
{
unsigned long flags;
spin_lock_irqsave(&opal_notifier_lock, flags);
last_notified_mask &= ~evt_mask;
last_notified_mask |= evt_val;
spin_unlock_irqrestore(&opal_notifier_lock, flags);
}
void opal_notifier_enable(void)
{
int64_t rc;
__be64 evt = 0;
atomic_set(&opal_notifier_hold, 0);
/* Process pending events */
rc = opal_poll_events(&evt);
if (rc == OPAL_SUCCESS && evt)
opal_do_notifier(be64_to_cpu(evt));
}
void opal_notifier_disable(void)
{
atomic_set(&opal_notifier_hold, 1);
}
/*
* Opal message notifier based on message type. Allow subscribers to get
* notified for specific messgae type.
*/
int opal_message_notifier_register(enum OpalMessageType msg_type,
struct notifier_block *nb)
{
if (!nb) {
pr_warning("%s: Invalid argument (%p)\n",
__func__, nb);
return -EINVAL;
}
if (msg_type > OPAL_MSG_TYPE_MAX) {
pr_warning("%s: Invalid message type argument (%d)\n",
__func__, msg_type);
return -EINVAL;
}
return atomic_notifier_chain_register(
&opal_msg_notifier_head[msg_type], nb);
}
static void opal_message_do_notify(uint32_t msg_type, void *msg)
{
/* notify subscribers */
atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
msg_type, msg);
}
static void opal_handle_message(void)
{
s64 ret;
/*
* TODO: pre-allocate a message buffer depending on opal-msg-size
* value in /proc/device-tree.
*/
static struct opal_msg msg;
u32 type;
ret = opal_get_msg(__pa(&msg), sizeof(msg));
/* No opal message pending. */
if (ret == OPAL_RESOURCE)
return;
/* check for errors. */
if (ret) {
pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
__func__, ret);
return;
}
type = be32_to_cpu(msg.msg_type);
/* Sanity check */
if (type > OPAL_MSG_TYPE_MAX) {
pr_warning("%s: Unknown message type: %u\n", __func__, type);
return;
}
opal_message_do_notify(type, (void *)&msg);
}
static int opal_message_notify(struct notifier_block *nb,
unsigned long events, void *change)
{
if (events & OPAL_EVENT_MSG_PENDING)
opal_handle_message();
return 0;
}
static struct notifier_block opal_message_nb = {
.notifier_call = opal_message_notify,
.next = NULL,
.priority = 0,
};
static int __init opal_message_init(void)
{
int ret, i;
for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
ret = opal_notifier_register(&opal_message_nb);
if (ret) {
pr_err("%s: Can't register OPAL event notifier (%d)\n",
__func__, ret);
return ret;
}
return 0;
}
machine_early_initcall(powernv, opal_message_init);
int opal_get_chars(uint32_t vtermno, char *buf, int count)
{
s64 rc;
__be64 evt, len;
if (!opal.entry)
return -ENODEV;
opal_poll_events(&evt);
if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
return 0;
len = cpu_to_be64(count);
rc = opal_console_read(vtermno, &len, buf);
if (rc == OPAL_SUCCESS)
return be64_to_cpu(len);
return 0;
}
int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
{
int written = 0;
__be64 olen;
s64 len, rc;
unsigned long flags;
__be64 evt;
if (!opal.entry)
return -ENODEV;
/* We want put_chars to be atomic to avoid mangling of hvsi
* packets. To do that, we first test for room and return
* -EAGAIN if there isn't enough.
*
* Unfortunately, opal_console_write_buffer_space() doesn't
* appear to work on opal v1, so we just assume there is
* enough room and be done with it
*/
spin_lock_irqsave(&opal_write_lock, flags);
if (firmware_has_feature(FW_FEATURE_OPALv2)) {
rc = opal_console_write_buffer_space(vtermno, &olen);
len = be64_to_cpu(olen);
if (rc || len < total_len) {
spin_unlock_irqrestore(&opal_write_lock, flags);
/* Closed -> drop characters */
if (rc)
return total_len;
opal_poll_events(NULL);
return -EAGAIN;
}
}
/* We still try to handle partial completions, though they
* should no longer happen.
*/
rc = OPAL_BUSY;
while(total_len > 0 && (rc == OPAL_BUSY ||
rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
olen = cpu_to_be64(total_len);
rc = opal_console_write(vtermno, &olen, data);
len = be64_to_cpu(olen);
/* Closed or other error drop */
if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
rc != OPAL_BUSY_EVENT) {
written = total_len;
break;
}
if (rc == OPAL_SUCCESS) {
total_len -= len;
data += len;
written += len;
}
/* This is a bit nasty but we need that for the console to
* flush when there aren't any interrupts. We will clean
* things a bit later to limit that to synchronous path
* such as the kernel console and xmon/udbg
*/
do
opal_poll_events(&evt);
while(rc == OPAL_SUCCESS &&
(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
}
spin_unlock_irqrestore(&opal_write_lock, flags);
return written;
}
static int opal_recover_mce(struct pt_regs *regs,
struct machine_check_event *evt)
{
int recovered = 0;
uint64_t ea = get_mce_fault_addr(evt);
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
recovered = 0;
} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */
recovered = 1;
} else if (ea && !is_kernel_addr(ea)) {
/*
* Faulting address is not in kernel text. We should be fine.
* We need to find which process uses this address.
* For now, kill the task if we have received exception when
* in userspace.
*
* TODO: Queue up this address for hwpoisioning later.
*/
if (user_mode(regs) && !is_global_init(current)) {
_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
recovered = 1;
} else
recovered = 0;
} else if (user_mode(regs) && !is_global_init(current) &&
evt->severity == MCE_SEV_ERROR_SYNC) {
/*
* If we have received a synchronous error when in userspace
* kill the task.
*/
_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
recovered = 1;
}
return recovered;
}
int opal_machine_check(struct pt_regs *regs)
{
struct machine_check_event evt;
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return 0;
/* Print things out */
if (evt.version != MCE_V1) {
pr_err("Machine Check Exception, Unknown event version %d !\n",
evt.version);
return 0;
}
machine_check_print_event_info(&evt);
if (opal_recover_mce(regs, &evt))
return 1;
return 0;
}
/* Early hmi handler called in real mode. */
int opal_hmi_exception_early(struct pt_regs *regs)
{
s64 rc;
/*
* call opal hmi handler. Pass paca address as token.
* The return value OPAL_SUCCESS is an indication that there is
* an HMI event generated waiting to pull by Linux.
*/
rc = opal_handle_hmi();
if (rc == OPAL_SUCCESS) {
local_paca->hmi_event_available = 1;
return 1;
}
return 0;
}
/* HMI exception handler called in virtual mode during check_irq_replay. */
int opal_handle_hmi_exception(struct pt_regs *regs)
{
s64 rc;
__be64 evt = 0;
/*
* Check if HMI event is available.
* if Yes, then call opal_poll_events to pull opal messages and
* process them.
*/
if (!local_paca->hmi_event_available)
return 0;
local_paca->hmi_event_available = 0;
rc = opal_poll_events(&evt);
if (rc == OPAL_SUCCESS && evt)
opal_do_notifier(be64_to_cpu(evt));
return 1;
}
static uint64_t find_recovery_address(uint64_t nip)
{
int i;
for (i = 0; i < mc_recoverable_range_len; i++)
if ((nip >= mc_recoverable_range[i].start_addr) &&
(nip < mc_recoverable_range[i].end_addr))
return mc_recoverable_range[i].recover_addr;
return 0;
}
bool opal_mce_check_early_recovery(struct pt_regs *regs)
{
uint64_t recover_addr = 0;
if (!opal.base || !opal.size)
goto out;
if ((regs->nip >= opal.base) &&
(regs->nip <= (opal.base + opal.size)))
recover_addr = find_recovery_address(regs->nip);
/*
* Setup regs->nip to rfi into fixup address.
*/
if (recover_addr)
regs->nip = recover_addr;
out:
return !!recover_addr;
}
static irqreturn_t opal_interrupt(int irq, void *data)
{
__be64 events;
opal_handle_interrupt(virq_to_hw(irq), &events);
opal_do_notifier(be64_to_cpu(events));
return IRQ_HANDLED;
}
static int opal_sysfs_init(void)
{
opal_kobj = kobject_create_and_add("opal", firmware_kobj);
if (!opal_kobj) {
pr_warn("kobject_create_and_add opal failed\n");
return -ENOMEM;
}
return 0;
}
static void __init opal_dump_region_init(void)
{
void *addr;
uint64_t size;
int rc;
/* Register kernel log buffer */
addr = log_buf_addr_get();
size = log_buf_len_get();
rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
__pa(addr), size);
/* Don't warn if this is just an older OPAL that doesn't
* know about that call
*/
if (rc && rc != OPAL_UNSUPPORTED)
pr_warn("DUMP: Failed to register kernel log buffer. "
"rc = %d\n", rc);
}
static int __init opal_init(void)
{
struct device_node *np, *consoles;
const __be32 *irqs;
int rc, i, irqlen;
opal_node = of_find_node_by_path("/ibm,opal");
if (!opal_node) {
pr_warn("opal: Node not found\n");
return -ENODEV;
}
/* Register OPAL consoles if any ports */
if (firmware_has_feature(FW_FEATURE_OPALv2))
consoles = of_find_node_by_path("/ibm,opal/consoles");
else
consoles = of_node_get(opal_node);
if (consoles) {
for_each_child_of_node(consoles, np) {
if (strcmp(np->name, "serial"))
continue;
of_platform_device_create(np, NULL, NULL);
}
of_node_put(consoles);
}
/* Find all OPAL interrupts and request them */
irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
pr_debug("opal: Found %d interrupts reserved for OPAL\n",
irqs ? (irqlen / 4) : 0);
opal_irq_count = irqlen / 4;
opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) {
unsigned int hwirq = be32_to_cpup(irqs);
unsigned int irq = irq_create_mapping(NULL, hwirq);
if (irq == NO_IRQ) {
pr_warning("opal: Failed to map irq 0x%x\n", hwirq);
continue;
}
rc = request_irq(irq, opal_interrupt, 0, "opal", NULL);
if (rc)
pr_warning("opal: Error %d requesting irq %d"
" (0x%x)\n", rc, irq, hwirq);
opal_irqs[i] = irq;
}
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
/* Setup dump region interface */
opal_dump_region_init();
/* Setup error log interface */
rc = opal_elog_init();
/* Setup code update interface */
opal_flash_init();
/* Setup platform dump extract interface */
opal_platform_dump_init();
/* Setup system parameters interface */
opal_sys_param_init();
/* Setup message log interface. */
opal_msglog_init();
}
return 0;
}
machine_subsys_initcall(powernv, opal_init);
void opal_shutdown(void)
{
unsigned int i;
long rc = OPAL_BUSY;
/* First free interrupts, which will also mask them */
for (i = 0; i < opal_irq_count; i++) {
if (opal_irqs[i])
free_irq(opal_irqs[i], NULL);
opal_irqs[i] = 0;
}
/*
* Then sync with OPAL which ensure anything that can
* potentially write to our memory has completed such
* as an ongoing dump retrieval
*/
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_sync_host_reboot();
if (rc == OPAL_BUSY)
opal_poll_events(NULL);
else
mdelay(10);
}
/* Unregister memory dump region */
opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
}
/* Export this so that test modules can use it */
EXPORT_SYMBOL_GPL(opal_invalid_call);
/* Convert a region of vmalloc memory to an opal sg list */
struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
unsigned long vmalloc_size)
{
struct opal_sg_list *sg, *first = NULL;
unsigned long i = 0;
sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!sg)
goto nomem;
first = sg;
while (vmalloc_size > 0) {
uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
uint64_t length = min(vmalloc_size, PAGE_SIZE);
sg->entry[i].data = cpu_to_be64(data);
sg->entry[i].length = cpu_to_be64(length);
i++;
if (i >= SG_ENTRIES_PER_NODE) {
struct opal_sg_list *next;
next = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!next)
goto nomem;
sg->length = cpu_to_be64(
i * sizeof(struct opal_sg_entry) + 16);
i = 0;
sg->next = cpu_to_be64(__pa(next));
sg = next;
}
vmalloc_addr += length;
vmalloc_size -= length;
}
sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
return first;
nomem:
pr_err("%s : Failed to allocate memory\n", __func__);
opal_free_sg_list(first);
return NULL;
}
void opal_free_sg_list(struct opal_sg_list *sg)
{
while (sg) {
uint64_t next = be64_to_cpu(sg->next);
kfree(sg);
if (next)
sg = __va(next);
else
sg = NULL;
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,239 @@
/*
* Support PCI/PCIe on PowerNV platforms
*
* Currently supports only P5IOC2
*
* Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/msi.h>
#include <asm/sections.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/msi_bitmap.h>
#include <asm/ppc-pci.h>
#include <asm/opal.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#include "powernv.h"
#include "pci.h"
/* For now, use a fixed amount of TCE memory for each p5ioc2
* hub, 16M will do
*/
#define P5IOC2_TCE_MEMORY 0x01000000
#ifdef CONFIG_PCI_MSI
static int pnv_pci_p5ioc2_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg)
{
if (WARN_ON(!is_64))
return -ENXIO;
msg->data = hwirq - phb->msi_base;
msg->address_hi = 0x10000000;
msg->address_lo = 0;
return 0;
}
static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
{
unsigned int count;
const __be32 *prop = of_get_property(phb->hose->dn,
"ibm,opal-msi-ranges", NULL);
if (!prop)
return;
/* Don't do MSI's on p5ioc2 PCI-X are they are not properly
* verified in HW
*/
if (of_device_is_compatible(phb->hose->dn, "ibm,p5ioc2-pcix"))
return;
phb->msi_base = be32_to_cpup(prop);
count = be32_to_cpup(prop + 1);
if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
phb->hose->global_number);
return;
}
phb->msi_setup = pnv_pci_p5ioc2_msi_setup;
phb->msi32_support = 0;
pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
count, phb->msi_base);
}
#else
static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
#endif /* CONFIG_PCI_MSI */
static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
struct pci_dev *pdev)
{
if (phb->p5ioc2.iommu_table.it_map == NULL) {
iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
iommu_register_group(&phb->p5ioc2.iommu_table,
pci_domain_nr(phb->hose->bus), phb->opal_id);
}
set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table);
}
static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
void *tce_mem, u64 tce_size)
{
struct pnv_phb *phb;
const __be64 *prop64;
u64 phb_id;
int64_t rc;
static int primary = 1;
pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name);
prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
if (!prop64) {
pr_err(" Missing \"ibm,opal-phbid\" property !\n");
return;
}
phb_id = be64_to_cpup(prop64);
pr_devel(" PHB-ID : 0x%016llx\n", phb_id);
pr_devel(" TCE AT : 0x%016lx\n", __pa(tce_mem));
pr_devel(" TCE SZ : 0x%016llx\n", tce_size);
rc = opal_pci_set_phb_tce_memory(phb_id, __pa(tce_mem), tce_size);
if (rc != OPAL_SUCCESS) {
pr_err(" Failed to set TCE memory, OPAL error %lld\n", rc);
return;
}
phb = alloc_bootmem(sizeof(struct pnv_phb));
if (phb) {
memset(phb, 0, sizeof(struct pnv_phb));
phb->hose = pcibios_alloc_controller(np);
}
if (!phb || !phb->hose) {
pr_err(" Failed to allocate PCI controller\n");
return;
}
spin_lock_init(&phb->lock);
phb->hose->first_busno = 0;
phb->hose->last_busno = 0xff;
phb->hose->private_data = phb;
phb->hub_id = hub_id;
phb->opal_id = phb_id;
phb->type = PNV_PHB_P5IOC2;
phb->model = PNV_PHB_MODEL_P5IOC2;
phb->regs = of_iomap(np, 0);
if (phb->regs == NULL)
pr_err(" Failed to map registers !\n");
else {
pr_devel(" P_BUID = 0x%08x\n", in_be32(phb->regs + 0x100));
pr_devel(" P_IOSZ = 0x%08x\n", in_be32(phb->regs + 0x1b0));
pr_devel(" P_IO_ST = 0x%08x\n", in_be32(phb->regs + 0x1e0));
pr_devel(" P_MEM1_H = 0x%08x\n", in_be32(phb->regs + 0x1a0));
pr_devel(" P_MEM1_L = 0x%08x\n", in_be32(phb->regs + 0x190));
pr_devel(" P_MSZ1_L = 0x%08x\n", in_be32(phb->regs + 0x1c0));
pr_devel(" P_MEM_ST = 0x%08x\n", in_be32(phb->regs + 0x1d0));
pr_devel(" P_MEM2_H = 0x%08x\n", in_be32(phb->regs + 0x2c0));
pr_devel(" P_MEM2_L = 0x%08x\n", in_be32(phb->regs + 0x2b0));
pr_devel(" P_MSZ2_H = 0x%08x\n", in_be32(phb->regs + 0x2d0));
pr_devel(" P_MSZ2_L = 0x%08x\n", in_be32(phb->regs + 0x2e0));
}
/* Interpret the "ranges" property */
/* This also maps the I/O region and sets isa_io/mem_base */
pci_process_bridge_OF_ranges(phb->hose, np, primary);
primary = 0;
phb->hose->ops = &pnv_pci_ops;
/* Setup MSI support */
pnv_pci_init_p5ioc2_msis(phb);
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup;
pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
tce_mem, tce_size, 0,
IOMMU_PAGE_SHIFT_4K);
}
void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
{
struct device_node *phbn;
const __be64 *prop64;
u64 hub_id;
void *tce_mem;
uint64_t tce_per_phb;
int64_t rc;
int phb_count = 0;
pr_info("Probing p5ioc2 IO-Hub %s\n", np->full_name);
prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
if (!prop64) {
pr_err(" Missing \"ibm,opal-hubid\" property !\n");
return;
}
hub_id = be64_to_cpup(prop64);
pr_info(" HUB-ID : 0x%016llx\n", hub_id);
/* Currently allocate 16M of TCE memory for every Hub
*
* XXX TODO: Make it chip local if possible
*/
tce_mem = __alloc_bootmem(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY,
__pa(MAX_DMA_ADDRESS));
if (!tce_mem) {
pr_err(" Failed to allocate TCE Memory !\n");
return;
}
pr_debug(" TCE : 0x%016lx..0x%016lx\n",
__pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1);
rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem),
P5IOC2_TCE_MEMORY);
if (rc != OPAL_SUCCESS) {
pr_err(" Failed to allocate TCE memory, OPAL error %lld\n", rc);
return;
}
/* Count child PHBs */
for_each_child_of_node(np, phbn) {
if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
of_device_is_compatible(phbn, "ibm,p5ioc2-pciex"))
phb_count++;
}
/* Calculate how much TCE space we can give per PHB */
tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count);
pr_info(" Allocating %lld MB of TCE memory per PHB\n",
tce_per_phb >> 20);
/* Initialize PHBs */
for_each_child_of_node(np, phbn) {
if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
pnv_pci_init_p5ioc2_phb(phbn, hub_id,
tce_mem, tce_per_phb);
tce_mem += tce_per_phb;
}
}
}

View file

@ -0,0 +1,896 @@
/*
* Support PCI/PCIe on PowerNV platforms
*
* Currently supports only P5IOC2
*
* Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/msi.h>
#include <linux/iommu.h>
#include <asm/sections.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/msi_bitmap.h>
#include <asm/ppc-pci.h>
#include <asm/opal.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#include <asm/firmware.h>
#include <asm/eeh_event.h>
#include <asm/eeh.h>
#include "powernv.h"
#include "pci.h"
/* Delay in usec */
#define PCI_RESET_DELAY_US 3000000
#define cfg_dbg(fmt...) do { } while(0)
//#define cfg_dbg(fmt...) printk(fmt)
#ifdef CONFIG_PCI_MSI
static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
struct msi_desc *entry;
struct msi_msg msg;
int hwirq;
unsigned int virq;
int rc;
if (WARN_ON(!phb) || !phb->msi_bmp.bitmap)
return -ENODEV;
if (pdev->no_64bit_msi && !phb->msi32_support)
return -ENODEV;
list_for_each_entry(entry, &pdev->msi_list, list) {
if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
pr_warn("%s: Supports only 64-bit MSIs\n",
pci_name(pdev));
return -ENXIO;
}
hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, 1);
if (hwirq < 0) {
pr_warn("%s: Failed to find a free MSI\n",
pci_name(pdev));
return -ENOSPC;
}
virq = irq_create_mapping(NULL, phb->msi_base + hwirq);
if (virq == NO_IRQ) {
pr_warn("%s: Failed to map MSI to linux irq\n",
pci_name(pdev));
msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
return -ENOMEM;
}
rc = phb->msi_setup(phb, pdev, phb->msi_base + hwirq,
virq, entry->msi_attrib.is_64, &msg);
if (rc) {
pr_warn("%s: Failed to setup MSI\n", pci_name(pdev));
irq_dispose_mapping(virq);
msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
return rc;
}
irq_set_msi_desc(virq, entry);
write_msi_msg(virq, &msg);
}
return 0;
}
static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
struct msi_desc *entry;
if (WARN_ON(!phb))
return;
list_for_each_entry(entry, &pdev->msi_list, list) {
if (entry->irq == NO_IRQ)
continue;
irq_set_msi_desc(entry->irq, NULL);
msi_bitmap_free_hwirqs(&phb->msi_bmp,
virq_to_hw(entry->irq) - phb->msi_base, 1);
irq_dispose_mapping(entry->irq);
}
}
#endif /* CONFIG_PCI_MSI */
static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
struct OpalIoPhbErrorCommon *common)
{
struct OpalIoP7IOCPhbErrorData *data;
int i;
data = (struct OpalIoP7IOCPhbErrorData *)common;
pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n",
hose->global_number, be32_to_cpu(common->version));
if (data->brdgCtl)
pr_info("brdgCtl: %08x\n",
be32_to_cpu(data->brdgCtl));
if (data->portStatusReg || data->rootCmplxStatus ||
data->busAgentStatus)
pr_info("UtlSts: %08x %08x %08x\n",
be32_to_cpu(data->portStatusReg),
be32_to_cpu(data->rootCmplxStatus),
be32_to_cpu(data->busAgentStatus));
if (data->deviceStatus || data->slotStatus ||
data->linkStatus || data->devCmdStatus ||
data->devSecStatus)
pr_info("RootSts: %08x %08x %08x %08x %08x\n",
be32_to_cpu(data->deviceStatus),
be32_to_cpu(data->slotStatus),
be32_to_cpu(data->linkStatus),
be32_to_cpu(data->devCmdStatus),
be32_to_cpu(data->devSecStatus));
if (data->rootErrorStatus || data->uncorrErrorStatus ||
data->corrErrorStatus)
pr_info("RootErrSts: %08x %08x %08x\n",
be32_to_cpu(data->rootErrorStatus),
be32_to_cpu(data->uncorrErrorStatus),
be32_to_cpu(data->corrErrorStatus));
if (data->tlpHdr1 || data->tlpHdr2 ||
data->tlpHdr3 || data->tlpHdr4)
pr_info("RootErrLog: %08x %08x %08x %08x\n",
be32_to_cpu(data->tlpHdr1),
be32_to_cpu(data->tlpHdr2),
be32_to_cpu(data->tlpHdr3),
be32_to_cpu(data->tlpHdr4));
if (data->sourceId || data->errorClass ||
data->correlator)
pr_info("RootErrLog1: %08x %016llx %016llx\n",
be32_to_cpu(data->sourceId),
be64_to_cpu(data->errorClass),
be64_to_cpu(data->correlator));
if (data->p7iocPlssr || data->p7iocCsr)
pr_info("PhbSts: %016llx %016llx\n",
be64_to_cpu(data->p7iocPlssr),
be64_to_cpu(data->p7iocCsr));
if (data->lemFir)
pr_info("Lem: %016llx %016llx %016llx\n",
be64_to_cpu(data->lemFir),
be64_to_cpu(data->lemErrorMask),
be64_to_cpu(data->lemWOF));
if (data->phbErrorStatus)
pr_info("PhbErr: %016llx %016llx %016llx %016llx\n",
be64_to_cpu(data->phbErrorStatus),
be64_to_cpu(data->phbFirstErrorStatus),
be64_to_cpu(data->phbErrorLog0),
be64_to_cpu(data->phbErrorLog1));
if (data->mmioErrorStatus)
pr_info("OutErr: %016llx %016llx %016llx %016llx\n",
be64_to_cpu(data->mmioErrorStatus),
be64_to_cpu(data->mmioFirstErrorStatus),
be64_to_cpu(data->mmioErrorLog0),
be64_to_cpu(data->mmioErrorLog1));
if (data->dma0ErrorStatus)
pr_info("InAErr: %016llx %016llx %016llx %016llx\n",
be64_to_cpu(data->dma0ErrorStatus),
be64_to_cpu(data->dma0FirstErrorStatus),
be64_to_cpu(data->dma0ErrorLog0),
be64_to_cpu(data->dma0ErrorLog1));
if (data->dma1ErrorStatus)
pr_info("InBErr: %016llx %016llx %016llx %016llx\n",
be64_to_cpu(data->dma1ErrorStatus),
be64_to_cpu(data->dma1FirstErrorStatus),
be64_to_cpu(data->dma1ErrorLog0),
be64_to_cpu(data->dma1ErrorLog1));
for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
if ((data->pestA[i] >> 63) == 0 &&
(data->pestB[i] >> 63) == 0)
continue;
pr_info("PE[%3d] A/B: %016llx %016llx\n",
i, be64_to_cpu(data->pestA[i]),
be64_to_cpu(data->pestB[i]));
}
}
static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
struct OpalIoPhbErrorCommon *common)
{
struct OpalIoPhb3ErrorData *data;
int i;
data = (struct OpalIoPhb3ErrorData*)common;
pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n",
hose->global_number, be32_to_cpu(common->version));
if (data->brdgCtl)
pr_info("brdgCtl: %08x\n",
be32_to_cpu(data->brdgCtl));
if (data->portStatusReg || data->rootCmplxStatus ||
data->busAgentStatus)
pr_info("UtlSts: %08x %08x %08x\n",
be32_to_cpu(data->portStatusReg),
be32_to_cpu(data->rootCmplxStatus),
be32_to_cpu(data->busAgentStatus));
if (data->deviceStatus || data->slotStatus ||
data->linkStatus || data->devCmdStatus ||
data->devSecStatus)
pr_info("RootSts: %08x %08x %08x %08x %08x\n",
be32_to_cpu(data->deviceStatus),
be32_to_cpu(data->slotStatus),
be32_to_cpu(data->linkStatus),
be32_to_cpu(data->devCmdStatus),
be32_to_cpu(data->devSecStatus));
if (data->rootErrorStatus || data->uncorrErrorStatus ||
data->corrErrorStatus)
pr_info("RootErrSts: %08x %08x %08x\n",
be32_to_cpu(data->rootErrorStatus),
be32_to_cpu(data->uncorrErrorStatus),
be32_to_cpu(data->corrErrorStatus));
if (data->tlpHdr1 || data->tlpHdr2 ||
data->tlpHdr3 || data->tlpHdr4)
pr_info("RootErrLog: %08x %08x %08x %08x\n",
be32_to_cpu(data->tlpHdr1),
be32_to_cpu(data->tlpHdr2),
be32_to_cpu(data->tlpHdr3),
be32_to_cpu(data->tlpHdr4));
if (data->sourceId || data->errorClass ||
data->correlator)
pr_info("RootErrLog1: %08x %016llx %016llx\n",
be32_to_cpu(data->sourceId),
be64_to_cpu(data->errorClass),
be64_to_cpu(data->correlator));
if (data->nFir)
pr_info("nFir: %016llx %016llx %016llx\n",
be64_to_cpu(data->nFir),
be64_to_cpu(data->nFirMask),
be64_to_cpu(data->nFirWOF));
if (data->phbPlssr || data->phbCsr)
pr_info("PhbSts: %016llx %016llx\n",
be64_to_cpu(data->phbPlssr),
be64_to_cpu(data->phbCsr));
if (data->lemFir)
pr_info("Lem: %016llx %016llx %016llx\n",
be64_to_cpu(data->lemFir),
be64_to_cpu(data->lemErrorMask),
be64_to_cpu(data->lemWOF));
if (data->phbErrorStatus)
pr_info("PhbErr: %016llx %016llx %016llx %016llx\n",
be64_to_cpu(data->phbErrorStatus),
be64_to_cpu(data->phbFirstErrorStatus),
be64_to_cpu(data->phbErrorLog0),
be64_to_cpu(data->phbErrorLog1));
if (data->mmioErrorStatus)
pr_info("OutErr: %016llx %016llx %016llx %016llx\n",
be64_to_cpu(data->mmioErrorStatus),
be64_to_cpu(data->mmioFirstErrorStatus),
be64_to_cpu(data->mmioErrorLog0),
be64_to_cpu(data->mmioErrorLog1));
if (data->dma0ErrorStatus)
pr_info("InAErr: %016llx %016llx %016llx %016llx\n",
be64_to_cpu(data->dma0ErrorStatus),
be64_to_cpu(data->dma0FirstErrorStatus),
be64_to_cpu(data->dma0ErrorLog0),
be64_to_cpu(data->dma0ErrorLog1));
if (data->dma1ErrorStatus)
pr_info("InBErr: %016llx %016llx %016llx %016llx\n",
be64_to_cpu(data->dma1ErrorStatus),
be64_to_cpu(data->dma1FirstErrorStatus),
be64_to_cpu(data->dma1ErrorLog0),
be64_to_cpu(data->dma1ErrorLog1));
for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
(be64_to_cpu(data->pestB[i]) >> 63) == 0)
continue;
pr_info("PE[%3d] A/B: %016llx %016llx\n",
i, be64_to_cpu(data->pestA[i]),
be64_to_cpu(data->pestB[i]));
}
}
void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
unsigned char *log_buff)
{
struct OpalIoPhbErrorCommon *common;
if (!hose || !log_buff)
return;
common = (struct OpalIoPhbErrorCommon *)log_buff;
switch (be32_to_cpu(common->ioType)) {
case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
pnv_pci_dump_p7ioc_diag_data(hose, common);
break;
case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
pnv_pci_dump_phb3_diag_data(hose, common);
break;
default:
pr_warn("%s: Unrecognized ioType %d\n",
__func__, be32_to_cpu(common->ioType));
}
}
static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
{
unsigned long flags, rc;
int has_diag, ret = 0;
spin_lock_irqsave(&phb->lock, flags);
/* Fetch PHB diag-data */
rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
PNV_PCI_DIAG_BUF_SIZE);
has_diag = (rc == OPAL_SUCCESS);
/* If PHB supports compound PE, to handle it */
if (phb->unfreeze_pe) {
ret = phb->unfreeze_pe(phb,
pe_no,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
} else {
rc = opal_pci_eeh_freeze_clear(phb->opal_id,
pe_no,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
if (rc) {
pr_warn("%s: Failure %ld clearing frozen "
"PHB#%x-PE#%x\n",
__func__, rc, phb->hose->global_number,
pe_no);
ret = -EIO;
}
}
/*
* For now, let's only display the diag buffer when we fail to clear
* the EEH status. We'll do more sensible things later when we have
* proper EEH support. We need to make sure we don't pollute ourselves
* with the normal errors generated when probing empty slots
*/
if (has_diag && ret)
pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
spin_unlock_irqrestore(&phb->lock, flags);
}
static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
struct device_node *dn)
{
u8 fstate;
__be16 pcierr;
int pe_no;
s64 rc;
/*
* Get the PE#. During the PCI probe stage, we might not
* setup that yet. So all ER errors should be mapped to
* reserved PE.
*/
pe_no = PCI_DN(dn)->pe_number;
if (pe_no == IODA_INVALID_PE) {
if (phb->type == PNV_PHB_P5IOC2)
pe_no = 0;
else
pe_no = phb->ioda.reserved_pe;
}
/*
* Fetch frozen state. If the PHB support compound PE,
* we need handle that case.
*/
if (phb->get_pe_state) {
fstate = phb->get_pe_state(phb, pe_no);
} else {
rc = opal_pci_eeh_freeze_status(phb->opal_id,
pe_no,
&fstate,
&pcierr,
NULL);
if (rc) {
pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n",
__func__, rc, phb->hose->global_number, pe_no);
return;
}
}
cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
(PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
pe_no, fstate);
/* Clear the frozen state if applicable */
if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
fstate == OPAL_EEH_STOPPED_DMA_FREEZE ||
fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) {
/*
* If PHB supports compound PE, freeze it for
* consistency.
*/
if (phb->freeze_pe)
phb->freeze_pe(phb, pe_no);
pnv_pci_handle_eeh_config(phb, pe_no);
}
}
int pnv_pci_cfg_read(struct device_node *dn,
int where, int size, u32 *val)
{
struct pci_dn *pdn = PCI_DN(dn);
struct pnv_phb *phb = pdn->phb->private_data;
u32 bdfn = (pdn->busno << 8) | pdn->devfn;
s64 rc;
switch (size) {
case 1: {
u8 v8;
rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8);
*val = (rc == OPAL_SUCCESS) ? v8 : 0xff;
break;
}
case 2: {
__be16 v16;
rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
&v16);
*val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff;
break;
}
case 4: {
__be32 v32;
rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
*val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff;
break;
}
default:
return PCIBIOS_FUNC_NOT_SUPPORTED;
}
cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
__func__, pdn->busno, pdn->devfn, where, size, *val);
return PCIBIOS_SUCCESSFUL;
}
int pnv_pci_cfg_write(struct device_node *dn,
int where, int size, u32 val)
{
struct pci_dn *pdn = PCI_DN(dn);
struct pnv_phb *phb = pdn->phb->private_data;
u32 bdfn = (pdn->busno << 8) | pdn->devfn;
cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
pdn->busno, pdn->devfn, where, size, val);
switch (size) {
case 1:
opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
break;
case 2:
opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val);
break;
case 4:
opal_pci_config_write_word(phb->opal_id, bdfn, where, val);
break;
default:
return PCIBIOS_FUNC_NOT_SUPPORTED;
}
return PCIBIOS_SUCCESSFUL;
}
#if CONFIG_EEH
static bool pnv_pci_cfg_check(struct pci_controller *hose,
struct device_node *dn)
{
struct eeh_dev *edev = NULL;
struct pnv_phb *phb = hose->private_data;
/* EEH not enabled ? */
if (!(phb->flags & PNV_PHB_FLAG_EEH))
return true;
/* PE reset or device removed ? */
edev = of_node_to_eeh_dev(dn);
if (edev) {
if (edev->pe &&
(edev->pe->state & EEH_PE_CFG_BLOCKED))
return false;
if (edev->mode & EEH_DEV_REMOVED)
return false;
}
return true;
}
#else
static inline pnv_pci_cfg_check(struct pci_controller *hose,
struct device_node *dn)
{
return true;
}
#endif /* CONFIG_EEH */
static int pnv_pci_read_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 *val)
{
struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
struct pci_dn *pdn;
struct pnv_phb *phb;
bool found = false;
int ret;
*val = 0xFFFFFFFF;
for (dn = busdn->child; dn; dn = dn->sibling) {
pdn = PCI_DN(dn);
if (pdn && pdn->devfn == devfn) {
phb = pdn->phb->private_data;
found = true;
break;
}
}
if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
return PCIBIOS_DEVICE_NOT_FOUND;
ret = pnv_pci_cfg_read(dn, where, size, val);
if (phb->flags & PNV_PHB_FLAG_EEH) {
if (*val == EEH_IO_ERROR_VALUE(size) &&
eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
return PCIBIOS_DEVICE_NOT_FOUND;
} else {
pnv_pci_config_check_eeh(phb, dn);
}
return ret;
}
static int pnv_pci_write_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 val)
{
struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
struct pci_dn *pdn;
struct pnv_phb *phb;
bool found = false;
int ret;
for (dn = busdn->child; dn; dn = dn->sibling) {
pdn = PCI_DN(dn);
if (pdn && pdn->devfn == devfn) {
phb = pdn->phb->private_data;
found = true;
break;
}
}
if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
return PCIBIOS_DEVICE_NOT_FOUND;
ret = pnv_pci_cfg_write(dn, where, size, val);
if (!(phb->flags & PNV_PHB_FLAG_EEH))
pnv_pci_config_check_eeh(phb, dn);
return ret;
}
struct pci_ops pnv_pci_ops = {
.read = pnv_pci_read_config,
.write = pnv_pci_write_config,
};
static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction,
struct dma_attrs *attrs, bool rm)
{
u64 proto_tce;
__be64 *tcep, *tces;
u64 rpn;
proto_tce = TCE_PCI_READ; // Read allowed
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
rpn = __pa(uaddr) >> tbl->it_page_shift;
while (npages--)
*(tcep++) = cpu_to_be64(proto_tce |
(rpn++ << tbl->it_page_shift));
/* Some implementations won't cache invalid TCEs and thus may not
* need that flush. We'll probably turn it_type into a bit mask
* of flags if that becomes the case
*/
if (tbl->it_type & TCE_PCI_SWINV_CREATE)
pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
return 0;
}
static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
false);
}
static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
bool rm)
{
__be64 *tcep, *tces;
tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
while (npages--)
*(tcep++) = cpu_to_be64(0);
if (tbl->it_type & TCE_PCI_SWINV_FREE)
pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
}
static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
{
pnv_tce_free(tbl, index, npages, false);
}
static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
{
return ((u64 *)tbl->it_base)[index - tbl->it_offset];
}
static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true);
}
static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
{
pnv_tce_free(tbl, index, npages, true);
}
void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned page_shift)
{
tbl->it_blocksize = 16;
tbl->it_base = (unsigned long)tce_mem;
tbl->it_page_shift = page_shift;
tbl->it_offset = dma_offset >> tbl->it_page_shift;
tbl->it_index = 0;
tbl->it_size = tce_size >> 3;
tbl->it_busno = 0;
tbl->it_type = TCE_PCI;
}
static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
{
struct iommu_table *tbl;
const __be64 *basep, *swinvp;
const __be32 *sizep;
basep = of_get_property(hose->dn, "linux,tce-base", NULL);
sizep = of_get_property(hose->dn, "linux,tce-size", NULL);
if (basep == NULL || sizep == NULL) {
pr_err("PCI: %s has missing tce entries !\n",
hose->dn->full_name);
return NULL;
}
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
if (WARN_ON(!tbl))
return NULL;
pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
be32_to_cpup(sizep), 0, IOMMU_PAGE_SHIFT_4K);
iommu_init_table(tbl, hose->node);
iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
/* Deal with SW invalidated TCEs when needed (BML way) */
swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
NULL);
if (swinvp) {
tbl->it_busno = be64_to_cpu(swinvp[1]);
tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
}
return tbl;
}
static void pnv_pci_dma_fallback_setup(struct pci_controller *hose,
struct pci_dev *pdev)
{
struct device_node *np = pci_bus_to_OF_node(hose->bus);
struct pci_dn *pdn;
if (np == NULL)
return;
pdn = PCI_DN(np);
if (!pdn->iommu_table)
pdn->iommu_table = pnv_pci_setup_bml_iommu(hose);
if (!pdn->iommu_table)
return;
set_iommu_table_base_and_group(&pdev->dev, pdn->iommu_table);
}
static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
/* If we have no phb structure, try to setup a fallback based on
* the device-tree (RTAS PCI for example)
*/
if (phb && phb->dma_dev_setup)
phb->dma_dev_setup(phb, pdev);
else
pnv_pci_dma_fallback_setup(hose, pdev);
}
int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
if (phb && phb->dma_set_mask)
return phb->dma_set_mask(phb, pdev, dma_mask);
return __dma_set_mask(&pdev->dev, dma_mask);
}
u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
if (phb && phb->dma_get_required_mask)
return phb->dma_get_required_mask(phb, pdev);
return __dma_get_required_mask(&pdev->dev);
}
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
list_for_each_entry(hose, &hose_list, list_node) {
struct pnv_phb *phb = hose->private_data;
if (phb && phb->shutdown)
phb->shutdown(phb);
}
}
/* Fixup wrong class code in p7ioc and p8 root complex */
static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
{
dev->class = PCI_CLASS_BRIDGE_PCI << 8;
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
static int pnv_pci_probe_mode(struct pci_bus *bus)
{
struct pci_controller *hose = pci_bus_to_host(bus);
const __be64 *tstamp;
u64 now, target;
/* We hijack this as a way to ensure we have waited long
* enough since the reset was lifted on the PCI bus
*/
if (bus != hose->bus)
return PCI_PROBE_NORMAL;
tstamp = of_get_property(hose->dn, "reset-clear-timestamp", NULL);
if (!tstamp || !*tstamp)
return PCI_PROBE_NORMAL;
now = mftb() / tb_ticks_per_usec;
target = (be64_to_cpup(tstamp) / tb_ticks_per_usec)
+ PCI_RESET_DELAY_US;
pr_devel("pci %04d: Reset target: 0x%llx now: 0x%llx\n",
hose->global_number, target, now);
if (now < target)
msleep((target - now + 999) / 1000);
return PCI_PROBE_NORMAL;
}
void __init pnv_pci_init(void)
{
struct device_node *np;
pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
/* OPAL absent, try POPAL first then RTAS detection of PHBs */
if (!firmware_has_feature(FW_FEATURE_OPAL)) {
#ifdef CONFIG_PPC_POWERNV_RTAS
init_pci_config_tokens();
find_and_init_phbs();
#endif /* CONFIG_PPC_POWERNV_RTAS */
}
/* OPAL is here, do our normal stuff */
else {
int found_ioda = 0;
/* Look for IODA IO-Hubs. We don't support mixing IODA
* and p5ioc2 due to the need to change some global
* probing flags
*/
for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
pnv_pci_init_ioda_hub(np);
found_ioda = 1;
}
/* Look for p5ioc2 IO-Hubs */
if (!found_ioda)
for_each_compatible_node(np, NULL, "ibm,p5ioc2")
pnv_pci_init_p5ioc2_hub(np);
/* Look for ioda2 built-in PHB3's */
for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
pnv_pci_init_ioda2_phb(np);
}
/* Setup the linkage between OF nodes and PHBs */
pci_devs_phb_init();
/* Configure IOMMU DMA hooks */
ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
ppc_md.tce_build = pnv_tce_build_vm;
ppc_md.tce_free = pnv_tce_free_vm;
ppc_md.tce_build_rm = pnv_tce_build_rm;
ppc_md.tce_free_rm = pnv_tce_free_rm;
ppc_md.tce_get = pnv_tce_get;
ppc_md.pci_probe_mode = pnv_pci_probe_mode;
set_pci_dma_ops(&dma_iommu_ops);
/* Configure MSIs */
#ifdef CONFIG_PCI_MSI
ppc_md.setup_msi_irqs = pnv_setup_msi_irqs;
ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs;
#endif
}
static int tce_iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
switch (action) {
case BUS_NOTIFY_ADD_DEVICE:
return iommu_add_device(dev);
case BUS_NOTIFY_DEL_DEVICE:
if (dev->iommu_group)
iommu_del_device(dev);
return 0;
default:
return 0;
}
}
static struct notifier_block tce_iommu_bus_nb = {
.notifier_call = tce_iommu_bus_notifier,
};
static int __init tce_iommu_bus_notifier_init(void)
{
bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
return 0;
}
machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);

View file

@ -0,0 +1,237 @@
#ifndef __POWERNV_PCI_H
#define __POWERNV_PCI_H
struct pci_dn;
enum pnv_phb_type {
PNV_PHB_P5IOC2 = 0,
PNV_PHB_IODA1 = 1,
PNV_PHB_IODA2 = 2,
};
/* Precise PHB model for error management */
enum pnv_phb_model {
PNV_PHB_MODEL_UNKNOWN,
PNV_PHB_MODEL_P5IOC2,
PNV_PHB_MODEL_P7IOC,
PNV_PHB_MODEL_PHB3,
};
#define PNV_PCI_DIAG_BUF_SIZE 8192
#define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */
#define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */
#define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */
#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_phb;
struct pnv_ioda_pe {
unsigned long flags;
struct pnv_phb *phb;
/* A PE can be associated with a single device or an
* entire bus (& children). In the former case, pdev
* is populated, in the later case, pbus is.
*/
struct pci_dev *pdev;
struct pci_bus *pbus;
/* Effective RID (device RID for a device PE and base bus
* RID with devfn 0 for a bus PE)
*/
unsigned int rid;
/* PE number */
unsigned int pe_number;
/* "Weight" assigned to the PE for the sake of DMA resource
* allocations
*/
unsigned int dma_weight;
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
int tce32_seg;
int tce32_segcount;
struct iommu_table tce32_table;
phys_addr_t tce_inval_reg_phys;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
uint64_t tce_bypass_base;
/* MSIs. MVE index is identical for for 32 and 64 bit MSI
* and -1 if not supported. (It's actually identical to the
* PE number)
*/
int mve_number;
/* PEs in compound case */
struct pnv_ioda_pe *master;
struct list_head slaves;
/* Link in list of PE#s */
struct list_head dma_link;
struct list_head list;
};
/* IOC dependent EEH operations */
#ifdef CONFIG_EEH
struct pnv_eeh_ops {
int (*post_init)(struct pci_controller *hose);
int (*set_option)(struct eeh_pe *pe, int option);
int (*get_state)(struct eeh_pe *pe);
int (*reset)(struct eeh_pe *pe, int option);
int (*get_log)(struct eeh_pe *pe, int severity,
char *drv_log, unsigned long len);
int (*configure_bridge)(struct eeh_pe *pe);
int (*err_inject)(struct eeh_pe *pe, int type, int func,
unsigned long addr, unsigned long mask);
int (*next_error)(struct eeh_pe **pe);
};
#endif /* CONFIG_EEH */
#define PNV_PHB_FLAG_EEH (1 << 0)
struct pnv_phb {
struct pci_controller *hose;
enum pnv_phb_type type;
enum pnv_phb_model model;
u64 hub_id;
u64 opal_id;
int flags;
void __iomem *regs;
int initialized;
spinlock_t lock;
#ifdef CONFIG_EEH
struct pnv_eeh_ops *eeh_ops;
#endif
#ifdef CONFIG_DEBUG_FS
int has_dbgfs;
struct dentry *dbgfs;
#endif
#ifdef CONFIG_PCI_MSI
unsigned int msi_base;
unsigned int msi32_support;
struct msi_bitmap msi_bmp;
#endif
int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg);
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
u64 dma_mask);
u64 (*dma_get_required_mask)(struct pnv_phb *phb,
struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
void (*shutdown)(struct pnv_phb *phb);
int (*init_m64)(struct pnv_phb *phb);
void (*alloc_m64_pe)(struct pnv_phb *phb);
int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
union {
struct {
struct iommu_table iommu_table;
} p5ioc2;
struct {
/* Global bridge info */
unsigned int total_pe;
unsigned int reserved_pe;
/* 32-bit MMIO window */
unsigned int m32_size;
unsigned int m32_segsize;
unsigned int m32_pci_base;
/* 64-bit MMIO window */
unsigned int m64_bar_idx;
unsigned long m64_size;
unsigned long m64_segsize;
unsigned long m64_base;
unsigned long m64_bar_alloc;
/* IO ports */
unsigned int io_size;
unsigned int io_segsize;
unsigned int io_pci_base;
/* PE allocation bitmap */
unsigned long *pe_alloc;
/* M32 & IO segment maps */
unsigned int *m32_segmap;
unsigned int *io_segmap;
struct pnv_ioda_pe *pe_array;
/* IRQ chip */
int irq_chip_init;
struct irq_chip irq_chip;
/* Sorted list of used PE's based
* on the sequence of creation
*/
struct list_head pe_list;
/* Reverse map of PEs, will have to extend if
* we are to support more than 256 PEs, indexed
* bus { bus, devfn }
*/
unsigned char pe_rmap[0x10000];
/* 32-bit TCE tables allocation */
unsigned long tce32_count;
/* Total "weight" for the sake of DMA resources
* allocation
*/
unsigned int dma_weight;
unsigned int dma_pe_count;
/* Sorted list of used PE's, sorted at
* boot for resource allocation purposes
*/
struct list_head pe_dma_list;
} ioda;
};
/* PHB and hub status structure */
union {
unsigned char blob[PNV_PCI_DIAG_BUF_SIZE];
struct OpalIoP7IOCPhbErrorData p7ioc;
struct OpalIoPhb3ErrorData phb3;
struct OpalIoP7IOCErrorData hub_diag;
} diag;
};
extern struct pci_ops pnv_pci_ops;
#ifdef CONFIG_EEH
extern struct pnv_eeh_ops ioda_eeh_ops;
#endif
void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
unsigned char *log_buff);
int pnv_pci_cfg_read(struct device_node *dn,
int where, int size, u32 *val);
int pnv_pci_cfg_write(struct device_node *dn,
int where, int size, u32 val);
extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned page_shift);
extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
__be64 *startp, __be64 *endp, bool rm);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option);
#endif /* __POWERNV_PCI_H */

View file

@ -0,0 +1,36 @@
#ifndef _POWERNV_H
#define _POWERNV_H
#ifdef CONFIG_SMP
extern void pnv_smp_init(void);
#else
static inline void pnv_smp_init(void) { }
#endif
struct pci_dev;
#ifdef CONFIG_PCI
extern void pnv_pci_init(void);
extern void pnv_pci_shutdown(void);
extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev);
#else
static inline void pnv_pci_init(void) { }
static inline void pnv_pci_shutdown(void) { }
static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
{
return -ENODEV;
}
static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
{
return 0;
}
#endif
extern void pnv_lpc_init(void);
bool cpu_core_split_required(void);
#endif /* _POWERNV_H */

View file

@ -0,0 +1,126 @@
/*
* Copyright 2013, Michael Ellerman, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "powernv-rng: " fmt
#include <linux/kernel.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <asm/archrandom.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/smp.h>
struct powernv_rng {
void __iomem *regs;
unsigned long mask;
};
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
/* Calculate the parity of the value */
asm ("popcntd %0,%1" : "=r" (parity) : "r" (val));
/* xor our value with the previous mask */
val ^= rng->mask;
/* update the mask based on the parity of this value */
rng->mask = (rng->mask << 1) | (parity & 1);
return val;
}
int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
rng = get_cpu_var(powernv_rng);
*v = rng_whiten(rng, in_be64(rng->regs));
put_cpu_var(rng);
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
static __init void rng_init_per_cpu(struct powernv_rng *rng,
struct device_node *dn)
{
int chip_id, cpu;
chip_id = of_get_ibm_chip_id(dn);
if (chip_id == -1)
pr_warn("No ibm,chip-id found for %s.\n", dn->full_name);
for_each_possible_cpu(cpu) {
if (per_cpu(powernv_rng, cpu) == NULL ||
cpu_to_chip_id(cpu) == chip_id) {
per_cpu(powernv_rng, cpu) = rng;
}
}
}
static __init int rng_create(struct device_node *dn)
{
struct powernv_rng *rng;
unsigned long val;
rng = kzalloc(sizeof(*rng), GFP_KERNEL);
if (!rng)
return -ENOMEM;
rng->regs = of_iomap(dn, 0);
if (!rng->regs) {
kfree(rng);
return -ENXIO;
}
val = in_be64(rng->regs);
rng->mask = val;
rng_init_per_cpu(rng, dn);
pr_info_once("Registering arch random hook.\n");
ppc_md.get_random_long = powernv_get_random_long;
return 0;
}
static __init int rng_init(void)
{
struct device_node *dn;
int rc;
for_each_compatible_node(dn, NULL, "ibm,power-rng") {
rc = rng_create(dn);
if (rc) {
pr_err("Failed creating rng for %s (%d).\n",
dn->full_name, rc);
continue;
}
/* Create devices for hwrng driver */
of_platform_device_create(dn, NULL, NULL);
}
return 0;
}
machine_subsys_initcall(powernv, rng_init);

View file

@ -0,0 +1,353 @@
/*
* PowerNV setup code.
*
* Copyright 2011 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/tty.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/console.h>
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/seq_file.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/interrupt.h>
#include <linux/bug.h>
#include <linux/pci.h>
#include <linux/cpufreq.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/xics.h>
#include <asm/rtas.h>
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
#include "powernv.h"
static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
/* Initialize SMP */
pnv_smp_init();
/* Setup PCI */
pnv_pci_init();
/* Setup RTC and NVRAM callbacks */
if (firmware_has_feature(FW_FEATURE_OPAL))
opal_nvram_init();
/* Enable NAP mode */
powersave_nap = 1;
/* XXX PMCS */
}
static void __init pnv_init_early(void)
{
/*
* Initialize the LPC bus now so that legacy serial
* ports can be found on it
*/
opal_lpc_init();
#ifdef CONFIG_HVC_OPAL
if (firmware_has_feature(FW_FEATURE_OPAL))
hvc_opal_init_early();
else
#endif
add_preferred_console("hvc", 0, NULL);
}
static void __init pnv_init_IRQ(void)
{
xics_init();
WARN_ON(!ppc_md.get_irq);
}
static void pnv_show_cpuinfo(struct seq_file *m)
{
struct device_node *root;
const char *model = "";
root = of_find_node_by_path("/");
if (root)
model = of_get_property(root, "model", NULL);
seq_printf(m, "machine\t\t: PowerNV %s\n", model);
if (firmware_has_feature(FW_FEATURE_OPALv3))
seq_printf(m, "firmware\t: OPAL v3\n");
else if (firmware_has_feature(FW_FEATURE_OPALv2))
seq_printf(m, "firmware\t: OPAL v2\n");
else if (firmware_has_feature(FW_FEATURE_OPAL))
seq_printf(m, "firmware\t: OPAL v1\n");
else
seq_printf(m, "firmware\t: BML\n");
of_node_put(root);
}
static void pnv_prepare_going_down(void)
{
/*
* Disable all notifiers from OPAL, we can't
* service interrupts anymore anyway
*/
opal_notifier_disable();
/* Soft disable interrupts */
local_irq_disable();
/*
* Return secondary CPUs to firwmare if a flash update
* is pending otherwise we will get all sort of error
* messages about CPU being stuck etc.. This will also
* have the side effect of hard disabling interrupts so
* past this point, the kernel is effectively dead.
*/
opal_flash_term_callback();
}
static void __noreturn pnv_restart(char *cmd)
{
long rc = OPAL_BUSY;
pnv_prepare_going_down();
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_cec_reboot();
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
else
mdelay(10);
}
for (;;)
opal_poll_events(NULL);
}
static void __noreturn pnv_power_off(void)
{
long rc = OPAL_BUSY;
pnv_prepare_going_down();
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_cec_power_down(0);
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
else
mdelay(10);
}
for (;;)
opal_poll_events(NULL);
}
static void __noreturn pnv_halt(void)
{
pnv_power_off();
}
static void pnv_progress(char *s, unsigned short hex)
{
}
static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
{
if (dev_is_pci(dev))
return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
return __dma_set_mask(dev, dma_mask);
}
static u64 pnv_dma_get_required_mask(struct device *dev)
{
if (dev_is_pci(dev))
return pnv_pci_dma_get_required_mask(to_pci_dev(dev));
return __dma_get_required_mask(dev);
}
static void pnv_shutdown(void)
{
/* Let the PCI code clear up IODA tables */
pnv_pci_shutdown();
/*
* Stop OPAL activity: Unregister all OPAL interrupts so they
* don't fire up while we kexec and make sure all potentially
* DMA'ing ops are complete (such as dump retrieval).
*/
opal_shutdown();
}
#ifdef CONFIG_KEXEC
static void pnv_kexec_wait_secondaries_down(void)
{
int my_cpu, i, notified = -1;
my_cpu = get_cpu();
for_each_online_cpu(i) {
uint8_t status;
int64_t rc;
if (i == my_cpu)
continue;
for (;;) {
rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
&status);
if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
break;
barrier();
if (i != notified) {
printk(KERN_INFO "kexec: waiting for cpu %d "
"(physical %d) to enter OPAL\n",
i, paca[i].hw_cpu_id);
notified = i;
}
}
}
}
static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
{
xics_kexec_teardown_cpu(secondary);
/* On OPAL v3, we return all CPUs to firmware */
if (!firmware_has_feature(FW_FEATURE_OPALv3))
return;
if (secondary) {
/* Return secondary CPUs to firmware on OPAL v3 */
mb();
get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
mb();
/* Return the CPU to OPAL */
opal_return_cpu();
} else if (crash_shutdown) {
/*
* On crash, we don't wait for secondaries to go
* down as they might be unreachable or hung, so
* instead we just wait a bit and move on.
*/
mdelay(1);
} else {
/* Primary waits for the secondaries to have reached OPAL */
pnv_kexec_wait_secondaries_down();
}
}
#endif /* CONFIG_KEXEC */
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
static unsigned long pnv_memory_block_size(void)
{
return 256UL * 1024 * 1024;
}
#endif
static void __init pnv_setup_machdep_opal(void)
{
ppc_md.get_boot_time = opal_get_boot_time;
ppc_md.get_rtc_time = opal_get_rtc_time;
ppc_md.set_rtc_time = opal_set_rtc_time;
ppc_md.restart = pnv_restart;
ppc_md.power_off = pnv_power_off;
ppc_md.halt = pnv_halt;
ppc_md.machine_check_exception = opal_machine_check;
ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
ppc_md.hmi_exception_early = opal_hmi_exception_early;
ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
}
#ifdef CONFIG_PPC_POWERNV_RTAS
static void __init pnv_setup_machdep_rtas(void)
{
if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) {
ppc_md.get_boot_time = rtas_get_boot_time;
ppc_md.get_rtc_time = rtas_get_rtc_time;
ppc_md.set_rtc_time = rtas_set_rtc_time;
}
ppc_md.restart = rtas_restart;
ppc_md.power_off = rtas_power_off;
ppc_md.halt = rtas_halt;
}
#endif /* CONFIG_PPC_POWERNV_RTAS */
static int __init pnv_probe(void)
{
unsigned long root = of_get_flat_dt_root();
if (!of_flat_dt_is_compatible(root, "ibm,powernv"))
return 0;
hpte_init_native();
if (firmware_has_feature(FW_FEATURE_OPAL))
pnv_setup_machdep_opal();
#ifdef CONFIG_PPC_POWERNV_RTAS
else if (rtas.base)
pnv_setup_machdep_rtas();
#endif /* CONFIG_PPC_POWERNV_RTAS */
pr_debug("PowerNV detected !\n");
return 1;
}
/*
* Returns the cpu frequency for 'cpu' in Hz. This is used by
* /proc/cpuinfo
*/
static unsigned long pnv_get_proc_freq(unsigned int cpu)
{
unsigned long ret_freq;
ret_freq = cpufreq_quick_get(cpu) * 1000ul;
/*
* If the backend cpufreq driver does not exist,
* then fallback to old way of reporting the clockrate.
*/
if (!ret_freq)
ret_freq = ppc_proc_freq;
return ret_freq;
}
define_machine(powernv) {
.name = "PowerNV",
.probe = pnv_probe,
.init_early = pnv_init_early,
.setup_arch = pnv_setup_arch,
.init_IRQ = pnv_init_IRQ,
.show_cpuinfo = pnv_show_cpuinfo,
.get_proc_freq = pnv_get_proc_freq,
.progress = pnv_progress,
.machine_shutdown = pnv_shutdown,
.power_save = power7_idle,
.calibrate_decr = generic_calibrate_decr,
.dma_set_mask = pnv_dma_set_mask,
.dma_get_required_mask = pnv_dma_get_required_mask,
#ifdef CONFIG_KEXEC
.kexec_cpu_down = pnv_kexec_cpu_down,
#endif
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
.memory_block_size = pnv_memory_block_size,
#endif
};

View file

@ -0,0 +1,221 @@
/*
* SMP support for PowerNV machines.
*
* Copyright 2011 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/cpu.h>
#include <asm/irq.h>
#include <asm/smp.h>
#include <asm/paca.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/firmware.h>
#include <asm/rtas.h>
#include <asm/vdso_datapage.h>
#include <asm/cputhreads.h>
#include <asm/xics.h>
#include <asm/opal.h>
#include <asm/runlatch.h>
#include <asm/code-patching.h>
#include <asm/dbell.h>
#include "powernv.h"
#ifdef DEBUG
#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
#endif
static void pnv_smp_setup_cpu(int cpu)
{
if (cpu != boot_cpuid)
xics_setup_cpu();
#ifdef CONFIG_PPC_DOORBELL
if (cpu_has_feature(CPU_FTR_DBELL))
doorbell_setup_this_cpu();
#endif
}
static int pnv_smp_kick_cpu(int nr)
{
unsigned int pcpu = get_hard_smp_processor_id(nr);
unsigned long start_here =
__pa(ppc_function_entry(generic_secondary_smp_init));
long rc;
BUG_ON(nr < 0 || nr >= NR_CPUS);
/*
* If we already started or OPALv2 is not supported, we just
* kick the CPU via the PACA
*/
if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2))
goto kick;
/*
* At this point, the CPU can either be spinning on the way in
* from kexec or be inside OPAL waiting to be started for the
* first time. OPAL v3 allows us to query OPAL to know if it
* has the CPUs, so we do that
*/
if (firmware_has_feature(FW_FEATURE_OPALv3)) {
uint8_t status;
rc = opal_query_cpu_status(pcpu, &status);
if (rc != OPAL_SUCCESS) {
pr_warn("OPAL Error %ld querying CPU %d state\n",
rc, nr);
return -ENODEV;
}
/*
* Already started, just kick it, probably coming from
* kexec and spinning
*/
if (status == OPAL_THREAD_STARTED)
goto kick;
/*
* Available/inactive, let's kick it
*/
if (status == OPAL_THREAD_INACTIVE) {
pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n",
nr, pcpu);
rc = opal_start_cpu(pcpu, start_here);
if (rc != OPAL_SUCCESS) {
pr_warn("OPAL Error %ld starting CPU %d\n",
rc, nr);
return -ENODEV;
}
} else {
/*
* An unavailable CPU (or any other unknown status)
* shouldn't be started. It should also
* not be in the possible map but currently it can
* happen
*/
pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
" (status %d)...\n", nr, pcpu, status);
return -ENODEV;
}
} else {
/*
* On OPAL v2, we just kick it and hope for the best,
* we must not test the error from opal_start_cpu() or
* we would fail to get CPUs from kexec.
*/
opal_start_cpu(pcpu, start_here);
}
kick:
return smp_generic_kick_cpu(nr);
}
#ifdef CONFIG_HOTPLUG_CPU
static int pnv_smp_cpu_disable(void)
{
int cpu = smp_processor_id();
/* This is identical to pSeries... might consolidate by
* moving migrate_irqs_away to a ppc_md with default to
* the generic fixup_irqs. --BenH.
*/
set_cpu_online(cpu, false);
vdso_data->processorCount--;
if (cpu == boot_cpuid)
boot_cpuid = cpumask_any(cpu_online_mask);
xics_migrate_irqs_away();
return 0;
}
static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
/* Standard hot unplug procedure */
local_irq_disable();
idle_task_exit();
current->active_mm = NULL; /* for sanity */
cpu = smp_processor_id();
DBG("CPU%d offline\n", cpu);
generic_set_cpu_dead(cpu);
smp_wmb();
/* We don't want to take decrementer interrupts while we are offline,
* so clear LPCR:PECE1. We keep PECE2 enabled.
*/
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
while (!generic_check_cpu_restart(cpu)) {
ppc64_runlatch_off();
power7_nap(1);
ppc64_runlatch_on();
/* Clear the IPI that woke us up */
icp_native_flush_interrupt();
local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
mb();
if (cpu_core_split_required())
continue;
if (!generic_check_cpu_restart(cpu))
DBG("CPU%d Unexpected exit while offline !\n", cpu);
}
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
DBG("CPU%d coming online...\n", cpu);
}
#endif /* CONFIG_HOTPLUG_CPU */
static struct smp_ops_t pnv_smp_ops = {
.message_pass = smp_muxed_ipi_message_pass,
.cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */
.probe = xics_smp_probe,
.kick_cpu = pnv_smp_kick_cpu,
.setup_cpu = pnv_smp_setup_cpu,
.cpu_bootable = smp_generic_cpu_bootable,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = pnv_smp_cpu_disable,
.cpu_die = generic_cpu_die,
#endif /* CONFIG_HOTPLUG_CPU */
};
/* This is called very early during platform setup_arch */
void __init pnv_smp_init(void)
{
smp_ops = &pnv_smp_ops;
/* XXX We don't yet have a proper entry point from HAL, for
* now we rely on kexec-style entry from BML
*/
#ifdef CONFIG_PPC_RTAS
/* Non-lpar has additional take/give timebase */
if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
smp_ops->give_timebase = rtas_give_timebase;
smp_ops->take_timebase = rtas_take_timebase;
}
#endif /* CONFIG_PPC_RTAS */
#ifdef CONFIG_HOTPLUG_CPU
ppc_md.cpu_die = pnv_smp_cpu_kill_self;
#endif
}

View file

@ -0,0 +1,95 @@
/*
* Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/asm-offsets.h>
#include <asm/ppc_asm.h>
#include <asm/reg.h>
#include "subcore.h"
_GLOBAL(split_core_secondary_loop)
/*
* r3 = u8 *state, used throughout the routine
* r4 = temp
* r5 = temp
* ..
* r12 = MSR
*/
mfmsr r12
/* Disable interrupts so SRR0/1 don't get trashed */
li r4,0
ori r4,r4,MSR_EE|MSR_SE|MSR_BE|MSR_RI
andc r4,r12,r4
sync
mtmsrd r4
/* Switch to real mode and leave interrupts off */
li r5, MSR_IR|MSR_DR
andc r5, r4, r5
LOAD_REG_ADDR(r4, real_mode)
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r5
rfid
b . /* prevent speculative execution */
real_mode:
/* Grab values from unsplit SPRs */
mfspr r6, SPRN_LDBAR
mfspr r7, SPRN_PMMAR
mfspr r8, SPRN_PMCR
mfspr r9, SPRN_RPR
mfspr r10, SPRN_SDR1
/* Order reading the SPRs vs telling the primary we are ready to split */
sync
/* Tell thread 0 we are in real mode */
li r4, SYNC_STEP_REAL_MODE
stb r4, 0(r3)
li r5, (HID0_POWER8_4LPARMODE | HID0_POWER8_2LPARMODE)@highest
sldi r5, r5, 48
/* Loop until we see the split happen in HID0 */
1: mfspr r4, SPRN_HID0
and. r4, r4, r5
beq 1b
/*
* We only need to initialise the below regs once for each subcore,
* but it's simpler and harmless to do it on each thread.
*/
/* Make sure various SPRS have sane values */
li r4, 0
mtspr SPRN_LPID, r4
mtspr SPRN_PCR, r4
mtspr SPRN_HDEC, r4
/* Restore SPR values now we are split */
mtspr SPRN_LDBAR, r6
mtspr SPRN_PMMAR, r7
mtspr SPRN_PMCR, r8
mtspr SPRN_RPR, r9
mtspr SPRN_SDR1, r10
LOAD_REG_ADDR(r5, virtual_mode)
/* Get out of real mode */
mtspr SPRN_SRR0,r5
mtspr SPRN_SRR1,r12
rfid
b . /* prevent speculative execution */
virtual_mode:
blr

View file

@ -0,0 +1,393 @@
/*
* Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "powernv: " fmt
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/stop_machine.h>
#include <asm/cputhreads.h>
#include <asm/kvm_ppc.h>
#include <asm/machdep.h>
#include <asm/opal.h>
#include <asm/smp.h>
#include "subcore.h"
#include "powernv.h"
/*
* Split/unsplit procedure:
*
* A core can be in one of three states, unsplit, 2-way split, and 4-way split.
*
* The mapping to subcores_per_core is simple:
*
* State | subcores_per_core
* ------------|------------------
* Unsplit | 1
* 2-way split | 2
* 4-way split | 4
*
* The core is split along thread boundaries, the mapping between subcores and
* threads is as follows:
*
* Unsplit:
* ----------------------------
* Subcore | 0 |
* ----------------------------
* Thread | 0 1 2 3 4 5 6 7 |
* ----------------------------
*
* 2-way split:
* -------------------------------------
* Subcore | 0 | 1 |
* -------------------------------------
* Thread | 0 1 2 3 | 4 5 6 7 |
* -------------------------------------
*
* 4-way split:
* -----------------------------------------
* Subcore | 0 | 1 | 2 | 3 |
* -----------------------------------------
* Thread | 0 1 | 2 3 | 4 5 | 6 7 |
* -----------------------------------------
*
*
* Transitions
* -----------
*
* It is not possible to transition between either of the split states, the
* core must first be unsplit. The legal transitions are:
*
* ----------- ---------------
* | | <----> | 2-way split |
* | | ---------------
* | Unsplit |
* | | ---------------
* | | <----> | 4-way split |
* ----------- ---------------
*
* Unsplitting
* -----------
*
* Unsplitting is the simpler procedure. It requires thread 0 to request the
* unsplit while all other threads NAP.
*
* Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells
* the hardware that if all threads except 0 are napping, the hardware should
* unsplit the core.
*
* Non-zero threads are sent to a NAP loop, they don't exit the loop until they
* see the core unsplit.
*
* Core 0 spins waiting for the hardware to see all the other threads napping
* and perform the unsplit.
*
* Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them
* out of NAP. They will then see the core unsplit and exit the NAP loop.
*
* Splitting
* ---------
*
* The basic splitting procedure is fairly straight forward. However it is
* complicated by the fact that after the split occurs, the newly created
* subcores are not in a fully initialised state.
*
* Most notably the subcores do not have the correct value for SDR1, which
* means they must not be running in virtual mode when the split occurs. The
* subcores have separate timebases SPRs but these are pre-synchronised by
* opal.
*
* To begin with secondary threads are sent to an assembly routine. There they
* switch to real mode, so they are immune to the uninitialised SDR1 value.
* Once in real mode they indicate that they are in real mode, and spin waiting
* to see the core split.
*
* Thread 0 waits to see that all secondaries are in real mode, and then begins
* the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which
* prevents the hardware from unsplitting. Then it sets the appropriate HID bit
* to request the split, and spins waiting to see that the split has happened.
*
* Concurrently the secondaries will notice the split. When they do they set up
* their SPRs, notably SDR1, and then they can return to virtual mode and exit
* the procedure.
*/
/* Initialised at boot by subcore_init() */
static int subcores_per_core;
/*
* Used to communicate to offline cpus that we want them to pop out of the
* offline loop and do a split or unsplit.
*
* 0 - no split happening
* 1 - unsplit in progress
* 2 - split to 2 in progress
* 4 - split to 4 in progress
*/
static int new_split_mode;
static cpumask_var_t cpu_offline_mask;
struct split_state {
u8 step;
u8 master;
};
static DEFINE_PER_CPU(struct split_state, split_state);
static void wait_for_sync_step(int step)
{
int i, cpu = smp_processor_id();
for (i = cpu + 1; i < cpu + threads_per_core; i++)
while(per_cpu(split_state, i).step < step)
barrier();
/* Order the wait loop vs any subsequent loads/stores. */
mb();
}
static void unsplit_core(void)
{
u64 hid0, mask;
int i, cpu;
mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
cpu = smp_processor_id();
if (cpu_thread_in_core(cpu) != 0) {
while (mfspr(SPRN_HID0) & mask)
power7_nap(0);
per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
return;
}
hid0 = mfspr(SPRN_HID0);
hid0 &= ~HID0_POWER8_DYNLPARDIS;
mtspr(SPRN_HID0, hid0);
while (mfspr(SPRN_HID0) & mask)
cpu_relax();
/* Wake secondaries out of NAP */
for (i = cpu + 1; i < cpu + threads_per_core; i++)
smp_send_reschedule(i);
wait_for_sync_step(SYNC_STEP_UNSPLIT);
}
static void split_core(int new_mode)
{
struct { u64 value; u64 mask; } split_parms[2] = {
{ HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE },
{ HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE }
};
int i, cpu;
u64 hid0;
/* Convert new_mode (2 or 4) into an index into our parms array */
i = (new_mode >> 1) - 1;
BUG_ON(i < 0 || i > 1);
cpu = smp_processor_id();
if (cpu_thread_in_core(cpu) != 0) {
split_core_secondary_loop(&per_cpu(split_state, cpu).step);
return;
}
wait_for_sync_step(SYNC_STEP_REAL_MODE);
/* Write new mode */
hid0 = mfspr(SPRN_HID0);
hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
mtspr(SPRN_HID0, hid0);
/* Wait for it to happen */
while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
cpu_relax();
}
static void cpu_do_split(int new_mode)
{
/*
* At boot subcores_per_core will be 0, so we will always unsplit at
* boot. In the usual case where the core is already unsplit it's a
* nop, and this just ensures the kernel's notion of the mode is
* consistent with the hardware.
*/
if (subcores_per_core != 1)
unsplit_core();
if (new_mode != 1)
split_core(new_mode);
mb();
per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED;
}
bool cpu_core_split_required(void)
{
smp_rmb();
if (!new_split_mode)
return false;
cpu_do_split(new_split_mode);
return true;
}
static int cpu_update_split_mode(void *data)
{
int cpu, new_mode = *(int *)data;
if (this_cpu_ptr(&split_state)->master) {
new_split_mode = new_mode;
smp_wmb();
cpumask_andnot(cpu_offline_mask, cpu_present_mask,
cpu_online_mask);
/* This should work even though the cpu is offline */
for_each_cpu(cpu, cpu_offline_mask)
smp_send_reschedule(cpu);
}
cpu_do_split(new_mode);
if (this_cpu_ptr(&split_state)->master) {
/* Wait for all cpus to finish before we touch subcores_per_core */
for_each_present_cpu(cpu) {
if (cpu >= setup_max_cpus)
break;
while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED)
barrier();
}
new_split_mode = 0;
/* Make the new mode public */
subcores_per_core = new_mode;
threads_per_subcore = threads_per_core / subcores_per_core;
/* Make sure the new mode is written before we exit */
mb();
}
return 0;
}
static int set_subcores_per_core(int new_mode)
{
struct split_state *state;
int cpu;
if (kvm_hv_mode_active()) {
pr_err("Unable to change split core mode while KVM active.\n");
return -EBUSY;
}
/*
* We are only called at boot, or from the sysfs write. If that ever
* changes we'll need a lock here.
*/
BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3);
for_each_present_cpu(cpu) {
state = &per_cpu(split_state, cpu);
state->step = SYNC_STEP_INITIAL;
state->master = 0;
}
get_online_cpus();
/* This cpu will update the globals before exiting stop machine */
this_cpu_ptr(&split_state)->master = 1;
/* Ensure state is consistent before we call the other cpus */
mb();
stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
put_online_cpus();
return 0;
}
static ssize_t __used store_subcores_per_core(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
{
unsigned long val;
int rc;
/* We are serialised by the attribute lock */
rc = sscanf(buf, "%lx", &val);
if (rc != 1)
return -EINVAL;
switch (val) {
case 1:
case 2:
case 4:
if (subcores_per_core == val)
/* Nothing to do */
goto out;
break;
default:
return -EINVAL;
}
rc = set_subcores_per_core(val);
if (rc)
return rc;
out:
return count;
}
static ssize_t show_subcores_per_core(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%x\n", subcores_per_core);
}
static DEVICE_ATTR(subcores_per_core, 0644,
show_subcores_per_core, store_subcores_per_core);
static int subcore_init(void)
{
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
return 0;
/*
* We need all threads in a core to be present to split/unsplit so
* continue only if max_cpus are aligned to threads_per_core.
*/
if (setup_max_cpus % threads_per_core)
return 0;
BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL));
set_subcores_per_core(1);
return device_create_file(cpu_subsys.dev_root,
&dev_attr_subcores_per_core);
}
machine_device_initcall(powernv, subcore_init);

View file

@ -0,0 +1,18 @@
/*
* Copyright 2013, Michael Ellerman, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* These are ordered and tested with <= */
#define SYNC_STEP_INITIAL 0
#define SYNC_STEP_UNSPLIT 1 /* Set by secondary when it sees unsplit */
#define SYNC_STEP_REAL_MODE 2 /* Set by secondary when in real mode */
#define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */
#ifndef __ASSEMBLY__
void split_core_secondary_loop(u8 *state);
#endif