Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

View file

@ -0,0 +1,7 @@
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
xen-pciback-y += conf_space.o conf_space_header.o \
conf_space_capability.o \
conf_space_quirks.o vpci.o \
passthrough.o

View file

@ -0,0 +1,438 @@
/*
* PCI Backend - Functions for creating a virtual configuration space for
* exported PCI Devices.
* It's dangerous to allow PCI Driver Domains to change their
* device's resources (memory, i/o ports, interrupts). We need to
* restrict changes to certain PCI Configuration registers:
* BARs, INTERRUPT_PIN, most registers in the header...
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include "pciback.h"
#include "conf_space.h"
#include "conf_space_quirks.h"
bool permissive;
module_param(permissive, bool, 0644);
/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
* xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */
#define DEFINE_PCI_CONFIG(op, size, type) \
int xen_pcibk_##op##_config_##size \
(struct pci_dev *dev, int offset, type value, void *data) \
{ \
return pci_##op##_config_##size(dev, offset, value); \
}
DEFINE_PCI_CONFIG(read, byte, u8 *)
DEFINE_PCI_CONFIG(read, word, u16 *)
DEFINE_PCI_CONFIG(read, dword, u32 *)
DEFINE_PCI_CONFIG(write, byte, u8)
DEFINE_PCI_CONFIG(write, word, u16)
DEFINE_PCI_CONFIG(write, dword, u32)
static int conf_space_read(struct pci_dev *dev,
const struct config_field_entry *entry,
int offset, u32 *value)
{
int ret = 0;
const struct config_field *field = entry->field;
*value = 0;
switch (field->size) {
case 1:
if (field->u.b.read)
ret = field->u.b.read(dev, offset, (u8 *) value,
entry->data);
break;
case 2:
if (field->u.w.read)
ret = field->u.w.read(dev, offset, (u16 *) value,
entry->data);
break;
case 4:
if (field->u.dw.read)
ret = field->u.dw.read(dev, offset, value, entry->data);
break;
}
return ret;
}
static int conf_space_write(struct pci_dev *dev,
const struct config_field_entry *entry,
int offset, u32 value)
{
int ret = 0;
const struct config_field *field = entry->field;
switch (field->size) {
case 1:
if (field->u.b.write)
ret = field->u.b.write(dev, offset, (u8) value,
entry->data);
break;
case 2:
if (field->u.w.write)
ret = field->u.w.write(dev, offset, (u16) value,
entry->data);
break;
case 4:
if (field->u.dw.write)
ret = field->u.dw.write(dev, offset, value,
entry->data);
break;
}
return ret;
}
static inline u32 get_mask(int size)
{
if (size == 1)
return 0xff;
else if (size == 2)
return 0xffff;
else
return 0xffffffff;
}
static inline int valid_request(int offset, int size)
{
/* Validate request (no un-aligned requests) */
if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
return 1;
return 0;
}
static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
int offset)
{
if (offset >= 0) {
new_val_mask <<= (offset * 8);
new_val <<= (offset * 8);
} else {
new_val_mask >>= (offset * -8);
new_val >>= (offset * -8);
}
val = (val & ~new_val_mask) | (new_val & new_val_mask);
return val;
}
static int xen_pcibios_err_to_errno(int err)
{
switch (err) {
case PCIBIOS_SUCCESSFUL:
return XEN_PCI_ERR_success;
case PCIBIOS_DEVICE_NOT_FOUND:
return XEN_PCI_ERR_dev_not_found;
case PCIBIOS_BAD_REGISTER_NUMBER:
return XEN_PCI_ERR_invalid_offset;
case PCIBIOS_FUNC_NOT_SUPPORTED:
return XEN_PCI_ERR_not_implemented;
case PCIBIOS_SET_FAILED:
return XEN_PCI_ERR_access_denied;
}
return err;
}
int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
u32 *ret_val)
{
int err = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
const struct config_field_entry *cfg_entry;
const struct config_field *field;
int req_start, req_end, field_start, field_end;
/* if read fails for any reason, return 0
* (as if device didn't respond) */
u32 value = 0, tmp_val;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
pci_name(dev), size, offset);
if (!valid_request(offset, size)) {
err = XEN_PCI_ERR_invalid_offset;
goto out;
}
/* Get the real value first, then modify as appropriate */
switch (size) {
case 1:
err = pci_read_config_byte(dev, offset, (u8 *) &value);
break;
case 2:
err = pci_read_config_word(dev, offset, (u16 *) &value);
break;
case 4:
err = pci_read_config_dword(dev, offset, &value);
break;
}
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
field = cfg_entry->field;
req_start = offset;
req_end = offset + size;
field_start = OFFSET(cfg_entry);
field_end = OFFSET(cfg_entry) + field->size;
if ((req_start >= field_start && req_start < field_end)
|| (req_end > field_start && req_end <= field_end)) {
err = conf_space_read(dev, cfg_entry, field_start,
&tmp_val);
if (err)
goto out;
value = merge_value(value, tmp_val,
get_mask(field->size),
field_start - req_start);
}
}
out:
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
pci_name(dev), size, offset, value);
*ret_val = value;
return xen_pcibios_err_to_errno(err);
}
int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
{
int err = 0, handled = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
const struct config_field_entry *cfg_entry;
const struct config_field *field;
u32 tmp_val;
int req_start, req_end, field_start, field_end;
if (unlikely(verbose_request))
printk(KERN_DEBUG
DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
pci_name(dev), size, offset, value);
if (!valid_request(offset, size))
return XEN_PCI_ERR_invalid_offset;
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
field = cfg_entry->field;
req_start = offset;
req_end = offset + size;
field_start = OFFSET(cfg_entry);
field_end = OFFSET(cfg_entry) + field->size;
if ((req_start >= field_start && req_start < field_end)
|| (req_end > field_start && req_end <= field_end)) {
tmp_val = 0;
err = xen_pcibk_config_read(dev, field_start,
field->size, &tmp_val);
if (err)
break;
tmp_val = merge_value(tmp_val, value, get_mask(size),
req_start - field_start);
err = conf_space_write(dev, cfg_entry, field_start,
tmp_val);
/* handled is set true here, but not every byte
* may have been written! Properly detecting if
* every byte is handled is unnecessary as the
* flag is used to detect devices that need
* special helpers to work correctly.
*/
handled = 1;
}
}
if (!handled && !err) {
/* By default, anything not specificially handled above is
* read-only. The permissive flag changes this behavior so
* that anything not specifically handled above is writable.
* This means that some fields may still be read-only because
* they have entries in the config_field list that intercept
* the write and do nothing. */
if (dev_data->permissive || permissive) {
switch (size) {
case 1:
err = pci_write_config_byte(dev, offset,
(u8) value);
break;
case 2:
err = pci_write_config_word(dev, offset,
(u16) value);
break;
case 4:
err = pci_write_config_dword(dev, offset,
(u32) value);
break;
}
} else if (!dev_data->warned_on_write) {
dev_data->warned_on_write = 1;
dev_warn(&dev->dev, "Driver tried to write to a "
"read-only configuration space field at offset"
" 0x%x, size %d. This may be harmless, but if "
"you have problems with your device:\n"
"1) see permissive attribute in sysfs\n"
"2) report problems to the xen-devel "
"mailing list along with details of your "
"device obtained from lspci.\n", offset, size);
}
}
return xen_pcibios_err_to_errno(err);
}
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
{
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
struct config_field_entry *cfg_entry, *t;
const struct config_field *field;
dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
"configuration space fields\n");
if (!dev_data)
return;
list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
field = cfg_entry->field;
if (field->clean) {
field->clean((struct config_field *)field);
kfree(cfg_entry->data);
list_del(&cfg_entry->list);
kfree(cfg_entry);
}
}
}
void xen_pcibk_config_reset_dev(struct pci_dev *dev)
{
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
const struct config_field_entry *cfg_entry;
const struct config_field *field;
dev_dbg(&dev->dev, "resetting virtual configuration space\n");
if (!dev_data)
return;
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
field = cfg_entry->field;
if (field->reset)
field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
}
}
void xen_pcibk_config_free_dev(struct pci_dev *dev)
{
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
struct config_field_entry *cfg_entry, *t;
const struct config_field *field;
dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
if (!dev_data)
return;
list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
list_del(&cfg_entry->list);
field = cfg_entry->field;
if (field->release)
field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
kfree(cfg_entry);
}
}
int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
const struct config_field *field,
unsigned int base_offset)
{
int err = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
struct config_field_entry *cfg_entry;
void *tmp;
cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
if (!cfg_entry) {
err = -ENOMEM;
goto out;
}
cfg_entry->data = NULL;
cfg_entry->field = field;
cfg_entry->base_offset = base_offset;
/* silently ignore duplicate fields */
err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry));
if (err)
goto out;
if (field->init) {
tmp = field->init(dev, OFFSET(cfg_entry));
if (IS_ERR(tmp)) {
err = PTR_ERR(tmp);
goto out;
}
cfg_entry->data = tmp;
}
dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
OFFSET(cfg_entry));
list_add_tail(&cfg_entry->list, &dev_data->config_fields);
out:
if (err)
kfree(cfg_entry);
return err;
}
/* This sets up the device's virtual configuration space to keep track of
* certain registers (like the base address registers (BARs) so that we can
* keep the client from manipulating them directly.
*/
int xen_pcibk_config_init_dev(struct pci_dev *dev)
{
int err = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
dev_dbg(&dev->dev, "initializing virtual configuration space\n");
INIT_LIST_HEAD(&dev_data->config_fields);
err = xen_pcibk_config_header_add_fields(dev);
if (err)
goto out;
err = xen_pcibk_config_capability_add_fields(dev);
if (err)
goto out;
err = xen_pcibk_config_quirks_init(dev);
out:
return err;
}
int xen_pcibk_config_init(void)
{
return xen_pcibk_config_capability_init();
}

View file

@ -0,0 +1,128 @@
/*
* PCI Backend - Common data structures for overriding the configuration space
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#ifndef __XEN_PCIBACK_CONF_SPACE_H__
#define __XEN_PCIBACK_CONF_SPACE_H__
#include <linux/list.h>
#include <linux/err.h>
/* conf_field_init can return an errno in a ptr with ERR_PTR() */
typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
void *data);
typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
void *data);
typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
void *data);
typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
void *data);
typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
void *data);
typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
void *data);
/* These are the fields within the configuration space which we
* are interested in intercepting reads/writes to and changing their
* values.
*/
struct config_field {
unsigned int offset;
unsigned int size;
unsigned int mask;
conf_field_init init;
conf_field_reset reset;
conf_field_free release;
void (*clean) (struct config_field *field);
union {
struct {
conf_dword_write write;
conf_dword_read read;
} dw;
struct {
conf_word_write write;
conf_word_read read;
} w;
struct {
conf_byte_write write;
conf_byte_read read;
} b;
} u;
struct list_head list;
};
struct config_field_entry {
struct list_head list;
const struct config_field *field;
unsigned int base_offset;
void *data;
};
extern bool permissive;
#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
/* Add fields to a device - the add_fields macro expects to get a pointer to
* the first entry in an array (of which the ending is marked by size==0)
*/
int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
const struct config_field *field,
unsigned int offset);
static inline int xen_pcibk_config_add_field(struct pci_dev *dev,
const struct config_field *field)
{
return xen_pcibk_config_add_field_offset(dev, field, 0);
}
static inline int xen_pcibk_config_add_fields(struct pci_dev *dev,
const struct config_field *field)
{
int i, err = 0;
for (i = 0; field[i].size != 0; i++) {
err = xen_pcibk_config_add_field(dev, &field[i]);
if (err)
break;
}
return err;
}
static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev,
const struct config_field *field,
unsigned int offset)
{
int i, err = 0;
for (i = 0; field[i].size != 0; i++) {
err = xen_pcibk_config_add_field_offset(dev, &field[i], offset);
if (err)
break;
}
return err;
}
/* Read/Write the real configuration space */
int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
void *data);
int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value,
void *data);
int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
void *data);
int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value,
void *data);
int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value,
void *data);
int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value,
void *data);
int xen_pcibk_config_capability_init(void);
int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */

View file

@ -0,0 +1,207 @@
/*
* PCI Backend - Handles the virtual fields found on the capability lists
* in the configuration space.
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
#include "conf_space.h"
static LIST_HEAD(capabilities);
struct xen_pcibk_config_capability {
struct list_head cap_list;
int capability;
/* If the device has the capability found above, add these fields */
const struct config_field *fields;
};
static const struct config_field caplist_header[] = {
{
.offset = PCI_CAP_LIST_ID,
.size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
.u.w.read = xen_pcibk_read_config_word,
.u.w.write = NULL,
},
{}
};
static inline void register_capability(struct xen_pcibk_config_capability *cap)
{
list_add_tail(&cap->cap_list, &capabilities);
}
int xen_pcibk_config_capability_add_fields(struct pci_dev *dev)
{
int err = 0;
struct xen_pcibk_config_capability *cap;
int cap_offset;
list_for_each_entry(cap, &capabilities, cap_list) {
cap_offset = pci_find_capability(dev, cap->capability);
if (cap_offset) {
dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
cap->capability, cap_offset);
err = xen_pcibk_config_add_fields_offset(dev,
caplist_header,
cap_offset);
if (err)
goto out;
err = xen_pcibk_config_add_fields_offset(dev,
cap->fields,
cap_offset);
if (err)
goto out;
}
}
out:
return err;
}
static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
void *data)
{
/* Disallow writes to the vital product data */
if (value & PCI_VPD_ADDR_F)
return PCIBIOS_SET_FAILED;
else
return pci_write_config_word(dev, offset, value);
}
static const struct config_field caplist_vpd[] = {
{
.offset = PCI_VPD_ADDR,
.size = 2,
.u.w.read = xen_pcibk_read_config_word,
.u.w.write = vpd_address_write,
},
{
.offset = PCI_VPD_DATA,
.size = 4,
.u.dw.read = xen_pcibk_read_config_dword,
.u.dw.write = NULL,
},
{}
};
static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
void *data)
{
int err;
u16 real_value;
err = pci_read_config_word(dev, offset, &real_value);
if (err)
goto out;
*value = real_value & ~PCI_PM_CAP_PME_MASK;
out:
return err;
}
/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
* Can't allow driver domain to enable PMEs - they're shared */
#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
void *data)
{
int err;
u16 old_value;
pci_power_t new_state, old_state;
err = pci_read_config_word(dev, offset, &old_value);
if (err)
goto out;
old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
new_value &= PM_OK_BITS;
if ((old_value & PM_OK_BITS) != new_value) {
new_value = (old_value & ~PM_OK_BITS) | new_value;
err = pci_write_config_word(dev, offset, new_value);
if (err)
goto out;
}
/* Let pci core handle the power management change */
dev_dbg(&dev->dev, "set power state to %x\n", new_state);
err = pci_set_power_state(dev, new_state);
if (err) {
err = PCIBIOS_SET_FAILED;
goto out;
}
out:
return err;
}
/* Ensure PMEs are disabled */
static void *pm_ctrl_init(struct pci_dev *dev, int offset)
{
int err;
u16 value;
err = pci_read_config_word(dev, offset, &value);
if (err)
goto out;
if (value & PCI_PM_CTRL_PME_ENABLE) {
value &= ~PCI_PM_CTRL_PME_ENABLE;
err = pci_write_config_word(dev, offset, value);
}
out:
return ERR_PTR(err);
}
static const struct config_field caplist_pm[] = {
{
.offset = PCI_PM_PMC,
.size = 2,
.u.w.read = pm_caps_read,
},
{
.offset = PCI_PM_CTRL,
.size = 2,
.init = pm_ctrl_init,
.u.w.read = xen_pcibk_read_config_word,
.u.w.write = pm_ctrl_write,
},
{
.offset = PCI_PM_PPB_EXTENSIONS,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
},
{
.offset = PCI_PM_DATA_REGISTER,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
},
{}
};
static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = {
.capability = PCI_CAP_ID_PM,
.fields = caplist_pm,
};
static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = {
.capability = PCI_CAP_ID_VPD,
.fields = caplist_vpd,
};
int xen_pcibk_config_capability_init(void)
{
register_capability(&xen_pcibk_config_capability_vpd);
register_capability(&xen_pcibk_config_capability_pm);
return 0;
}

View file

@ -0,0 +1,420 @@
/*
* PCI Backend - Handles the virtual fields in the configuration space headers.
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
#include "conf_space.h"
struct pci_cmd_info {
u16 val;
};
struct pci_bar_info {
u32 val;
u32 len_val;
int which;
};
#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
/* Bits guests are allowed to control in permissive mode. */
#define PCI_COMMAND_GUEST (PCI_COMMAND_MASTER|PCI_COMMAND_SPECIAL| \
PCI_COMMAND_INVALIDATE|PCI_COMMAND_VGA_PALETTE| \
PCI_COMMAND_WAIT|PCI_COMMAND_FAST_BACK)
static void *command_init(struct pci_dev *dev, int offset)
{
struct pci_cmd_info *cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
int err;
if (!cmd)
return ERR_PTR(-ENOMEM);
err = pci_read_config_word(dev, PCI_COMMAND, &cmd->val);
if (err) {
kfree(cmd);
return ERR_PTR(err);
}
return cmd;
}
static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
{
int ret = pci_read_config_word(dev, offset, value);
const struct pci_cmd_info *cmd = data;
*value &= PCI_COMMAND_GUEST;
*value |= cmd->val & ~PCI_COMMAND_GUEST;
return ret;
}
static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
{
struct xen_pcibk_dev_data *dev_data;
int err;
u16 val;
struct pci_cmd_info *cmd = data;
dev_data = pci_get_drvdata(dev);
if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
pci_name(dev));
err = pci_enable_device(dev);
if (err)
return err;
if (dev_data)
dev_data->enable_intx = 1;
} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
pci_name(dev));
pci_disable_device(dev);
if (dev_data)
dev_data->enable_intx = 0;
}
if (!dev->is_busmaster && is_master_cmd(value)) {
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
pci_name(dev));
pci_set_master(dev);
}
if (value & PCI_COMMAND_INVALIDATE) {
if (unlikely(verbose_request))
printk(KERN_DEBUG
DRV_NAME ": %s: enable memory-write-invalidate\n",
pci_name(dev));
err = pci_set_mwi(dev);
if (err) {
pr_warn("%s: cannot enable memory-write-invalidate (%d)\n",
pci_name(dev), err);
value &= ~PCI_COMMAND_INVALIDATE;
}
}
cmd->val = value;
if (!permissive && (!dev_data || !dev_data->permissive))
return 0;
/* Only allow the guest to control certain bits. */
err = pci_read_config_word(dev, offset, &val);
if (err || val == value)
return err;
value &= PCI_COMMAND_GUEST;
value |= val & ~PCI_COMMAND_GUEST;
return pci_write_config_word(dev, offset, value);
}
static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
{
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
pr_warn(DRV_NAME ": driver data not found for %s\n",
pci_name(dev));
return XEN_PCI_ERR_op_failed;
}
/* A write to obtain the length must happen as a 32-bit write.
* This does not (yet) support writing individual bytes
*/
if (value == ~PCI_ROM_ADDRESS_ENABLE)
bar->which = 1;
else {
u32 tmpval;
pci_read_config_dword(dev, offset, &tmpval);
if (tmpval != bar->val && value == bar->val) {
/* Allow restoration of bar value. */
pci_write_config_dword(dev, offset, bar->val);
}
bar->which = 0;
}
/* Do we need to support enabling/disabling the rom address here? */
return 0;
}
/* For the BARs, only allow writes which write ~0 or
* the correct resource information
* (Needed for when the driver probes the resource usage)
*/
static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
{
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
pr_warn(DRV_NAME ": driver data not found for %s\n",
pci_name(dev));
return XEN_PCI_ERR_op_failed;
}
/* A write to obtain the length must happen as a 32-bit write.
* This does not (yet) support writing individual bytes
*/
if (value == ~0)
bar->which = 1;
else {
u32 tmpval;
pci_read_config_dword(dev, offset, &tmpval);
if (tmpval != bar->val && value == bar->val) {
/* Allow restoration of bar value. */
pci_write_config_dword(dev, offset, bar->val);
}
bar->which = 0;
}
return 0;
}
static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
{
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
pr_warn(DRV_NAME ": driver data not found for %s\n",
pci_name(dev));
return XEN_PCI_ERR_op_failed;
}
*value = bar->which ? bar->len_val : bar->val;
return 0;
}
static inline void read_dev_bar(struct pci_dev *dev,
struct pci_bar_info *bar_info, int offset,
u32 len_mask)
{
int pos;
struct resource *res = dev->resource;
if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
pos = PCI_ROM_RESOURCE;
else {
pos = (offset - PCI_BASE_ADDRESS_0) / 4;
if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
(PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64))) {
bar_info->val = res[pos - 1].start >> 32;
bar_info->len_val = res[pos - 1].end >> 32;
return;
}
}
bar_info->val = res[pos].start |
(res[pos].flags & PCI_REGION_FLAG_MASK);
bar_info->len_val = resource_size(&res[pos]);
}
static void *bar_init(struct pci_dev *dev, int offset)
{
struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
if (!bar)
return ERR_PTR(-ENOMEM);
read_dev_bar(dev, bar, offset, ~0);
bar->which = 0;
return bar;
}
static void *rom_init(struct pci_dev *dev, int offset)
{
struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
if (!bar)
return ERR_PTR(-ENOMEM);
read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
bar->which = 0;
return bar;
}
static void bar_reset(struct pci_dev *dev, int offset, void *data)
{
struct pci_bar_info *bar = data;
bar->which = 0;
}
static void bar_release(struct pci_dev *dev, int offset, void *data)
{
kfree(data);
}
static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset,
u16 *value, void *data)
{
*value = dev->vendor;
return 0;
}
static int xen_pcibk_read_device(struct pci_dev *dev, int offset,
u16 *value, void *data)
{
*value = dev->device;
return 0;
}
static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
void *data)
{
*value = (u8) dev->irq;
return 0;
}
static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
{
u8 cur_value;
int err;
err = pci_read_config_byte(dev, offset, &cur_value);
if (err)
goto out;
if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
|| value == PCI_BIST_START)
err = pci_write_config_byte(dev, offset, value);
out:
return err;
}
static const struct config_field header_common[] = {
{
.offset = PCI_VENDOR_ID,
.size = 2,
.u.w.read = xen_pcibk_read_vendor,
},
{
.offset = PCI_DEVICE_ID,
.size = 2,
.u.w.read = xen_pcibk_read_device,
},
{
.offset = PCI_COMMAND,
.size = 2,
.init = command_init,
.release = bar_release,
.u.w.read = command_read,
.u.w.write = command_write,
},
{
.offset = PCI_INTERRUPT_LINE,
.size = 1,
.u.b.read = interrupt_read,
},
{
.offset = PCI_INTERRUPT_PIN,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
},
{
/* Any side effects of letting driver domain control cache line? */
.offset = PCI_CACHE_LINE_SIZE,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
.u.b.write = xen_pcibk_write_config_byte,
},
{
.offset = PCI_LATENCY_TIMER,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
},
{
.offset = PCI_BIST,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
.u.b.write = bist_write,
},
{}
};
#define CFG_FIELD_BAR(reg_offset) \
{ \
.offset = reg_offset, \
.size = 4, \
.init = bar_init, \
.reset = bar_reset, \
.release = bar_release, \
.u.dw.read = bar_read, \
.u.dw.write = bar_write, \
}
#define CFG_FIELD_ROM(reg_offset) \
{ \
.offset = reg_offset, \
.size = 4, \
.init = rom_init, \
.reset = bar_reset, \
.release = bar_release, \
.u.dw.read = bar_read, \
.u.dw.write = rom_write, \
}
static const struct config_field header_0[] = {
CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
CFG_FIELD_ROM(PCI_ROM_ADDRESS),
{}
};
static const struct config_field header_1[] = {
CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
{}
};
int xen_pcibk_config_header_add_fields(struct pci_dev *dev)
{
int err;
err = xen_pcibk_config_add_fields(dev, header_common);
if (err)
goto out;
switch (dev->hdr_type) {
case PCI_HEADER_TYPE_NORMAL:
err = xen_pcibk_config_add_fields(dev, header_0);
break;
case PCI_HEADER_TYPE_BRIDGE:
err = xen_pcibk_config_add_fields(dev, header_1);
break;
default:
err = -EINVAL;
pr_err("%s: Unsupported header type %d!\n",
pci_name(dev), dev->hdr_type);
break;
}
out:
return err;
}

View file

@ -0,0 +1,139 @@
/*
* PCI Backend - Handle special overlays for broken devices.
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
* Author: Chris Bookholt <hap10@epoch.ncsc.mil>
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
#include "conf_space.h"
#include "conf_space_quirks.h"
LIST_HEAD(xen_pcibk_quirks);
static inline const struct pci_device_id *
match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
{
if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
(id->device == PCI_ANY_ID || id->device == dev->device) &&
(id->subvendor == PCI_ANY_ID ||
id->subvendor == dev->subsystem_vendor) &&
(id->subdevice == PCI_ANY_ID ||
id->subdevice == dev->subsystem_device) &&
!((id->class ^ dev->class) & id->class_mask))
return id;
return NULL;
}
static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
{
struct xen_pcibk_config_quirk *tmp_quirk;
list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list)
if (match_one_device(&tmp_quirk->devid, dev) != NULL)
goto out;
tmp_quirk = NULL;
printk(KERN_DEBUG DRV_NAME
": quirk didn't match any device known\n");
out:
return tmp_quirk;
}
static inline void register_quirk(struct xen_pcibk_config_quirk *quirk)
{
list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks);
}
int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg)
{
int ret = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
struct config_field_entry *cfg_entry;
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
if (OFFSET(cfg_entry) == reg) {
ret = 1;
break;
}
}
return ret;
}
int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
*field)
{
int err = 0;
switch (field->size) {
case 1:
field->u.b.read = xen_pcibk_read_config_byte;
field->u.b.write = xen_pcibk_write_config_byte;
break;
case 2:
field->u.w.read = xen_pcibk_read_config_word;
field->u.w.write = xen_pcibk_write_config_word;
break;
case 4:
field->u.dw.read = xen_pcibk_read_config_dword;
field->u.dw.write = xen_pcibk_write_config_dword;
break;
default:
err = -EINVAL;
goto out;
}
xen_pcibk_config_add_field(dev, field);
out:
return err;
}
int xen_pcibk_config_quirks_init(struct pci_dev *dev)
{
struct xen_pcibk_config_quirk *quirk;
int ret = 0;
quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
if (!quirk) {
ret = -ENOMEM;
goto out;
}
quirk->devid.vendor = dev->vendor;
quirk->devid.device = dev->device;
quirk->devid.subvendor = dev->subsystem_vendor;
quirk->devid.subdevice = dev->subsystem_device;
quirk->devid.class = 0;
quirk->devid.class_mask = 0;
quirk->devid.driver_data = 0UL;
quirk->pdev = dev;
register_quirk(quirk);
out:
return ret;
}
void xen_pcibk_config_field_free(struct config_field *field)
{
kfree(field);
}
int xen_pcibk_config_quirk_release(struct pci_dev *dev)
{
struct xen_pcibk_config_quirk *quirk;
int ret = 0;
quirk = xen_pcibk_find_quirk(dev);
if (!quirk) {
ret = -ENXIO;
goto out;
}
list_del(&quirk->quirks_list);
kfree(quirk);
out:
return ret;
}

View file

@ -0,0 +1,33 @@
/*
* PCI Backend - Data structures for special overlays for broken devices.
*
* Ryan Wilson <hap9@epoch.ncsc.mil>
* Chris Bookholt <hap10@epoch.ncsc.mil>
*/
#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
#include <linux/pci.h>
#include <linux/list.h>
struct xen_pcibk_config_quirk {
struct list_head quirks_list;
struct pci_device_id devid;
struct pci_dev *pdev;
};
int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
*field);
int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg);
int xen_pcibk_config_quirks_init(struct pci_dev *dev);
void xen_pcibk_config_field_free(struct config_field *field);
int xen_pcibk_config_quirk_release(struct pci_dev *dev);
int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg);
#endif

View file

@ -0,0 +1,188 @@
/*
* PCI Backend - Provides restricted access to the real PCI bus topology
* to the frontend
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/mutex.h>
#include "pciback.h"
struct passthrough_dev_data {
/* Access to dev_list must be protected by lock */
struct list_head dev_list;
struct mutex lock;
};
static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
unsigned int domain,
unsigned int bus,
unsigned int devfn)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry;
struct pci_dev *dev = NULL;
mutex_lock(&dev_data->lock);
list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
&& bus == (unsigned int)dev_entry->dev->bus->number
&& devfn == dev_entry->dev->devfn) {
dev = dev_entry->dev;
break;
}
}
mutex_unlock(&dev_data->lock);
return dev;
}
static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev,
int devid, publish_pci_dev_cb publish_cb)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry;
unsigned int domain, bus, devfn;
int err;
dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
if (!dev_entry)
return -ENOMEM;
dev_entry->dev = dev;
mutex_lock(&dev_data->lock);
list_add_tail(&dev_entry->list, &dev_data->dev_list);
mutex_unlock(&dev_data->lock);
/* Publish this device. */
domain = (unsigned int)pci_domain_nr(dev->bus);
bus = (unsigned int)dev->bus->number;
devfn = dev->devfn;
err = publish_cb(pdev, domain, bus, devfn, devid);
return err;
}
static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *t;
struct pci_dev *found_dev = NULL;
mutex_lock(&dev_data->lock);
list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
if (dev_entry->dev == dev) {
list_del(&dev_entry->list);
found_dev = dev_entry->dev;
kfree(dev_entry);
}
}
mutex_unlock(&dev_data->lock);
if (found_dev)
pcistub_put_pci_dev(found_dev);
}
static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
{
struct passthrough_dev_data *dev_data;
dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
if (!dev_data)
return -ENOMEM;
mutex_init(&dev_data->lock);
INIT_LIST_HEAD(&dev_data->dev_list);
pdev->pci_dev_data = dev_data;
return 0;
}
static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
publish_pci_root_cb publish_root_cb)
{
int err = 0;
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *e;
struct pci_dev *dev;
int found;
unsigned int domain, bus;
mutex_lock(&dev_data->lock);
list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
/* Only publish this device as a root if none of its
* parent bridges are exported
*/
found = 0;
dev = dev_entry->dev->bus->self;
for (; !found && dev != NULL; dev = dev->bus->self) {
list_for_each_entry(e, &dev_data->dev_list, list) {
if (dev == e->dev) {
found = 1;
break;
}
}
}
domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
bus = (unsigned int)dev_entry->dev->bus->number;
if (!found) {
err = publish_root_cb(pdev, domain, bus);
if (err)
break;
}
}
mutex_unlock(&dev_data->lock);
return err;
}
static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *t;
list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
list_del(&dev_entry->list);
pcistub_put_pci_dev(dev_entry->dev);
kfree(dev_entry);
}
kfree(dev_data);
pdev->pci_dev_data = NULL;
}
static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
struct xen_pcibk_device *pdev,
unsigned int *domain, unsigned int *bus,
unsigned int *devfn)
{
*domain = pci_domain_nr(pcidev->bus);
*bus = pcidev->bus->number;
*devfn = pcidev->devfn;
return 1;
}
const struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
.name = "passthrough",
.init = __xen_pcibk_init_devices,
.free = __xen_pcibk_release_devices,
.find = __xen_pcibk_get_pcifront_dev,
.publish = __xen_pcibk_publish_pci_roots,
.release = __xen_pcibk_release_pci_dev,
.add = __xen_pcibk_add_pci_dev,
.get = __xen_pcibk_get_pci_dev,
};

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,192 @@
/*
* PCI Backend Common Data Structures & Function Declarations
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#ifndef __XEN_PCIBACK_H__
#define __XEN_PCIBACK_H__
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <xen/xenbus.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/atomic.h>
#include <xen/interface/io/pciif.h>
#define DRV_NAME "xen-pciback"
struct pci_dev_entry {
struct list_head list;
struct pci_dev *dev;
};
#define _PDEVF_op_active (0)
#define PDEVF_op_active (1<<(_PDEVF_op_active))
#define _PCIB_op_pending (1)
#define PCIB_op_pending (1<<(_PCIB_op_pending))
struct xen_pcibk_device {
void *pci_dev_data;
struct mutex dev_lock;
struct xenbus_device *xdev;
struct xenbus_watch be_watch;
u8 be_watching;
int evtchn_irq;
struct xen_pci_sharedinfo *sh_info;
unsigned long flags;
struct work_struct op_work;
};
struct xen_pcibk_dev_data {
struct list_head config_fields;
struct pci_saved_state *pci_saved_state;
unsigned int permissive:1;
unsigned int warned_on_write:1;
unsigned int enable_intx:1;
unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
unsigned int ack_intr:1; /* .. and ACK-ing */
unsigned long handled;
unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
char irq_name[0]; /* xen-pcibk[000:04:00.0] */
};
/* Used by XenBus and xen_pcibk_ops.c */
extern wait_queue_head_t xen_pcibk_aer_wait_queue;
extern struct workqueue_struct *xen_pcibk_wq;
/* Used by pcistub.c and conf_space_quirks.c */
extern struct list_head xen_pcibk_quirks;
/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
int domain, int bus,
int slot, int func);
struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev);
void pcistub_put_pci_dev(struct pci_dev *dev);
/* Ensure a device is turned off or reset */
void xen_pcibk_reset_device(struct pci_dev *pdev);
/* Access a virtual configuration space for a PCI device */
int xen_pcibk_config_init(void);
int xen_pcibk_config_init_dev(struct pci_dev *dev);
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev);
void xen_pcibk_config_reset_dev(struct pci_dev *dev);
void xen_pcibk_config_free_dev(struct pci_dev *dev);
int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
u32 *ret_val);
int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size,
u32 value);
/* Handle requests for specific devices from the frontend */
typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus,
unsigned int devfn, unsigned int devid);
typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus);
/* Backend registration for the two types of BDF representation:
* vpci - BDFs start at 00
* passthrough - BDFs are exactly like in the host.
*/
struct xen_pcibk_backend {
const char *name;
int (*init)(struct xen_pcibk_device *pdev);
void (*free)(struct xen_pcibk_device *pdev);
int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
unsigned int *domain, unsigned int *bus,
unsigned int *devfn);
int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev);
int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
int devid, publish_pci_dev_cb publish_cb);
struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus,
unsigned int devfn);
};
extern const struct xen_pcibk_backend xen_pcibk_vpci_backend;
extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend;
extern const struct xen_pcibk_backend *xen_pcibk_backend;
static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev,
int devid,
publish_pci_dev_cb publish_cb)
{
if (xen_pcibk_backend && xen_pcibk_backend->add)
return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
return -1;
}
static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
{
if (xen_pcibk_backend && xen_pcibk_backend->release)
return xen_pcibk_backend->release(pdev, dev);
}
static inline struct pci_dev *
xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
unsigned int bus, unsigned int devfn)
{
if (xen_pcibk_backend && xen_pcibk_backend->get)
return xen_pcibk_backend->get(pdev, domain, bus, devfn);
return NULL;
}
/**
* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
* before sending aer request to pcifront, so that guest could identify
* device, coopearte with xen_pcibk to finish aer recovery job if device driver
* has the capability
*/
static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
struct xen_pcibk_device *pdev,
unsigned int *domain,
unsigned int *bus,
unsigned int *devfn)
{
if (xen_pcibk_backend && xen_pcibk_backend->find)
return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
devfn);
return -1;
}
static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
{
if (xen_pcibk_backend && xen_pcibk_backend->init)
return xen_pcibk_backend->init(pdev);
return -1;
}
static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
publish_pci_root_cb cb)
{
if (xen_pcibk_backend && xen_pcibk_backend->publish)
return xen_pcibk_backend->publish(pdev, cb);
return -1;
}
static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
{
if (xen_pcibk_backend && xen_pcibk_backend->free)
return xen_pcibk_backend->free(pdev);
}
/* Handles events from front-end */
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data);
int xen_pcibk_xenbus_register(void);
void xen_pcibk_xenbus_unregister(void);
extern int verbose_request;
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
#endif
/* Handles shared IRQs that can to device domain and control domain. */
void xen_pcibk_irq_handler(struct pci_dev *dev, int reset);

View file

@ -0,0 +1,387 @@
/*
* PCI Backend Operations - respond to PCI requests from Frontend
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/wait.h>
#include <linux/bitops.h>
#include <xen/events.h>
#include <linux/sched.h>
#include "pciback.h"
int verbose_request;
module_param(verbose_request, int, 0644);
static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id);
/* Ensure a device is has the fake IRQ handler "turned on/off" and is
* ready to be exported. This MUST be run after xen_pcibk_reset_device
* which does the actual PCI device enable/disable.
*/
static void xen_pcibk_control_isr(struct pci_dev *dev, int reset)
{
struct xen_pcibk_dev_data *dev_data;
int rc;
int enable = 0;
dev_data = pci_get_drvdata(dev);
if (!dev_data)
return;
/* We don't deal with bridges */
if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
return;
if (reset) {
dev_data->enable_intx = 0;
dev_data->ack_intr = 0;
}
enable = dev_data->enable_intx;
/* Asked to disable, but ISR isn't runnig */
if (!enable && !dev_data->isr_on)
return;
/* Squirrel away the IRQs in the dev_data. We need this
* b/c when device transitions to MSI, the dev->irq is
* overwritten with the MSI vector.
*/
if (enable)
dev_data->irq = dev->irq;
/*
* SR-IOV devices in all use MSI-X and have no legacy
* interrupts, so inhibit creating a fake IRQ handler for them.
*/
if (dev_data->irq == 0)
goto out;
dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
dev_data->irq_name,
dev_data->irq,
pci_is_enabled(dev) ? "on" : "off",
dev->msi_enabled ? "MSI" : "",
dev->msix_enabled ? "MSI/X" : "",
dev_data->isr_on ? "enable" : "disable",
enable ? "enable" : "disable");
if (enable) {
rc = request_irq(dev_data->irq,
xen_pcibk_guest_interrupt, IRQF_SHARED,
dev_data->irq_name, dev);
if (rc) {
dev_err(&dev->dev, "%s: failed to install fake IRQ " \
"handler for IRQ %d! (rc:%d)\n",
dev_data->irq_name, dev_data->irq, rc);
goto out;
}
} else {
free_irq(dev_data->irq, dev);
dev_data->irq = 0;
}
dev_data->isr_on = enable;
dev_data->ack_intr = enable;
out:
dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
dev_data->irq_name,
dev_data->irq,
pci_is_enabled(dev) ? "on" : "off",
dev->msi_enabled ? "MSI" : "",
dev->msix_enabled ? "MSI/X" : "",
enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
(dev_data->isr_on ? "failed to disable" : "disabled"));
}
/* Ensure a device is "turned off" and ready to be exported.
* (Also see xen_pcibk_config_reset to ensure virtual configuration space is
* ready to be re-exported)
*/
void xen_pcibk_reset_device(struct pci_dev *dev)
{
u16 cmd;
xen_pcibk_control_isr(dev, 1 /* reset device */);
/* Disable devices (but not bridges) */
if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
#ifdef CONFIG_PCI_MSI
/* The guest could have been abruptly killed without
* disabling MSI/MSI-X interrupts.*/
if (dev->msix_enabled)
pci_disable_msix(dev);
if (dev->msi_enabled)
pci_disable_msi(dev);
#endif
if (pci_is_enabled(dev))
pci_disable_device(dev);
pci_write_config_word(dev, PCI_COMMAND, 0);
dev->is_busmaster = 0;
} else {
pci_read_config_word(dev, PCI_COMMAND, &cmd);
if (cmd & (PCI_COMMAND_INVALIDATE)) {
cmd &= ~(PCI_COMMAND_INVALIDATE);
pci_write_config_word(dev, PCI_COMMAND, cmd);
dev->is_busmaster = 0;
}
}
}
#ifdef CONFIG_PCI_MSI
static
int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
struct xen_pcibk_dev_data *dev_data;
int status;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
status = pci_enable_msi(dev);
if (status) {
pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n",
pci_name(dev), pdev->xdev->otherend_id,
status);
op->value = 0;
return XEN_PCI_ERR_op_failed;
}
/* The value the guest needs is actually the IDT vector, not the
* the local domain's IRQ number. */
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
op->value);
dev_data = pci_get_drvdata(dev);
if (dev_data)
dev_data->ack_intr = 0;
return 0;
}
static
int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
struct xen_pcibk_dev_data *dev_data;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
pci_name(dev));
pci_disable_msi(dev);
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
op->value);
dev_data = pci_get_drvdata(dev);
if (dev_data)
dev_data->ack_intr = 1;
return 0;
}
static
int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
struct xen_pcibk_dev_data *dev_data;
int i, result;
struct msix_entry *entries;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
pci_name(dev));
if (op->value > SH_INFO_MAX_VEC)
return -EINVAL;
entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
if (entries == NULL)
return -ENOMEM;
for (i = 0; i < op->value; i++) {
entries[i].entry = op->msix_entries[i].entry;
entries[i].vector = op->msix_entries[i].vector;
}
result = pci_enable_msix_exact(dev, entries, op->value);
if (result == 0) {
for (i = 0; i < op->value; i++) {
op->msix_entries[i].entry = entries[i].entry;
if (entries[i].vector) {
op->msix_entries[i].vector =
xen_pirq_from_irq(entries[i].vector);
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: " \
"MSI-X[%d]: %d\n",
pci_name(dev), i,
op->msix_entries[i].vector);
}
}
} else
pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n",
pci_name(dev), pdev->xdev->otherend_id,
result);
kfree(entries);
op->value = result;
dev_data = pci_get_drvdata(dev);
if (dev_data)
dev_data->ack_intr = 0;
return result > 0 ? 0 : result;
}
static
int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
struct xen_pcibk_dev_data *dev_data;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
pci_name(dev));
pci_disable_msix(dev);
/*
* SR-IOV devices (which don't have any legacy IRQ) have
* an undefined IRQ value of zero.
*/
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev),
op->value);
dev_data = pci_get_drvdata(dev);
if (dev_data)
dev_data->ack_intr = 1;
return 0;
}
#endif
/*
* Now the same evtchn is used for both pcifront conf_read_write request
* as well as pcie aer front end ack. We use a new work_queue to schedule
* xen_pcibk conf_read_write service for avoiding confict with aer_core
* do_recovery job which also use the system default work_queue
*/
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
{
/* Check that frontend is requesting an operation and that we are not
* already processing a request */
if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
&& !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
queue_work(xen_pcibk_wq, &pdev->op_work);
}
/*_XEN_PCIB_active should have been cleared by pcifront. And also make
sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& test_bit(_PCIB_op_pending, &pdev->flags)) {
wake_up(&xen_pcibk_aer_wait_queue);
}
}
/* Performing the configuration space reads/writes must not be done in atomic
* context because some of the pci_* functions can sleep (mostly due to ACPI
* use of semaphores). This function is intended to be called from a work
* queue in process context taking a struct xen_pcibk_device as a parameter */
void xen_pcibk_do_op(struct work_struct *data)
{
struct xen_pcibk_device *pdev =
container_of(data, struct xen_pcibk_device, op_work);
struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data = NULL;
struct xen_pci_op *op = &pdev->sh_info->op;
int test_intx = 0;
dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
if (dev == NULL)
op->err = XEN_PCI_ERR_dev_not_found;
else {
dev_data = pci_get_drvdata(dev);
if (dev_data)
test_intx = dev_data->enable_intx;
switch (op->cmd) {
case XEN_PCI_OP_conf_read:
op->err = xen_pcibk_config_read(dev,
op->offset, op->size, &op->value);
break;
case XEN_PCI_OP_conf_write:
op->err = xen_pcibk_config_write(dev,
op->offset, op->size, op->value);
break;
#ifdef CONFIG_PCI_MSI
case XEN_PCI_OP_enable_msi:
op->err = xen_pcibk_enable_msi(pdev, dev, op);
break;
case XEN_PCI_OP_disable_msi:
op->err = xen_pcibk_disable_msi(pdev, dev, op);
break;
case XEN_PCI_OP_enable_msix:
op->err = xen_pcibk_enable_msix(pdev, dev, op);
break;
case XEN_PCI_OP_disable_msix:
op->err = xen_pcibk_disable_msix(pdev, dev, op);
break;
#endif
default:
op->err = XEN_PCI_ERR_not_implemented;
break;
}
}
if (!op->err && dev && dev_data) {
/* Transition detected */
if ((dev_data->enable_intx != test_intx))
xen_pcibk_control_isr(dev, 0 /* no reset */);
}
/* Tell the driver domain that we're done. */
wmb();
clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
notify_remote_via_irq(pdev->evtchn_irq);
/* Mark that we're done. */
smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
clear_bit(_PDEVF_op_active, &pdev->flags);
smp_mb__after_atomic(); /* /before/ final check for work */
/* Check to see if the driver domain tried to start another request in
* between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
*/
xen_pcibk_test_and_schedule_op(pdev);
}
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
{
struct xen_pcibk_device *pdev = dev_id;
xen_pcibk_test_and_schedule_op(pdev);
return IRQ_HANDLED;
}
static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id)
{
struct pci_dev *dev = (struct pci_dev *)dev_id;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
if (dev_data->isr_on && dev_data->ack_intr) {
dev_data->handled++;
if ((dev_data->handled % 1000) == 0) {
if (xen_test_irq_shared(irq)) {
pr_info("%s IRQ line is not shared "
"with other domains. Turning ISR off\n",
dev_data->irq_name);
dev_data->ack_intr = 0;
}
}
return IRQ_HANDLED;
}
return IRQ_NONE;
}

View file

@ -0,0 +1,262 @@
/*
* PCI Backend - Provides a Virtual PCI bus (with real devices)
* to the frontend
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/mutex.h>
#include "pciback.h"
#define PCI_SLOT_MAX 32
struct vpci_dev_data {
/* Access to dev_list must be protected by lock */
struct list_head dev_list[PCI_SLOT_MAX];
struct mutex lock;
};
static inline struct list_head *list_first(struct list_head *head)
{
return head->next;
}
static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
unsigned int domain,
unsigned int bus,
unsigned int devfn)
{
struct pci_dev_entry *entry;
struct pci_dev *dev = NULL;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
if (domain != 0 || bus != 0)
return NULL;
if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
mutex_lock(&vpci_dev->lock);
list_for_each_entry(entry,
&vpci_dev->dev_list[PCI_SLOT(devfn)],
list) {
if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
dev = entry->dev;
break;
}
}
mutex_unlock(&vpci_dev->lock);
}
return dev;
}
static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
{
if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
&& l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
return 1;
return 0;
}
static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev, int devid,
publish_pci_dev_cb publish_cb)
{
int err = 0, slot, func = -1;
struct pci_dev_entry *t, *dev_entry;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
err = -EFAULT;
xenbus_dev_fatal(pdev->xdev, err,
"Can't export bridges on the virtual PCI bus");
goto out;
}
dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
if (!dev_entry) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"Error adding entry to virtual PCI bus");
goto out;
}
dev_entry->dev = dev;
mutex_lock(&vpci_dev->lock);
/*
* Keep multi-function devices together on the virtual PCI bus, except
* virtual functions.
*/
if (!dev->is_virtfn) {
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
if (list_empty(&vpci_dev->dev_list[slot]))
continue;
t = list_entry(list_first(&vpci_dev->dev_list[slot]),
struct pci_dev_entry, list);
if (match_slot(dev, t->dev)) {
pr_info("vpci: %s: assign to virtual slot %d func %d\n",
pci_name(dev), slot,
PCI_FUNC(dev->devfn));
list_add_tail(&dev_entry->list,
&vpci_dev->dev_list[slot]);
func = PCI_FUNC(dev->devfn);
goto unlock;
}
}
}
/* Assign to a new slot on the virtual PCI bus */
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
if (list_empty(&vpci_dev->dev_list[slot])) {
pr_info("vpci: %s: assign to virtual slot %d\n",
pci_name(dev), slot);
list_add_tail(&dev_entry->list,
&vpci_dev->dev_list[slot]);
func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn);
goto unlock;
}
}
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"No more space on root virtual PCI bus");
unlock:
mutex_unlock(&vpci_dev->lock);
/* Publish this device. */
if (!err)
err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
else
kfree(dev_entry);
out:
return err;
}
static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
{
int slot;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
struct pci_dev *found_dev = NULL;
mutex_lock(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
struct pci_dev_entry *e;
list_for_each_entry(e, &vpci_dev->dev_list[slot], list) {
if (e->dev == dev) {
list_del(&e->list);
found_dev = e->dev;
kfree(e);
goto out;
}
}
}
out:
mutex_unlock(&vpci_dev->lock);
if (found_dev)
pcistub_put_pci_dev(found_dev);
}
static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
{
int slot;
struct vpci_dev_data *vpci_dev;
vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
if (!vpci_dev)
return -ENOMEM;
mutex_init(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++)
INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
pdev->pci_dev_data = vpci_dev;
return 0;
}
static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
publish_pci_root_cb publish_cb)
{
/* The Virtual PCI bus has only one root */
return publish_cb(pdev, 0, 0);
}
static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
{
int slot;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
struct pci_dev_entry *e, *tmp;
list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
list) {
list_del(&e->list);
pcistub_put_pci_dev(e->dev);
kfree(e);
}
}
kfree(vpci_dev);
pdev->pci_dev_data = NULL;
}
static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
struct xen_pcibk_device *pdev,
unsigned int *domain, unsigned int *bus,
unsigned int *devfn)
{
struct pci_dev_entry *entry;
struct pci_dev *dev = NULL;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
int found = 0, slot;
mutex_lock(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
list_for_each_entry(entry,
&vpci_dev->dev_list[slot],
list) {
dev = entry->dev;
if (dev && dev->bus->number == pcidev->bus->number
&& pci_domain_nr(dev->bus) ==
pci_domain_nr(pcidev->bus)
&& dev->devfn == pcidev->devfn) {
found = 1;
*domain = 0;
*bus = 0;
*devfn = PCI_DEVFN(slot,
PCI_FUNC(pcidev->devfn));
}
}
}
mutex_unlock(&vpci_dev->lock);
return found;
}
const struct xen_pcibk_backend xen_pcibk_vpci_backend = {
.name = "vpci",
.init = __xen_pcibk_init_devices,
.free = __xen_pcibk_release_devices,
.find = __xen_pcibk_get_pcifront_dev,
.publish = __xen_pcibk_publish_pci_roots,
.release = __xen_pcibk_release_pci_dev,
.add = __xen_pcibk_add_pci_dev,
.get = __xen_pcibk_get_pci_dev,
};

View file

@ -0,0 +1,750 @@
/*
* PCI Backend Xenbus Setup - handles setup with frontend and xend
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include <xen/xenbus.h>
#include <xen/events.h>
#include <asm/xen/pci.h>
#include "pciback.h"
#define INVALID_EVTCHN_IRQ (-1)
struct workqueue_struct *xen_pcibk_wq;
static bool __read_mostly passthrough;
module_param(passthrough, bool, S_IRUGO);
MODULE_PARM_DESC(passthrough,
"Option to specify how to export PCI topology to guest:\n"\
" 0 - (default) Hide the true PCI topology and makes the frontend\n"\
" there is a single PCI bus with only the exported devices on it.\n"\
" For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\
" while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\
" 1 - Passthrough provides a real view of the PCI topology to the\n"\
" frontend (for example, a device at 06:01.b will still appear at\n"\
" 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
" exposed PCI devices to its driver domains. This may be required\n"\
" for drivers which depend on finding their hardward in certain\n"\
" bus/slot locations.");
static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
{
struct xen_pcibk_device *pdev;
pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL);
if (pdev == NULL)
goto out;
dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
pdev->xdev = xdev;
dev_set_drvdata(&xdev->dev, pdev);
mutex_init(&pdev->dev_lock);
pdev->sh_info = NULL;
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
pdev->be_watching = 0;
INIT_WORK(&pdev->op_work, xen_pcibk_do_op);
if (xen_pcibk_init_devices(pdev)) {
kfree(pdev);
pdev = NULL;
}
out:
return pdev;
}
static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
{
mutex_lock(&pdev->dev_lock);
/* Ensure the guest can't trigger our handler before removing devices */
if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
unbind_from_irqhandler(pdev->evtchn_irq, pdev);
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
}
/* If the driver domain started an op, make sure we complete it
* before releasing the shared memory */
/* Note, the workqueue does not use spinlocks at all.*/
flush_workqueue(xen_pcibk_wq);
if (pdev->sh_info != NULL) {
xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
pdev->sh_info = NULL;
}
mutex_unlock(&pdev->dev_lock);
}
static void free_pdev(struct xen_pcibk_device *pdev)
{
if (pdev->be_watching) {
unregister_xenbus_watch(&pdev->be_watch);
pdev->be_watching = 0;
}
xen_pcibk_disconnect(pdev);
/* N.B. This calls pcistub_put_pci_dev which does the FLR on all
* of the PCIe devices. */
xen_pcibk_release_devices(pdev);
dev_set_drvdata(&pdev->xdev->dev, NULL);
pdev->xdev = NULL;
kfree(pdev);
}
static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
int remote_evtchn)
{
int err = 0;
void *vaddr;
dev_dbg(&pdev->xdev->dev,
"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
gnt_ref, remote_evtchn);
err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error mapping other domain page in ours.");
goto out;
}
pdev->sh_info = vaddr;
err = bind_interdomain_evtchn_to_irqhandler(
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
0, DRV_NAME, pdev);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error binding event channel to IRQ");
goto out;
}
pdev->evtchn_irq = err;
err = 0;
dev_dbg(&pdev->xdev->dev, "Attached!\n");
out:
return err;
}
static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
{
int err = 0;
int gnt_ref, remote_evtchn;
char *magic = NULL;
mutex_lock(&pdev->dev_lock);
/* Make sure we only do this setup once */
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateInitialised)
goto out;
/* Wait for frontend to state that it has published the configuration */
if (xenbus_read_driver_state(pdev->xdev->otherend) !=
XenbusStateInitialised)
goto out;
dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
"pci-op-ref", "%u", &gnt_ref,
"event-channel", "%u", &remote_evtchn,
"magic", NULL, &magic, NULL);
if (err) {
/* If configuration didn't get read correctly, wait longer */
xenbus_dev_fatal(pdev->xdev, err,
"Error reading configuration from frontend");
goto out;
}
if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
xenbus_dev_fatal(pdev->xdev, -EFAULT,
"version mismatch (%s/%s) with pcifront - "
"halting " DRV_NAME,
magic, XEN_PCI_MAGIC);
err = -EFAULT;
goto out;
}
err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn);
if (err)
goto out;
dev_dbg(&pdev->xdev->dev, "Connecting...\n");
err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
if (err)
xenbus_dev_fatal(pdev->xdev, err,
"Error switching to connected state!");
dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
out:
mutex_unlock(&pdev->dev_lock);
kfree(magic);
return err;
}
static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus,
unsigned int devfn, unsigned int devid)
{
int err;
int len;
char str[64];
len = snprintf(str, sizeof(str), "vdev-%d", devid);
if (unlikely(len >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
/* Note: The PV protocol uses %02x, don't change it */
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
"%04x:%02x:%02x.%02x", domain, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
out:
return err;
}
static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
int domain, int bus, int slot, int func,
int devid)
{
struct pci_dev *dev;
int err = 0;
dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
domain, bus, slot, func);
dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
if (!dev) {
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Couldn't locate PCI device "
"(%04x:%02x:%02x.%d)! "
"perhaps already in-use?",
domain, bus, slot, func);
goto out;
}
err = xen_pcibk_add_pci_dev(pdev, dev, devid,
xen_pcibk_publish_pci_dev);
if (err)
goto out;
dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
if (xen_register_device_domain_owner(dev,
pdev->xdev->otherend_id) != 0) {
dev_err(&dev->dev, "Stealing ownership from dom%d.\n",
xen_find_device_domain_owner(dev));
xen_unregister_device_domain_owner(dev);
xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
}
/* TODO: It'd be nice to export a bridge and have all of its children
* get exported with it. This may be best done in xend (which will
* have to calculate resource usage anyway) but we probably want to
* put something in here to ensure that if a bridge gets given to a
* driver domain, that all devices under that bridge are not given
* to other driver domains (as he who controls the bridge can disable
* it and stop the other devices from working).
*/
out:
return err;
}
static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
int domain, int bus, int slot, int func)
{
int err = 0;
struct pci_dev *dev;
dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
domain, bus, slot, func);
dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
if (!dev) {
err = -EINVAL;
dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
"(%04x:%02x:%02x.%d)! not owned by this domain\n",
domain, bus, slot, func);
goto out;
}
dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
xen_unregister_device_domain_owner(dev);
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
xen_pcibk_release_pci_dev(pdev, dev);
out:
return err;
}
static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus)
{
unsigned int d, b;
int i, root_num, len, err;
char str[64];
dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
"root_num", "%d", &root_num);
if (err == 0 || err == -ENOENT)
root_num = 0;
else if (err < 0)
goto out;
/* Verify that we haven't already published this pci root */
for (i = 0; i < root_num; i++) {
len = snprintf(str, sizeof(str), "root-%d", i);
if (unlikely(len >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
str, "%x:%x", &d, &b);
if (err < 0)
goto out;
if (err != 2) {
err = -EINVAL;
goto out;
}
if (d == domain && b == bus) {
err = 0;
goto out;
}
}
len = snprintf(str, sizeof(str), "root-%d", root_num);
if (unlikely(len >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
root_num, domain, bus);
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
"%04x:%02x", domain, bus);
if (err)
goto out;
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
"root_num", "%d", (root_num + 1));
out:
return err;
}
static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
{
int err = 0;
int num_devs;
int domain, bus, slot, func;
int substate;
int i, len;
char state_str[64];
char dev_str[64];
dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
mutex_lock(&pdev->dev_lock);
/* Make sure we only reconfigure once */
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateReconfiguring)
goto out;
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
&num_devs);
if (err != 1) {
if (err >= 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading number of devices");
goto out;
}
for (i = 0; i < num_devs; i++) {
len = snprintf(state_str, sizeof(state_str), "state-%d", i);
if (unlikely(len >= (sizeof(state_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while reading "
"configuration");
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
"%d", &substate);
if (err != 1)
substate = XenbusStateUnknown;
switch (substate) {
case XenbusStateInitialising:
dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
if (unlikely(len >= (sizeof(dev_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while "
"reading configuration");
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
dev_str, "%x:%x:%x.%x",
&domain, &bus, &slot, &func);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error reading device "
"configuration");
goto out;
}
if (err != 4) {
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error parsing pci device "
"configuration");
goto out;
}
err = xen_pcibk_export_device(pdev, domain, bus, slot,
func, i);
if (err)
goto out;
/* Publish pci roots. */
err = xen_pcibk_publish_pci_roots(pdev,
xen_pcibk_publish_pci_root);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error while publish PCI root"
"buses for frontend");
goto out;
}
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
state_str, "%d",
XenbusStateInitialised);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error switching substate of "
"dev-%d\n", i);
goto out;
}
break;
case XenbusStateClosing:
dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
if (unlikely(len >= (sizeof(dev_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while "
"reading configuration");
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
dev_str, "%x:%x:%x.%x",
&domain, &bus, &slot, &func);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error reading device "
"configuration");
goto out;
}
if (err != 4) {
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error parsing pci device "
"configuration");
goto out;
}
err = xen_pcibk_remove_device(pdev, domain, bus, slot,
func);
if (err)
goto out;
/* TODO: If at some point we implement support for pci
* root hot-remove on pcifront side, we'll need to
* remove unnecessary xenstore nodes of pci roots here.
*/
break;
default:
break;
}
}
err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error switching to reconfigured state!");
goto out;
}
out:
mutex_unlock(&pdev->dev_lock);
return 0;
}
static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
enum xenbus_state fe_state)
{
struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev);
dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
switch (fe_state) {
case XenbusStateInitialised:
xen_pcibk_attach(pdev);
break;
case XenbusStateReconfiguring:
xen_pcibk_reconfigure(pdev);
break;
case XenbusStateConnected:
/* pcifront switched its state from reconfiguring to connected.
* Then switch to connected state.
*/
xenbus_switch_state(xdev, XenbusStateConnected);
break;
case XenbusStateClosing:
xen_pcibk_disconnect(pdev);
xenbus_switch_state(xdev, XenbusStateClosing);
break;
case XenbusStateClosed:
xen_pcibk_disconnect(pdev);
xenbus_switch_state(xdev, XenbusStateClosed);
if (xenbus_dev_is_online(xdev))
break;
/* fall through if not online */
case XenbusStateUnknown:
dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
device_unregister(&xdev->dev);
break;
default:
break;
}
}
static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
{
/* Get configuration from xend (if available now) */
int domain, bus, slot, func;
int err = 0;
int i, num_devs;
char dev_str[64];
char state_str[64];
mutex_lock(&pdev->dev_lock);
/* It's possible we could get the call to setup twice, so make sure
* we're not already connected.
*/
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateInitWait)
goto out;
dev_dbg(&pdev->xdev->dev, "getting be setup\n");
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
&num_devs);
if (err != 1) {
if (err >= 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading number of devices");
goto out;
}
for (i = 0; i < num_devs; i++) {
int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
if (unlikely(l >= (sizeof(dev_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while reading "
"configuration");
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
"%x:%x:%x.%x", &domain, &bus, &slot, &func);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error reading device configuration");
goto out;
}
if (err != 4) {
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error parsing pci device "
"configuration");
goto out;
}
err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i);
if (err)
goto out;
/* Switch substate of this device. */
l = snprintf(state_str, sizeof(state_str), "state-%d", i);
if (unlikely(l >= (sizeof(state_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while reading "
"configuration");
goto out;
}
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
"%d", XenbusStateInitialised);
if (err) {
xenbus_dev_fatal(pdev->xdev, err, "Error switching "
"substate of dev-%d\n", i);
goto out;
}
}
err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error while publish PCI root buses "
"for frontend");
goto out;
}
err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
if (err)
xenbus_dev_fatal(pdev->xdev, err,
"Error switching to initialised state!");
out:
mutex_unlock(&pdev->dev_lock);
if (!err)
/* see if pcifront is already configured (if not, we'll wait) */
xen_pcibk_attach(pdev);
return err;
}
static void xen_pcibk_be_watch(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
struct xen_pcibk_device *pdev =
container_of(watch, struct xen_pcibk_device, be_watch);
switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
case XenbusStateInitWait:
xen_pcibk_setup_backend(pdev);
break;
default:
break;
}
}
static int xen_pcibk_xenbus_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
int err = 0;
struct xen_pcibk_device *pdev = alloc_pdev(dev);
if (pdev == NULL) {
err = -ENOMEM;
xenbus_dev_fatal(dev, err,
"Error allocating xen_pcibk_device struct");
goto out;
}
/* wait for xend to configure us */
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto out;
/* watch the backend node for backend configuration information */
err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
xen_pcibk_be_watch);
if (err)
goto out;
pdev->be_watching = 1;
/* We need to force a call to our callback here in case
* xend already configured us!
*/
xen_pcibk_be_watch(&pdev->be_watch, NULL, 0);
out:
return err;
}
static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
{
struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
if (pdev != NULL)
free_pdev(pdev);
return 0;
}
static const struct xenbus_device_id xen_pcibk_ids[] = {
{"pci"},
{""},
};
static struct xenbus_driver xen_pcibk_driver = {
.name = DRV_NAME,
.ids = xen_pcibk_ids,
.probe = xen_pcibk_xenbus_probe,
.remove = xen_pcibk_xenbus_remove,
.otherend_changed = xen_pcibk_frontend_changed,
};
const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
int __init xen_pcibk_xenbus_register(void)
{
xen_pcibk_wq = create_workqueue("xen_pciback_workqueue");
if (!xen_pcibk_wq) {
pr_err("%s: create xen_pciback_workqueue failed\n", __func__);
return -EFAULT;
}
xen_pcibk_backend = &xen_pcibk_vpci_backend;
if (passthrough)
xen_pcibk_backend = &xen_pcibk_passthrough_backend;
pr_info("backend is %s\n", xen_pcibk_backend->name);
return xenbus_register_backend(&xen_pcibk_driver);
}
void __exit xen_pcibk_xenbus_unregister(void)
{
destroy_workqueue(xen_pcibk_wq);
xenbus_unregister_driver(&xen_pcibk_driver);
}