mirror of
https://github.com/AetherDroid/android_kernel_samsung_on5xelte.git
synced 2025-09-07 08:48:05 -04:00
Fixed MTP to work with TWRP
This commit is contained in:
commit
f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
39
virt/kvm/Kconfig
Normal file
39
virt/kvm/Kconfig
Normal file
|
@ -0,0 +1,39 @@
|
|||
# KVM common configuration items and defaults
|
||||
|
||||
config HAVE_KVM
|
||||
bool
|
||||
|
||||
config HAVE_KVM_IRQCHIP
|
||||
bool
|
||||
|
||||
config HAVE_KVM_IRQFD
|
||||
bool
|
||||
|
||||
config HAVE_KVM_IRQ_ROUTING
|
||||
bool
|
||||
|
||||
config HAVE_KVM_EVENTFD
|
||||
bool
|
||||
select EVENTFD
|
||||
|
||||
config KVM_APIC_ARCHITECTURE
|
||||
bool
|
||||
|
||||
config KVM_MMIO
|
||||
bool
|
||||
|
||||
config KVM_ASYNC_PF
|
||||
bool
|
||||
|
||||
# Toggle to switch between direct notification and batch job
|
||||
config KVM_ASYNC_PF_SYNC
|
||||
bool
|
||||
|
||||
config HAVE_KVM_MSI
|
||||
bool
|
||||
|
||||
config HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
bool
|
||||
|
||||
config KVM_VFIO
|
||||
bool
|
332
virt/kvm/arm/arch_timer.c
Normal file
332
virt/kvm/arm/arch_timer.c
Normal file
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* Copyright (C) 2012 ARM Ltd.
|
||||
* Author: Marc Zyngier <marc.zyngier@arm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/of_irq.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include <clocksource/arm_arch_timer.h>
|
||||
#include <asm/arch_timer.h>
|
||||
|
||||
#include <kvm/arm_vgic.h>
|
||||
#include <kvm/arm_arch_timer.h>
|
||||
|
||||
static struct timecounter *timecounter;
|
||||
static struct workqueue_struct *wqueue;
|
||||
static unsigned int host_vtimer_irq;
|
||||
|
||||
static cycle_t kvm_phys_timer_read(void)
|
||||
{
|
||||
return timecounter->cc->read(timecounter->cc);
|
||||
}
|
||||
|
||||
static bool timer_is_armed(struct arch_timer_cpu *timer)
|
||||
{
|
||||
return timer->armed;
|
||||
}
|
||||
|
||||
/* timer_arm: as in "arm the timer", not as in ARM the company */
|
||||
static void timer_arm(struct arch_timer_cpu *timer, u64 ns)
|
||||
{
|
||||
timer->armed = true;
|
||||
hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
|
||||
HRTIMER_MODE_ABS);
|
||||
}
|
||||
|
||||
static void timer_disarm(struct arch_timer_cpu *timer)
|
||||
{
|
||||
if (timer_is_armed(timer)) {
|
||||
hrtimer_cancel(&timer->timer);
|
||||
cancel_work_sync(&timer->expired);
|
||||
timer->armed = false;
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
|
||||
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
|
||||
timer->irq->irq,
|
||||
timer->irq->level);
|
||||
WARN_ON(ret);
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
|
||||
|
||||
/*
|
||||
* We disable the timer in the world switch and let it be
|
||||
* handled by kvm_timer_sync_hwstate(). Getting a timer
|
||||
* interrupt at this point is a sure sign of some major
|
||||
* breakage.
|
||||
*/
|
||||
pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void kvm_timer_inject_irq_work(struct work_struct *work)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
|
||||
vcpu->arch.timer_cpu.armed = false;
|
||||
kvm_timer_inject_irq(vcpu);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
|
||||
{
|
||||
struct arch_timer_cpu *timer;
|
||||
timer = container_of(hrt, struct arch_timer_cpu, timer);
|
||||
queue_work(wqueue, &timer->expired);
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
|
||||
* @vcpu: The vcpu pointer
|
||||
*
|
||||
* Disarm any pending soft timers, since the world-switch code will write the
|
||||
* virtual timer state back to the physical CPU.
|
||||
*/
|
||||
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
/*
|
||||
* We're about to run this vcpu again, so there is no need to
|
||||
* keep the background timer running, as we're about to
|
||||
* populate the CPU timer again.
|
||||
*/
|
||||
timer_disarm(timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_timer_sync_hwstate - sync timer state from cpu
|
||||
* @vcpu: The vcpu pointer
|
||||
*
|
||||
* Check if the virtual timer was armed and either schedule a corresponding
|
||||
* soft timer or inject directly if already expired.
|
||||
*/
|
||||
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
cycle_t cval, now;
|
||||
u64 ns;
|
||||
|
||||
if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
|
||||
!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
|
||||
return;
|
||||
|
||||
cval = timer->cntv_cval;
|
||||
now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
|
||||
|
||||
BUG_ON(timer_is_armed(timer));
|
||||
|
||||
if (cval <= now) {
|
||||
/*
|
||||
* Timer has already expired while we were not
|
||||
* looking. Inject the interrupt and carry on.
|
||||
*/
|
||||
kvm_timer_inject_irq(vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
ns = cyclecounter_cyc2ns(timecounter->cc, cval - now);
|
||||
timer_arm(timer, ns);
|
||||
}
|
||||
|
||||
void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_irq_level *irq)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
/*
|
||||
* The vcpu timer irq number cannot be determined in
|
||||
* kvm_timer_vcpu_init() because it is called much before
|
||||
* kvm_vcpu_set_target(). To handle this, we determine
|
||||
* vcpu timer irq number when the vcpu is reset.
|
||||
*/
|
||||
timer->irq = irq;
|
||||
}
|
||||
|
||||
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
|
||||
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
timer->timer.function = kvm_timer_expire;
|
||||
}
|
||||
|
||||
static void kvm_timer_init_interrupt(void *info)
|
||||
{
|
||||
enable_percpu_irq(host_vtimer_irq, 0);
|
||||
}
|
||||
|
||||
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
switch (regid) {
|
||||
case KVM_REG_ARM_TIMER_CTL:
|
||||
timer->cntv_ctl = value;
|
||||
break;
|
||||
case KVM_REG_ARM_TIMER_CNT:
|
||||
vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
|
||||
break;
|
||||
case KVM_REG_ARM_TIMER_CVAL:
|
||||
timer->cntv_cval = value;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
switch (regid) {
|
||||
case KVM_REG_ARM_TIMER_CTL:
|
||||
return timer->cntv_ctl;
|
||||
case KVM_REG_ARM_TIMER_CNT:
|
||||
return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
|
||||
case KVM_REG_ARM_TIMER_CVAL:
|
||||
return timer->cntv_cval;
|
||||
}
|
||||
return (u64)-1;
|
||||
}
|
||||
|
||||
static int kvm_timer_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *cpu)
|
||||
{
|
||||
switch (action) {
|
||||
case CPU_STARTING:
|
||||
case CPU_STARTING_FROZEN:
|
||||
kvm_timer_init_interrupt(NULL);
|
||||
break;
|
||||
case CPU_DYING:
|
||||
case CPU_DYING_FROZEN:
|
||||
disable_percpu_irq(host_vtimer_irq);
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block kvm_timer_cpu_nb = {
|
||||
.notifier_call = kvm_timer_cpu_notify,
|
||||
};
|
||||
|
||||
static const struct of_device_id arch_timer_of_match[] = {
|
||||
{ .compatible = "arm,armv7-timer", },
|
||||
{ .compatible = "arm,armv8-timer", },
|
||||
{},
|
||||
};
|
||||
|
||||
int kvm_timer_hyp_init(void)
|
||||
{
|
||||
struct device_node *np;
|
||||
unsigned int ppi;
|
||||
int err;
|
||||
|
||||
timecounter = arch_timer_get_timecounter();
|
||||
if (!timecounter)
|
||||
return -ENODEV;
|
||||
|
||||
np = of_find_matching_node(NULL, arch_timer_of_match);
|
||||
if (!np) {
|
||||
kvm_err("kvm_arch_timer: can't find DT node\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
ppi = irq_of_parse_and_map(np, 2);
|
||||
if (!ppi) {
|
||||
kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = request_percpu_irq(ppi, kvm_arch_timer_handler,
|
||||
"kvm guest timer", kvm_get_running_vcpus());
|
||||
if (err) {
|
||||
kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
|
||||
ppi, err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
host_vtimer_irq = ppi;
|
||||
|
||||
err = __register_cpu_notifier(&kvm_timer_cpu_nb);
|
||||
if (err) {
|
||||
kvm_err("Cannot register timer CPU notifier\n");
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
wqueue = create_singlethread_workqueue("kvm_arch_timer");
|
||||
if (!wqueue) {
|
||||
err = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
kvm_info("%s IRQ%d\n", np->name, ppi);
|
||||
on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
|
||||
|
||||
goto out;
|
||||
out_free:
|
||||
free_percpu_irq(ppi, kvm_get_running_vcpus());
|
||||
out:
|
||||
of_node_put(np);
|
||||
return err;
|
||||
}
|
||||
|
||||
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
timer_disarm(timer);
|
||||
}
|
||||
|
||||
void kvm_timer_enable(struct kvm *kvm)
|
||||
{
|
||||
if (kvm->arch.timer.enabled)
|
||||
return;
|
||||
|
||||
/*
|
||||
* There is a potential race here between VCPUs starting for the first
|
||||
* time, which may be enabling the timer multiple times. That doesn't
|
||||
* hurt though, because we're just setting a variable to the same
|
||||
* variable that it already was. The important thing is that all
|
||||
* VCPUs have the enabled variable set, before entering the guest, if
|
||||
* the arch timers are enabled.
|
||||
*/
|
||||
if (timecounter && wqueue)
|
||||
kvm->arch.timer.enabled = 1;
|
||||
}
|
||||
|
||||
void kvm_timer_init(struct kvm *kvm)
|
||||
{
|
||||
kvm->arch.timer.cntvoff = kvm_phys_timer_read();
|
||||
}
|
255
virt/kvm/arm/vgic-v2.c
Normal file
255
virt/kvm/arm/vgic-v2.c
Normal file
|
@ -0,0 +1,255 @@
|
|||
/*
|
||||
* Copyright (C) 2012,2013 ARM Limited, All Rights Reserved.
|
||||
* Author: Marc Zyngier <marc.zyngier@arm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_address.h>
|
||||
#include <linux/of_irq.h>
|
||||
|
||||
#include <linux/irqchip/arm-gic.h>
|
||||
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
|
||||
{
|
||||
struct vgic_lr lr_desc;
|
||||
u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
|
||||
|
||||
lr_desc.irq = val & GICH_LR_VIRTUALID;
|
||||
if (lr_desc.irq <= 15)
|
||||
lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
|
||||
else
|
||||
lr_desc.source = 0;
|
||||
lr_desc.state = 0;
|
||||
|
||||
if (val & GICH_LR_PENDING_BIT)
|
||||
lr_desc.state |= LR_STATE_PENDING;
|
||||
if (val & GICH_LR_ACTIVE_BIT)
|
||||
lr_desc.state |= LR_STATE_ACTIVE;
|
||||
if (val & GICH_LR_EOI)
|
||||
lr_desc.state |= LR_EOI_INT;
|
||||
|
||||
return lr_desc;
|
||||
}
|
||||
|
||||
static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
|
||||
struct vgic_lr lr_desc)
|
||||
{
|
||||
u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
|
||||
|
||||
if (lr_desc.state & LR_STATE_PENDING)
|
||||
lr_val |= GICH_LR_PENDING_BIT;
|
||||
if (lr_desc.state & LR_STATE_ACTIVE)
|
||||
lr_val |= GICH_LR_ACTIVE_BIT;
|
||||
if (lr_desc.state & LR_EOI_INT)
|
||||
lr_val |= GICH_LR_EOI;
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
|
||||
}
|
||||
|
||||
static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
|
||||
struct vgic_lr lr_desc)
|
||||
{
|
||||
if (!(lr_desc.state & LR_STATE_MASK))
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
|
||||
else
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
|
||||
}
|
||||
|
||||
static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
|
||||
}
|
||||
|
||||
static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
|
||||
}
|
||||
|
||||
static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
|
||||
}
|
||||
|
||||
static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
|
||||
u32 ret = 0;
|
||||
|
||||
if (misr & GICH_MISR_EOI)
|
||||
ret |= INT_STATUS_EOI;
|
||||
if (misr & GICH_MISR_U)
|
||||
ret |= INT_STATUS_UNDERFLOW;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
|
||||
}
|
||||
|
||||
static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
|
||||
}
|
||||
|
||||
static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
||||
{
|
||||
u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
|
||||
|
||||
vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
|
||||
vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
|
||||
vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
|
||||
vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
|
||||
}
|
||||
|
||||
static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
||||
{
|
||||
u32 vmcr;
|
||||
|
||||
vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
|
||||
vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
|
||||
vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
|
||||
vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
|
||||
}
|
||||
|
||||
static void vgic_v2_enable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* By forcing VMCR to zero, the GIC will restore the binary
|
||||
* points to their reset values. Anything else resets to zero
|
||||
* anyway.
|
||||
*/
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
|
||||
|
||||
/* Get the show on the road... */
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
|
||||
}
|
||||
|
||||
static const struct vgic_ops vgic_v2_ops = {
|
||||
.get_lr = vgic_v2_get_lr,
|
||||
.set_lr = vgic_v2_set_lr,
|
||||
.sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
|
||||
.get_elrsr = vgic_v2_get_elrsr,
|
||||
.get_eisr = vgic_v2_get_eisr,
|
||||
.clear_eisr = vgic_v2_clear_eisr,
|
||||
.get_interrupt_status = vgic_v2_get_interrupt_status,
|
||||
.enable_underflow = vgic_v2_enable_underflow,
|
||||
.disable_underflow = vgic_v2_disable_underflow,
|
||||
.get_vmcr = vgic_v2_get_vmcr,
|
||||
.set_vmcr = vgic_v2_set_vmcr,
|
||||
.enable = vgic_v2_enable,
|
||||
};
|
||||
|
||||
static struct vgic_params vgic_v2_params;
|
||||
|
||||
/**
|
||||
* vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
|
||||
* @node: pointer to the DT node
|
||||
* @ops: address of a pointer to the GICv2 operations
|
||||
* @params: address of a pointer to HW-specific parameters
|
||||
*
|
||||
* Returns 0 if a GICv2 has been found, with the low level operations
|
||||
* in *ops and the HW parameters in *params. Returns an error code
|
||||
* otherwise.
|
||||
*/
|
||||
int vgic_v2_probe(struct device_node *vgic_node,
|
||||
const struct vgic_ops **ops,
|
||||
const struct vgic_params **params)
|
||||
{
|
||||
int ret;
|
||||
struct resource vctrl_res;
|
||||
struct resource vcpu_res;
|
||||
struct vgic_params *vgic = &vgic_v2_params;
|
||||
|
||||
vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
|
||||
if (!vgic->maint_irq) {
|
||||
kvm_err("error getting vgic maintenance irq from DT\n");
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
|
||||
if (ret) {
|
||||
kvm_err("Cannot obtain GICH resource\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
vgic->vctrl_base = of_iomap(vgic_node, 2);
|
||||
if (!vgic->vctrl_base) {
|
||||
kvm_err("Cannot ioremap GICH\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
|
||||
vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
|
||||
|
||||
ret = create_hyp_io_mappings(vgic->vctrl_base,
|
||||
vgic->vctrl_base + resource_size(&vctrl_res),
|
||||
vctrl_res.start);
|
||||
if (ret) {
|
||||
kvm_err("Cannot map VCTRL into hyp\n");
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
|
||||
kvm_err("Cannot obtain GICV resource\n");
|
||||
ret = -ENXIO;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
if (!PAGE_ALIGNED(vcpu_res.start)) {
|
||||
kvm_err("GICV physical address 0x%llx not page aligned\n",
|
||||
(unsigned long long)vcpu_res.start);
|
||||
ret = -ENXIO;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
|
||||
kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
|
||||
(unsigned long long)resource_size(&vcpu_res),
|
||||
PAGE_SIZE);
|
||||
ret = -ENXIO;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
vgic->vcpu_base = vcpu_res.start;
|
||||
|
||||
kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
|
||||
vctrl_res.start, vgic->maint_irq);
|
||||
|
||||
vgic->type = VGIC_V2;
|
||||
*ops = &vgic_v2_ops;
|
||||
*params = vgic;
|
||||
goto out;
|
||||
|
||||
out_unmap:
|
||||
iounmap(vgic->vctrl_base);
|
||||
out:
|
||||
of_node_put(vgic_node);
|
||||
return ret;
|
||||
}
|
255
virt/kvm/arm/vgic-v3.c
Normal file
255
virt/kvm/arm/vgic-v3.c
Normal file
|
@ -0,0 +1,255 @@
|
|||
/*
|
||||
* Copyright (C) 2013 ARM Limited, All Rights Reserved.
|
||||
* Author: Marc Zyngier <marc.zyngier@arm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_address.h>
|
||||
#include <linux/of_irq.h>
|
||||
|
||||
#include <linux/irqchip/arm-gic-v3.h>
|
||||
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
/* These are for GICv2 emulation only */
|
||||
#define GICH_LR_VIRTUALID (0x3ffUL << 0)
|
||||
#define GICH_LR_PHYSID_CPUID_SHIFT (10)
|
||||
#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
|
||||
|
||||
/*
|
||||
* LRs are stored in reverse order in memory. make sure we index them
|
||||
* correctly.
|
||||
*/
|
||||
#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr)
|
||||
|
||||
static u32 ich_vtr_el2;
|
||||
|
||||
static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
|
||||
{
|
||||
struct vgic_lr lr_desc;
|
||||
u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
|
||||
|
||||
lr_desc.irq = val & GICH_LR_VIRTUALID;
|
||||
if (lr_desc.irq <= 15)
|
||||
lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
|
||||
else
|
||||
lr_desc.source = 0;
|
||||
lr_desc.state = 0;
|
||||
|
||||
if (val & ICH_LR_PENDING_BIT)
|
||||
lr_desc.state |= LR_STATE_PENDING;
|
||||
if (val & ICH_LR_ACTIVE_BIT)
|
||||
lr_desc.state |= LR_STATE_ACTIVE;
|
||||
if (val & ICH_LR_EOI)
|
||||
lr_desc.state |= LR_EOI_INT;
|
||||
|
||||
return lr_desc;
|
||||
}
|
||||
|
||||
static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
|
||||
struct vgic_lr lr_desc)
|
||||
{
|
||||
u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) |
|
||||
lr_desc.irq);
|
||||
|
||||
if (lr_desc.state & LR_STATE_PENDING)
|
||||
lr_val |= ICH_LR_PENDING_BIT;
|
||||
if (lr_desc.state & LR_STATE_ACTIVE)
|
||||
lr_val |= ICH_LR_ACTIVE_BIT;
|
||||
if (lr_desc.state & LR_EOI_INT)
|
||||
lr_val |= ICH_LR_EOI;
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
|
||||
}
|
||||
|
||||
static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
|
||||
struct vgic_lr lr_desc)
|
||||
{
|
||||
if (!(lr_desc.state & LR_STATE_MASK))
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
|
||||
else
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
|
||||
}
|
||||
|
||||
static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr;
|
||||
}
|
||||
|
||||
static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
|
||||
}
|
||||
|
||||
static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
|
||||
}
|
||||
|
||||
static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
|
||||
u32 ret = 0;
|
||||
|
||||
if (misr & ICH_MISR_EOI)
|
||||
ret |= INT_STATUS_EOI;
|
||||
if (misr & ICH_MISR_U)
|
||||
ret |= INT_STATUS_UNDERFLOW;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
||||
{
|
||||
u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
|
||||
|
||||
vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
|
||||
vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
|
||||
vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
|
||||
vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
|
||||
}
|
||||
|
||||
static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE;
|
||||
}
|
||||
|
||||
static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE;
|
||||
}
|
||||
|
||||
static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
||||
{
|
||||
u32 vmcr;
|
||||
|
||||
vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
|
||||
vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
|
||||
vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
|
||||
vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
|
||||
}
|
||||
|
||||
static void vgic_v3_enable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* By forcing VMCR to zero, the GIC will restore the binary
|
||||
* points to their reset values. Anything else resets to zero
|
||||
* anyway.
|
||||
*/
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0;
|
||||
|
||||
/* Get the show on the road... */
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN;
|
||||
}
|
||||
|
||||
static const struct vgic_ops vgic_v3_ops = {
|
||||
.get_lr = vgic_v3_get_lr,
|
||||
.set_lr = vgic_v3_set_lr,
|
||||
.sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
|
||||
.get_elrsr = vgic_v3_get_elrsr,
|
||||
.get_eisr = vgic_v3_get_eisr,
|
||||
.clear_eisr = vgic_v3_clear_eisr,
|
||||
.get_interrupt_status = vgic_v3_get_interrupt_status,
|
||||
.enable_underflow = vgic_v3_enable_underflow,
|
||||
.disable_underflow = vgic_v3_disable_underflow,
|
||||
.get_vmcr = vgic_v3_get_vmcr,
|
||||
.set_vmcr = vgic_v3_set_vmcr,
|
||||
.enable = vgic_v3_enable,
|
||||
};
|
||||
|
||||
static struct vgic_params vgic_v3_params;
|
||||
|
||||
/**
|
||||
* vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
|
||||
* @node: pointer to the DT node
|
||||
* @ops: address of a pointer to the GICv3 operations
|
||||
* @params: address of a pointer to HW-specific parameters
|
||||
*
|
||||
* Returns 0 if a GICv3 has been found, with the low level operations
|
||||
* in *ops and the HW parameters in *params. Returns an error code
|
||||
* otherwise.
|
||||
*/
|
||||
int vgic_v3_probe(struct device_node *vgic_node,
|
||||
const struct vgic_ops **ops,
|
||||
const struct vgic_params **params)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 gicv_idx;
|
||||
struct resource vcpu_res;
|
||||
struct vgic_params *vgic = &vgic_v3_params;
|
||||
|
||||
vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
|
||||
if (!vgic->maint_irq) {
|
||||
kvm_err("error getting vgic maintenance irq from DT\n");
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
|
||||
|
||||
/*
|
||||
* The ListRegs field is 5 bits, but there is a architectural
|
||||
* maximum of 16 list registers. Just ignore bit 4...
|
||||
*/
|
||||
vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
|
||||
|
||||
if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
|
||||
gicv_idx = 1;
|
||||
|
||||
gicv_idx += 3; /* Also skip GICD, GICC, GICH */
|
||||
if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
|
||||
kvm_err("Cannot obtain GICV region\n");
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!PAGE_ALIGNED(vcpu_res.start)) {
|
||||
kvm_err("GICV physical address 0x%llx not page aligned\n",
|
||||
(unsigned long long)vcpu_res.start);
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
|
||||
kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
|
||||
(unsigned long long)resource_size(&vcpu_res),
|
||||
PAGE_SIZE);
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vgic->vcpu_base = vcpu_res.start;
|
||||
vgic->vctrl_base = NULL;
|
||||
vgic->type = VGIC_V3;
|
||||
|
||||
kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
|
||||
vcpu_res.start, vgic->maint_irq);
|
||||
|
||||
*ops = &vgic_v3_ops;
|
||||
*params = vgic;
|
||||
|
||||
out:
|
||||
of_node_put(vgic_node);
|
||||
return ret;
|
||||
}
|
2500
virt/kvm/arm/vgic.c
Normal file
2500
virt/kvm/arm/vgic.c
Normal file
File diff suppressed because it is too large
Load diff
1026
virt/kvm/assigned-dev.c
Normal file
1026
virt/kvm/assigned-dev.c
Normal file
File diff suppressed because it is too large
Load diff
225
virt/kvm/async_pf.c
Normal file
225
virt/kvm/async_pf.c
Normal file
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* kvm asynchronous fault support
|
||||
*
|
||||
* Copyright 2010 Red Hat, Inc.
|
||||
*
|
||||
* Author:
|
||||
* Gleb Natapov <gleb@redhat.com>
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mmu_context.h>
|
||||
|
||||
#include "async_pf.h"
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
|
||||
struct kvm_async_pf *work)
|
||||
{
|
||||
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
|
||||
kvm_arch_async_page_present(vcpu, work);
|
||||
#endif
|
||||
}
|
||||
static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
|
||||
struct kvm_async_pf *work)
|
||||
{
|
||||
#ifndef CONFIG_KVM_ASYNC_PF_SYNC
|
||||
kvm_arch_async_page_present(vcpu, work);
|
||||
#endif
|
||||
}
|
||||
|
||||
static struct kmem_cache *async_pf_cache;
|
||||
|
||||
int kvm_async_pf_init(void)
|
||||
{
|
||||
async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
|
||||
|
||||
if (!async_pf_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_async_pf_deinit(void)
|
||||
{
|
||||
if (async_pf_cache)
|
||||
kmem_cache_destroy(async_pf_cache);
|
||||
async_pf_cache = NULL;
|
||||
}
|
||||
|
||||
void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
INIT_LIST_HEAD(&vcpu->async_pf.done);
|
||||
INIT_LIST_HEAD(&vcpu->async_pf.queue);
|
||||
spin_lock_init(&vcpu->async_pf.lock);
|
||||
}
|
||||
|
||||
static void async_pf_execute(struct work_struct *work)
|
||||
{
|
||||
struct kvm_async_pf *apf =
|
||||
container_of(work, struct kvm_async_pf, work);
|
||||
struct mm_struct *mm = apf->mm;
|
||||
struct kvm_vcpu *vcpu = apf->vcpu;
|
||||
unsigned long addr = apf->addr;
|
||||
gva_t gva = apf->gva;
|
||||
|
||||
might_sleep();
|
||||
|
||||
kvm_get_user_page_io(NULL, mm, addr, 1, NULL);
|
||||
kvm_async_page_present_sync(vcpu, apf);
|
||||
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
list_add_tail(&apf->link, &vcpu->async_pf.done);
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
/*
|
||||
* apf may be freed by kvm_check_async_pf_completion() after
|
||||
* this point
|
||||
*/
|
||||
|
||||
trace_kvm_async_pf_completed(addr, gva);
|
||||
|
||||
if (waitqueue_active(&vcpu->wq))
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
|
||||
mmput(mm);
|
||||
kvm_put_kvm(vcpu->kvm);
|
||||
}
|
||||
|
||||
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* cancel outstanding work queue item */
|
||||
while (!list_empty(&vcpu->async_pf.queue)) {
|
||||
struct kvm_async_pf *work =
|
||||
list_entry(vcpu->async_pf.queue.next,
|
||||
typeof(*work), queue);
|
||||
list_del(&work->queue);
|
||||
|
||||
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
|
||||
flush_work(&work->work);
|
||||
#else
|
||||
if (cancel_work_sync(&work->work)) {
|
||||
mmput(work->mm);
|
||||
kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
|
||||
kmem_cache_free(async_pf_cache, work);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
while (!list_empty(&vcpu->async_pf.done)) {
|
||||
struct kvm_async_pf *work =
|
||||
list_entry(vcpu->async_pf.done.next,
|
||||
typeof(*work), link);
|
||||
list_del(&work->link);
|
||||
kmem_cache_free(async_pf_cache, work);
|
||||
}
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
vcpu->async_pf.queued = 0;
|
||||
}
|
||||
|
||||
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_async_pf *work;
|
||||
|
||||
while (!list_empty_careful(&vcpu->async_pf.done) &&
|
||||
kvm_arch_can_inject_async_page_present(vcpu)) {
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
|
||||
link);
|
||||
list_del(&work->link);
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
kvm_arch_async_page_ready(vcpu, work);
|
||||
kvm_async_page_present_async(vcpu, work);
|
||||
|
||||
list_del(&work->queue);
|
||||
vcpu->async_pf.queued--;
|
||||
kmem_cache_free(async_pf_cache, work);
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
|
||||
struct kvm_arch_async_pf *arch)
|
||||
{
|
||||
struct kvm_async_pf *work;
|
||||
|
||||
if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
|
||||
return 0;
|
||||
|
||||
/* setup delayed work */
|
||||
|
||||
/*
|
||||
* do alloc nowait since if we are going to sleep anyway we
|
||||
* may as well sleep faulting in page
|
||||
*/
|
||||
work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
|
||||
if (!work)
|
||||
return 0;
|
||||
|
||||
work->wakeup_all = false;
|
||||
work->vcpu = vcpu;
|
||||
work->gva = gva;
|
||||
work->addr = hva;
|
||||
work->arch = *arch;
|
||||
work->mm = current->mm;
|
||||
atomic_inc(&work->mm->mm_users);
|
||||
kvm_get_kvm(work->vcpu->kvm);
|
||||
|
||||
/* this can't really happen otherwise gfn_to_pfn_async
|
||||
would succeed */
|
||||
if (unlikely(kvm_is_error_hva(work->addr)))
|
||||
goto retry_sync;
|
||||
|
||||
INIT_WORK(&work->work, async_pf_execute);
|
||||
if (!schedule_work(&work->work))
|
||||
goto retry_sync;
|
||||
|
||||
list_add_tail(&work->queue, &vcpu->async_pf.queue);
|
||||
vcpu->async_pf.queued++;
|
||||
kvm_arch_async_page_not_present(vcpu, work);
|
||||
return 1;
|
||||
retry_sync:
|
||||
kvm_put_kvm(work->vcpu->kvm);
|
||||
mmput(work->mm);
|
||||
kmem_cache_free(async_pf_cache, work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_async_pf *work;
|
||||
|
||||
if (!list_empty_careful(&vcpu->async_pf.done))
|
||||
return 0;
|
||||
|
||||
work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
|
||||
if (!work)
|
||||
return -ENOMEM;
|
||||
|
||||
work->wakeup_all = true;
|
||||
INIT_LIST_HEAD(&work->queue); /* for list_del to work */
|
||||
|
||||
spin_lock(&vcpu->async_pf.lock);
|
||||
list_add_tail(&work->link, &vcpu->async_pf.done);
|
||||
spin_unlock(&vcpu->async_pf.lock);
|
||||
|
||||
vcpu->async_pf.queued++;
|
||||
return 0;
|
||||
}
|
36
virt/kvm/async_pf.h
Normal file
36
virt/kvm/async_pf.h
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* kvm asynchronous fault support
|
||||
*
|
||||
* Copyright 2010 Red Hat, Inc.
|
||||
*
|
||||
* Author:
|
||||
* Gleb Natapov <gleb@redhat.com>
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __KVM_ASYNC_PF_H__
|
||||
#define __KVM_ASYNC_PF_H__
|
||||
|
||||
#ifdef CONFIG_KVM_ASYNC_PF
|
||||
int kvm_async_pf_init(void);
|
||||
void kvm_async_pf_deinit(void);
|
||||
void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu);
|
||||
#else
|
||||
#define kvm_async_pf_init() (0)
|
||||
#define kvm_async_pf_deinit() do{}while(0)
|
||||
#define kvm_async_pf_vcpu_init(C) do{}while(0)
|
||||
#endif
|
||||
|
||||
#endif
|
182
virt/kvm/coalesced_mmio.c
Normal file
182
virt/kvm/coalesced_mmio.c
Normal file
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* KVM coalesced MMIO
|
||||
*
|
||||
* Copyright (c) 2008 Bull S.A.S.
|
||||
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
|
||||
*
|
||||
*/
|
||||
|
||||
#include "iodev.h"
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kvm.h>
|
||||
|
||||
#include "coalesced_mmio.h"
|
||||
|
||||
static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct kvm_coalesced_mmio_dev, dev);
|
||||
}
|
||||
|
||||
static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
|
||||
gpa_t addr, int len)
|
||||
{
|
||||
/* is it in a batchable area ?
|
||||
* (addr,len) is fully included in
|
||||
* (zone->addr, zone->size)
|
||||
*/
|
||||
if (len < 0)
|
||||
return 0;
|
||||
if (addr + len < addr)
|
||||
return 0;
|
||||
if (addr < dev->zone.addr)
|
||||
return 0;
|
||||
if (addr + len > dev->zone.addr + dev->zone.size)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
|
||||
{
|
||||
struct kvm_coalesced_mmio_ring *ring;
|
||||
unsigned avail;
|
||||
|
||||
/* Are we able to batch it ? */
|
||||
|
||||
/* last is the first free entry
|
||||
* check if we don't meet the first used entry
|
||||
* there is always one unused entry in the buffer
|
||||
*/
|
||||
ring = dev->kvm->coalesced_mmio_ring;
|
||||
avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
|
||||
if (avail == 0) {
|
||||
/* full */
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int coalesced_mmio_write(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
|
||||
struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
|
||||
|
||||
if (!coalesced_mmio_in_range(dev, addr, len))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
spin_lock(&dev->kvm->ring_lock);
|
||||
|
||||
if (!coalesced_mmio_has_room(dev)) {
|
||||
spin_unlock(&dev->kvm->ring_lock);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* copy data in first free entry of the ring */
|
||||
|
||||
ring->coalesced_mmio[ring->last].phys_addr = addr;
|
||||
ring->coalesced_mmio[ring->last].len = len;
|
||||
memcpy(ring->coalesced_mmio[ring->last].data, val, len);
|
||||
smp_wmb();
|
||||
ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
|
||||
spin_unlock(&dev->kvm->ring_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void coalesced_mmio_destructor(struct kvm_io_device *this)
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
|
||||
|
||||
list_del(&dev->list);
|
||||
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops coalesced_mmio_ops = {
|
||||
.write = coalesced_mmio_write,
|
||||
.destructor = coalesced_mmio_destructor,
|
||||
};
|
||||
|
||||
int kvm_coalesced_mmio_init(struct kvm *kvm)
|
||||
{
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
ret = -ENOMEM;
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!page)
|
||||
goto out_err;
|
||||
|
||||
ret = 0;
|
||||
kvm->coalesced_mmio_ring = page_address(page);
|
||||
|
||||
/*
|
||||
* We're using this spinlock to sync access to the coalesced ring.
|
||||
* The list doesn't need it's own lock since device registration and
|
||||
* unregistration should only happen when kvm->slots_lock is held.
|
||||
*/
|
||||
spin_lock_init(&kvm->ring_lock);
|
||||
INIT_LIST_HEAD(&kvm->coalesced_zones);
|
||||
|
||||
out_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_coalesced_mmio_free(struct kvm *kvm)
|
||||
{
|
||||
if (kvm->coalesced_mmio_ring)
|
||||
free_page((unsigned long)kvm->coalesced_mmio_ring);
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
|
||||
struct kvm_coalesced_mmio_zone *zone)
|
||||
{
|
||||
int ret;
|
||||
struct kvm_coalesced_mmio_dev *dev;
|
||||
|
||||
dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
|
||||
dev->kvm = kvm;
|
||||
dev->zone = *zone;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
|
||||
zone->size, &dev->dev);
|
||||
if (ret < 0)
|
||||
goto out_free_dev;
|
||||
list_add_tail(&dev->list, &kvm->coalesced_zones);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_dev:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
kfree(dev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
|
||||
struct kvm_coalesced_mmio_zone *zone)
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev, *tmp;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list)
|
||||
if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev);
|
||||
kvm_iodevice_destructor(&dev->dev);
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return 0;
|
||||
}
|
38
virt/kvm/coalesced_mmio.h
Normal file
38
virt/kvm/coalesced_mmio.h
Normal file
|
@ -0,0 +1,38 @@
|
|||
#ifndef __KVM_COALESCED_MMIO_H__
|
||||
#define __KVM_COALESCED_MMIO_H__
|
||||
|
||||
/*
|
||||
* KVM coalesced MMIO
|
||||
*
|
||||
* Copyright (c) 2008 Bull S.A.S.
|
||||
*
|
||||
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_KVM_MMIO
|
||||
|
||||
#include <linux/list.h>
|
||||
|
||||
struct kvm_coalesced_mmio_dev {
|
||||
struct list_head list;
|
||||
struct kvm_io_device dev;
|
||||
struct kvm *kvm;
|
||||
struct kvm_coalesced_mmio_zone zone;
|
||||
};
|
||||
|
||||
int kvm_coalesced_mmio_init(struct kvm *kvm);
|
||||
void kvm_coalesced_mmio_free(struct kvm *kvm);
|
||||
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
|
||||
struct kvm_coalesced_mmio_zone *zone);
|
||||
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
|
||||
struct kvm_coalesced_mmio_zone *zone);
|
||||
|
||||
#else
|
||||
|
||||
static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; }
|
||||
static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { }
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
932
virt/kvm/eventfd.c
Normal file
932
virt/kvm/eventfd.c
Normal file
|
@ -0,0 +1,932 @@
|
|||
/*
|
||||
* kvm eventfd support - use eventfd objects to signal various KVM events
|
||||
*
|
||||
* Copyright 2009 Novell. All Rights Reserved.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Author:
|
||||
* Gregory Haskins <ghaskins@novell.com>
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/eventfd.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
#include "ioapic.h"
|
||||
#endif
|
||||
#include "iodev.h"
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
/*
|
||||
* --------------------------------------------------------------------
|
||||
* irqfd: Allows an fd to be used to inject an interrupt to the guest
|
||||
*
|
||||
* Credit goes to Avi Kivity for the original idea.
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*
|
||||
* Resampling irqfds are a special variety of irqfds used to emulate
|
||||
* level triggered interrupts. The interrupt is asserted on eventfd
|
||||
* trigger. On acknowledgement through the irq ack notifier, the
|
||||
* interrupt is de-asserted and userspace is notified through the
|
||||
* resamplefd. All resamplers on the same gsi are de-asserted
|
||||
* together, so we don't need to track the state of each individual
|
||||
* user. We can also therefore share the same irq source ID.
|
||||
*/
|
||||
struct _irqfd_resampler {
|
||||
struct kvm *kvm;
|
||||
/*
|
||||
* List of resampling struct _irqfd objects sharing this gsi.
|
||||
* RCU list modified under kvm->irqfds.resampler_lock
|
||||
*/
|
||||
struct list_head list;
|
||||
struct kvm_irq_ack_notifier notifier;
|
||||
/*
|
||||
* Entry in list of kvm->irqfd.resampler_list. Use for sharing
|
||||
* resamplers among irqfds on the same gsi.
|
||||
* Accessed and modified under kvm->irqfds.resampler_lock
|
||||
*/
|
||||
struct list_head link;
|
||||
};
|
||||
|
||||
struct _irqfd {
|
||||
/* Used for MSI fast-path */
|
||||
struct kvm *kvm;
|
||||
wait_queue_t wait;
|
||||
/* Update side is protected by irqfds.lock */
|
||||
struct kvm_kernel_irq_routing_entry irq_entry;
|
||||
seqcount_t irq_entry_sc;
|
||||
/* Used for level IRQ fast-path */
|
||||
int gsi;
|
||||
struct work_struct inject;
|
||||
/* The resampler used by this irqfd (resampler-only) */
|
||||
struct _irqfd_resampler *resampler;
|
||||
/* Eventfd notified on resample (resampler-only) */
|
||||
struct eventfd_ctx *resamplefd;
|
||||
/* Entry in list of irqfds for a resampler (resampler-only) */
|
||||
struct list_head resampler_link;
|
||||
/* Used for setup/shutdown */
|
||||
struct eventfd_ctx *eventfd;
|
||||
struct list_head list;
|
||||
poll_table pt;
|
||||
struct work_struct shutdown;
|
||||
};
|
||||
|
||||
static struct workqueue_struct *irqfd_cleanup_wq;
|
||||
|
||||
static void
|
||||
irqfd_inject(struct work_struct *work)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
|
||||
if (!irqfd->resampler) {
|
||||
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
|
||||
false);
|
||||
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
|
||||
false);
|
||||
} else
|
||||
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||
irqfd->gsi, 1, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Since resampler irqfds share an IRQ source ID, we de-assert once
|
||||
* then notify all of the resampler irqfds using this GSI. We can't
|
||||
* do multiple de-asserts or we risk racing with incoming re-asserts.
|
||||
*/
|
||||
static void
|
||||
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
struct _irqfd_resampler *resampler;
|
||||
struct kvm *kvm;
|
||||
struct _irqfd *irqfd;
|
||||
int idx;
|
||||
|
||||
resampler = container_of(kian, struct _irqfd_resampler, notifier);
|
||||
kvm = resampler->kvm;
|
||||
|
||||
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||
resampler->notifier.gsi, 0, false);
|
||||
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
|
||||
list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
|
||||
eventfd_signal(irqfd->resamplefd, 1);
|
||||
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
static void
|
||||
irqfd_resampler_shutdown(struct _irqfd *irqfd)
|
||||
{
|
||||
struct _irqfd_resampler *resampler = irqfd->resampler;
|
||||
struct kvm *kvm = resampler->kvm;
|
||||
|
||||
mutex_lock(&kvm->irqfds.resampler_lock);
|
||||
|
||||
list_del_rcu(&irqfd->resampler_link);
|
||||
synchronize_srcu(&kvm->irq_srcu);
|
||||
|
||||
if (list_empty(&resampler->list)) {
|
||||
list_del(&resampler->link);
|
||||
kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
|
||||
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||
resampler->notifier.gsi, 0, false);
|
||||
kfree(resampler);
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->irqfds.resampler_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Race-free decouple logic (ordering is critical)
|
||||
*/
|
||||
static void
|
||||
irqfd_shutdown(struct work_struct *work)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
|
||||
u64 cnt;
|
||||
|
||||
/*
|
||||
* Synchronize with the wait-queue and unhook ourselves to prevent
|
||||
* further events.
|
||||
*/
|
||||
eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
|
||||
|
||||
/*
|
||||
* We know no new events will be scheduled at this point, so block
|
||||
* until all previously outstanding events have completed
|
||||
*/
|
||||
flush_work(&irqfd->inject);
|
||||
|
||||
if (irqfd->resampler) {
|
||||
irqfd_resampler_shutdown(irqfd);
|
||||
eventfd_ctx_put(irqfd->resamplefd);
|
||||
}
|
||||
|
||||
/*
|
||||
* It is now safe to release the object's resources
|
||||
*/
|
||||
eventfd_ctx_put(irqfd->eventfd);
|
||||
kfree(irqfd);
|
||||
}
|
||||
|
||||
|
||||
/* assumes kvm->irqfds.lock is held */
|
||||
static bool
|
||||
irqfd_is_active(struct _irqfd *irqfd)
|
||||
{
|
||||
return list_empty(&irqfd->list) ? false : true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the irqfd as inactive and schedule it for removal
|
||||
*
|
||||
* assumes kvm->irqfds.lock is held
|
||||
*/
|
||||
static void
|
||||
irqfd_deactivate(struct _irqfd *irqfd)
|
||||
{
|
||||
BUG_ON(!irqfd_is_active(irqfd));
|
||||
|
||||
list_del_init(&irqfd->list);
|
||||
|
||||
queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with wqh->lock held and interrupts disabled
|
||||
*/
|
||||
static int
|
||||
irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
|
||||
unsigned long flags = (unsigned long)key;
|
||||
struct kvm_kernel_irq_routing_entry irq;
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
unsigned seq;
|
||||
int idx;
|
||||
|
||||
if (flags & POLLIN) {
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
do {
|
||||
seq = read_seqcount_begin(&irqfd->irq_entry_sc);
|
||||
irq = irqfd->irq_entry;
|
||||
} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
|
||||
/* An event has been signaled, inject an interrupt */
|
||||
if (irq.type == KVM_IRQ_ROUTING_MSI)
|
||||
kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
|
||||
false);
|
||||
else
|
||||
schedule_work(&irqfd->inject);
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
if (flags & POLLHUP) {
|
||||
/* The eventfd is closing, detach from KVM */
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kvm->irqfds.lock, flags);
|
||||
|
||||
/*
|
||||
* We must check if someone deactivated the irqfd before
|
||||
* we could acquire the irqfds.lock since the item is
|
||||
* deactivated from the KVM side before it is unhooked from
|
||||
* the wait-queue. If it is already deactivated, we can
|
||||
* simply return knowing the other side will cleanup for us.
|
||||
* We cannot race against the irqfd going away since the
|
||||
* other side is required to acquire wqh->lock, which we hold
|
||||
*/
|
||||
if (irqfd_is_active(irqfd))
|
||||
irqfd_deactivate(irqfd);
|
||||
|
||||
spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
|
||||
poll_table *pt)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
|
||||
add_wait_queue(wqh, &irqfd->wait);
|
||||
}
|
||||
|
||||
/* Must be called under irqfds.lock */
|
||||
static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
|
||||
int i, n_entries;
|
||||
|
||||
n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
|
||||
|
||||
write_seqcount_begin(&irqfd->irq_entry_sc);
|
||||
|
||||
irqfd->irq_entry.type = 0;
|
||||
|
||||
e = entries;
|
||||
for (i = 0; i < n_entries; ++i, ++e) {
|
||||
/* Only fast-path MSI. */
|
||||
if (e->type == KVM_IRQ_ROUTING_MSI)
|
||||
irqfd->irq_entry = *e;
|
||||
}
|
||||
|
||||
write_seqcount_end(&irqfd->irq_entry_sc);
|
||||
}
|
||||
|
||||
static int
|
||||
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
{
|
||||
struct _irqfd *irqfd, *tmp;
|
||||
struct fd f;
|
||||
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
|
||||
int ret;
|
||||
unsigned int events;
|
||||
int idx;
|
||||
|
||||
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
|
||||
if (!irqfd)
|
||||
return -ENOMEM;
|
||||
|
||||
irqfd->kvm = kvm;
|
||||
irqfd->gsi = args->gsi;
|
||||
INIT_LIST_HEAD(&irqfd->list);
|
||||
INIT_WORK(&irqfd->inject, irqfd_inject);
|
||||
INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
|
||||
seqcount_init(&irqfd->irq_entry_sc);
|
||||
|
||||
f = fdget(args->fd);
|
||||
if (!f.file) {
|
||||
ret = -EBADF;
|
||||
goto out;
|
||||
}
|
||||
|
||||
eventfd = eventfd_ctx_fileget(f.file);
|
||||
if (IS_ERR(eventfd)) {
|
||||
ret = PTR_ERR(eventfd);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
irqfd->eventfd = eventfd;
|
||||
|
||||
if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
|
||||
struct _irqfd_resampler *resampler;
|
||||
|
||||
resamplefd = eventfd_ctx_fdget(args->resamplefd);
|
||||
if (IS_ERR(resamplefd)) {
|
||||
ret = PTR_ERR(resamplefd);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
irqfd->resamplefd = resamplefd;
|
||||
INIT_LIST_HEAD(&irqfd->resampler_link);
|
||||
|
||||
mutex_lock(&kvm->irqfds.resampler_lock);
|
||||
|
||||
list_for_each_entry(resampler,
|
||||
&kvm->irqfds.resampler_list, link) {
|
||||
if (resampler->notifier.gsi == irqfd->gsi) {
|
||||
irqfd->resampler = resampler;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!irqfd->resampler) {
|
||||
resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
|
||||
if (!resampler) {
|
||||
ret = -ENOMEM;
|
||||
mutex_unlock(&kvm->irqfds.resampler_lock);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
resampler->kvm = kvm;
|
||||
INIT_LIST_HEAD(&resampler->list);
|
||||
resampler->notifier.gsi = irqfd->gsi;
|
||||
resampler->notifier.irq_acked = irqfd_resampler_ack;
|
||||
INIT_LIST_HEAD(&resampler->link);
|
||||
|
||||
list_add(&resampler->link, &kvm->irqfds.resampler_list);
|
||||
kvm_register_irq_ack_notifier(kvm,
|
||||
&resampler->notifier);
|
||||
irqfd->resampler = resampler;
|
||||
}
|
||||
|
||||
list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
|
||||
synchronize_srcu(&kvm->irq_srcu);
|
||||
|
||||
mutex_unlock(&kvm->irqfds.resampler_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Install our own custom wake-up handling so we are notified via
|
||||
* a callback whenever someone signals the underlying eventfd
|
||||
*/
|
||||
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
|
||||
init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
ret = 0;
|
||||
list_for_each_entry(tmp, &kvm->irqfds.items, list) {
|
||||
if (irqfd->eventfd != tmp->eventfd)
|
||||
continue;
|
||||
/* This fd is used for another irq already. */
|
||||
ret = -EBUSY;
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
irqfd_update(kvm, irqfd);
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
|
||||
list_add_tail(&irqfd->list, &kvm->irqfds.items);
|
||||
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
|
||||
/*
|
||||
* Check if there was an event already pending on the eventfd
|
||||
* before we registered, and trigger it as if we didn't miss it.
|
||||
*/
|
||||
events = f.file->f_op->poll(f.file, &irqfd->pt);
|
||||
|
||||
if (events & POLLIN)
|
||||
schedule_work(&irqfd->inject);
|
||||
|
||||
/*
|
||||
* do not drop the file until the irqfd is fully initialized, otherwise
|
||||
* we might race against the POLLHUP
|
||||
*/
|
||||
fdput(f);
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (irqfd->resampler)
|
||||
irqfd_resampler_shutdown(irqfd);
|
||||
|
||||
if (resamplefd && !IS_ERR(resamplefd))
|
||||
eventfd_ctx_put(resamplefd);
|
||||
|
||||
if (eventfd && !IS_ERR(eventfd))
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
fdput(f);
|
||||
|
||||
out:
|
||||
kfree(irqfd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
int gsi, idx;
|
||||
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
if (kian->gsi == gsi) {
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
return true;
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
|
||||
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
int gsi, idx;
|
||||
|
||||
trace_kvm_ack_irq(irqchip, pin);
|
||||
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
if (kian->gsi == gsi)
|
||||
kian->irq_acked(kian);
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
kvm_vcpu_request_scan_ioapic(kvm);
|
||||
#endif
|
||||
}
|
||||
|
||||
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_del_init_rcu(&kian->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
synchronize_srcu(&kvm->irq_srcu);
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
kvm_vcpu_request_scan_ioapic(kvm);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
kvm_eventfd_init(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
spin_lock_init(&kvm->irqfds.lock);
|
||||
INIT_LIST_HEAD(&kvm->irqfds.items);
|
||||
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
|
||||
mutex_init(&kvm->irqfds.resampler_lock);
|
||||
#endif
|
||||
INIT_LIST_HEAD(&kvm->ioeventfds);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQFD
|
||||
/*
|
||||
* shutdown any irqfd's that match fd+gsi
|
||||
*/
|
||||
static int
|
||||
kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
{
|
||||
struct _irqfd *irqfd, *tmp;
|
||||
struct eventfd_ctx *eventfd;
|
||||
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
if (IS_ERR(eventfd))
|
||||
return PTR_ERR(eventfd);
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
|
||||
if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
|
||||
/*
|
||||
* This clearing of irq_entry.type is needed for when
|
||||
* another thread calls kvm_irq_routing_update before
|
||||
* we flush workqueue below (we synchronize with
|
||||
* kvm_irq_routing_update using irqfds.lock).
|
||||
*/
|
||||
write_seqcount_begin(&irqfd->irq_entry_sc);
|
||||
irqfd->irq_entry.type = 0;
|
||||
write_seqcount_end(&irqfd->irq_entry_sc);
|
||||
irqfd_deactivate(irqfd);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
/*
|
||||
* Block until we know all outstanding shutdown jobs have completed
|
||||
* so that we guarantee there will not be any more interrupts on this
|
||||
* gsi once this deassign function returns.
|
||||
*/
|
||||
flush_workqueue(irqfd_cleanup_wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
{
|
||||
if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
|
||||
return -EINVAL;
|
||||
|
||||
if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
|
||||
return kvm_irqfd_deassign(kvm, args);
|
||||
|
||||
return kvm_irqfd_assign(kvm, args);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called as the kvm VM fd is being released. Shutdown all
|
||||
* irqfds that still remain open
|
||||
*/
|
||||
void
|
||||
kvm_irqfd_release(struct kvm *kvm)
|
||||
{
|
||||
struct _irqfd *irqfd, *tmp;
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
|
||||
irqfd_deactivate(irqfd);
|
||||
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
|
||||
/*
|
||||
* Block until we know all outstanding shutdown jobs have completed
|
||||
* since we do not take a kvm* reference.
|
||||
*/
|
||||
flush_workqueue(irqfd_cleanup_wq);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Take note of a change in irq routing.
|
||||
* Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
|
||||
*/
|
||||
void kvm_irq_routing_update(struct kvm *kvm)
|
||||
{
|
||||
struct _irqfd *irqfd;
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
list_for_each_entry(irqfd, &kvm->irqfds.items, list)
|
||||
irqfd_update(kvm, irqfd);
|
||||
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* create a host-wide workqueue for issuing deferred shutdown requests
|
||||
* aggregated from all vm* instances. We need our own isolated single-thread
|
||||
* queue to prevent deadlock against flushing the normal work-queue.
|
||||
*/
|
||||
int kvm_irqfd_init(void)
|
||||
{
|
||||
irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
|
||||
if (!irqfd_cleanup_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_irqfd_exit(void)
|
||||
{
|
||||
destroy_workqueue(irqfd_cleanup_wq);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------
|
||||
* ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
|
||||
*
|
||||
* userspace can register a PIO/MMIO address with an eventfd for receiving
|
||||
* notification when the memory has been touched.
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
struct _ioeventfd {
|
||||
struct list_head list;
|
||||
u64 addr;
|
||||
int length;
|
||||
struct eventfd_ctx *eventfd;
|
||||
u64 datamatch;
|
||||
struct kvm_io_device dev;
|
||||
u8 bus_idx;
|
||||
bool wildcard;
|
||||
};
|
||||
|
||||
static inline struct _ioeventfd *
|
||||
to_ioeventfd(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct _ioeventfd, dev);
|
||||
}
|
||||
|
||||
static void
|
||||
ioeventfd_release(struct _ioeventfd *p)
|
||||
{
|
||||
eventfd_ctx_put(p->eventfd);
|
||||
list_del(&p->list);
|
||||
kfree(p);
|
||||
}
|
||||
|
||||
static bool
|
||||
ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
|
||||
{
|
||||
u64 _val;
|
||||
|
||||
if (addr != p->addr)
|
||||
/* address must be precise for a hit */
|
||||
return false;
|
||||
|
||||
if (!p->length)
|
||||
/* length = 0 means only look at the address, so always a hit */
|
||||
return true;
|
||||
|
||||
if (len != p->length)
|
||||
/* address-range must be precise for a hit */
|
||||
return false;
|
||||
|
||||
if (p->wildcard)
|
||||
/* all else equal, wildcard is always a hit */
|
||||
return true;
|
||||
|
||||
/* otherwise, we have to actually compare the data */
|
||||
|
||||
BUG_ON(!IS_ALIGNED((unsigned long)val, len));
|
||||
|
||||
switch (len) {
|
||||
case 1:
|
||||
_val = *(u8 *)val;
|
||||
break;
|
||||
case 2:
|
||||
_val = *(u16 *)val;
|
||||
break;
|
||||
case 4:
|
||||
_val = *(u32 *)val;
|
||||
break;
|
||||
case 8:
|
||||
_val = *(u64 *)val;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return _val == p->datamatch ? true : false;
|
||||
}
|
||||
|
||||
/* MMIO/PIO writes trigger an event if the addr/val match */
|
||||
static int
|
||||
ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
const void *val)
|
||||
{
|
||||
struct _ioeventfd *p = to_ioeventfd(this);
|
||||
|
||||
if (!ioeventfd_in_range(p, addr, len, val))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
eventfd_signal(p->eventfd, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called as KVM is completely shutting down. We do not
|
||||
* need to worry about locking just nuke anything we have as quickly as possible
|
||||
*/
|
||||
static void
|
||||
ioeventfd_destructor(struct kvm_io_device *this)
|
||||
{
|
||||
struct _ioeventfd *p = to_ioeventfd(this);
|
||||
|
||||
ioeventfd_release(p);
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops ioeventfd_ops = {
|
||||
.write = ioeventfd_write,
|
||||
.destructor = ioeventfd_destructor,
|
||||
};
|
||||
|
||||
/* assumes kvm->slots_lock held */
|
||||
static bool
|
||||
ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
|
||||
{
|
||||
struct _ioeventfd *_p;
|
||||
|
||||
list_for_each_entry(_p, &kvm->ioeventfds, list)
|
||||
if (_p->bus_idx == p->bus_idx &&
|
||||
_p->addr == p->addr &&
|
||||
(!_p->length || !p->length ||
|
||||
(_p->length == p->length &&
|
||||
(_p->wildcard || p->wildcard ||
|
||||
_p->datamatch == p->datamatch))))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
|
||||
{
|
||||
if (flags & KVM_IOEVENTFD_FLAG_PIO)
|
||||
return KVM_PIO_BUS;
|
||||
if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
|
||||
return KVM_VIRTIO_CCW_NOTIFY_BUS;
|
||||
return KVM_MMIO_BUS;
|
||||
}
|
||||
|
||||
static int
|
||||
kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
enum kvm_bus bus_idx;
|
||||
struct _ioeventfd *p;
|
||||
struct eventfd_ctx *eventfd;
|
||||
int ret;
|
||||
|
||||
bus_idx = ioeventfd_bus_from_flags(args->flags);
|
||||
/* must be natural-word sized, or 0 to ignore length */
|
||||
switch (args->len) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 4:
|
||||
case 8:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* check for range overflow */
|
||||
if (args->addr + args->len < args->addr)
|
||||
return -EINVAL;
|
||||
|
||||
/* check for extra flags that we don't understand */
|
||||
if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
/* ioeventfd with no length can't be combined with DATAMATCH */
|
||||
if (!args->len &&
|
||||
args->flags & (KVM_IOEVENTFD_FLAG_PIO |
|
||||
KVM_IOEVENTFD_FLAG_DATAMATCH))
|
||||
return -EINVAL;
|
||||
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
if (IS_ERR(eventfd))
|
||||
return PTR_ERR(eventfd);
|
||||
|
||||
p = kzalloc(sizeof(*p), GFP_KERNEL);
|
||||
if (!p) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
p->addr = args->addr;
|
||||
p->bus_idx = bus_idx;
|
||||
p->length = args->len;
|
||||
p->eventfd = eventfd;
|
||||
|
||||
/* The datamatch feature is optional, otherwise this is a wildcard */
|
||||
if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
|
||||
p->datamatch = args->datamatch;
|
||||
else
|
||||
p->wildcard = true;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
/* Verify that there isn't a match already */
|
||||
if (ioeventfd_check_collision(kvm, p)) {
|
||||
ret = -EEXIST;
|
||||
goto unlock_fail;
|
||||
}
|
||||
|
||||
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
|
||||
|
||||
ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
|
||||
&p->dev);
|
||||
if (ret < 0)
|
||||
goto unlock_fail;
|
||||
|
||||
/* When length is ignored, MMIO is also put on a separate bus, for
|
||||
* faster lookups.
|
||||
*/
|
||||
if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
|
||||
p->addr, 0, &p->dev);
|
||||
if (ret < 0)
|
||||
goto register_fail;
|
||||
}
|
||||
|
||||
kvm->buses[bus_idx]->ioeventfd_count++;
|
||||
list_add_tail(&p->list, &kvm->ioeventfds);
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return 0;
|
||||
|
||||
register_fail:
|
||||
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
|
||||
unlock_fail:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
fail:
|
||||
kfree(p);
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
enum kvm_bus bus_idx;
|
||||
struct _ioeventfd *p, *tmp;
|
||||
struct eventfd_ctx *eventfd;
|
||||
int ret = -ENOENT;
|
||||
|
||||
bus_idx = ioeventfd_bus_from_flags(args->flags);
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
if (IS_ERR(eventfd))
|
||||
return PTR_ERR(eventfd);
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
|
||||
bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
|
||||
|
||||
if (p->bus_idx != bus_idx ||
|
||||
p->eventfd != eventfd ||
|
||||
p->addr != args->addr ||
|
||||
p->length != args->len ||
|
||||
p->wildcard != wildcard)
|
||||
continue;
|
||||
|
||||
if (!p->wildcard && p->datamatch != args->datamatch)
|
||||
continue;
|
||||
|
||||
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
|
||||
if (!p->length) {
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
|
||||
&p->dev);
|
||||
}
|
||||
kvm->buses[bus_idx]->ioeventfd_count--;
|
||||
ioeventfd_release(p);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
|
||||
return kvm_deassign_ioeventfd(kvm, args);
|
||||
|
||||
return kvm_assign_ioeventfd(kvm, args);
|
||||
}
|
687
virt/kvm/ioapic.c
Normal file
687
virt/kvm/ioapic.c
Normal file
|
@ -0,0 +1,687 @@
|
|||
/*
|
||||
* Copyright (C) 2001 MandrakeSoft S.A.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* MandrakeSoft S.A.
|
||||
* 43, rue d'Aboukir
|
||||
* 75002 Paris - France
|
||||
* http://www.linux-mandrake.com/
|
||||
* http://www.mandrakesoft.com/
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* Yunhong Jiang <yunhong.jiang@intel.com>
|
||||
* Yaozu (Eddie) Dong <eddie.dong@intel.com>
|
||||
* Based on Xen 3.1 code.
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/current.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#include "ioapic.h"
|
||||
#include "lapic.h"
|
||||
#include "irq.h"
|
||||
|
||||
#if 0
|
||||
#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
|
||||
#else
|
||||
#define ioapic_debug(fmt, arg...)
|
||||
#endif
|
||||
static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
|
||||
bool line_status);
|
||||
|
||||
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
|
||||
unsigned long addr,
|
||||
unsigned long length)
|
||||
{
|
||||
unsigned long result = 0;
|
||||
|
||||
switch (ioapic->ioregsel) {
|
||||
case IOAPIC_REG_VERSION:
|
||||
result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
|
||||
| (IOAPIC_VERSION_ID & 0xff));
|
||||
break;
|
||||
|
||||
case IOAPIC_REG_APIC_ID:
|
||||
case IOAPIC_REG_ARB_ID:
|
||||
result = ((ioapic->id & 0xf) << 24);
|
||||
break;
|
||||
|
||||
default:
|
||||
{
|
||||
u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
|
||||
u64 redir_content;
|
||||
|
||||
if (redir_index < IOAPIC_NUM_PINS)
|
||||
redir_content =
|
||||
ioapic->redirtbl[redir_index].bits;
|
||||
else
|
||||
redir_content = ~0ULL;
|
||||
|
||||
result = (ioapic->ioregsel & 0x1) ?
|
||||
(redir_content >> 32) & 0xffffffff :
|
||||
redir_content & 0xffffffff;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
|
||||
{
|
||||
ioapic->rtc_status.pending_eoi = 0;
|
||||
bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
|
||||
}
|
||||
|
||||
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
|
||||
|
||||
static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
|
||||
{
|
||||
if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
|
||||
kvm_rtc_eoi_tracking_restore_all(ioapic);
|
||||
}
|
||||
|
||||
static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
bool new_val, old_val;
|
||||
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
|
||||
union kvm_ioapic_redirect_entry *e;
|
||||
|
||||
e = &ioapic->redirtbl[RTC_GSI];
|
||||
if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id,
|
||||
e->fields.dest_mode))
|
||||
return;
|
||||
|
||||
new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
|
||||
old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
|
||||
|
||||
if (new_val == old_val)
|
||||
return;
|
||||
|
||||
if (new_val) {
|
||||
__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
|
||||
ioapic->rtc_status.pending_eoi++;
|
||||
} else {
|
||||
__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
|
||||
ioapic->rtc_status.pending_eoi--;
|
||||
rtc_status_pending_eoi_check_valid(ioapic);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
__rtc_irq_eoi_tracking_restore_one(vcpu);
|
||||
spin_unlock(&ioapic->lock);
|
||||
}
|
||||
|
||||
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
if (RTC_GSI >= IOAPIC_NUM_PINS)
|
||||
return;
|
||||
|
||||
rtc_irq_eoi_tracking_reset(ioapic);
|
||||
kvm_for_each_vcpu(i, vcpu, ioapic->kvm)
|
||||
__rtc_irq_eoi_tracking_restore_one(vcpu);
|
||||
}
|
||||
|
||||
static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
|
||||
--ioapic->rtc_status.pending_eoi;
|
||||
rtc_status_pending_eoi_check_valid(ioapic);
|
||||
}
|
||||
}
|
||||
|
||||
static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
|
||||
{
|
||||
if (ioapic->rtc_status.pending_eoi > 0)
|
||||
return true; /* coalesced */
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
|
||||
int irq_level, bool line_status)
|
||||
{
|
||||
union kvm_ioapic_redirect_entry entry;
|
||||
u32 mask = 1 << irq;
|
||||
u32 old_irr;
|
||||
int edge, ret;
|
||||
|
||||
entry = ioapic->redirtbl[irq];
|
||||
edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
|
||||
|
||||
if (!irq_level) {
|
||||
ioapic->irr &= ~mask;
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 0 for coalesced interrupts; for edge-triggered interrupts,
|
||||
* this only happens if a previous edge has not been delivered due
|
||||
* do masking. For level interrupts, the remote_irr field tells
|
||||
* us if the interrupt is waiting for an EOI.
|
||||
*
|
||||
* RTC is special: it is edge-triggered, but userspace likes to know
|
||||
* if it has been already ack-ed via EOI because coalesced RTC
|
||||
* interrupts lead to time drift in Windows guests. So we track
|
||||
* EOI manually for the RTC interrupt.
|
||||
*/
|
||||
if (irq == RTC_GSI && line_status &&
|
||||
rtc_irq_check_coalesced(ioapic)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
old_irr = ioapic->irr;
|
||||
ioapic->irr |= mask;
|
||||
if ((edge && old_irr == ioapic->irr) ||
|
||||
(!edge && entry.fields.remote_irr)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ioapic_service(ioapic, irq, line_status);
|
||||
|
||||
out:
|
||||
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
|
||||
{
|
||||
u32 idx;
|
||||
|
||||
rtc_irq_eoi_tracking_reset(ioapic);
|
||||
for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
|
||||
ioapic_set_irq(ioapic, idx, 1, true);
|
||||
|
||||
kvm_rtc_eoi_tracking_restore_all(ioapic);
|
||||
}
|
||||
|
||||
|
||||
static void update_handled_vectors(struct kvm_ioapic *ioapic)
|
||||
{
|
||||
DECLARE_BITMAP(handled_vectors, 256);
|
||||
int i;
|
||||
|
||||
memset(handled_vectors, 0, sizeof(handled_vectors));
|
||||
for (i = 0; i < IOAPIC_NUM_PINS; ++i)
|
||||
__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
|
||||
memcpy(ioapic->handled_vectors, handled_vectors,
|
||||
sizeof(handled_vectors));
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
|
||||
u32 *tmr)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
|
||||
union kvm_ioapic_redirect_entry *e;
|
||||
int index;
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
for (index = 0; index < IOAPIC_NUM_PINS; index++) {
|
||||
e = &ioapic->redirtbl[index];
|
||||
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
|
||||
kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
|
||||
index == RTC_GSI) {
|
||||
if (kvm_apic_match_dest(vcpu, NULL, 0,
|
||||
e->fields.dest_id, e->fields.dest_mode)) {
|
||||
__set_bit(e->fields.vector,
|
||||
(unsigned long *)eoi_exit_bitmap);
|
||||
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
|
||||
__set_bit(e->fields.vector,
|
||||
(unsigned long *)tmr);
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&ioapic->lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
|
||||
if (!ioapic)
|
||||
return;
|
||||
kvm_make_scan_ioapic_request(kvm);
|
||||
}
|
||||
#else
|
||||
void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
|
||||
{
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
||||
{
|
||||
unsigned index;
|
||||
bool mask_before, mask_after;
|
||||
union kvm_ioapic_redirect_entry *e;
|
||||
|
||||
switch (ioapic->ioregsel) {
|
||||
case IOAPIC_REG_VERSION:
|
||||
/* Writes are ignored. */
|
||||
break;
|
||||
|
||||
case IOAPIC_REG_APIC_ID:
|
||||
ioapic->id = (val >> 24) & 0xf;
|
||||
break;
|
||||
|
||||
case IOAPIC_REG_ARB_ID:
|
||||
break;
|
||||
|
||||
default:
|
||||
index = (ioapic->ioregsel - 0x10) >> 1;
|
||||
|
||||
ioapic_debug("change redir index %x val %x\n", index, val);
|
||||
if (index >= IOAPIC_NUM_PINS)
|
||||
return;
|
||||
e = &ioapic->redirtbl[index];
|
||||
mask_before = e->fields.mask;
|
||||
if (ioapic->ioregsel & 1) {
|
||||
e->bits &= 0xffffffff;
|
||||
e->bits |= (u64) val << 32;
|
||||
} else {
|
||||
e->bits &= ~0xffffffffULL;
|
||||
e->bits |= (u32) val;
|
||||
e->fields.remote_irr = 0;
|
||||
}
|
||||
update_handled_vectors(ioapic);
|
||||
mask_after = e->fields.mask;
|
||||
if (mask_before != mask_after)
|
||||
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
|
||||
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
|
||||
&& ioapic->irr & (1 << index))
|
||||
ioapic_service(ioapic, index, false);
|
||||
kvm_vcpu_request_scan_ioapic(ioapic->kvm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
|
||||
{
|
||||
union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
|
||||
struct kvm_lapic_irq irqe;
|
||||
int ret;
|
||||
|
||||
if (entry->fields.mask)
|
||||
return -1;
|
||||
|
||||
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
|
||||
"vector=%x trig_mode=%x\n",
|
||||
entry->fields.dest_id, entry->fields.dest_mode,
|
||||
entry->fields.delivery_mode, entry->fields.vector,
|
||||
entry->fields.trig_mode);
|
||||
|
||||
irqe.dest_id = entry->fields.dest_id;
|
||||
irqe.vector = entry->fields.vector;
|
||||
irqe.dest_mode = entry->fields.dest_mode;
|
||||
irqe.trig_mode = entry->fields.trig_mode;
|
||||
irqe.delivery_mode = entry->fields.delivery_mode << 8;
|
||||
irqe.level = 1;
|
||||
irqe.shorthand = 0;
|
||||
|
||||
if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
|
||||
ioapic->irr &= ~(1 << irq);
|
||||
|
||||
if (irq == RTC_GSI && line_status) {
|
||||
/*
|
||||
* pending_eoi cannot ever become negative (see
|
||||
* rtc_status_pending_eoi_check_valid) and the caller
|
||||
* ensures that it is only called if it is >= zero, namely
|
||||
* if rtc_irq_check_coalesced returns false).
|
||||
*/
|
||||
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
|
||||
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
|
||||
ioapic->rtc_status.dest_map);
|
||||
ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
|
||||
} else
|
||||
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
|
||||
|
||||
if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
|
||||
entry->fields.remote_irr = 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
|
||||
int level, bool line_status)
|
||||
{
|
||||
int ret, irq_level;
|
||||
|
||||
BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
|
||||
irq_source_id, level);
|
||||
ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
|
||||
|
||||
spin_unlock(&ioapic->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
|
||||
{
|
||||
int i;
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
|
||||
__clear_bit(irq_source_id, &ioapic->irq_states[i]);
|
||||
spin_unlock(&ioapic->lock);
|
||||
}
|
||||
|
||||
static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
|
||||
{
|
||||
int i;
|
||||
struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
|
||||
eoi_inject.work);
|
||||
spin_lock(&ioapic->lock);
|
||||
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
|
||||
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
|
||||
|
||||
if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
|
||||
continue;
|
||||
|
||||
if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
|
||||
ioapic_service(ioapic, i, false);
|
||||
}
|
||||
spin_unlock(&ioapic->lock);
|
||||
}
|
||||
|
||||
#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
|
||||
|
||||
static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
|
||||
struct kvm_ioapic *ioapic, int vector, int trigger_mode)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
|
||||
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
|
||||
|
||||
if (ent->fields.vector != vector)
|
||||
continue;
|
||||
|
||||
if (i == RTC_GSI)
|
||||
rtc_irq_eoi(ioapic, vcpu);
|
||||
/*
|
||||
* We are dropping lock while calling ack notifiers because ack
|
||||
* notifier callbacks for assigned devices call into IOAPIC
|
||||
* recursively. Since remote_irr is cleared only after call
|
||||
* to notifiers if the same vector will be delivered while lock
|
||||
* is dropped it will be put into irr and will be delivered
|
||||
* after ack notifier returns.
|
||||
*/
|
||||
spin_unlock(&ioapic->lock);
|
||||
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
|
||||
spin_lock(&ioapic->lock);
|
||||
|
||||
if (trigger_mode != IOAPIC_LEVEL_TRIG)
|
||||
continue;
|
||||
|
||||
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
|
||||
ent->fields.remote_irr = 0;
|
||||
if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
|
||||
++ioapic->irq_eoi[i];
|
||||
if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
|
||||
/*
|
||||
* Real hardware does not deliver the interrupt
|
||||
* immediately during eoi broadcast, and this
|
||||
* lets a buggy guest make slow progress
|
||||
* even if it does not correctly handle a
|
||||
* level-triggered interrupt. Emulate this
|
||||
* behavior if we detect an interrupt storm.
|
||||
*/
|
||||
schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
|
||||
ioapic->irq_eoi[i] = 0;
|
||||
trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
|
||||
} else {
|
||||
ioapic_service(ioapic, i, false);
|
||||
}
|
||||
} else {
|
||||
ioapic->irq_eoi[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
smp_rmb();
|
||||
return test_bit(vector, ioapic->handled_vectors);
|
||||
}
|
||||
|
||||
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
|
||||
spin_unlock(&ioapic->lock);
|
||||
}
|
||||
|
||||
static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct kvm_ioapic, dev);
|
||||
}
|
||||
|
||||
static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
|
||||
{
|
||||
return ((addr >= ioapic->base_address &&
|
||||
(addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
|
||||
}
|
||||
|
||||
static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
void *val)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = to_ioapic(this);
|
||||
u32 result;
|
||||
if (!ioapic_in_range(ioapic, addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ioapic_debug("addr %lx\n", (unsigned long)addr);
|
||||
ASSERT(!(addr & 0xf)); /* check alignment */
|
||||
|
||||
addr &= 0xff;
|
||||
spin_lock(&ioapic->lock);
|
||||
switch (addr) {
|
||||
case IOAPIC_REG_SELECT:
|
||||
result = ioapic->ioregsel;
|
||||
break;
|
||||
|
||||
case IOAPIC_REG_WINDOW:
|
||||
result = ioapic_read_indirect(ioapic, addr, len);
|
||||
break;
|
||||
|
||||
default:
|
||||
result = 0;
|
||||
break;
|
||||
}
|
||||
spin_unlock(&ioapic->lock);
|
||||
|
||||
switch (len) {
|
||||
case 8:
|
||||
*(u64 *) val = result;
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 4:
|
||||
memcpy(val, (char *)&result, len);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_WARNING "ioapic: wrong length %d\n", len);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
const void *val)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = to_ioapic(this);
|
||||
u32 data;
|
||||
if (!ioapic_in_range(ioapic, addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
|
||||
(void*)addr, len, val);
|
||||
ASSERT(!(addr & 0xf)); /* check alignment */
|
||||
|
||||
switch (len) {
|
||||
case 8:
|
||||
case 4:
|
||||
data = *(u32 *) val;
|
||||
break;
|
||||
case 2:
|
||||
data = *(u16 *) val;
|
||||
break;
|
||||
case 1:
|
||||
data = *(u8 *) val;
|
||||
break;
|
||||
default:
|
||||
printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
addr &= 0xff;
|
||||
spin_lock(&ioapic->lock);
|
||||
switch (addr) {
|
||||
case IOAPIC_REG_SELECT:
|
||||
ioapic->ioregsel = data & 0xFF; /* 8-bit register */
|
||||
break;
|
||||
|
||||
case IOAPIC_REG_WINDOW:
|
||||
ioapic_write_indirect(ioapic, data);
|
||||
break;
|
||||
#ifdef CONFIG_IA64
|
||||
case IOAPIC_REG_EOI:
|
||||
__kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
spin_unlock(&ioapic->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
|
||||
{
|
||||
int i;
|
||||
|
||||
cancel_delayed_work_sync(&ioapic->eoi_inject);
|
||||
for (i = 0; i < IOAPIC_NUM_PINS; i++)
|
||||
ioapic->redirtbl[i].fields.mask = 1;
|
||||
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
|
||||
ioapic->ioregsel = 0;
|
||||
ioapic->irr = 0;
|
||||
ioapic->id = 0;
|
||||
memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
|
||||
rtc_irq_eoi_tracking_reset(ioapic);
|
||||
update_handled_vectors(ioapic);
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops ioapic_mmio_ops = {
|
||||
.read = ioapic_mmio_read,
|
||||
.write = ioapic_mmio_write,
|
||||
};
|
||||
|
||||
int kvm_ioapic_init(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_ioapic *ioapic;
|
||||
int ret;
|
||||
|
||||
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
|
||||
if (!ioapic)
|
||||
return -ENOMEM;
|
||||
spin_lock_init(&ioapic->lock);
|
||||
INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
|
||||
kvm->arch.vioapic = ioapic;
|
||||
kvm_ioapic_reset(ioapic);
|
||||
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
|
||||
ioapic->kvm = kvm;
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
|
||||
IOAPIC_MEM_LENGTH, &ioapic->dev);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
if (ret < 0) {
|
||||
kvm->arch.vioapic = NULL;
|
||||
kfree(ioapic);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_ioapic_destroy(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
|
||||
cancel_delayed_work_sync(&ioapic->eoi_inject);
|
||||
if (ioapic) {
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
|
||||
kvm->arch.vioapic = NULL;
|
||||
kfree(ioapic);
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
|
||||
if (!ioapic)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
|
||||
spin_unlock(&ioapic->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
|
||||
if (!ioapic)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
|
||||
ioapic->irr = 0;
|
||||
update_handled_vectors(ioapic);
|
||||
kvm_vcpu_request_scan_ioapic(kvm);
|
||||
kvm_ioapic_inject_all(ioapic, state->irr);
|
||||
spin_unlock(&ioapic->lock);
|
||||
return 0;
|
||||
}
|
104
virt/kvm/ioapic.h
Normal file
104
virt/kvm/ioapic.h
Normal file
|
@ -0,0 +1,104 @@
|
|||
#ifndef __KVM_IO_APIC_H
|
||||
#define __KVM_IO_APIC_H
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include "iodev.h"
|
||||
|
||||
struct kvm;
|
||||
struct kvm_vcpu;
|
||||
|
||||
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
|
||||
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
|
||||
#define IOAPIC_EDGE_TRIG 0
|
||||
#define IOAPIC_LEVEL_TRIG 1
|
||||
|
||||
#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
|
||||
#define IOAPIC_MEM_LENGTH 0x100
|
||||
|
||||
/* Direct registers. */
|
||||
#define IOAPIC_REG_SELECT 0x00
|
||||
#define IOAPIC_REG_WINDOW 0x10
|
||||
#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
|
||||
|
||||
/* Indirect registers. */
|
||||
#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
|
||||
#define IOAPIC_REG_VERSION 0x01
|
||||
#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
|
||||
|
||||
/*ioapic delivery mode*/
|
||||
#define IOAPIC_FIXED 0x0
|
||||
#define IOAPIC_LOWEST_PRIORITY 0x1
|
||||
#define IOAPIC_PMI 0x2
|
||||
#define IOAPIC_NMI 0x4
|
||||
#define IOAPIC_INIT 0x5
|
||||
#define IOAPIC_EXTINT 0x7
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#define RTC_GSI 8
|
||||
#else
|
||||
#define RTC_GSI -1U
|
||||
#endif
|
||||
|
||||
struct rtc_status {
|
||||
int pending_eoi;
|
||||
DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
|
||||
};
|
||||
|
||||
struct kvm_ioapic {
|
||||
u64 base_address;
|
||||
u32 ioregsel;
|
||||
u32 id;
|
||||
u32 irr;
|
||||
u32 pad;
|
||||
union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
|
||||
unsigned long irq_states[IOAPIC_NUM_PINS];
|
||||
struct kvm_io_device dev;
|
||||
struct kvm *kvm;
|
||||
void (*ack_notifier)(void *opaque, int irq);
|
||||
spinlock_t lock;
|
||||
DECLARE_BITMAP(handled_vectors, 256);
|
||||
struct rtc_status rtc_status;
|
||||
struct delayed_work eoi_inject;
|
||||
u32 irq_eoi[IOAPIC_NUM_PINS];
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
#define ASSERT(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
|
||||
__FILE__, __LINE__, #x); \
|
||||
BUG(); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define ASSERT(x) do { } while (0)
|
||||
#endif
|
||||
|
||||
static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.vioapic;
|
||||
}
|
||||
|
||||
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
|
||||
int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
||||
int short_hand, int dest, int dest_mode);
|
||||
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
|
||||
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
|
||||
int trigger_mode);
|
||||
bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
|
||||
int kvm_ioapic_init(struct kvm *kvm);
|
||||
void kvm_ioapic_destroy(struct kvm *kvm);
|
||||
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
|
||||
int level, bool line_status);
|
||||
void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
|
||||
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
struct kvm_lapic_irq *irq, unsigned long *dest_map);
|
||||
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
|
||||
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
|
||||
u32 *tmr);
|
||||
|
||||
#endif
|
70
virt/kvm/iodev.h
Normal file
70
virt/kvm/iodev.h
Normal file
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __KVM_IODEV_H__
|
||||
#define __KVM_IODEV_H__
|
||||
|
||||
#include <linux/kvm_types.h>
|
||||
#include <asm/errno.h>
|
||||
|
||||
struct kvm_io_device;
|
||||
|
||||
/**
|
||||
* kvm_io_device_ops are called under kvm slots_lock.
|
||||
* read and write handlers return 0 if the transaction has been handled,
|
||||
* or non-zero to have it passed to the next device.
|
||||
**/
|
||||
struct kvm_io_device_ops {
|
||||
int (*read)(struct kvm_io_device *this,
|
||||
gpa_t addr,
|
||||
int len,
|
||||
void *val);
|
||||
int (*write)(struct kvm_io_device *this,
|
||||
gpa_t addr,
|
||||
int len,
|
||||
const void *val);
|
||||
void (*destructor)(struct kvm_io_device *this);
|
||||
};
|
||||
|
||||
|
||||
struct kvm_io_device {
|
||||
const struct kvm_io_device_ops *ops;
|
||||
};
|
||||
|
||||
static inline void kvm_iodevice_init(struct kvm_io_device *dev,
|
||||
const struct kvm_io_device_ops *ops)
|
||||
{
|
||||
dev->ops = ops;
|
||||
}
|
||||
|
||||
static inline int kvm_iodevice_read(struct kvm_io_device *dev,
|
||||
gpa_t addr, int l, void *v)
|
||||
{
|
||||
return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline int kvm_iodevice_write(struct kvm_io_device *dev,
|
||||
gpa_t addr, int l, const void *v)
|
||||
{
|
||||
return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
|
||||
{
|
||||
if (dev->ops->destructor)
|
||||
dev->ops->destructor(dev);
|
||||
}
|
||||
|
||||
#endif /* __KVM_IODEV_H__ */
|
358
virt/kvm/iommu.c
Normal file
358
virt/kvm/iommu.c
Normal file
|
@ -0,0 +1,358 @@
|
|||
/*
|
||||
* Copyright (c) 2006, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
* Place - Suite 330, Boston, MA 02111-1307 USA.
|
||||
*
|
||||
* Copyright (C) 2006-2008 Intel Corporation
|
||||
* Copyright IBM Corporation, 2008
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*
|
||||
* Author: Allen M. Kay <allen.m.kay@intel.com>
|
||||
* Author: Weidong Han <weidong.han@intel.com>
|
||||
* Author: Ben-Ami Yassour <benami@il.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/stat.h>
|
||||
#include <linux/dmar.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/intel-iommu.h>
|
||||
|
||||
static bool allow_unsafe_assigned_interrupts;
|
||||
module_param_named(allow_unsafe_assigned_interrupts,
|
||||
allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
|
||||
"Enable device assignment on platforms without interrupt remapping support.");
|
||||
|
||||
static int kvm_iommu_unmap_memslots(struct kvm *kvm);
|
||||
static void kvm_iommu_put_pages(struct kvm *kvm,
|
||||
gfn_t base_gfn, unsigned long npages);
|
||||
|
||||
static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
unsigned long npages)
|
||||
{
|
||||
gfn_t end_gfn;
|
||||
pfn_t pfn;
|
||||
|
||||
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||
end_gfn = gfn + npages;
|
||||
gfn += 1;
|
||||
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
return pfn;
|
||||
|
||||
while (gfn < end_gfn)
|
||||
gfn_to_pfn_memslot(slot, gfn++);
|
||||
|
||||
return pfn;
|
||||
}
|
||||
|
||||
static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < npages; ++i)
|
||||
kvm_release_pfn_clean(pfn + i);
|
||||
}
|
||||
|
||||
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
{
|
||||
gfn_t gfn, end_gfn;
|
||||
pfn_t pfn;
|
||||
int r = 0;
|
||||
struct iommu_domain *domain = kvm->arch.iommu_domain;
|
||||
int flags;
|
||||
|
||||
/* check if iommu exists and in use */
|
||||
if (!domain)
|
||||
return 0;
|
||||
|
||||
gfn = slot->base_gfn;
|
||||
end_gfn = gfn + slot->npages;
|
||||
|
||||
flags = IOMMU_READ;
|
||||
if (!(slot->flags & KVM_MEM_READONLY))
|
||||
flags |= IOMMU_WRITE;
|
||||
if (!kvm->arch.iommu_noncoherent)
|
||||
flags |= IOMMU_CACHE;
|
||||
|
||||
|
||||
while (gfn < end_gfn) {
|
||||
unsigned long page_size;
|
||||
|
||||
/* Check if already mapped */
|
||||
if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
|
||||
gfn += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Get the page size we could use to map */
|
||||
page_size = kvm_host_page_size(kvm, gfn);
|
||||
|
||||
/* Make sure the page_size does not exceed the memslot */
|
||||
while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
|
||||
page_size >>= 1;
|
||||
|
||||
/* Make sure gfn is aligned to the page size we want to map */
|
||||
while ((gfn << PAGE_SHIFT) & (page_size - 1))
|
||||
page_size >>= 1;
|
||||
|
||||
/* Make sure hva is aligned to the page size we want to map */
|
||||
while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
|
||||
page_size >>= 1;
|
||||
|
||||
/*
|
||||
* Pin all pages we are about to map in memory. This is
|
||||
* important because we unmap and unpin in 4kb steps later.
|
||||
*/
|
||||
pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
|
||||
if (is_error_noslot_pfn(pfn)) {
|
||||
gfn += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Map into IO address space */
|
||||
r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
|
||||
page_size, flags);
|
||||
if (r) {
|
||||
printk(KERN_ERR "kvm_iommu_map_address:"
|
||||
"iommu failed to map pfn=%llx\n", pfn);
|
||||
kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
|
||||
goto unmap_pages;
|
||||
}
|
||||
|
||||
gfn += page_size >> PAGE_SHIFT;
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unmap_pages:
|
||||
kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_iommu_map_memslots(struct kvm *kvm)
|
||||
{
|
||||
int idx, r = 0;
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
if (kvm->arch.iommu_noncoherent)
|
||||
kvm_arch_register_noncoherent_dma(kvm);
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
slots = kvm_memslots(kvm);
|
||||
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
r = kvm_iommu_map_pages(kvm, memslot);
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_assign_device(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *assigned_dev)
|
||||
{
|
||||
struct pci_dev *pdev = NULL;
|
||||
struct iommu_domain *domain = kvm->arch.iommu_domain;
|
||||
int r;
|
||||
bool noncoherent;
|
||||
|
||||
/* check if iommu exists and in use */
|
||||
if (!domain)
|
||||
return 0;
|
||||
|
||||
pdev = assigned_dev->dev;
|
||||
if (pdev == NULL)
|
||||
return -ENODEV;
|
||||
|
||||
r = iommu_attach_device(domain, &pdev->dev);
|
||||
if (r) {
|
||||
dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
|
||||
|
||||
/* Check if need to update IOMMU page table for guest memory */
|
||||
if (noncoherent != kvm->arch.iommu_noncoherent) {
|
||||
kvm_iommu_unmap_memslots(kvm);
|
||||
kvm->arch.iommu_noncoherent = noncoherent;
|
||||
r = kvm_iommu_map_memslots(kvm);
|
||||
if (r)
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
pci_set_dev_assigned(pdev);
|
||||
|
||||
dev_info(&pdev->dev, "kvm assign device\n");
|
||||
|
||||
return 0;
|
||||
out_unmap:
|
||||
kvm_iommu_unmap_memslots(kvm);
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_deassign_device(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *assigned_dev)
|
||||
{
|
||||
struct iommu_domain *domain = kvm->arch.iommu_domain;
|
||||
struct pci_dev *pdev = NULL;
|
||||
|
||||
/* check if iommu exists and in use */
|
||||
if (!domain)
|
||||
return 0;
|
||||
|
||||
pdev = assigned_dev->dev;
|
||||
if (pdev == NULL)
|
||||
return -ENODEV;
|
||||
|
||||
iommu_detach_device(domain, &pdev->dev);
|
||||
|
||||
pci_clear_dev_assigned(pdev);
|
||||
|
||||
dev_info(&pdev->dev, "kvm deassign device\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_iommu_map_guest(struct kvm *kvm)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (!iommu_present(&pci_bus_type)) {
|
||||
printk(KERN_ERR "%s: iommu not found\n", __func__);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
|
||||
if (!kvm->arch.iommu_domain) {
|
||||
r = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!allow_unsafe_assigned_interrupts &&
|
||||
!iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
|
||||
printk(KERN_WARNING "%s: No interrupt remapping support,"
|
||||
" disallowing device assignment."
|
||||
" Re-enble with \"allow_unsafe_assigned_interrupts=1\""
|
||||
" module option.\n", __func__);
|
||||
iommu_domain_free(kvm->arch.iommu_domain);
|
||||
kvm->arch.iommu_domain = NULL;
|
||||
r = -EPERM;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
r = kvm_iommu_map_memslots(kvm);
|
||||
if (r)
|
||||
kvm_iommu_unmap_memslots(kvm);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void kvm_iommu_put_pages(struct kvm *kvm,
|
||||
gfn_t base_gfn, unsigned long npages)
|
||||
{
|
||||
struct iommu_domain *domain;
|
||||
gfn_t end_gfn, gfn;
|
||||
pfn_t pfn;
|
||||
u64 phys;
|
||||
|
||||
domain = kvm->arch.iommu_domain;
|
||||
end_gfn = base_gfn + npages;
|
||||
gfn = base_gfn;
|
||||
|
||||
/* check if iommu exists and in use */
|
||||
if (!domain)
|
||||
return;
|
||||
|
||||
while (gfn < end_gfn) {
|
||||
unsigned long unmap_pages;
|
||||
size_t size;
|
||||
|
||||
/* Get physical address */
|
||||
phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
|
||||
|
||||
if (!phys) {
|
||||
gfn++;
|
||||
continue;
|
||||
}
|
||||
|
||||
pfn = phys >> PAGE_SHIFT;
|
||||
|
||||
/* Unmap address from IO address space */
|
||||
size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
|
||||
unmap_pages = 1ULL << get_order(size);
|
||||
|
||||
/* Unpin all pages we just unmapped to not leak any memory */
|
||||
kvm_unpin_pages(kvm, pfn, unmap_pages);
|
||||
|
||||
gfn += unmap_pages;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
|
||||
}
|
||||
|
||||
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
|
||||
{
|
||||
int idx;
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
slots = kvm_memslots(kvm);
|
||||
|
||||
kvm_for_each_memslot(memslot, slots)
|
||||
kvm_iommu_unmap_pages(kvm, memslot);
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
if (kvm->arch.iommu_noncoherent)
|
||||
kvm_arch_unregister_noncoherent_dma(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_iommu_unmap_guest(struct kvm *kvm)
|
||||
{
|
||||
struct iommu_domain *domain = kvm->arch.iommu_domain;
|
||||
|
||||
/* check if iommu exists and in use */
|
||||
if (!domain)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
kvm_iommu_unmap_memslots(kvm);
|
||||
kvm->arch.iommu_domain = NULL;
|
||||
kvm->arch.iommu_noncoherent = false;
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
iommu_domain_free(domain);
|
||||
return 0;
|
||||
}
|
369
virt/kvm/irq_comm.c
Normal file
369
virt/kvm/irq_comm.c
Normal file
|
@ -0,0 +1,369 @@
|
|||
/*
|
||||
* irq_comm.c: Common API for in kernel interrupt controller
|
||||
* Copyright (c) 2007, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
* Place - Suite 330, Boston, MA 02111-1307 USA.
|
||||
* Authors:
|
||||
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
|
||||
*
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#include <asm/msidef.h>
|
||||
#ifdef CONFIG_IA64
|
||||
#include <asm/iosapic.h>
|
||||
#endif
|
||||
|
||||
#include "irq.h"
|
||||
|
||||
#include "ioapic.h"
|
||||
|
||||
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
bool line_status)
|
||||
{
|
||||
#ifdef CONFIG_X86
|
||||
struct kvm_pic *pic = pic_irqchip(kvm);
|
||||
return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
|
||||
#else
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level,
|
||||
bool line_status)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
|
||||
line_status);
|
||||
}
|
||||
|
||||
inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
|
||||
{
|
||||
#ifdef CONFIG_IA64
|
||||
return irq->delivery_mode ==
|
||||
(IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
|
||||
#else
|
||||
return irq->delivery_mode == APIC_DM_LOWEST;
|
||||
#endif
|
||||
}
|
||||
|
||||
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
struct kvm_lapic_irq *irq, unsigned long *dest_map)
|
||||
{
|
||||
int i, r = -1;
|
||||
struct kvm_vcpu *vcpu, *lowest = NULL;
|
||||
|
||||
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
|
||||
kvm_is_dm_lowest_prio(irq)) {
|
||||
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
|
||||
irq->delivery_mode = APIC_DM_FIXED;
|
||||
}
|
||||
|
||||
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
|
||||
return r;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (!kvm_apic_present(vcpu))
|
||||
continue;
|
||||
|
||||
if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
|
||||
irq->dest_id, irq->dest_mode))
|
||||
continue;
|
||||
|
||||
if (!kvm_is_dm_lowest_prio(irq)) {
|
||||
if (r < 0)
|
||||
r = 0;
|
||||
r += kvm_apic_set_irq(vcpu, irq, dest_map);
|
||||
} else if (kvm_lapic_enabled(vcpu)) {
|
||||
if (!lowest)
|
||||
lowest = vcpu;
|
||||
else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
|
||||
lowest = vcpu;
|
||||
}
|
||||
}
|
||||
|
||||
if (lowest)
|
||||
r = kvm_apic_set_irq(lowest, irq, dest_map);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm_lapic_irq *irq)
|
||||
{
|
||||
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
|
||||
|
||||
irq->dest_id = (e->msi.address_lo &
|
||||
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
|
||||
irq->vector = (e->msi.data &
|
||||
MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
|
||||
irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
|
||||
irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
|
||||
irq->delivery_mode = e->msi.data & 0x700;
|
||||
irq->level = 1;
|
||||
irq->shorthand = 0;
|
||||
/* TODO Deal with RH bit of MSI message address */
|
||||
}
|
||||
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int irq_source_id, int level, bool line_status)
|
||||
{
|
||||
struct kvm_lapic_irq irq;
|
||||
|
||||
if (!level)
|
||||
return -1;
|
||||
|
||||
kvm_set_msi_irq(e, &irq);
|
||||
|
||||
return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
|
||||
}
|
||||
|
||||
|
||||
static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm)
|
||||
{
|
||||
struct kvm_lapic_irq irq;
|
||||
int r;
|
||||
|
||||
kvm_set_msi_irq(e, &irq);
|
||||
|
||||
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
|
||||
return r;
|
||||
else
|
||||
return -EWOULDBLOCK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deliver an IRQ in an atomic context if we can, or return a failure,
|
||||
* user can retry in a process context.
|
||||
* Return value:
|
||||
* -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
|
||||
* Other values - No need to retry.
|
||||
*/
|
||||
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
int ret = -EINVAL;
|
||||
int idx;
|
||||
|
||||
trace_kvm_set_irq(irq, level, irq_source_id);
|
||||
|
||||
/*
|
||||
* Injection into either PIC or IOAPIC might need to scan all CPUs,
|
||||
* which would need to be retried from thread context; when same GSI
|
||||
* is connected to both PIC and IOAPIC, we'd have to report a
|
||||
* partial failure here.
|
||||
* Since there's no easy way to do this, we only support injecting MSI
|
||||
* which is limited to 1:1 GSI mapping.
|
||||
*/
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
|
||||
e = &entries[0];
|
||||
if (likely(e->type == KVM_IRQ_ROUTING_MSI))
|
||||
ret = kvm_set_msi_inatomic(e, kvm);
|
||||
else
|
||||
ret = -EWOULDBLOCK;
|
||||
}
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_request_irq_source_id(struct kvm *kvm)
|
||||
{
|
||||
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
|
||||
int irq_source_id;
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
|
||||
|
||||
if (irq_source_id >= BITS_PER_LONG) {
|
||||
printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
|
||||
irq_source_id = -EFAULT;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
||||
#ifdef CONFIG_X86
|
||||
ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
|
||||
#endif
|
||||
set_bit(irq_source_id, bitmap);
|
||||
unlock:
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
return irq_source_id;
|
||||
}
|
||||
|
||||
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
|
||||
{
|
||||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
||||
#ifdef CONFIG_X86
|
||||
ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
|
||||
#endif
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
if (irq_source_id < 0 ||
|
||||
irq_source_id >= BITS_PER_LONG) {
|
||||
printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
|
||||
goto unlock;
|
||||
}
|
||||
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
goto unlock;
|
||||
|
||||
kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
|
||||
#ifdef CONFIG_X86
|
||||
kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
|
||||
#endif
|
||||
unlock:
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||
struct kvm_irq_mask_notifier *kimn)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kimn->irq = irq;
|
||||
hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||
struct kvm_irq_mask_notifier *kimn)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_del_rcu(&kimn->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
synchronize_srcu(&kvm->irq_srcu);
|
||||
}
|
||||
|
||||
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
|
||||
bool mask)
|
||||
{
|
||||
struct kvm_irq_mask_notifier *kimn;
|
||||
int idx, gsi;
|
||||
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
|
||||
if (kimn->irq == gsi)
|
||||
kimn->func(kimn, mask);
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
|
||||
const struct kvm_irq_routing_entry *ue)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
int delta;
|
||||
unsigned max_pin;
|
||||
|
||||
switch (ue->type) {
|
||||
case KVM_IRQ_ROUTING_IRQCHIP:
|
||||
delta = 0;
|
||||
switch (ue->u.irqchip.irqchip) {
|
||||
case KVM_IRQCHIP_PIC_MASTER:
|
||||
e->set = kvm_set_pic_irq;
|
||||
max_pin = PIC_NUM_PINS;
|
||||
break;
|
||||
case KVM_IRQCHIP_PIC_SLAVE:
|
||||
e->set = kvm_set_pic_irq;
|
||||
max_pin = PIC_NUM_PINS;
|
||||
delta = 8;
|
||||
break;
|
||||
case KVM_IRQCHIP_IOAPIC:
|
||||
max_pin = KVM_IOAPIC_NUM_PINS;
|
||||
e->set = kvm_set_ioapic_irq;
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
e->irqchip.irqchip = ue->u.irqchip.irqchip;
|
||||
e->irqchip.pin = ue->u.irqchip.pin + delta;
|
||||
if (e->irqchip.pin >= max_pin)
|
||||
goto out;
|
||||
break;
|
||||
case KVM_IRQ_ROUTING_MSI:
|
||||
e->set = kvm_set_msi;
|
||||
e->msi.address_lo = ue->u.msi.address_lo;
|
||||
e->msi.address_hi = ue->u.msi.address_hi;
|
||||
e->msi.data = ue->u.msi.data;
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
|
||||
r = 0;
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
#define IOAPIC_ROUTING_ENTRY(irq) \
|
||||
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
|
||||
.u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
|
||||
#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
# define PIC_ROUTING_ENTRY(irq) \
|
||||
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
|
||||
.u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
|
||||
# define ROUTING_ENTRY2(irq) \
|
||||
IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
|
||||
#else
|
||||
# define ROUTING_ENTRY2(irq) \
|
||||
IOAPIC_ROUTING_ENTRY(irq)
|
||||
#endif
|
||||
|
||||
static const struct kvm_irq_routing_entry default_routing[] = {
|
||||
ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
|
||||
ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
|
||||
ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
|
||||
ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
|
||||
ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
|
||||
ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
|
||||
ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
|
||||
ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
|
||||
ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
|
||||
ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
|
||||
ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
|
||||
ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
|
||||
#ifdef CONFIG_IA64
|
||||
ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
|
||||
ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
|
||||
ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
|
||||
ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
|
||||
ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
|
||||
ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
|
||||
ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
|
||||
ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
|
||||
ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
|
||||
ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
|
||||
ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
|
||||
ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
|
||||
#endif
|
||||
};
|
||||
|
||||
int kvm_setup_default_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
return kvm_set_irq_routing(kvm, default_routing,
|
||||
ARRAY_SIZE(default_routing), 0);
|
||||
}
|
214
virt/kvm/irqchip.c
Normal file
214
virt/kvm/irqchip.c
Normal file
|
@ -0,0 +1,214 @@
|
|||
/*
|
||||
* irqchip.c: Common API for in kernel interrupt controllers
|
||||
* Copyright (c) 2007, Intel Corporation.
|
||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
||||
* Copyright (c) 2013, Alexander Graf <agraf@suse.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
* Place - Suite 330, Boston, MA 02111-1307 USA.
|
||||
*
|
||||
* This file is derived from virt/kvm/irq_comm.c.
|
||||
*
|
||||
* Authors:
|
||||
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
|
||||
* Alexander Graf <agraf@suse.de>
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/export.h>
|
||||
#include <trace/events/kvm.h>
|
||||
#include "irq.h"
|
||||
|
||||
struct kvm_irq_routing_table {
|
||||
int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
|
||||
struct kvm_kernel_irq_routing_entry *rt_entries;
|
||||
u32 nr_rt_entries;
|
||||
/*
|
||||
* Array indexed by gsi. Each entry contains list of irq chips
|
||||
* the gsi is connected to.
|
||||
*/
|
||||
struct hlist_head map[0];
|
||||
};
|
||||
|
||||
int kvm_irq_map_gsi(struct kvm *kvm,
|
||||
struct kvm_kernel_irq_routing_entry *entries, int gsi)
|
||||
{
|
||||
struct kvm_irq_routing_table *irq_rt;
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
int n = 0;
|
||||
|
||||
irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
|
||||
lockdep_is_held(&kvm->irq_lock));
|
||||
if (gsi < irq_rt->nr_rt_entries) {
|
||||
hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
|
||||
entries[n] = *e;
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_routing_table *irq_rt;
|
||||
|
||||
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
||||
return irq_rt->chip[irqchip][pin];
|
||||
}
|
||||
|
||||
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry route;
|
||||
|
||||
if (!irqchip_in_kernel(kvm) || msi->flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
route.msi.address_lo = msi->address_lo;
|
||||
route.msi.address_hi = msi->address_hi;
|
||||
route.msi.data = msi->data;
|
||||
|
||||
return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return value:
|
||||
* < 0 Interrupt was ignored (masked or not delivered for other reasons)
|
||||
* = 0 Interrupt was coalesced (previous irq is still pending)
|
||||
* > 0 Number of CPUs interrupt was delivered to
|
||||
*/
|
||||
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
|
||||
bool line_status)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
|
||||
int ret = -1, i, idx;
|
||||
|
||||
trace_kvm_set_irq(irq, level, irq_source_id);
|
||||
|
||||
/* Not possible to detect if the guest uses the PIC or the
|
||||
* IOAPIC. So set the bit in both. The guest will ignore
|
||||
* writes to the unused one.
|
||||
*/
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
i = kvm_irq_map_gsi(kvm, irq_set, irq);
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
|
||||
while(i--) {
|
||||
int r;
|
||||
r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
|
||||
line_status);
|
||||
if (r < 0)
|
||||
continue;
|
||||
|
||||
ret = r + ((ret < 0) ? 0 : ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_free_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
/* Called only during vm destruction. Nobody can use the pointer
|
||||
at this stage */
|
||||
kfree(kvm->irq_routing);
|
||||
}
|
||||
|
||||
static int setup_routing_entry(struct kvm_irq_routing_table *rt,
|
||||
struct kvm_kernel_irq_routing_entry *e,
|
||||
const struct kvm_irq_routing_entry *ue)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
struct kvm_kernel_irq_routing_entry *ei;
|
||||
|
||||
/*
|
||||
* Do not allow GSI to be mapped to the same irqchip more than once.
|
||||
* Allow only one to one mapping between GSI and MSI.
|
||||
*/
|
||||
hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
|
||||
if (ei->type == KVM_IRQ_ROUTING_MSI ||
|
||||
ue->type == KVM_IRQ_ROUTING_MSI ||
|
||||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
|
||||
return r;
|
||||
|
||||
e->gsi = ue->gsi;
|
||||
e->type = ue->type;
|
||||
r = kvm_set_routing_entry(e, ue);
|
||||
if (r)
|
||||
goto out;
|
||||
if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
|
||||
rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
|
||||
|
||||
hlist_add_head(&e->link, &rt->map[e->gsi]);
|
||||
r = 0;
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_set_irq_routing(struct kvm *kvm,
|
||||
const struct kvm_irq_routing_entry *ue,
|
||||
unsigned nr,
|
||||
unsigned flags)
|
||||
{
|
||||
struct kvm_irq_routing_table *new, *old;
|
||||
u32 i, j, nr_rt_entries = 0;
|
||||
int r;
|
||||
|
||||
for (i = 0; i < nr; ++i) {
|
||||
if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
|
||||
return -EINVAL;
|
||||
nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
|
||||
}
|
||||
|
||||
nr_rt_entries += 1;
|
||||
|
||||
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
|
||||
+ (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
new->rt_entries = (void *)&new->map[nr_rt_entries];
|
||||
|
||||
new->nr_rt_entries = nr_rt_entries;
|
||||
for (i = 0; i < KVM_NR_IRQCHIPS; i++)
|
||||
for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++)
|
||||
new->chip[i][j] = -1;
|
||||
|
||||
for (i = 0; i < nr; ++i) {
|
||||
r = -EINVAL;
|
||||
if (ue->flags)
|
||||
goto out;
|
||||
r = setup_routing_entry(new, &new->rt_entries[i], ue);
|
||||
if (r)
|
||||
goto out;
|
||||
++ue;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
old = kvm->irq_routing;
|
||||
rcu_assign_pointer(kvm->irq_routing, new);
|
||||
kvm_irq_routing_update(kvm);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
synchronize_srcu_expedited(&kvm->irq_srcu);
|
||||
|
||||
new = old;
|
||||
r = 0;
|
||||
|
||||
out:
|
||||
kfree(new);
|
||||
return r;
|
||||
}
|
3341
virt/kvm/kvm_main.c
Normal file
3341
virt/kvm/kvm_main.c
Normal file
File diff suppressed because it is too large
Load diff
290
virt/kvm/vfio.c
Normal file
290
virt/kvm/vfio.c
Normal file
|
@ -0,0 +1,290 @@
|
|||
/*
|
||||
* VFIO-KVM bridge pseudo device
|
||||
*
|
||||
* Copyright (C) 2013 Red Hat, Inc. All rights reserved.
|
||||
* Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vfio.h>
|
||||
#include "vfio.h"
|
||||
|
||||
struct kvm_vfio_group {
|
||||
struct list_head node;
|
||||
struct vfio_group *vfio_group;
|
||||
};
|
||||
|
||||
struct kvm_vfio {
|
||||
struct list_head group_list;
|
||||
struct mutex lock;
|
||||
bool noncoherent;
|
||||
};
|
||||
|
||||
static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
|
||||
{
|
||||
struct vfio_group *vfio_group;
|
||||
struct vfio_group *(*fn)(struct file *);
|
||||
|
||||
fn = symbol_get(vfio_group_get_external_user);
|
||||
if (!fn)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
vfio_group = fn(filep);
|
||||
|
||||
symbol_put(vfio_group_get_external_user);
|
||||
|
||||
return vfio_group;
|
||||
}
|
||||
|
||||
static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
|
||||
{
|
||||
void (*fn)(struct vfio_group *);
|
||||
|
||||
fn = symbol_get(vfio_group_put_external_user);
|
||||
if (!fn)
|
||||
return;
|
||||
|
||||
fn(vfio_group);
|
||||
|
||||
symbol_put(vfio_group_put_external_user);
|
||||
}
|
||||
|
||||
static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
|
||||
{
|
||||
long (*fn)(struct vfio_group *, unsigned long);
|
||||
long ret;
|
||||
|
||||
fn = symbol_get(vfio_external_check_extension);
|
||||
if (!fn)
|
||||
return false;
|
||||
|
||||
ret = fn(vfio_group, VFIO_DMA_CC_IOMMU);
|
||||
|
||||
symbol_put(vfio_external_check_extension);
|
||||
|
||||
return ret > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Groups can use the same or different IOMMU domains. If the same then
|
||||
* adding a new group may change the coherency of groups we've previously
|
||||
* been told about. We don't want to care about any of that so we retest
|
||||
* each group and bail as soon as we find one that's noncoherent. This
|
||||
* means we only ever [un]register_noncoherent_dma once for the whole device.
|
||||
*/
|
||||
static void kvm_vfio_update_coherency(struct kvm_device *dev)
|
||||
{
|
||||
struct kvm_vfio *kv = dev->private;
|
||||
bool noncoherent = false;
|
||||
struct kvm_vfio_group *kvg;
|
||||
|
||||
mutex_lock(&kv->lock);
|
||||
|
||||
list_for_each_entry(kvg, &kv->group_list, node) {
|
||||
if (!kvm_vfio_group_is_coherent(kvg->vfio_group)) {
|
||||
noncoherent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (noncoherent != kv->noncoherent) {
|
||||
kv->noncoherent = noncoherent;
|
||||
|
||||
if (kv->noncoherent)
|
||||
kvm_arch_register_noncoherent_dma(dev->kvm);
|
||||
else
|
||||
kvm_arch_unregister_noncoherent_dma(dev->kvm);
|
||||
}
|
||||
|
||||
mutex_unlock(&kv->lock);
|
||||
}
|
||||
|
||||
static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
|
||||
{
|
||||
struct kvm_vfio *kv = dev->private;
|
||||
struct vfio_group *vfio_group;
|
||||
struct kvm_vfio_group *kvg;
|
||||
int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
|
||||
struct fd f;
|
||||
int32_t fd;
|
||||
int ret;
|
||||
|
||||
switch (attr) {
|
||||
case KVM_DEV_VFIO_GROUP_ADD:
|
||||
if (get_user(fd, argp))
|
||||
return -EFAULT;
|
||||
|
||||
f = fdget(fd);
|
||||
if (!f.file)
|
||||
return -EBADF;
|
||||
|
||||
vfio_group = kvm_vfio_group_get_external_user(f.file);
|
||||
fdput(f);
|
||||
|
||||
if (IS_ERR(vfio_group))
|
||||
return PTR_ERR(vfio_group);
|
||||
|
||||
mutex_lock(&kv->lock);
|
||||
|
||||
list_for_each_entry(kvg, &kv->group_list, node) {
|
||||
if (kvg->vfio_group == vfio_group) {
|
||||
mutex_unlock(&kv->lock);
|
||||
kvm_vfio_group_put_external_user(vfio_group);
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
|
||||
kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
|
||||
if (!kvg) {
|
||||
mutex_unlock(&kv->lock);
|
||||
kvm_vfio_group_put_external_user(vfio_group);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
list_add_tail(&kvg->node, &kv->group_list);
|
||||
kvg->vfio_group = vfio_group;
|
||||
|
||||
mutex_unlock(&kv->lock);
|
||||
|
||||
kvm_vfio_update_coherency(dev);
|
||||
|
||||
return 0;
|
||||
|
||||
case KVM_DEV_VFIO_GROUP_DEL:
|
||||
if (get_user(fd, argp))
|
||||
return -EFAULT;
|
||||
|
||||
f = fdget(fd);
|
||||
if (!f.file)
|
||||
return -EBADF;
|
||||
|
||||
vfio_group = kvm_vfio_group_get_external_user(f.file);
|
||||
fdput(f);
|
||||
|
||||
if (IS_ERR(vfio_group))
|
||||
return PTR_ERR(vfio_group);
|
||||
|
||||
ret = -ENOENT;
|
||||
|
||||
mutex_lock(&kv->lock);
|
||||
|
||||
list_for_each_entry(kvg, &kv->group_list, node) {
|
||||
if (kvg->vfio_group != vfio_group)
|
||||
continue;
|
||||
|
||||
list_del(&kvg->node);
|
||||
kvm_vfio_group_put_external_user(kvg->vfio_group);
|
||||
kfree(kvg);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&kv->lock);
|
||||
|
||||
kvm_vfio_group_put_external_user(vfio_group);
|
||||
|
||||
kvm_vfio_update_coherency(dev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int kvm_vfio_set_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_VFIO_GROUP:
|
||||
return kvm_vfio_set_group(dev, attr->attr, attr->addr);
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int kvm_vfio_has_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_VFIO_GROUP:
|
||||
switch (attr->attr) {
|
||||
case KVM_DEV_VFIO_GROUP_ADD:
|
||||
case KVM_DEV_VFIO_GROUP_DEL:
|
||||
return 0;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static void kvm_vfio_destroy(struct kvm_device *dev)
|
||||
{
|
||||
struct kvm_vfio *kv = dev->private;
|
||||
struct kvm_vfio_group *kvg, *tmp;
|
||||
|
||||
list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
|
||||
kvm_vfio_group_put_external_user(kvg->vfio_group);
|
||||
list_del(&kvg->node);
|
||||
kfree(kvg);
|
||||
}
|
||||
|
||||
kvm_vfio_update_coherency(dev);
|
||||
|
||||
kfree(kv);
|
||||
kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
|
||||
}
|
||||
|
||||
static int kvm_vfio_create(struct kvm_device *dev, u32 type);
|
||||
|
||||
static struct kvm_device_ops kvm_vfio_ops = {
|
||||
.name = "kvm-vfio",
|
||||
.create = kvm_vfio_create,
|
||||
.destroy = kvm_vfio_destroy,
|
||||
.set_attr = kvm_vfio_set_attr,
|
||||
.has_attr = kvm_vfio_has_attr,
|
||||
};
|
||||
|
||||
static int kvm_vfio_create(struct kvm_device *dev, u32 type)
|
||||
{
|
||||
struct kvm_device *tmp;
|
||||
struct kvm_vfio *kv;
|
||||
|
||||
/* Only one VFIO "device" per VM */
|
||||
list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
|
||||
if (tmp->ops == &kvm_vfio_ops)
|
||||
return -EBUSY;
|
||||
|
||||
kv = kzalloc(sizeof(*kv), GFP_KERNEL);
|
||||
if (!kv)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&kv->group_list);
|
||||
mutex_init(&kv->lock);
|
||||
|
||||
dev->private = kv;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_vfio_ops_init(void)
|
||||
{
|
||||
return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
|
||||
}
|
||||
|
||||
void kvm_vfio_ops_exit(void)
|
||||
{
|
||||
kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
|
||||
}
|
17
virt/kvm/vfio.h
Normal file
17
virt/kvm/vfio.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
#ifndef __KVM_VFIO_H
|
||||
#define __KVM_VFIO_H
|
||||
|
||||
#ifdef CONFIG_KVM_VFIO
|
||||
int kvm_vfio_ops_init(void);
|
||||
void kvm_vfio_ops_exit(void);
|
||||
#else
|
||||
static inline int kvm_vfio_ops_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void kvm_vfio_ops_exit(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue