Newer
Older
int n;
struct kvm_memory_slot *memslot;
int is_dirty = 0;
down_write(&kvm->slots_lock);
r = kvm_get_dirty_log(kvm, log, &is_dirty);
if (r)
goto out;
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
spin_lock(&kvm->mmu_lock);
kvm_mmu_slot_remove_write_access(kvm, log->slot);
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
memslot = &kvm->memslots[log->slot];
n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
memset(memslot->dirty_bitmap, 0, n);
}
r = 0;
out:
up_write(&kvm->slots_lock);
return r;
}
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
int r = -EINVAL;
/*
* This union makes it completely explicit to gcc-3.x
* that these two variables' stack usage should be
* combined, not added together.
*/
union {
struct kvm_pit_state ps;
struct kvm_memory_alias alias;
} u;
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
switch (ioctl) {
case KVM_SET_TSS_ADDR:
r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
if (r < 0)
goto out;
break;
case KVM_SET_MEMORY_REGION: {
struct kvm_memory_region kvm_mem;
struct kvm_userspace_memory_region kvm_userspace_mem;
r = -EFAULT;
if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
goto out;
kvm_userspace_mem.slot = kvm_mem.slot;
kvm_userspace_mem.flags = kvm_mem.flags;
kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
kvm_userspace_mem.memory_size = kvm_mem.memory_size;
r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
if (r)
goto out;
break;
}
case KVM_SET_NR_MMU_PAGES:
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
if (r)
goto out;
break;
case KVM_GET_NR_MMU_PAGES:
r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
break;
case KVM_SET_MEMORY_ALIAS:
if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
if (r)
goto out;
break;
case KVM_CREATE_IRQCHIP:
r = -ENOMEM;
kvm->arch.vpic = kvm_create_pic(kvm);
if (kvm->arch.vpic) {
r = kvm_ioapic_init(kvm);
if (r) {
kfree(kvm->arch.vpic);
kvm->arch.vpic = NULL;
goto out;
}
} else
goto out;
r = kvm_setup_default_irq_routing(kvm);
if (r) {
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
goto out;
}
mutex_lock(&kvm->lock);
r = -EEXIST;
if (kvm->arch.vpit)
goto create_pit_unlock;
r = -ENOMEM;
kvm->arch.vpit = kvm_create_pit(kvm);
if (kvm->arch.vpit)
r = 0;
create_pit_unlock:
mutex_unlock(&kvm->lock);
case KVM_IRQ_LINE_STATUS:
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
r = -EFAULT;
if (copy_from_user(&irq_event, argp, sizeof irq_event))
goto out;
if (irqchip_in_kernel(kvm)) {
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level);
if (ioctl == KVM_IRQ_LINE_STATUS) {
irq_event.status = status;
if (copy_to_user(argp, &irq_event,
sizeof irq_event))
goto out;
}
r = 0;
}
break;
}
case KVM_GET_IRQCHIP: {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
r = -ENOMEM;
if (!chip)
r = -EFAULT;
if (copy_from_user(chip, argp, sizeof *chip))
goto get_irqchip_out;
r = -ENXIO;
if (!irqchip_in_kernel(kvm))
goto get_irqchip_out;
r = kvm_vm_ioctl_get_irqchip(kvm, chip);
if (copy_to_user(argp, chip, sizeof *chip))
goto get_irqchip_out;
get_irqchip_out:
kfree(chip);
if (r)
goto out;
break;
}
case KVM_SET_IRQCHIP: {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
r = -ENOMEM;
if (!chip)
r = -EFAULT;
if (copy_from_user(chip, argp, sizeof *chip))
goto set_irqchip_out;
r = -ENXIO;
if (!irqchip_in_kernel(kvm))
goto set_irqchip_out;
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
set_irqchip_out:
kfree(chip);
if (r)
goto out;
case KVM_GET_PIT: {
r = -EFAULT;
if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
goto out;
r = -ENXIO;
if (!kvm->arch.vpit)
goto out;
r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
if (r)
goto out;
r = -EFAULT;
if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
goto out;
r = 0;
break;
}
case KVM_SET_PIT: {
r = -EFAULT;
if (copy_from_user(&u.ps, argp, sizeof u.ps))
goto out;
r = -ENXIO;
if (!kvm->arch.vpit)
goto out;
r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
if (r)
goto out;
r = 0;
break;
}
case KVM_REINJECT_CONTROL: {
struct kvm_reinject_control control;
r = -EFAULT;
if (copy_from_user(&control, argp, sizeof(control)))
goto out;
r = kvm_vm_ioctl_reinject(kvm, &control);
if (r)
goto out;
r = 0;
break;
}
default:
;
}
out:
return r;
}
static void kvm_init_msr_list(void)
{
u32 dummy[2];
unsigned i, j;
for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
continue;
if (j < i)
msrs_to_save[j] = msrs_to_save[i];
j++;
}
num_msrs_to_save = j;
}
/*
* Only apic need an MMIO device hook, so shortcut now..
*/
static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
gpa_t addr, int len,
int is_write)
{
struct kvm_io_device *dev;
if (vcpu->arch.apic) {
dev = &vcpu->arch.apic->dev;
if (dev->in_range(dev, addr, len, is_write))
return dev;
}
return NULL;
}
static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
gpa_t addr, int len,
int is_write)
{
struct kvm_io_device *dev;
dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
if (dev == NULL)
dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
is_write);
return dev;
}
static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu)
{
void *data = val;
while (bytes) {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
if (gpa == UNMAPPED_GVA) {
r = X86EMUL_PROPAGATE_FAULT;
goto out;
}
ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
if (ret < 0) {
r = X86EMUL_UNHANDLEABLE;
goto out;
}
bytes -= toread;
data += toread;
addr += toread;
static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu)
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
{
void *data = val;
int r = X86EMUL_CONTINUE;
while (bytes) {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
if (gpa == UNMAPPED_GVA) {
r = X86EMUL_PROPAGATE_FAULT;
goto out;
}
ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
if (ret < 0) {
r = X86EMUL_UNHANDLEABLE;
goto out;
}
bytes -= towrite;
data += towrite;
addr += towrite;
}
out:
return r;
}
static int emulator_read_emulated(unsigned long addr,
void *val,
unsigned int bytes,
struct kvm_vcpu *vcpu)
{
struct kvm_io_device *mmio_dev;
gpa_t gpa;
if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
vcpu->mmio_read_completed = 0;
return X86EMUL_CONTINUE;
}
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
/* For APIC access vmexit */
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto mmio;
if (kvm_read_guest_virt(addr, val, bytes, vcpu)
== X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
mmio:
/*
* Is this MMIO handled locally?
*/
mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
if (mmio_dev) {
kvm_iodevice_read(mmio_dev, gpa, bytes, val);
return X86EMUL_CONTINUE;
}
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
vcpu->mmio_is_write = 0;
return X86EMUL_UNHANDLEABLE;
}
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes)
{
int ret;
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
return 0;
kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
return 1;
}
static int emulator_write_emulated_onepage(unsigned long addr,
const void *val,
unsigned int bytes,
struct kvm_vcpu *vcpu)
{
struct kvm_io_device *mmio_dev;
gpa_t gpa;
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA) {
kvm_inject_page_fault(vcpu, addr, 2);
return X86EMUL_PROPAGATE_FAULT;
}
/* For APIC access vmexit */
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto mmio;
if (emulator_write_phys(vcpu, gpa, val, bytes))
return X86EMUL_CONTINUE;
mmio:
/*
* Is this MMIO handled locally?
*/
mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
if (mmio_dev) {
kvm_iodevice_write(mmio_dev, gpa, bytes, val);
return X86EMUL_CONTINUE;
}
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
vcpu->mmio_is_write = 1;
memcpy(vcpu->mmio_data, val, bytes);
return X86EMUL_CONTINUE;
}
int emulator_write_emulated(unsigned long addr,
const void *val,
unsigned int bytes,
struct kvm_vcpu *vcpu)
{
/* Crossing a page boundary? */
if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
int rc, now;
now = -addr & ~PAGE_MASK;
rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
if (rc != X86EMUL_CONTINUE)
return rc;
addr += now;
val += now;
bytes -= now;
}
return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
}
EXPORT_SYMBOL_GPL(emulator_write_emulated);
static int emulator_cmpxchg_emulated(unsigned long addr,
const void *old,
const void *new,
unsigned int bytes,
struct kvm_vcpu *vcpu)
{
static int reported;
if (!reported) {
reported = 1;
printk(KERN_WARNING "kvm: emulating exchange as write\n");
}
#ifndef CONFIG_X86_64
/* guests cmpxchg8b have to be emulated atomically */
if (bytes == 8) {
struct page *page;
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA ||
(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto emul_write;
if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
goto emul_write;
val = *(u64 *)new;
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
kaddr = kmap_atomic(page, KM_USER0);
set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
kunmap_atomic(kaddr, KM_USER0);
kvm_release_page_dirty(page);
}
return emulator_write_emulated(addr, new, bytes, vcpu);
}
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
return kvm_x86_ops->get_segment_base(vcpu, seg);
}
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
{
return X86EMUL_CONTINUE;
}
int emulate_clts(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
return X86EMUL_CONTINUE;
}
int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
switch (dr) {
case 0 ... 3:
*dest = kvm_x86_ops->get_dr(vcpu, dr);
return X86EMUL_CONTINUE;
default:
pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
return X86EMUL_UNHANDLEABLE;
}
}
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
{
unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
int exception;
kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
if (exception) {
/* FIXME: better handling */
return X86EMUL_UNHANDLEABLE;
}
return X86EMUL_CONTINUE;
}
void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
{
u8 opcodes[4];
unsigned long rip = kvm_rip_read(vcpu);
unsigned long rip_linear;
if (!printk_ratelimit())
return;
rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
}
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
static struct x86_emulate_ops emulate_ops = {
.read_std = kvm_read_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
{
kvm_register_read(vcpu, VCPU_REGS_RAX);
kvm_register_read(vcpu, VCPU_REGS_RSP);
kvm_register_read(vcpu, VCPU_REGS_RIP);
vcpu->arch.regs_dirty = ~0;
}
int emulate_instruction(struct kvm_vcpu *vcpu,
struct kvm_run *run,
unsigned long cr2,
u16 error_code,
int emulation_type)
int r, shadow_mask;
struct decode_cache *c;
kvm_clear_exception_queue(vcpu);
vcpu->arch.mmio_fault_cr2 = cr2;
/*
* TODO: fix x86_emulate.c to use guest_read/write_register
* instead of direct ->regs accesses, can save hundred cycles
* on Intel for instructions that don't read/change RSP, for
* for example.
*/
cache_all_regs(vcpu);
vcpu->mmio_is_write = 0;
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
int cs_db, cs_l;
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
vcpu->arch.emulate_ctxt.vcpu = vcpu;
vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
vcpu->arch.emulate_ctxt.mode =
(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
? X86EMUL_MODE_REAL : cs_l
? X86EMUL_MODE_PROT64 : cs_db
? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
/* Reject the instructions other than VMCALL/VMMCALL when
* try to emulate invalid opcode */
c = &vcpu->arch.emulate_ctxt.decode;
if ((emulation_type & EMULTYPE_TRAP_UD) &&
(!(c->twobyte && c->b == 0x01 &&
(c->modrm_reg == 0 || c->modrm_reg == 3) &&
c->modrm_mod == 3 && c->modrm_rm == 1)))
return EMULATE_FAIL;
if (r) {
++vcpu->stat.insn_emulation_fail;
if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
return EMULATE_DONE;
return EMULATE_FAIL;
}
}
if (emulation_type & EMULTYPE_SKIP) {
kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
return EMULATE_DONE;
}
r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
if (r == 0)
kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
return EMULATE_DO_MMIO;
if ((r || vcpu->mmio_is_write) && run) {
run->exit_reason = KVM_EXIT_MMIO;
run->mmio.phys_addr = vcpu->mmio_phys_addr;
memcpy(run->mmio.data, vcpu->mmio_data, 8);
run->mmio.len = vcpu->mmio_size;
run->mmio.is_write = vcpu->mmio_is_write;
}
if (r) {
if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
return EMULATE_DONE;
if (!vcpu->mmio_needed) {
kvm_report_emulation_failure(vcpu, "mmio");
return EMULATE_FAIL;
}
return EMULATE_DO_MMIO;
}
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
if (vcpu->mmio_is_write) {
vcpu->mmio_needed = 0;
return EMULATE_DO_MMIO;
}
return EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(emulate_instruction);
static int pio_copy_data(struct kvm_vcpu *vcpu)
{
void *p = vcpu->arch.pio_data;
bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
if (vcpu->arch.pio.in)
ret = kvm_read_guest_virt(q, p, bytes, vcpu);
return ret;
}
int complete_pio(struct kvm_vcpu *vcpu)
{
struct kvm_pio_request *io = &vcpu->arch.pio;
long delta;
int r;
if (!io->string) {
if (io->in) {
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(&val, vcpu->arch.pio_data, io->size);
kvm_register_write(vcpu, VCPU_REGS_RAX, val);
}
} else {
if (io->in) {
r = pio_copy_data(vcpu);
return r;
}
delta = 1;
if (io->rep) {
delta *= io->cur_count;
/*
* The size of the register should really depend on
* current address size.
*/
val = kvm_register_read(vcpu, VCPU_REGS_RCX);
val -= delta;
kvm_register_write(vcpu, VCPU_REGS_RCX, val);
}
if (io->down)
delta = -delta;
delta *= io->size;
if (io->in) {
val = kvm_register_read(vcpu, VCPU_REGS_RDI);
val += delta;
kvm_register_write(vcpu, VCPU_REGS_RDI, val);
} else {
val = kvm_register_read(vcpu, VCPU_REGS_RSI);
val += delta;
kvm_register_write(vcpu, VCPU_REGS_RSI, val);
}
}
io->count -= io->cur_count;
io->cur_count = 0;
return 0;
}
static void kernel_pio(struct kvm_io_device *pio_dev,
struct kvm_vcpu *vcpu,
void *pd)
{
/* TODO: String I/O for in kernel device */
mutex_lock(&vcpu->kvm->lock);
if (vcpu->arch.pio.in)
kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
vcpu->arch.pio.size,
kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
vcpu->arch.pio.size,
pd);
mutex_unlock(&vcpu->kvm->lock);
}
static void pio_string_write(struct kvm_io_device *pio_dev,
struct kvm_vcpu *vcpu)
{
struct kvm_pio_request *io = &vcpu->arch.pio;
void *pd = vcpu->arch.pio_data;
int i;
mutex_lock(&vcpu->kvm->lock);
for (i = 0; i < io->cur_count; i++) {
kvm_iodevice_write(pio_dev, io->port,
io->size,
pd);
pd += io->size;
}
mutex_unlock(&vcpu->kvm->lock);
}
static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
gpa_t addr, int len,
int is_write)
return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
}
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned port)
{
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
vcpu->run->io.port = vcpu->arch.pio.port = port;
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 0;
vcpu->arch.pio.down = 0;
vcpu->arch.pio.rep = 0;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
handler);
else
KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
handler);
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
complete_pio(vcpu);
return 1;
}
return 0;
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio);
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port)
{
unsigned now, in_page;
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
vcpu->run->io.port = vcpu->arch.pio.port = port;
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 1;
vcpu->arch.pio.down = down;
vcpu->arch.pio.rep = rep;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
handler);
else
KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
handler);
if (!count) {
kvm_x86_ops->skip_emulated_instruction(vcpu);
return 1;
}
if (!down)
in_page = PAGE_SIZE - offset_in_page(address);
else
in_page = offset_in_page(address) + size;
now = min(count, (unsigned long)in_page / size);
now = 1;
if (down) {
/*
* String I/O in reverse. Yuck. Kill the guest, fix later.
*/
pr_unimpl(vcpu, "guest string pio down\n");
kvm_inject_gp(vcpu, 0);
return 1;
}
vcpu->run->io.count = now;
vcpu->arch.pio.cur_count = now;
if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
kvm_x86_ops->skip_emulated_instruction(vcpu);
pio_dev = vcpu_find_pio_dev(vcpu, port,
vcpu->arch.pio.cur_count,
!vcpu->arch.pio.in);
/* string PIO write */
ret = pio_copy_data(vcpu);
if (ret == X86EMUL_PROPAGATE_FAULT) {
kvm_inject_gp(vcpu, 0);
return 1;
}
if (ret == 0 && pio_dev) {
pio_string_write(pio_dev, vcpu);
complete_pio(vcpu);
if (vcpu->arch.pio.count == 0)
ret = 1;
}
} else if (pio_dev)
pr_unimpl(vcpu, "no string pio read support yet, "
"port %x size %d count %ld\n",
port, size, count);
return ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
static void bounce_off(void *info)
{
/* nothing */
}
static unsigned int ref_freq;
static unsigned long tsc_khz_ref;
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_freqs *freq = data;
struct kvm *kvm;
struct kvm_vcpu *vcpu;
int i, send_ipi = 0;
if (!ref_freq)
ref_freq = freq->old;
if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
return 0;
if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
return 0;
per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (vcpu->cpu != freq->cpu)
continue;
if (!kvm_request_guest_time_update(vcpu))
continue;
if (vcpu->cpu != smp_processor_id())
send_ipi++;
}
}
spin_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
* We upscale the frequency. Must make the guest
* doesn't see old kvmclock values while running with
* the new frequency, otherwise we risk the guest sees
* time go backwards.
*
* In case we update the frequency for another cpu
* (which might be in guest context) send an interrupt
* to kick the cpu out of guest context. Next time
* guest context is entered kvmclock will be updated,
* so the guest will not see stale values.
*/
smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
}
return 0;
}
static struct notifier_block kvmclock_cpufreq_notifier_block = {
.notifier_call = kvmclock_cpufreq_notifier
};
int kvm_arch_init(void *opaque)
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
printk(KERN_ERR "kvm: already loaded the other module\n");
r = -EEXIST;
goto out;
}
if (!ops->cpu_has_kvm_support()) {
printk(KERN_ERR "kvm: no hardware support\n");
r = -EOPNOTSUPP;
goto out;
}
if (ops->disabled_by_bios()) {
printk(KERN_ERR "kvm: disabled by bios\n");
r = -EOPNOTSUPP;
goto out;
}
r = kvm_mmu_module_init();
if (r)
goto out;
kvm_init_msr_list();
kvm_x86_ops = ops;
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);