Newer
Older
if (!reported) {
reported = 1;
printk(KERN_WARNING "kvm: emulating exchange as write\n");
}
#ifndef CONFIG_X86_64
/* guests cmpxchg8b have to be emulated atomically */
if (bytes == 8) {
struct page *page;
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA ||
(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto emul_write;
if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
goto emul_write;
val = *(u64 *)new;
down_read(¤t->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
up_read(¤t->mm->mmap_sem);
kaddr = kmap_atomic(page, KM_USER0);
set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
kunmap_atomic(kaddr, KM_USER0);
kvm_release_page_dirty(page);
}
return emulator_write_emulated(addr, new, bytes, vcpu);
}
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
return kvm_x86_ops->get_segment_base(vcpu, seg);
}
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
{
return X86EMUL_CONTINUE;
}
int emulate_clts(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
return X86EMUL_CONTINUE;
}
int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
switch (dr) {
case 0 ... 3:
*dest = kvm_x86_ops->get_dr(vcpu, dr);
return X86EMUL_CONTINUE;
default:
pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
return X86EMUL_UNHANDLEABLE;
}
}
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
{
unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
int exception;
kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
if (exception) {
/* FIXME: better handling */
return X86EMUL_UNHANDLEABLE;
}
return X86EMUL_CONTINUE;
}
void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
{
u8 opcodes[4];
unsigned long rip = kvm_rip_read(vcpu);
unsigned long rip_linear;
if (!printk_ratelimit())
return;
rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
}
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
static struct x86_emulate_ops emulate_ops = {
.read_std = emulator_read_std,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
{
kvm_register_read(vcpu, VCPU_REGS_RAX);
kvm_register_read(vcpu, VCPU_REGS_RSP);
kvm_register_read(vcpu, VCPU_REGS_RIP);
vcpu->arch.regs_dirty = ~0;
}
int emulate_instruction(struct kvm_vcpu *vcpu,
struct kvm_run *run,
unsigned long cr2,
u16 error_code,
int emulation_type)
{
int r;
struct decode_cache *c;
kvm_clear_exception_queue(vcpu);
vcpu->arch.mmio_fault_cr2 = cr2;
/*
* TODO: fix x86_emulate.c to use guest_read/write_register
* instead of direct ->regs accesses, can save hundred cycles
* on Intel for instructions that don't read/change RSP, for
* for example.
*/
cache_all_regs(vcpu);
vcpu->mmio_is_write = 0;
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
int cs_db, cs_l;
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
vcpu->arch.emulate_ctxt.vcpu = vcpu;
vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
vcpu->arch.emulate_ctxt.mode =
(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
? X86EMUL_MODE_REAL : cs_l
? X86EMUL_MODE_PROT64 : cs_db
? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
/* Reject the instructions other than VMCALL/VMMCALL when
* try to emulate invalid opcode */
c = &vcpu->arch.emulate_ctxt.decode;
if ((emulation_type & EMULTYPE_TRAP_UD) &&
(!(c->twobyte && c->b == 0x01 &&
(c->modrm_reg == 0 || c->modrm_reg == 3) &&
c->modrm_mod == 3 && c->modrm_rm == 1)))
return EMULATE_FAIL;
if (r) {
++vcpu->stat.insn_emulation_fail;
if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
return EMULATE_DONE;
return EMULATE_FAIL;
}
}
r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
return EMULATE_DO_MMIO;
if ((r || vcpu->mmio_is_write) && run) {
run->exit_reason = KVM_EXIT_MMIO;
run->mmio.phys_addr = vcpu->mmio_phys_addr;
memcpy(run->mmio.data, vcpu->mmio_data, 8);
run->mmio.len = vcpu->mmio_size;
run->mmio.is_write = vcpu->mmio_is_write;
}
if (r) {
if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
return EMULATE_DONE;
if (!vcpu->mmio_needed) {
kvm_report_emulation_failure(vcpu, "mmio");
return EMULATE_FAIL;
}
return EMULATE_DO_MMIO;
}
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
if (vcpu->mmio_is_write) {
vcpu->mmio_needed = 0;
return EMULATE_DO_MMIO;
}
return EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(emulate_instruction);
static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
{
int i;
for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
if (vcpu->arch.pio.guest_pages[i]) {
kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
vcpu->arch.pio.guest_pages[i] = NULL;
}
}
static int pio_copy_data(struct kvm_vcpu *vcpu)
{
void *p = vcpu->arch.pio_data;
void *q;
unsigned bytes;
int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
PAGE_KERNEL);
if (!q) {
free_pio_guest_pages(vcpu);
return -ENOMEM;
}
q += vcpu->arch.pio.guest_page_offset;
bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
if (vcpu->arch.pio.in)
memcpy(q, p, bytes);
else
memcpy(p, q, bytes);
q -= vcpu->arch.pio.guest_page_offset;
vunmap(q);
free_pio_guest_pages(vcpu);
return 0;
}
int complete_pio(struct kvm_vcpu *vcpu)
{
struct kvm_pio_request *io = &vcpu->arch.pio;
long delta;
int r;
if (!io->string) {
if (io->in) {
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(&val, vcpu->arch.pio_data, io->size);
kvm_register_write(vcpu, VCPU_REGS_RAX, val);
}
} else {
if (io->in) {
r = pio_copy_data(vcpu);
return r;
}
delta = 1;
if (io->rep) {
delta *= io->cur_count;
/*
* The size of the register should really depend on
* current address size.
*/
val = kvm_register_read(vcpu, VCPU_REGS_RCX);
val -= delta;
kvm_register_write(vcpu, VCPU_REGS_RCX, val);
}
if (io->down)
delta = -delta;
delta *= io->size;
if (io->in) {
val = kvm_register_read(vcpu, VCPU_REGS_RDI);
val += delta;
kvm_register_write(vcpu, VCPU_REGS_RDI, val);
} else {
val = kvm_register_read(vcpu, VCPU_REGS_RSI);
val += delta;
kvm_register_write(vcpu, VCPU_REGS_RSI, val);
}
}
io->count -= io->cur_count;
io->cur_count = 0;
return 0;
}
static void kernel_pio(struct kvm_io_device *pio_dev,
struct kvm_vcpu *vcpu,
void *pd)
{
/* TODO: String I/O for in kernel device */
mutex_lock(&vcpu->kvm->lock);
if (vcpu->arch.pio.in)
kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
vcpu->arch.pio.size,
kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
vcpu->arch.pio.size,
pd);
mutex_unlock(&vcpu->kvm->lock);
}
static void pio_string_write(struct kvm_io_device *pio_dev,
struct kvm_vcpu *vcpu)
{
struct kvm_pio_request *io = &vcpu->arch.pio;
void *pd = vcpu->arch.pio_data;
int i;
mutex_lock(&vcpu->kvm->lock);
for (i = 0; i < io->cur_count; i++) {
kvm_iodevice_write(pio_dev, io->port,
io->size,
pd);
pd += io->size;
}
mutex_unlock(&vcpu->kvm->lock);
}
static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
gpa_t addr, int len,
int is_write)
return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
}
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned port)
{
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
vcpu->run->io.port = vcpu->arch.pio.port = port;
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 0;
vcpu->arch.pio.down = 0;
vcpu->arch.pio.guest_page_offset = 0;
vcpu->arch.pio.rep = 0;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
handler);
else
KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
handler);
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
kvm_x86_ops->skip_emulated_instruction(vcpu);
pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
complete_pio(vcpu);
return 1;
}
return 0;
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio);
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port)
{
unsigned now, in_page;
int i, ret = 0;
int nr_pages = 1;
struct page *page;
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
vcpu->run->io.port = vcpu->arch.pio.port = port;
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 1;
vcpu->arch.pio.down = down;
vcpu->arch.pio.guest_page_offset = offset_in_page(address);
vcpu->arch.pio.rep = rep;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
handler);
else
KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
handler);
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
if (!count) {
kvm_x86_ops->skip_emulated_instruction(vcpu);
return 1;
}
if (!down)
in_page = PAGE_SIZE - offset_in_page(address);
else
in_page = offset_in_page(address) + size;
now = min(count, (unsigned long)in_page / size);
if (!now) {
/*
* String I/O straddles page boundary. Pin two guest pages
* so that we satisfy atomicity constraints. Do just one
* transaction to avoid complexity.
*/
nr_pages = 2;
now = 1;
}
if (down) {
/*
* String I/O in reverse. Yuck. Kill the guest, fix later.
*/
pr_unimpl(vcpu, "guest string pio down\n");
kvm_inject_gp(vcpu, 0);
return 1;
}
vcpu->run->io.count = now;
vcpu->arch.pio.cur_count = now;
if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
kvm_x86_ops->skip_emulated_instruction(vcpu);
for (i = 0; i < nr_pages; ++i) {
page = gva_to_page(vcpu, address + i * PAGE_SIZE);
vcpu->arch.pio.guest_pages[i] = page;
kvm_inject_gp(vcpu, 0);
free_pio_guest_pages(vcpu);
return 1;
}
}
pio_dev = vcpu_find_pio_dev(vcpu, port,
vcpu->arch.pio.cur_count,
!vcpu->arch.pio.in);
/* string PIO write */
ret = pio_copy_data(vcpu);
if (ret >= 0 && pio_dev) {
pio_string_write(pio_dev, vcpu);
complete_pio(vcpu);
if (vcpu->arch.pio.count == 0)
ret = 1;
}
} else if (pio_dev)
pr_unimpl(vcpu, "no string pio read support yet, "
"port %x size %d count %ld\n",
port, size, count);
return ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
int kvm_arch_init(void *opaque)
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
printk(KERN_ERR "kvm: already loaded the other module\n");
r = -EEXIST;
goto out;
}
if (!ops->cpu_has_kvm_support()) {
printk(KERN_ERR "kvm: no hardware support\n");
r = -EOPNOTSUPP;
goto out;
}
if (ops->disabled_by_bios()) {
printk(KERN_ERR "kvm: disabled by bios\n");
r = -EOPNOTSUPP;
goto out;
}
r = kvm_mmu_module_init();
if (r)
goto out;
kvm_init_msr_list();
kvm_x86_ops = ops;
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
return 0;
out:
return r;
void kvm_arch_exit(void)
{
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
}
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
kvm_vcpu_block(vcpu);
if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
return -EINTR;
return 1;
} else {
vcpu->run->exit_reason = KVM_EXIT_HLT;
return 0;
}
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
unsigned long a1)
{
if (is_long_mode(vcpu))
return a0;
else
return a0 | ((gpa_t)a1 << 32);
}
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
if (!is_long_mode(vcpu)) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
a1 &= 0xFFFFFFFF;
a2 &= 0xFFFFFFFF;
a3 &= 0xFFFFFFFF;
}
switch (nr) {
case KVM_HC_VAPIC_POLL_IRQ:
ret = 0;
break;
case KVM_HC_MMU_OP:
r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
break;
default:
ret = -KVM_ENOSYS;
break;
}
kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
{
char instruction[3];
int ret = 0;
unsigned long rip = kvm_rip_read(vcpu);
/*
* Blow out the MMU to ensure that no other VCPU has an active mapping
* to ensure that the updated hypercall appears atomically across all
* VCPUs.
*/
kvm_mmu_zap_all(vcpu->kvm);
kvm_x86_ops->patch_hypercall(vcpu, instruction);
if (emulator_write_emulated(rip, instruction, 3, vcpu)
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
!= X86EMUL_CONTINUE)
ret = -EFAULT;
return ret;
}
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
}
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
{
struct descriptor_table dt = { limit, base };
kvm_x86_ops->set_gdt(vcpu, &dt);
}
void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
{
struct descriptor_table dt = { limit, base };
kvm_x86_ops->set_idt(vcpu, &dt);
}
void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long *rflags)
{
kvm_lmsw(vcpu, msw);
*rflags = kvm_x86_ops->get_rflags(vcpu);
}
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
{
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
switch (cr) {
case 0:
case 2:
case 3:
case 4:
default:
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
return 0;
}
KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
(u32)((u64)value >> 32), handler);
return value;
}
void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
unsigned long *rflags)
{
KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
(u32)((u64)val >> 32), handler);
switch (cr) {
case 0:
kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
*rflags = kvm_x86_ops->get_rflags(vcpu);
break;
case 2:
break;
case 3:
kvm_set_cr3(vcpu, val);
break;
case 4:
kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
break;
kvm_set_cr8(vcpu, val & 0xfUL);
default:
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
}
}
static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
{
struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
int j, nent = vcpu->arch.cpuid_nent;
e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
for (j = i + 1; j == i; j = (j + 1) % nent) {
struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
if (ej->function == e->function) {
ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
return j;
}
}
return 0; /* silence gcc, even though control never reaches here */
}
/* find an entry with matching function, matching index (if needed), and that
* should be read next (if it's stateful) */
static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
u32 function, u32 index)
{
if (e->function != function)
return 0;
if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
return 0;
if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
!(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
return 0;
return 1;
}
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
int i;
u32 function, index;
struct kvm_cpuid_entry2 *e, *best;
function = kvm_register_read(vcpu, VCPU_REGS_RAX);
index = kvm_register_read(vcpu, VCPU_REGS_RCX);
kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
move_to_next_stateful_cpuid_entry(vcpu, i);
best = e;
break;
}
/*
* Both basic or both extended?
*/
if (((e->function ^ function) & 0x80000000) == 0)
if (!best || e->function > best->function)
best = e;
}
if (best) {
kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
}
kvm_x86_ops->skip_emulated_instruction(vcpu);
(u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
(u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
(u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
(u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
/*
* Check if userspace requested an interrupt window, and that the
* interrupt window is open.
*
* No need to exit to userspace if we already have an interrupt queued.
*/
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
return (!vcpu->arch.irq_summary &&
kvm_run->request_interrupt_window &&
vcpu->arch.interrupt_window_open &&
(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
}
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
kvm_run->ready_for_interrupt_injection = 1;
else
kvm_run->ready_for_interrupt_injection =
(vcpu->arch.interrupt_window_open &&
vcpu->arch.irq_summary == 0);
static void vapic_enter(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct page *page;
if (!apic || !apic->vapic_addr)
return;
down_read(¤t->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
vcpu->arch.apic->vapic_page = page;
}
static void vapic_exit(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic || !apic->vapic_addr)
return;
down_read(&vcpu->kvm->slots_lock);
kvm_release_page_dirty(apic->vapic_page);
mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
up_read(&vcpu->kvm->slots_lock);
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
kvm_lapic_reset(vcpu);
r = kvm_x86_ops->vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
preempted:
if (vcpu->guest_debug.enabled)
kvm_x86_ops->guest_debug_pre(vcpu);
again:
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
kvm_mmu_unload(vcpu);
r = kvm_mmu_reload(vcpu);
if (unlikely(r))
goto out;
if (vcpu->requests) {
if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
kvm_x86_ops->tlb_flush(vcpu);
if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
&vcpu->requests)) {
kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
goto out;
}
if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
r = 0;
goto out;
}
clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
kvm_inject_pending_timer_irqs(vcpu);
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
kvm_load_guest_fpu(vcpu);
local_irq_disable();
if (vcpu->requests || need_resched()) {
local_irq_enable();
preempt_enable();
r = 1;
goto out;
}
if (signal_pending(current)) {
local_irq_enable();
preempt_enable();
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
goto out;
}
vcpu->guest_mode = 1;
/*
* Make sure that guest_mode assignment won't happen after
* testing the pending IRQ vector bitmap.
*/
smp_wmb();
if (vcpu->arch.exception.pending)
__queue_exception(vcpu);
else if (irqchip_in_kernel(vcpu->kvm))
kvm_x86_ops->inject_pending_irq(vcpu);
kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
up_read(&vcpu->kvm->slots_lock);
kvm_guest_enter();
kvm_x86_ops->run(vcpu, kvm_run);
vcpu->guest_mode = 0;
local_irq_enable();
++vcpu->stat.exits;
/*
* We must have an instruction between local_irq_enable() and
* kvm_guest_exit(), so the timer interrupt isn't delayed by
* the interrupt shadow. The stat.exits increment will do nicely.
* But we need to prevent reordering, hence this barrier():
*/
barrier();
kvm_guest_exit();
preempt_enable();
down_read(&vcpu->kvm->slots_lock);
/*
* Profile KVM exit RIPs:
*/
if (unlikely(prof_on == KVM_PROFILING)) {
unsigned long rip = kvm_rip_read(vcpu);
profile_hit(KVM_PROFILING, (void *)rip);
if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
vcpu->arch.exception.pending = false;
r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
if (r > 0) {
if (dm_request_for_irq_injection(vcpu, kvm_run)) {
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.request_irq_exits;
goto out;
}
if (!need_resched())
goto again;
}
out:
if (r > 0) {
kvm_resched(vcpu);
goto preempted;
}
post_kvm_run_save(vcpu, kvm_run);
return r;
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
sigset_t sigsaved;
vcpu_load(vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
r = -EAGAIN;
goto out;
}
/* re-sync apic's tpr */
if (!irqchip_in_kernel(vcpu->kvm))
kvm_set_cr8(vcpu, kvm_run->cr8);
if (vcpu->arch.pio.cur_count) {
r = complete_pio(vcpu);
if (r)
goto out;
}
#if CONFIG_HAS_IOMEM
if (vcpu->mmio_needed) {