Newer
Older
exception);
EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val,
unsigned int bytes,
struct x86_exception *exception)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
void *data = val;
int r = X86EMUL_CONTINUE;
while (bytes) {
gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
PFERR_WRITE_MASK,
exception);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
if (ret < 0) {

Gleb Natapov
committed
r = X86EMUL_IO_NEEDED;
goto out;
}
bytes -= towrite;
data += towrite;
addr += towrite;
}
out:
return r;
}
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t *gpa, struct x86_exception *exception,
bool write)
{
u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
| (write ? PFERR_WRITE_MASK : 0);
if (vcpu_match_mmio_gva(vcpu, gva)
&& !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false);
*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
if (*gpa == UNMAPPED_GVA)
return -1;
/* For APIC access vmexit */
if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
return 1;
if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
trace_vcpu_match_mmio(gva, *gpa, write, true);
return 0;
}
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes)
{
int ret;
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
return 0;
kvm_mmu_pte_write(vcpu, gpa, val, bytes);
return 1;
}
struct read_write_emulator_ops {
int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
int bytes);
int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes);
int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
int bytes, void *val);
int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes);
bool write;
};
static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
vcpu->mmio_read_completed = 0;
return 1;
}
return 0;
}
static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
}
static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
return emulator_write_phys(vcpu, gpa, val, bytes);
}
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
{
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
return vcpu_mmio_write(vcpu, gpa, bytes, val);
}
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
return X86EMUL_IO_NEEDED;
}
static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
return X86EMUL_CONTINUE;
}
static const struct read_write_emulator_ops read_emultor = {
.read_write_prepare = read_prepare,
.read_write_emulate = read_emulate,
.read_write_mmio = vcpu_mmio_read,
.read_write_exit_mmio = read_exit_mmio,
};
static const struct read_write_emulator_ops write_emultor = {
.read_write_emulate = write_emulate,
.read_write_mmio = write_mmio,
.read_write_exit_mmio = write_exit_mmio,
.write = true,
};
static int emulator_read_write_onepage(unsigned long addr, void *val,
unsigned int bytes,
struct x86_exception *exception,
struct kvm_vcpu *vcpu,
const struct read_write_emulator_ops *ops)
gpa_t gpa;
int handled, ret;
bool write = ops->write;
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
if (ret < 0)
return X86EMUL_PROPAGATE_FAULT;
/* For APIC access vmexit */
goto mmio;
if (ops->read_write_emulate(vcpu, gpa, val, bytes))
return X86EMUL_CONTINUE;
mmio:
/*
* Is this MMIO handled locally?
*/
handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
if (handled == bytes)
return X86EMUL_CONTINUE;
gpa += handled;
bytes -= handled;
val += handled;
WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
frag->gpa = gpa;
frag->data = val;
frag->len = bytes;
}
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
void *val, unsigned int bytes,
struct x86_exception *exception,
const struct read_write_emulator_ops *ops)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
int rc;
if (ops->read_write_prepare &&
ops->read_write_prepare(vcpu, val, bytes))
return X86EMUL_CONTINUE;
vcpu->mmio_nr_fragments = 0;
/* Crossing a page boundary? */
if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
now = -addr & ~PAGE_MASK;
rc = emulator_read_write_onepage(addr, val, now, exception,
vcpu, ops);
if (rc != X86EMUL_CONTINUE)
return rc;
addr += now;
val += now;
bytes -= now;
}
rc = emulator_read_write_onepage(addr, val, bytes, exception,
vcpu, ops);
if (rc != X86EMUL_CONTINUE)
return rc;
if (!vcpu->mmio_nr_fragments)
return rc;
gpa = vcpu->mmio_fragments[0].gpa;
vcpu->mmio_needed = 1;
vcpu->mmio_cur_fragment = 0;
vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = gpa;
return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
}
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
unsigned long addr,
void *val,
unsigned int bytes,
struct x86_exception *exception)
{
return emulator_read_write(ctxt, addr, val, bytes,
exception, &read_emultor);
}
int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
unsigned long addr,
const void *val,
unsigned int bytes,
struct x86_exception *exception)
{
return emulator_read_write(ctxt, addr, (void *)val, bytes,
exception, &write_emultor);
}
#define CMPXCHG_TYPE(t, ptr, old, new) \
(cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
#ifdef CONFIG_X86_64
# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
#else
# define CMPXCHG64(ptr, old, new) \
(cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
unsigned long addr,
const void *old,
const void *new,
unsigned int bytes,
struct x86_exception *exception)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
struct page *page;
char *kaddr;
bool exchanged;
/* guests cmpxchg8b have to be emulated atomically */
if (bytes > 8 || (bytes & (bytes - 1)))
goto emul_write;
gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
if (gpa == UNMAPPED_GVA ||
(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto emul_write;
if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
goto emul_write;
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
kaddr = kmap_atomic(page);
kaddr += offset_in_page(gpa);
switch (bytes) {
case 1:
exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
break;
case 2:
exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
break;
case 4:
exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
break;
case 8:
exchanged = CMPXCHG64(kaddr, old, new);
break;
default:
BUG();
kvm_release_page_dirty(page);
if (!exchanged)
return X86EMUL_CMPXCHG_FAILED;
kvm_mmu_pte_write(vcpu, gpa, new, bytes);
return X86EMUL_CONTINUE;
printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
return emulator_write_emulated(ctxt, addr, new, bytes, exception);
}
static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
{
/* TODO: String I/O for in kernel device */
int r;
if (vcpu->arch.pio.in)
r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
vcpu->arch.pio.size, pd);
else
r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
vcpu->arch.pio.port, vcpu->arch.pio.size,
pd);
return r;
}
static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
unsigned short port, void *val,
unsigned int count, bool in)
trace_kvm_pio(!in, port, size, count);
vcpu->arch.pio.port = port;
vcpu->arch.pio.count = count;
vcpu->arch.pio.size = size;
if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
vcpu->arch.pio.count = 0;
return 1;
}
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = size;
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = count;
vcpu->run->io.port = port;
return 0;
}
static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
int size, unsigned short port, void *val,
unsigned int count)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
if (vcpu->arch.pio.count)
goto data_avail;
ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
if (ret) {
data_avail:
memcpy(val, vcpu->arch.pio_data, size * count);
vcpu->arch.pio.count = 0;
return 1;
}
return 0;
}
static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
int size, unsigned short port,
const void *val, unsigned int count)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
memcpy(vcpu->arch.pio_data, val, size * count);
return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
}
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
return kvm_x86_ops->get_segment_base(vcpu, seg);
}
static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
}
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
{
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
if (kvm_x86_ops->has_wbinvd_exit()) {
int cpu = get_cpu();
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
wbinvd_ipi, NULL, 1);
cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
return X86EMUL_CONTINUE;
}
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
{
kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
}
int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
}
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
}
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
unsigned long value;
switch (cr) {
case 0:
value = kvm_read_cr0(vcpu);
break;
case 2:
value = vcpu->arch.cr2;
break;
case 3:
value = kvm_read_cr3(vcpu);
break;
case 4:
value = kvm_read_cr4(vcpu);
break;
case 8:
value = kvm_get_cr8(vcpu);
break;
default:
kvm_err("%s: unexpected cr %u\n", __func__, cr);
return 0;
}
return value;
}
static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int res = 0;
switch (cr) {
case 0:
res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
break;
case 2:
vcpu->arch.cr2 = val;
break;
case 3:
break;
case 4:
res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
break;
case 8:
break;
default:
kvm_err("%s: unexpected cr %u\n", __func__, cr);
res = -1;
return res;
}
static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
{
kvm_set_rflags(emul_to_vcpu(ctxt), val);
}
static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
}
static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
}
static unsigned long emulator_get_cached_segment_base(
struct x86_emulate_ctxt *ctxt, int seg)
{
return get_segment_base(emul_to_vcpu(ctxt), seg);
}
static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
struct desc_struct *desc, u32 *base3,
int seg)
{
struct kvm_segment var;
kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
*selector = var.selector;
if (var.unusable) {
memset(desc, 0, sizeof(*desc));
return false;
}
if (var.g)
var.limit >>= 12;
set_desc_limit(desc, var.limit);
set_desc_base(desc, (unsigned long)var.base);
#ifdef CONFIG_X86_64
if (base3)
*base3 = var.base >> 32;
#endif
desc->type = var.type;
desc->s = var.s;
desc->dpl = var.dpl;
desc->p = var.present;
desc->avl = var.avl;
desc->l = var.l;
desc->d = var.db;
desc->g = var.g;
return true;
}
static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
struct desc_struct *desc, u32 base3,
int seg)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
struct kvm_segment var;
var.base = get_desc_base(desc);
#ifdef CONFIG_X86_64
var.base |= ((u64)base3) << 32;
#endif
var.limit = get_desc_limit(desc);
if (desc->g)
var.limit = (var.limit << 12) | 0xfff;
var.type = desc->type;
var.present = desc->p;
var.dpl = desc->dpl;
var.db = desc->d;
var.s = desc->s;
var.l = desc->l;
var.g = desc->g;
var.avl = desc->avl;
var.present = desc->p;
var.unusable = !var.present;
var.padding = 0;
kvm_set_segment(vcpu, &var, seg);
return;
}
static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
u32 msr_index, u64 *pdata)
{
return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
}
static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
u32 msr_index, u64 data)
{
struct msr_data msr;
msr.data = data;
msr.index = msr_index;
msr.host_initiated = false;
return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc, u64 *pdata)
{
return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
}
static void emulator_halt(struct x86_emulate_ctxt *ctxt)
{
emul_to_vcpu(ctxt)->arch.halt_request = 1;
}

Avi Kivity
committed
static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
{
preempt_disable();
kvm_load_guest_fpu(emul_to_vcpu(ctxt));

Avi Kivity
committed
/*
* CR0.TS may reference the host fpu state, not the guest fpu state,
* so it may be clear at this point.
*/
clts();
}
static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
{
preempt_enable();
}
static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
{
return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
{
kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
{
return kvm_register_read(emul_to_vcpu(ctxt), reg);
}
static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
{
kvm_register_write(emul_to_vcpu(ctxt), reg, val);
}
static const struct x86_emulate_ops emulate_ops = {
.read_gpr = emulator_read_gpr,
.write_gpr = emulator_write_gpr,
.read_std = kvm_read_guest_virt_system,
.write_std = kvm_write_guest_virt_system,
.fetch = kvm_fetch_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
.invlpg = emulator_invlpg,
.pio_in_emulated = emulator_pio_in_emulated,
.pio_out_emulated = emulator_pio_out_emulated,
.get_segment = emulator_get_segment,
.set_segment = emulator_set_segment,
.get_cached_segment_base = emulator_get_cached_segment_base,
.get_gdt = emulator_get_gdt,
.get_idt = emulator_get_idt,
.set_gdt = emulator_set_gdt,
.set_idt = emulator_set_idt,
.get_cr = emulator_get_cr,
.set_cr = emulator_set_cr,
.set_rflags = emulator_set_rflags,
.cpl = emulator_get_cpl,
.get_dr = emulator_get_dr,
.set_dr = emulator_set_dr,
.set_msr = emulator_set_msr,
.get_msr = emulator_get_msr,
.fix_hypercall = emulator_fix_hypercall,

Avi Kivity
committed
.get_fpu = emulator_get_fpu,
.put_fpu = emulator_put_fpu,
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
{
u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
/*
* an sti; sti; sequence only disable interrupts for the first
* instruction. So, if the last instruction, be it emulated or
* not, left the system with the INT_STI flag enabled, it
* means that the last instruction is an sti. We should not
* leave the flag on in this case. The same goes for mov ss
*/
if (!(int_shadow & mask))
kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
}
static void inject_emulated_exception(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
if (ctxt->exception.vector == PF_VECTOR)
kvm_propagate_fault(vcpu, &ctxt->exception);
else if (ctxt->exception.error_code_valid)
kvm_queue_exception_e(vcpu, ctxt->exception.vector,
ctxt->exception.error_code);
kvm_queue_exception(vcpu, ctxt->exception.vector);
static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
memset(&ctxt->twobyte, 0,
(void *)&ctxt->_regs - (void *)&ctxt->twobyte);
ctxt->fetch.start = 0;
ctxt->fetch.end = 0;
ctxt->io_read.pos = 0;
ctxt->io_read.end = 0;
ctxt->mem_read.pos = 0;
ctxt->mem_read.end = 0;
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
int cs_db, cs_l;
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu);
ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
cs_l ? X86EMUL_MODE_PROT64 :
cs_db ? X86EMUL_MODE_PROT32 :
X86EMUL_MODE_PROT16;
ctxt->guest_mode = is_guest_mode(vcpu);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
}
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
int ret;
init_emulate_ctxt(vcpu);
ctxt->op_bytes = 2;
ctxt->ad_bytes = 2;
ctxt->_eip = ctxt->eip + inc_eip;
ret = emulate_int_real(ctxt, irq);
if (ret != X86EMUL_CONTINUE)
return EMULATE_FAIL;
ctxt->eip = ctxt->_eip;
kvm_rip_write(vcpu, ctxt->eip);
kvm_set_rflags(vcpu, ctxt->eflags);
if (irq == NMI_VECTOR)
else
vcpu->arch.interrupt.pending = false;
return EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
static int handle_emulation_failure(struct kvm_vcpu *vcpu)
{
int r = EMULATE_DONE;
++vcpu->stat.insn_emulation_fail;
trace_kvm_emulate_insn_failed(vcpu);
if (!is_guest_mode(vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
r = EMULATE_FAIL;
}
kvm_queue_exception(vcpu, UD_VECTOR);
return r;
}
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,

Gleb Natapov
committed
bool write_fault_to_shadow_pgtable,
int emulation_type)

Gleb Natapov
committed
{
pfn_t pfn;

Gleb Natapov
committed

Gleb Natapov
committed
if (emulation_type & EMULTYPE_NO_REEXECUTE)
return false;
if (!vcpu->arch.mmu.direct_map) {
/*
* Write permission should be allowed since only
* write access need to be emulated.
*/
gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);

Gleb Natapov
committed
/*
* If the mapping is invalid in guest, let cpu retry
* it to generate fault.
*/
if (gpa == UNMAPPED_GVA)
return true;
}

Gleb Natapov
committed
/*
* Do not retry the unhandleable instruction if it faults on the
* readonly host memory, otherwise it will goto a infinite loop:
* retry instruction -> write #PF -> emulation fail -> retry
* instruction -> ...
*/
pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
/*
* If the instruction failed on the error pfn, it can not be fixed,
* report the error to userspace.
*/
if (is_error_noslot_pfn(pfn))
return false;
kvm_release_pfn_clean(pfn);
/* The instructions are well-emulated on direct mmu. */
if (vcpu->arch.mmu.direct_map) {
unsigned int indirect_shadow_pages;
spin_lock(&vcpu->kvm->mmu_lock);
indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
spin_unlock(&vcpu->kvm->mmu_lock);
if (indirect_shadow_pages)
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));

Gleb Natapov
committed
return true;
}

Gleb Natapov
committed
/*
* if emulation was due to access to shadowed page table
* and it failed try to unshadow page and re-enter the
* guest to let CPU execute the instruction.
*/
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
/*
* If the access faults on its page table, it can not
* be fixed by unprotecting shadow page and it should
* be reported to userspace.
*/
return !write_fault_to_shadow_pgtable;

Gleb Natapov
committed
}
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
unsigned long cr2, int emulation_type)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
last_retry_eip = vcpu->arch.last_retry_eip;
last_retry_addr = vcpu->arch.last_retry_addr;
/*
* If the emulation is caused by #PF and it is non-page_table
* writing instruction, it means the VM-EXIT is caused by shadow
* page protected, we can zap the shadow page and retry this
* instruction directly.
*
* Note: if the guest uses a non-page-table modifying instruction
* on the PDE that points to the instruction, then we will unmap
* the instruction and go to an infinite loop. So, we cache the
* last retried eip and the last fault address, if we meet the eip
* and the address again, we can break out of the potential infinite
* loop.
*/
vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
if (!(emulation_type & EMULTYPE_RETRY))
return false;
if (x86_page_table_writing_insn(ctxt))
return false;
if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
return false;
vcpu->arch.last_retry_eip = ctxt->eip;
vcpu->arch.last_retry_addr = cr2;
if (!vcpu->arch.mmu.direct_map)
gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
return true;
}
static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
static int complete_emulated_pio(struct kvm_vcpu *vcpu);
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2,
int emulation_type,
void *insn,
int insn_len)
int r;
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
bool writeback = true;
bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
/*
* Clear write_fault_to_shadow_pgtable here to ensure it is
* never reused.
*/
vcpu->arch.write_fault_to_shadow_pgtable = false;
kvm_clear_exception_queue(vcpu);
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
init_emulate_ctxt(vcpu);
ctxt->interruptibility = 0;
ctxt->have_exception = false;
ctxt->perm_ok = false;
ctxt->only_vendor_specific_insn
= emulation_type & EMULTYPE_TRAP_UD;
r = x86_decode_insn(ctxt, insn, insn_len);
if (r != EMULATION_OK) {
if (emulation_type & EMULTYPE_TRAP_UD)
return EMULATE_FAIL;

Gleb Natapov
committed
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
emulation_type))
return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
return handle_emulation_failure(vcpu);