Skip to content
Snippets Groups Projects
x86.c 183 KiB
Newer Older
  • Learn to ignore specific revisions
  • EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
    
    static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
    				      gva_t addr, void *val, unsigned int bytes,
    
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    
    	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
    
    int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
    
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    
    	void *data = val;
    	int r = X86EMUL_CONTINUE;
    
    	while (bytes) {
    
    		gpa_t gpa =  vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
    							     PFERR_WRITE_MASK,
    
    		unsigned offset = addr & (PAGE_SIZE-1);
    		unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
    		int ret;
    
    
    			return X86EMUL_PROPAGATE_FAULT;
    
    		ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
    		if (ret < 0) {
    
    			goto out;
    		}
    
    		bytes -= towrite;
    		data += towrite;
    		addr += towrite;
    	}
    out:
    	return r;
    }
    
    EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
    
    static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
    				gpa_t *gpa, struct x86_exception *exception,
    				bool write)
    {
    
    	u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
    		| (write ? PFERR_WRITE_MASK : 0);
    
    	if (vcpu_match_mmio_gva(vcpu, gva)
    	    && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
    
    		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
    					(gva & (PAGE_SIZE - 1));
    
    		trace_vcpu_match_mmio(gva, *gpa, write, false);
    
    	*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
    
    	if (*gpa == UNMAPPED_GVA)
    		return -1;
    
    	/* For APIC access vmexit */
    	if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    		return 1;
    
    
    	if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
    		trace_vcpu_match_mmio(gva, *gpa, write, true);
    
    int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
    
    {
    	int ret;
    
    	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
    
    	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
    
    struct read_write_emulator_ops {
    	int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
    				  int bytes);
    	int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
    				  void *val, int bytes);
    	int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
    			       int bytes, void *val);
    	int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
    				    void *val, int bytes);
    	bool write;
    };
    
    static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
    {
    	if (vcpu->mmio_read_completed) {
    		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
    
    Avi Kivity's avatar
    Avi Kivity committed
    			       vcpu->mmio_fragments[0].gpa, *(u64 *)val);
    
    		vcpu->mmio_read_completed = 0;
    		return 1;
    	}
    
    	return 0;
    }
    
    static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
    			void *val, int bytes)
    {
    	return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
    }
    
    static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
    			 void *val, int bytes)
    {
    	return emulator_write_phys(vcpu, gpa, val, bytes);
    }
    
    static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
    {
    	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
    	return vcpu_mmio_write(vcpu, gpa, bytes, val);
    }
    
    static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
    			  void *val, int bytes)
    {
    	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
    	return X86EMUL_IO_NEEDED;
    }
    
    static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
    			   void *val, int bytes)
    {
    
    Avi Kivity's avatar
    Avi Kivity committed
    	struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
    
    
    	memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
    
    static const struct read_write_emulator_ops read_emultor = {
    
    	.read_write_prepare = read_prepare,
    	.read_write_emulate = read_emulate,
    	.read_write_mmio = vcpu_mmio_read,
    	.read_write_exit_mmio = read_exit_mmio,
    };
    
    
    static const struct read_write_emulator_ops write_emultor = {
    
    	.read_write_emulate = write_emulate,
    	.read_write_mmio = write_mmio,
    	.read_write_exit_mmio = write_exit_mmio,
    	.write = true,
    };
    
    
    static int emulator_read_write_onepage(unsigned long addr, void *val,
    				       unsigned int bytes,
    				       struct x86_exception *exception,
    				       struct kvm_vcpu *vcpu,
    
    				       const struct read_write_emulator_ops *ops)
    
    	bool write = ops->write;
    
    Avi Kivity's avatar
    Avi Kivity committed
    	struct kvm_mmio_fragment *frag;
    
    	ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
    
    		return X86EMUL_PROPAGATE_FAULT;
    
    	/* For APIC access vmexit */
    
    	if (ops->read_write_emulate(vcpu, gpa, val, bytes))
    
    		return X86EMUL_CONTINUE;
    
    mmio:
    	/*
    	 * Is this MMIO handled locally?
    	 */
    
    	handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
    
    	gpa += handled;
    	bytes -= handled;
    	val += handled;
    
    
    	WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
    	frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
    	frag->gpa = gpa;
    	frag->data = val;
    	frag->len = bytes;
    
    Avi Kivity's avatar
    Avi Kivity committed
    	return X86EMUL_CONTINUE;
    
    int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
    			void *val, unsigned int bytes,
    			struct x86_exception *exception,
    
    			const struct read_write_emulator_ops *ops)
    
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    
    Avi Kivity's avatar
    Avi Kivity committed
    	gpa_t gpa;
    	int rc;
    
    	if (ops->read_write_prepare &&
    		  ops->read_write_prepare(vcpu, val, bytes))
    		return X86EMUL_CONTINUE;
    
    	vcpu->mmio_nr_fragments = 0;
    
    	/* Crossing a page boundary? */
    	if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
    
    Avi Kivity's avatar
    Avi Kivity committed
    		int now;
    
    		rc = emulator_read_write_onepage(addr, val, now, exception,
    						 vcpu, ops);
    
    
    		if (rc != X86EMUL_CONTINUE)
    			return rc;
    		addr += now;
    		val += now;
    		bytes -= now;
    	}
    
    Avi Kivity's avatar
    Avi Kivity committed
    	rc = emulator_read_write_onepage(addr, val, bytes, exception,
    					 vcpu, ops);
    	if (rc != X86EMUL_CONTINUE)
    		return rc;
    
    	if (!vcpu->mmio_nr_fragments)
    		return rc;
    
    	gpa = vcpu->mmio_fragments[0].gpa;
    
    	vcpu->mmio_needed = 1;
    	vcpu->mmio_cur_fragment = 0;
    
    
    	vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
    
    Avi Kivity's avatar
    Avi Kivity committed
    	vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
    	vcpu->run->exit_reason = KVM_EXIT_MMIO;
    	vcpu->run->mmio.phys_addr = gpa;
    
    	return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
    
    }
    
    static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
    				  unsigned long addr,
    				  void *val,
    				  unsigned int bytes,
    				  struct x86_exception *exception)
    {
    	return emulator_read_write(ctxt, addr, val, bytes,
    				   exception, &read_emultor);
    }
    
    int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
    			    unsigned long addr,
    			    const void *val,
    			    unsigned int bytes,
    			    struct x86_exception *exception)
    {
    	return emulator_read_write(ctxt, addr, (void *)val, bytes,
    				   exception, &write_emultor);
    
    #define CMPXCHG_TYPE(t, ptr, old, new) \
    	(cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
    
    #ifdef CONFIG_X86_64
    #  define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
    #else
    #  define CMPXCHG64(ptr, old, new) \
    
    	(cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
    
    static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
    				     unsigned long addr,
    
    				     const void *old,
    				     const void *new,
    				     unsigned int bytes,
    
    				     struct x86_exception *exception)
    
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    
    	gpa_t gpa;
    	struct page *page;
    	char *kaddr;
    	bool exchanged;
    
    	/* guests cmpxchg8b have to be emulated atomically */
    	if (bytes > 8 || (bytes & (bytes - 1)))
    		goto emul_write;
    
    	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
    
    	if (gpa == UNMAPPED_GVA ||
    	    (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    		goto emul_write;
    
    	if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
    		goto emul_write;
    
    	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
    
    	if (is_error_page(page))
    
    	kaddr = kmap_atomic(page);
    
    	kaddr += offset_in_page(gpa);
    	switch (bytes) {
    	case 1:
    		exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
    		break;
    	case 2:
    		exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
    		break;
    	case 4:
    		exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
    		break;
    	case 8:
    		exchanged = CMPXCHG64(kaddr, old, new);
    		break;
    	default:
    		BUG();
    
    	kunmap_atomic(kaddr);
    
    	kvm_release_page_dirty(page);
    
    	if (!exchanged)
    		return X86EMUL_CMPXCHG_FAILED;
    
    
    	kvm_mmu_pte_write(vcpu, gpa, new, bytes);
    
    emul_write:
    
    	printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
    
    	return emulator_write_emulated(ctxt, addr, new, bytes, exception);
    
    static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
    {
    	/* TODO: String I/O for in kernel device */
    	int r;
    
    	if (vcpu->arch.pio.in)
    		r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
    				    vcpu->arch.pio.size, pd);
    	else
    		r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
    				     vcpu->arch.pio.port, vcpu->arch.pio.size,
    				     pd);
    	return r;
    }
    
    
    static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
    			       unsigned short port, void *val,
    			       unsigned int count, bool in)
    
    	trace_kvm_pio(!in, port, size, count);
    
    
    	vcpu->arch.pio.port = port;
    
    	vcpu->arch.pio.in = in;
    
    	vcpu->arch.pio.count  = count;
    
    	vcpu->arch.pio.size = size;
    
    	if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
    
    		return 1;
    	}
    
    	vcpu->run->exit_reason = KVM_EXIT_IO;
    
    	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
    
    	vcpu->run->io.size = size;
    	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
    	vcpu->run->io.count = count;
    	vcpu->run->io.port = port;
    
    	return 0;
    }
    
    
    static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
    				    int size, unsigned short port, void *val,
    				    unsigned int count)
    
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    
    	if (vcpu->arch.pio.count)
    		goto data_avail;
    
    	ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
    	if (ret) {
    data_avail:
    		memcpy(val, vcpu->arch.pio_data, size * count);
    
    static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
    				     int size, unsigned short port,
    				     const void *val, unsigned int count)
    {
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    
    	memcpy(vcpu->arch.pio_data, val, size * count);
    	return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
    }
    
    
    static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
    {
    	return kvm_x86_ops->get_segment_base(vcpu, seg);
    }
    
    
    static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
    
    	kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
    
    int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
    {
    	if (!need_emulate_wbinvd(vcpu))
    		return X86EMUL_CONTINUE;
    
    	if (kvm_x86_ops->has_wbinvd_exit()) {
    
    		int cpu = get_cpu();
    
    		cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
    
    		smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
    				wbinvd_ipi, NULL, 1);
    
    		put_cpu();
    
    		cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
    
    	} else
    		wbinvd();
    
    	return X86EMUL_CONTINUE;
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
    
    
    static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
    {
    	kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
    }
    
    
    int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
    
    	return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
    
    int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
    
    	return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
    
    static u64 mk_cr_64(u64 curr_cr, u32 new_val)
    
    	return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
    
    static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
    
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    
    	unsigned long value;
    
    	switch (cr) {
    	case 0:
    		value = kvm_read_cr0(vcpu);
    		break;
    	case 2:
    		value = vcpu->arch.cr2;
    		break;
    	case 3:
    
    		value = kvm_read_cr3(vcpu);
    
    		break;
    	case 4:
    		value = kvm_read_cr4(vcpu);
    		break;
    	case 8:
    		value = kvm_get_cr8(vcpu);
    		break;
    	default:
    
    		kvm_err("%s: unexpected cr %u\n", __func__, cr);
    
    static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
    
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    
    		res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
    
    		break;
    	case 2:
    		vcpu->arch.cr2 = val;
    		break;
    	case 3:
    
    		res = kvm_set_cr3(vcpu, val);
    
    		res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
    
    		res = kvm_set_cr8(vcpu, val);
    
    		kvm_err("%s: unexpected cr %u\n", __func__, cr);
    
    static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
    {
    	kvm_set_rflags(emul_to_vcpu(ctxt), val);
    }
    
    
    static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
    
    	return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
    
    static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
    
    	kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
    
    static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
    
    	kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
    
    static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
    {
    	kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
    }
    
    static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
    {
    	kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
    }
    
    
    static unsigned long emulator_get_cached_segment_base(
    	struct x86_emulate_ctxt *ctxt, int seg)
    
    	return get_segment_base(emul_to_vcpu(ctxt), seg);
    
    static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
    				 struct desc_struct *desc, u32 *base3,
    				 int seg)
    
    	kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
    
    	*selector = var.selector;
    
    	if (var.unusable) {
    		memset(desc, 0, sizeof(*desc));
    
    
    	if (var.g)
    		var.limit >>= 12;
    	set_desc_limit(desc, var.limit);
    	set_desc_base(desc, (unsigned long)var.base);
    
    #ifdef CONFIG_X86_64
    	if (base3)
    		*base3 = var.base >> 32;
    #endif
    
    	desc->type = var.type;
    	desc->s = var.s;
    	desc->dpl = var.dpl;
    	desc->p = var.present;
    	desc->avl = var.avl;
    	desc->l = var.l;
    	desc->d = var.db;
    	desc->g = var.g;
    
    	return true;
    }
    
    
    static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
    				 struct desc_struct *desc, u32 base3,
    				 int seg)
    
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    
    	var.selector = selector;
    
    	var.base = get_desc_base(desc);
    
    #ifdef CONFIG_X86_64
    	var.base |= ((u64)base3) << 32;
    #endif
    
    	var.limit = get_desc_limit(desc);
    	if (desc->g)
    		var.limit = (var.limit << 12) | 0xfff;
    	var.type = desc->type;
    	var.present = desc->p;
    	var.dpl = desc->dpl;
    	var.db = desc->d;
    	var.s = desc->s;
    	var.l = desc->l;
    	var.g = desc->g;
    	var.avl = desc->avl;
    	var.present = desc->p;
    	var.unusable = !var.present;
    	var.padding = 0;
    
    	kvm_set_segment(vcpu, &var, seg);
    	return;
    }
    
    
    static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
    			    u32 msr_index, u64 *pdata)
    {
    	return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
    }
    
    static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
    			    u32 msr_index, u64 data)
    {
    
    	struct msr_data msr;
    
    	msr.data = data;
    	msr.index = msr_index;
    	msr.host_initiated = false;
    	return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
    
    static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
    			     u32 pmc, u64 *pdata)
    {
    	return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
    }
    
    
    static void emulator_halt(struct x86_emulate_ctxt *ctxt)
    {
    	emul_to_vcpu(ctxt)->arch.halt_request = 1;
    }
    
    
    static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
    {
    	preempt_disable();
    
    	kvm_load_guest_fpu(emul_to_vcpu(ctxt));
    
    	/*
    	 * CR0.TS may reference the host fpu state, not the guest fpu state,
    	 * so it may be clear at this point.
    	 */
    	clts();
    }
    
    static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
    {
    	preempt_enable();
    }
    
    
    static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
    
    			      struct x86_instruction_info *info,
    
    			      enum x86_intercept_stage stage)
    {
    
    	return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
    
    static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
    
    			       u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
    {
    
    	kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
    
    static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
    {
    	return kvm_register_read(emul_to_vcpu(ctxt), reg);
    }
    
    static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
    {
    	kvm_register_write(emul_to_vcpu(ctxt), reg, val);
    }
    
    
    static const struct x86_emulate_ops emulate_ops = {
    
    	.read_gpr            = emulator_read_gpr,
    	.write_gpr           = emulator_write_gpr,
    
    	.read_std            = kvm_read_guest_virt_system,
    
    	.write_std           = kvm_write_guest_virt_system,
    
    	.fetch               = kvm_fetch_guest_virt,
    
    	.read_emulated       = emulator_read_emulated,
    	.write_emulated      = emulator_write_emulated,
    	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
    
    	.invlpg              = emulator_invlpg,
    
    	.pio_in_emulated     = emulator_pio_in_emulated,
    	.pio_out_emulated    = emulator_pio_out_emulated,
    
    	.get_segment         = emulator_get_segment,
    	.set_segment         = emulator_set_segment,
    
    	.get_cached_segment_base = emulator_get_cached_segment_base,
    
    	.get_gdt             = emulator_get_gdt,
    
    	.get_idt	     = emulator_get_idt,
    
    	.set_gdt             = emulator_set_gdt,
    	.set_idt	     = emulator_set_idt,
    
    	.get_cr              = emulator_get_cr,
    	.set_cr              = emulator_set_cr,
    
    	.set_rflags          = emulator_set_rflags,
    
    	.get_dr              = emulator_get_dr,
    	.set_dr              = emulator_set_dr,
    
    	.set_msr             = emulator_set_msr,
    	.get_msr             = emulator_get_msr,
    
    	.read_pmc            = emulator_read_pmc,
    
    	.halt                = emulator_halt,
    
    	.wbinvd              = emulator_wbinvd,
    
    	.fix_hypercall       = emulator_fix_hypercall,
    
    	.get_fpu             = emulator_get_fpu,
    	.put_fpu             = emulator_put_fpu,
    
    	.intercept           = emulator_intercept,
    
    	.get_cpuid           = emulator_get_cpuid,
    
    static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
    {
    	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
    	/*
    	 * an sti; sti; sequence only disable interrupts for the first
    	 * instruction. So, if the last instruction, be it emulated or
    	 * not, left the system with the INT_STI flag enabled, it
    	 * means that the last instruction is an sti. We should not
    	 * leave the flag on in this case. The same goes for mov ss
    	 */
    	if (!(int_shadow & mask))
    		kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
    }
    
    
    static void inject_emulated_exception(struct kvm_vcpu *vcpu)
    {
    	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
    
    	if (ctxt->exception.vector == PF_VECTOR)
    
    		kvm_propagate_fault(vcpu, &ctxt->exception);
    
    	else if (ctxt->exception.error_code_valid)
    		kvm_queue_exception_e(vcpu, ctxt->exception.vector,
    				      ctxt->exception.error_code);
    
    		kvm_queue_exception(vcpu, ctxt->exception.vector);
    
    static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
    
    	memset(&ctxt->twobyte, 0,
    
    	       (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
    
    	ctxt->fetch.start = 0;
    	ctxt->fetch.end = 0;
    	ctxt->io_read.pos = 0;
    	ctxt->io_read.end = 0;
    	ctxt->mem_read.pos = 0;
    	ctxt->mem_read.end = 0;
    
    static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
    {
    
    	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
    
    	int cs_db, cs_l;
    
    	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
    
    
    	ctxt->eflags = kvm_get_rflags(vcpu);
    	ctxt->eip = kvm_rip_read(vcpu);
    	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
    		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
    		     cs_l				? X86EMUL_MODE_PROT64 :
    		     cs_db				? X86EMUL_MODE_PROT32 :
    							  X86EMUL_MODE_PROT16;
    	ctxt->guest_mode = is_guest_mode(vcpu);
    
    
    	init_decode_cache(ctxt);
    
    	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
    
    int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
    
    	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
    
    	ctxt->op_bytes = 2;
    	ctxt->ad_bytes = 2;
    	ctxt->_eip = ctxt->eip + inc_eip;
    
    	ret = emulate_int_real(ctxt, irq);
    
    
    	if (ret != X86EMUL_CONTINUE)
    		return EMULATE_FAIL;
    
    
    	kvm_rip_write(vcpu, ctxt->eip);
    	kvm_set_rflags(vcpu, ctxt->eflags);
    
    Avi Kivity's avatar
    Avi Kivity committed
    		vcpu->arch.nmi_pending = 0;
    
    	else
    		vcpu->arch.interrupt.pending = false;
    
    	return EMULATE_DONE;
    }
    EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
    
    
    static int handle_emulation_failure(struct kvm_vcpu *vcpu)
    {
    
    	++vcpu->stat.insn_emulation_fail;
    	trace_kvm_emulate_insn_failed(vcpu);
    
    	if (!is_guest_mode(vcpu)) {
    		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
    		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
    		vcpu->run->internal.ndata = 0;
    		r = EMULATE_FAIL;
    	}
    
    	kvm_queue_exception(vcpu, UD_VECTOR);
    
    static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
    
    				  bool write_fault_to_shadow_pgtable,
    				  int emulation_type)
    
    	if (emulation_type & EMULTYPE_NO_REEXECUTE)
    		return false;
    
    
    	if (!vcpu->arch.mmu.direct_map) {
    		/*
    		 * Write permission should be allowed since only
    		 * write access need to be emulated.
    		 */
    		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
    
    		/*
    		 * If the mapping is invalid in guest, let cpu retry
    		 * it to generate fault.
    		 */
    		if (gpa == UNMAPPED_GVA)
    			return true;
    	}
    
    	/*
    	 * Do not retry the unhandleable instruction if it faults on the
    	 * readonly host memory, otherwise it will goto a infinite loop:
    	 * retry instruction -> write #PF -> emulation fail -> retry
    	 * instruction -> ...
    	 */
    	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
    
    
    	/*
    	 * If the instruction failed on the error pfn, it can not be fixed,
    	 * report the error to userspace.
    	 */
    	if (is_error_noslot_pfn(pfn))
    		return false;
    
    	kvm_release_pfn_clean(pfn);
    
    	/* The instructions are well-emulated on direct mmu. */
    	if (vcpu->arch.mmu.direct_map) {
    		unsigned int indirect_shadow_pages;
    
    		spin_lock(&vcpu->kvm->mmu_lock);
    		indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
    		spin_unlock(&vcpu->kvm->mmu_lock);
    
    		if (indirect_shadow_pages)
    			kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
    
    
    	/*
    	 * if emulation was due to access to shadowed page table
    	 * and it failed try to unshadow page and re-enter the
    	 * guest to let CPU execute the instruction.
    	 */
    	kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
    
    
    	/*
    	 * If the access faults on its page table, it can not
    	 * be fixed by unprotecting shadow page and it should
    	 * be reported to userspace.
    	 */
    	return !write_fault_to_shadow_pgtable;
    
    static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
    			      unsigned long cr2,  int emulation_type)
    {
    	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
    	unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
    
    	last_retry_eip = vcpu->arch.last_retry_eip;
    	last_retry_addr = vcpu->arch.last_retry_addr;
    
    	/*
    	 * If the emulation is caused by #PF and it is non-page_table
    	 * writing instruction, it means the VM-EXIT is caused by shadow
    	 * page protected, we can zap the shadow page and retry this
    	 * instruction directly.
    	 *
    	 * Note: if the guest uses a non-page-table modifying instruction
    	 * on the PDE that points to the instruction, then we will unmap
    	 * the instruction and go to an infinite loop. So, we cache the
    	 * last retried eip and the last fault address, if we meet the eip
    	 * and the address again, we can break out of the potential infinite
    	 * loop.
    	 */
    	vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
    
    	if (!(emulation_type & EMULTYPE_RETRY))
    		return false;
    
    	if (x86_page_table_writing_insn(ctxt))
    		return false;
    
    	if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
    		return false;
    
    	vcpu->arch.last_retry_eip = ctxt->eip;
    	vcpu->arch.last_retry_addr = cr2;
    
    	if (!vcpu->arch.mmu.direct_map)
    		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
    
    
    	kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
    
    static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
    static int complete_emulated_pio(struct kvm_vcpu *vcpu);
    
    
    int x86_emulate_instruction(struct kvm_vcpu *vcpu,
    			    unsigned long cr2,
    
    			    int emulation_type,
    			    void *insn,
    			    int insn_len)
    
    	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
    
    	bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
    
    	/*
    	 * Clear write_fault_to_shadow_pgtable here to ensure it is
    	 * never reused.
    	 */
    	vcpu->arch.write_fault_to_shadow_pgtable = false;
    
    	kvm_clear_exception_queue(vcpu);
    
    	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
    
    		ctxt->interruptibility = 0;
    		ctxt->have_exception = false;
    		ctxt->perm_ok = false;
    
    		ctxt->only_vendor_specific_insn
    
    			= emulation_type & EMULTYPE_TRAP_UD;
    
    
    		r = x86_decode_insn(ctxt, insn, insn_len);
    
    		trace_kvm_emulate_insn_start(vcpu);
    
    		++vcpu->stat.insn_emulation;
    
    			if (emulation_type & EMULTYPE_TRAP_UD)
    				return EMULATE_FAIL;
    
    			if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
    						emulation_type))
    
    			if (emulation_type & EMULTYPE_SKIP)
    				return EMULATE_FAIL;
    			return handle_emulation_failure(vcpu);