Skip to content
Snippets Groups Projects
x86.c 97.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	if (!reported) {
    		reported = 1;
    		printk(KERN_WARNING "kvm: emulating exchange as write\n");
    	}
    
    #ifndef CONFIG_X86_64
    	/* guests cmpxchg8b have to be emulated atomically */
    	if (bytes == 8) {
    
    Andrew Morton's avatar
    Andrew Morton committed
    		char *kaddr;
    
    		gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    
    		if (gpa == UNMAPPED_GVA ||
    		   (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    			goto emul_write;
    
    		if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
    			goto emul_write;
    
    		val = *(u64 *)new;
    
    		page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
    
    Andrew Morton's avatar
    Andrew Morton committed
    		kaddr = kmap_atomic(page, KM_USER0);
    		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
    		kunmap_atomic(kaddr, KM_USER0);
    
    		kvm_release_page_dirty(page);
    	}
    
    emul_write:
    
    	return emulator_write_emulated(addr, new, bytes, vcpu);
    }
    
    static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
    {
    	return kvm_x86_ops->get_segment_base(vcpu, seg);
    }
    
    int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
    {
    	return X86EMUL_CONTINUE;
    }
    
    int emulate_clts(struct kvm_vcpu *vcpu)
    {
    
    	KVMTRACE_0D(CLTS, vcpu, handler);
    
    	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
    
    	return X86EMUL_CONTINUE;
    }
    
    int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
    {
    	struct kvm_vcpu *vcpu = ctxt->vcpu;
    
    	switch (dr) {
    	case 0 ... 3:
    		*dest = kvm_x86_ops->get_dr(vcpu, dr);
    		return X86EMUL_CONTINUE;
    	default:
    
    		pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
    
    		return X86EMUL_UNHANDLEABLE;
    	}
    }
    
    int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
    {
    	unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
    	int exception;
    
    	kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
    	if (exception) {
    		/* FIXME: better handling */
    		return X86EMUL_UNHANDLEABLE;
    	}
    	return X86EMUL_CONTINUE;
    }
    
    void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
    {
    	u8 opcodes[4];
    
    	unsigned long rip = kvm_rip_read(vcpu);
    
    	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
    
    
    	emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
    
    	printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
    	       context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
    }
    EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
    
    
    static struct x86_emulate_ops emulate_ops = {
    
    	.read_std            = emulator_read_std,
    	.read_emulated       = emulator_read_emulated,
    	.write_emulated      = emulator_write_emulated,
    	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
    };
    
    
    static void cache_all_regs(struct kvm_vcpu *vcpu)
    {
    	kvm_register_read(vcpu, VCPU_REGS_RAX);
    	kvm_register_read(vcpu, VCPU_REGS_RSP);
    	kvm_register_read(vcpu, VCPU_REGS_RIP);
    	vcpu->arch.regs_dirty = ~0;
    }
    
    
    int emulate_instruction(struct kvm_vcpu *vcpu,
    			struct kvm_run *run,
    			unsigned long cr2,
    			u16 error_code,
    
    	kvm_clear_exception_queue(vcpu);
    
    	vcpu->arch.mmio_fault_cr2 = cr2;
    
    	/*
    	 * TODO: fix x86_emulate.c to use guest_read/write_register
    	 * instead of direct ->regs accesses, can save hundred cycles
    	 * on Intel for instructions that don't read/change RSP, for
    	 * for example.
    	 */
    	cache_all_regs(vcpu);
    
    	vcpu->arch.pio.string = 0;
    
    	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
    
    		int cs_db, cs_l;
    		kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
    
    
    		vcpu->arch.emulate_ctxt.vcpu = vcpu;
    		vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
    		vcpu->arch.emulate_ctxt.mode =
    			(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
    
    			? X86EMUL_MODE_REAL : cs_l
    			? X86EMUL_MODE_PROT64 :	cs_db
    			? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
    
    
    		r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
    
    
    		/* Reject the instructions other than VMCALL/VMMCALL when
    		 * try to emulate invalid opcode */
    		c = &vcpu->arch.emulate_ctxt.decode;
    		if ((emulation_type & EMULTYPE_TRAP_UD) &&
    		    (!(c->twobyte && c->b == 0x01 &&
    		      (c->modrm_reg == 0 || c->modrm_reg == 3) &&
    		       c->modrm_mod == 3 && c->modrm_rm == 1)))
    			return EMULATE_FAIL;
    
    
    		++vcpu->stat.insn_emulation;
    
    			++vcpu->stat.insn_emulation_fail;
    
    			if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
    				return EMULATE_DONE;
    			return EMULATE_FAIL;
    		}
    	}
    
    
    	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
    
    	if (vcpu->arch.pio.string)
    
    		return EMULATE_DO_MMIO;
    
    	if ((r || vcpu->mmio_is_write) && run) {
    		run->exit_reason = KVM_EXIT_MMIO;
    		run->mmio.phys_addr = vcpu->mmio_phys_addr;
    		memcpy(run->mmio.data, vcpu->mmio_data, 8);
    		run->mmio.len = vcpu->mmio_size;
    		run->mmio.is_write = vcpu->mmio_is_write;
    	}
    
    	if (r) {
    		if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
    			return EMULATE_DONE;
    		if (!vcpu->mmio_needed) {
    			kvm_report_emulation_failure(vcpu, "mmio");
    			return EMULATE_FAIL;
    		}
    		return EMULATE_DO_MMIO;
    	}
    
    
    	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
    
    
    	if (vcpu->mmio_is_write) {
    		vcpu->mmio_needed = 0;
    		return EMULATE_DO_MMIO;
    	}
    
    	return EMULATE_DONE;
    }
    EXPORT_SYMBOL_GPL(emulate_instruction);
    
    
    static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
    {
    	int i;
    
    
    	for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
    		if (vcpu->arch.pio.guest_pages[i]) {
    			kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
    			vcpu->arch.pio.guest_pages[i] = NULL;
    
    		}
    }
    
    static int pio_copy_data(struct kvm_vcpu *vcpu)
    {
    
    	void *p = vcpu->arch.pio_data;
    
    	int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
    
    	q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
    
    		 PAGE_KERNEL);
    	if (!q) {
    		free_pio_guest_pages(vcpu);
    		return -ENOMEM;
    	}
    
    	q += vcpu->arch.pio.guest_page_offset;
    	bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
    	if (vcpu->arch.pio.in)
    
    		memcpy(q, p, bytes);
    	else
    		memcpy(p, q, bytes);
    
    	q -= vcpu->arch.pio.guest_page_offset;
    
    	vunmap(q);
    	free_pio_guest_pages(vcpu);
    	return 0;
    }
    
    int complete_pio(struct kvm_vcpu *vcpu)
    {
    
    	struct kvm_pio_request *io = &vcpu->arch.pio;
    
    	unsigned long val;
    
    		if (io->in) {
    			val = kvm_register_read(vcpu, VCPU_REGS_RAX);
    			memcpy(&val, vcpu->arch.pio_data, io->size);
    			kvm_register_write(vcpu, VCPU_REGS_RAX, val);
    		}
    
    	} else {
    		if (io->in) {
    			r = pio_copy_data(vcpu);
    
    				return r;
    		}
    
    		delta = 1;
    		if (io->rep) {
    			delta *= io->cur_count;
    			/*
    			 * The size of the register should really depend on
    			 * current address size.
    			 */
    
    			val = kvm_register_read(vcpu, VCPU_REGS_RCX);
    			val -= delta;
    			kvm_register_write(vcpu, VCPU_REGS_RCX, val);
    
    		}
    		if (io->down)
    			delta = -delta;
    		delta *= io->size;
    
    		if (io->in) {
    			val = kvm_register_read(vcpu, VCPU_REGS_RDI);
    			val += delta;
    			kvm_register_write(vcpu, VCPU_REGS_RDI, val);
    		} else {
    			val = kvm_register_read(vcpu, VCPU_REGS_RSI);
    			val += delta;
    			kvm_register_write(vcpu, VCPU_REGS_RSI, val);
    		}
    
    	}
    
    	io->count -= io->cur_count;
    	io->cur_count = 0;
    
    	return 0;
    }
    
    static void kernel_pio(struct kvm_io_device *pio_dev,
    		       struct kvm_vcpu *vcpu,
    		       void *pd)
    {
    	/* TODO: String I/O for in kernel device */
    
    	mutex_lock(&vcpu->kvm->lock);
    
    	if (vcpu->arch.pio.in)
    		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
    				  vcpu->arch.pio.size,
    
    		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
    				   vcpu->arch.pio.size,
    
    				   pd);
    	mutex_unlock(&vcpu->kvm->lock);
    }
    
    static void pio_string_write(struct kvm_io_device *pio_dev,
    			     struct kvm_vcpu *vcpu)
    {
    
    	struct kvm_pio_request *io = &vcpu->arch.pio;
    	void *pd = vcpu->arch.pio_data;
    
    	int i;
    
    	mutex_lock(&vcpu->kvm->lock);
    	for (i = 0; i < io->cur_count; i++) {
    		kvm_iodevice_write(pio_dev, io->port,
    				   io->size,
    				   pd);
    		pd += io->size;
    	}
    	mutex_unlock(&vcpu->kvm->lock);
    }
    
    static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
    
    	return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
    
    }
    
    int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
    		  int size, unsigned port)
    {
    	struct kvm_io_device *pio_dev;
    
    	unsigned long val;
    
    
    	vcpu->run->exit_reason = KVM_EXIT_IO;
    	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
    
    	vcpu->run->io.size = vcpu->arch.pio.size = size;
    
    	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
    
    	vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
    	vcpu->run->io.port = vcpu->arch.pio.port = port;
    	vcpu->arch.pio.in = in;
    	vcpu->arch.pio.string = 0;
    	vcpu->arch.pio.down = 0;
    	vcpu->arch.pio.guest_page_offset = 0;
    	vcpu->arch.pio.rep = 0;
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
    		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    	else
    		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    
    
    	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	memcpy(vcpu->arch.pio_data, &val, 4);
    
    
    	kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    
    	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
    
    		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
    
    		complete_pio(vcpu);
    		return 1;
    	}
    	return 0;
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_pio);
    
    int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
    		  int size, unsigned long count, int down,
    		  gva_t address, int rep, unsigned port)
    {
    	unsigned now, in_page;
    	int i, ret = 0;
    	int nr_pages = 1;
    	struct page *page;
    	struct kvm_io_device *pio_dev;
    
    	vcpu->run->exit_reason = KVM_EXIT_IO;
    	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
    
    	vcpu->run->io.size = vcpu->arch.pio.size = size;
    
    	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
    
    	vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
    	vcpu->run->io.port = vcpu->arch.pio.port = port;
    	vcpu->arch.pio.in = in;
    	vcpu->arch.pio.string = 1;
    	vcpu->arch.pio.down = down;
    	vcpu->arch.pio.guest_page_offset = offset_in_page(address);
    	vcpu->arch.pio.rep = rep;
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
    		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    	else
    		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    
    
    	if (!count) {
    		kvm_x86_ops->skip_emulated_instruction(vcpu);
    		return 1;
    	}
    
    	if (!down)
    		in_page = PAGE_SIZE - offset_in_page(address);
    	else
    		in_page = offset_in_page(address) + size;
    	now = min(count, (unsigned long)in_page / size);
    	if (!now) {
    		/*
    		 * String I/O straddles page boundary.  Pin two guest pages
    		 * so that we satisfy atomicity constraints.  Do just one
    		 * transaction to avoid complexity.
    		 */
    		nr_pages = 2;
    		now = 1;
    	}
    	if (down) {
    		/*
    		 * String I/O in reverse.  Yuck.  Kill the guest, fix later.
    		 */
    		pr_unimpl(vcpu, "guest string pio down\n");
    
    		return 1;
    	}
    	vcpu->run->io.count = now;
    
    	vcpu->arch.pio.cur_count = now;
    
    	if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
    
    		kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    	for (i = 0; i < nr_pages; ++i) {
    		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
    
    		vcpu->arch.pio.guest_pages[i] = page;
    
    	pio_dev = vcpu_find_pio_dev(vcpu, port,
    				    vcpu->arch.pio.cur_count,
    				    !vcpu->arch.pio.in);
    
    	if (!vcpu->arch.pio.in) {
    
    		/* string PIO write */
    		ret = pio_copy_data(vcpu);
    		if (ret >= 0 && pio_dev) {
    			pio_string_write(pio_dev, vcpu);
    			complete_pio(vcpu);
    
    			if (vcpu->arch.pio.count == 0)
    
    				ret = 1;
    		}
    	} else if (pio_dev)
    		pr_unimpl(vcpu, "no string pio read support yet, "
    		       "port %x size %d count %ld\n",
    			port, size, count);
    
    	return ret;
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
    
    
    	struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
    
    	if (kvm_x86_ops) {
    		printk(KERN_ERR "kvm: already loaded the other module\n");
    
    	}
    
    	if (!ops->cpu_has_kvm_support()) {
    		printk(KERN_ERR "kvm: no hardware support\n");
    
    	}
    	if (ops->disabled_by_bios()) {
    		printk(KERN_ERR "kvm: disabled by bios\n");
    
    	r = kvm_mmu_module_init();
    	if (r)
    		goto out;
    
    	kvm_init_msr_list();
    
    
    	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
    
    Sheng Yang's avatar
    Sheng Yang committed
    	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
    	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
    			PT_DIRTY_MASK, PT64_NX_MASK, 0);
    
    void kvm_arch_exit(void)
    {
    	kvm_x86_ops = NULL;
    
    int kvm_emulate_halt(struct kvm_vcpu *vcpu)
    {
    	++vcpu->stat.halt_exits;
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	KVMTRACE_0D(HLT, vcpu, handler);
    
    	if (irqchip_in_kernel(vcpu->kvm)) {
    
    		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
    
    		up_read(&vcpu->kvm->slots_lock);
    
    		down_read(&vcpu->kvm->slots_lock);
    
    		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
    
    			return -EINTR;
    		return 1;
    	} else {
    		vcpu->run->exit_reason = KVM_EXIT_HLT;
    		return 0;
    	}
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_halt);
    
    
    static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
    			   unsigned long a1)
    {
    	if (is_long_mode(vcpu))
    		return a0;
    	else
    		return a0 | ((gpa_t)a1 << 32);
    }
    
    
    int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
    {
    	unsigned long nr, a0, a1, a2, a3, ret;
    
    	nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
    
    
    	if (!is_long_mode(vcpu)) {
    		nr &= 0xFFFFFFFF;
    		a0 &= 0xFFFFFFFF;
    		a1 &= 0xFFFFFFFF;
    		a2 &= 0xFFFFFFFF;
    		a3 &= 0xFFFFFFFF;
    	}
    
    	switch (nr) {
    
    	case KVM_HC_VAPIC_POLL_IRQ:
    		ret = 0;
    		break;
    
    	case KVM_HC_MMU_OP:
    		r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
    		break;
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
    
    	++vcpu->stat.hypercalls;
    
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
    
    int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
    {
    	char instruction[3];
    	int ret = 0;
    
    	unsigned long rip = kvm_rip_read(vcpu);
    
    
    
    	/*
    	 * Blow out the MMU to ensure that no other VCPU has an active mapping
    	 * to ensure that the updated hypercall appears atomically across all
    	 * VCPUs.
    	 */
    	kvm_mmu_zap_all(vcpu->kvm);
    
    	kvm_x86_ops->patch_hypercall(vcpu, instruction);
    
    	if (emulator_write_emulated(rip, instruction, 3, vcpu)
    
    	    != X86EMUL_CONTINUE)
    		ret = -EFAULT;
    
    	return ret;
    }
    
    static u64 mk_cr_64(u64 curr_cr, u32 new_val)
    {
    	return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
    }
    
    void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
    {
    	struct descriptor_table dt = { limit, base };
    
    	kvm_x86_ops->set_gdt(vcpu, &dt);
    }
    
    void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
    {
    	struct descriptor_table dt = { limit, base };
    
    	kvm_x86_ops->set_idt(vcpu, &dt);
    }
    
    void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
    		   unsigned long *rflags)
    {
    
    	*rflags = kvm_x86_ops->get_rflags(vcpu);
    }
    
    unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
    {
    
    	unsigned long value;
    
    
    	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
    	switch (cr) {
    	case 0:
    
    		value = vcpu->arch.cr0;
    		break;
    
    		value = vcpu->arch.cr2;
    		break;
    
    		value = vcpu->arch.cr3;
    		break;
    
    		value = vcpu->arch.cr4;
    		break;
    
    		value = kvm_get_cr8(vcpu);
    		break;
    
    		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
    
    	KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
    		    (u32)((u64)value >> 32), handler);
    
    	return value;
    
    }
    
    void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
    		     unsigned long *rflags)
    {
    
    	KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
    		    (u32)((u64)val >> 32), handler);
    
    
    		kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
    
    		*rflags = kvm_x86_ops->get_rflags(vcpu);
    		break;
    	case 2:
    
    		vcpu->arch.cr2 = val;
    
    		kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
    
    		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
    
    static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
    {
    
    	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
    	int j, nent = vcpu->arch.cpuid_nent;
    
    
    	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
    	/* when no next entry is found, the current entry[i] is reselected */
    	for (j = i + 1; j == i; j = (j + 1) % nent) {
    
    		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
    
    		if (ej->function == e->function) {
    			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
    			return j;
    		}
    	}
    	return 0; /* silence gcc, even though control never reaches here */
    }
    
    /* find an entry with matching function, matching index (if needed), and that
     * should be read next (if it's stateful) */
    static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
    	u32 function, u32 index)
    {
    	if (e->function != function)
    		return 0;
    	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
    		return 0;
    	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
    		!(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
    		return 0;
    	return 1;
    }
    
    
    void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
    {
    	int i;
    
    	u32 function, index;
    	struct kvm_cpuid_entry2 *e, *best;
    
    	function = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	index = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
    
    	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
    		e = &vcpu->arch.cpuid_entries[i];
    
    		if (is_matching_cpuid_entry(e, function, index)) {
    			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
    				move_to_next_stateful_cpuid_entry(vcpu, i);
    
    			best = e;
    			break;
    		}
    		/*
    		 * Both basic or both extended?
    		 */
    		if (((e->function ^ function) & 0x80000000) == 0)
    			if (!best || e->function > best->function)
    				best = e;
    	}
    	if (best) {
    
    		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
    		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
    		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
    		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
    
    	}
    	kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	KVMTRACE_5D(CPUID, vcpu, function,
    
    		    (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
    		    (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
    		    (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
    		    (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
    
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
    
    /*
     * Check if userspace requested an interrupt window, and that the
     * interrupt window is open.
     *
     * No need to exit to userspace if we already have an interrupt queued.
     */
    static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
    					  struct kvm_run *kvm_run)
    {
    
    	return (!vcpu->arch.irq_summary &&
    
    		kvm_run->request_interrupt_window &&
    
    		vcpu->arch.interrupt_window_open &&
    
    		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
    }
    
    static void post_kvm_run_save(struct kvm_vcpu *vcpu,
    			      struct kvm_run *kvm_run)
    {
    	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
    
    	kvm_run->apic_base = kvm_get_apic_base(vcpu);
    	if (irqchip_in_kernel(vcpu->kvm))
    		kvm_run->ready_for_interrupt_injection = 1;
    	else
    		kvm_run->ready_for_interrupt_injection =
    
    					(vcpu->arch.interrupt_window_open &&
    					 vcpu->arch.irq_summary == 0);
    
    static void vapic_enter(struct kvm_vcpu *vcpu)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    	struct page *page;
    
    	if (!apic || !apic->vapic_addr)
    		return;
    
    
    	down_read(&current->mm->mmap_sem);
    
    	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
    
    	up_read(&current->mm->mmap_sem);
    
    }
    
    static void vapic_exit(struct kvm_vcpu *vcpu)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    
    	if (!apic || !apic->vapic_addr)
    		return;
    
    
    	down_read(&vcpu->kvm->slots_lock);
    
    	kvm_release_page_dirty(apic->vapic_page);
    	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
    
    	up_read(&vcpu->kvm->slots_lock);
    
    static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    {
    	int r;
    
    
    	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
    
    		pr_debug("vcpu %d received sipi with vector # %x\n",
    
    		       vcpu->vcpu_id, vcpu->arch.sipi_vector);
    
    		kvm_lapic_reset(vcpu);
    		r = kvm_x86_ops->vcpu_reset(vcpu);
    		if (r)
    			return r;
    
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    	down_read(&vcpu->kvm->slots_lock);
    
    	vapic_enter(vcpu);
    
    
    preempted:
    	if (vcpu->guest_debug.enabled)
    		kvm_x86_ops->guest_debug_pre(vcpu);
    
    again:
    
    	if (vcpu->requests)
    		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
    			kvm_mmu_unload(vcpu);
    
    
    	r = kvm_mmu_reload(vcpu);
    	if (unlikely(r))
    		goto out;
    
    
    	if (vcpu->requests) {
    		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
    
    Marcelo Tosatti's avatar
    Marcelo Tosatti committed
    			__kvm_migrate_timers(vcpu);
    
    		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
    			kvm_x86_ops->tlb_flush(vcpu);
    
    		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
    				       &vcpu->requests)) {
    			kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
    			r = 0;
    			goto out;
    		}
    
    		if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
    			kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
    			r = 0;
    			goto out;
    		}
    
    	clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
    
    	kvm_inject_pending_timer_irqs(vcpu);
    
    	preempt_disable();
    
    	kvm_x86_ops->prepare_guest_switch(vcpu);
    	kvm_load_guest_fpu(vcpu);
    
    	local_irq_disable();
    
    
    	if (vcpu->requests || need_resched()) {
    
    		local_irq_enable();
    		preempt_enable();
    		r = 1;
    		goto out;
    	}
    
    
    	if (signal_pending(current)) {
    		local_irq_enable();
    		preempt_enable();
    		r = -EINTR;
    		kvm_run->exit_reason = KVM_EXIT_INTR;
    		++vcpu->stat.signal_exits;
    		goto out;
    	}
    
    
    	vcpu->guest_mode = 1;
    	/*
    	 * Make sure that guest_mode assignment won't happen after
    	 * testing the pending IRQ vector bitmap.
    	 */
    	smp_wmb();
    
    
    	if (vcpu->arch.exception.pending)
    
    		__queue_exception(vcpu);
    	else if (irqchip_in_kernel(vcpu->kvm))
    
    		kvm_x86_ops->inject_pending_irq(vcpu);
    
    		kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
    
    
    	kvm_lapic_sync_to_vapic(vcpu);
    
    
    	up_read(&vcpu->kvm->slots_lock);
    
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	KVMTRACE_0D(VMENTRY, vcpu, entryexit);
    
    	kvm_x86_ops->run(vcpu, kvm_run);
    
    	vcpu->guest_mode = 0;
    	local_irq_enable();
    
    	++vcpu->stat.exits;
    
    	/*
    	 * We must have an instruction between local_irq_enable() and
    	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
    	 * the interrupt shadow.  The stat.exits increment will do nicely.
    	 * But we need to prevent reordering, hence this barrier():
    	 */
    	barrier();
    
    	kvm_guest_exit();
    
    	preempt_enable();
    
    
    	down_read(&vcpu->kvm->slots_lock);
    
    
    	/*
    	 * Profile KVM exit RIPs:
    	 */
    	if (unlikely(prof_on == KVM_PROFILING)) {
    
    		unsigned long rip = kvm_rip_read(vcpu);
    		profile_hit(KVM_PROFILING, (void *)rip);
    
    	if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
    		vcpu->arch.exception.pending = false;
    
    	kvm_lapic_sync_from_vapic(vcpu);
    
    
    	r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
    
    	if (r > 0) {
    		if (dm_request_for_irq_injection(vcpu, kvm_run)) {
    			r = -EINTR;
    			kvm_run->exit_reason = KVM_EXIT_INTR;
    			++vcpu->stat.request_irq_exits;
    			goto out;
    		}
    
    	up_read(&vcpu->kvm->slots_lock);
    
    	if (r > 0) {
    		kvm_resched(vcpu);
    
    		down_read(&vcpu->kvm->slots_lock);
    
    		goto preempted;
    	}
    
    	post_kvm_run_save(vcpu, kvm_run);
    
    
    	vapic_exit(vcpu);
    
    
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    {
    	int r;
    	sigset_t sigsaved;
    
    	vcpu_load(vcpu);
    
    
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
    
    
    	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
    
    	}
    
    	/* re-sync apic's tpr */
    	if (!irqchip_in_kernel(vcpu->kvm))
    
    	if (vcpu->arch.pio.cur_count) {
    
    		r = complete_pio(vcpu);
    		if (r)
    			goto out;
    	}
    #if CONFIG_HAS_IOMEM
    	if (vcpu->mmio_needed) {