Skip to content
Snippets Groups Projects
x86.c 147 KiB
Newer Older
  • Learn to ignore specific revisions
  • 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
    
    			kvm_mmu_unload(vcpu);
    
    		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
    
    Marcelo Tosatti's avatar
    Marcelo Tosatti committed
    			__kvm_migrate_timers(vcpu);
    
    		if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) {
    			r = kvm_write_guest_time(vcpu);
    			if (unlikely(r))
    				goto out;
    		}
    
    		if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
    
    			kvm_mmu_sync_roots(vcpu);
    
    		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
    
    			kvm_x86_ops->tlb_flush(vcpu);
    
    		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
    
    			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
    
    			r = 0;
    			goto out;
    		}
    
    		if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
    
    			vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
    
    		if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
    
    			vcpu->fpu_active = 0;
    			kvm_x86_ops->fpu_deactivate(vcpu);
    		}
    
    	r = kvm_mmu_reload(vcpu);
    	if (unlikely(r))
    		goto out;
    
    
    	preempt_disable();
    
    	kvm_x86_ops->prepare_guest_switch(vcpu);
    
    	if (vcpu->fpu_active)
    		kvm_load_guest_fpu(vcpu);
    
    	kvm_load_guest_xcr0(vcpu);
    
    Avi Kivity's avatar
    Avi Kivity committed
    	atomic_set(&vcpu->guest_mode, 1);
    	smp_wmb();
    
    Avi Kivity's avatar
    Avi Kivity committed
    	local_irq_disable();
    
    Avi Kivity's avatar
    Avi Kivity committed
    	if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
    	    || need_resched() || signal_pending(current)) {
    		atomic_set(&vcpu->guest_mode, 0);
    		smp_wmb();
    
    		local_irq_enable();
    		preempt_enable();
    		r = 1;
    		goto out;
    	}
    
    
    	inject_pending_event(vcpu);
    
    	/* enable NMI/IRQ window open exits if needed */
    	if (vcpu->arch.nmi_pending)
    		kvm_x86_ops->enable_nmi_window(vcpu);
    	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
    		kvm_x86_ops->enable_irq_window(vcpu);
    
    
    	if (kvm_lapic_enabled(vcpu)) {
    
    		update_cr8_intercept(vcpu);
    		kvm_lapic_sync_to_vapic(vcpu);
    
    	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
    
    	if (unlikely(vcpu->arch.switch_db_regs)) {
    		set_debugreg(0, 7);
    		set_debugreg(vcpu->arch.eff_db[0], 0);
    		set_debugreg(vcpu->arch.eff_db[1], 1);
    		set_debugreg(vcpu->arch.eff_db[2], 2);
    		set_debugreg(vcpu->arch.eff_db[3], 3);
    	}
    
    	trace_kvm_entry(vcpu->vcpu_id);
    
    	kvm_x86_ops->run(vcpu);
    
    	/*
    	 * If the guest has used debug registers, at least dr7
    	 * will be disabled while returning to the host.
    	 * If we don't have active breakpoints in the host, we don't
    	 * care about the messed up debug address registers. But if
    	 * we have some of them active, restore the old state.
    	 */
    
    	kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
    
    
    Avi Kivity's avatar
    Avi Kivity committed
    	atomic_set(&vcpu->guest_mode, 0);
    	smp_wmb();
    
    	local_irq_enable();
    
    	++vcpu->stat.exits;
    
    	/*
    	 * We must have an instruction between local_irq_enable() and
    	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
    	 * the interrupt shadow.  The stat.exits increment will do nicely.
    	 * But we need to prevent reordering, hence this barrier():
    	 */
    	barrier();
    
    	kvm_guest_exit();
    
    	preempt_enable();
    
    
    	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    	/*
    	 * Profile KVM exit RIPs:
    	 */
    	if (unlikely(prof_on == KVM_PROFILING)) {
    
    		unsigned long rip = kvm_rip_read(vcpu);
    		profile_hit(KVM_PROFILING, (void *)rip);
    
    	kvm_lapic_sync_from_vapic(vcpu);
    
    
    	r = kvm_x86_ops->handle_exit(vcpu);
    
    static int __vcpu_run(struct kvm_vcpu *vcpu)
    
    	struct kvm *kvm = vcpu->kvm;
    
    
    	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
    
    		pr_debug("vcpu %d received sipi with vector # %x\n",
    			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
    
    		kvm_lapic_reset(vcpu);
    
    		if (r)
    			return r;
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
    
    	vapic_enter(vcpu);
    
    	r = 1;
    	while (r > 0) {
    
    		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
    
    			r = vcpu_enter_guest(vcpu);
    
    			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
    
    			kvm_vcpu_block(vcpu);
    
    			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
    
    			if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
    
    			{
    				switch(vcpu->arch.mp_state) {
    				case KVM_MP_STATE_HALTED:
    
    					vcpu->arch.mp_state =
    
    						KVM_MP_STATE_RUNNABLE;
    				case KVM_MP_STATE_RUNNABLE:
    					break;
    				case KVM_MP_STATE_SIPI_RECEIVED:
    				default:
    					r = -EINTR;
    					break;
    				}
    			}
    
    		if (r <= 0)
    			break;
    
    		clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
    		if (kvm_cpu_has_pending_timer(vcpu))
    			kvm_inject_pending_timer_irqs(vcpu);
    
    
    		if (dm_request_for_irq_injection(vcpu)) {
    
    			vcpu->run->exit_reason = KVM_EXIT_INTR;
    
    			++vcpu->stat.request_irq_exits;
    		}
    		if (signal_pending(current)) {
    			r = -EINTR;
    
    			vcpu->run->exit_reason = KVM_EXIT_INTR;
    
    			++vcpu->stat.signal_exits;
    		}
    		if (need_resched()) {
    
    			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
    
    			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
    
    	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
    
    	vapic_exit(vcpu);
    
    
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    {
    	int r;
    	sigset_t sigsaved;
    
    
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
    
    
    	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
    
    		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
    
    	}
    
    	/* re-sync apic's tpr */
    	if (!irqchip_in_kernel(vcpu->kvm))
    
    	if (vcpu->arch.pio.count || vcpu->mmio_needed) {
    
    		if (vcpu->mmio_needed) {
    			memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
    			vcpu->mmio_read_completed = 1;
    			vcpu->mmio_needed = 0;
    
    		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    		r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
    
    		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
    
    	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
    		kvm_register_write(vcpu, VCPU_REGS_RAX,
    				     kvm_run->hypercall.ret);
    
    	r = __vcpu_run(vcpu);
    
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
    
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    
    	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
    	regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    
    	regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
    	regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
    	regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
    	regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
    	regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
    	regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
    	regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
    	regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
    
    	regs->rip = kvm_rip_read(vcpu);
    
    	regs->rflags = kvm_get_rflags(vcpu);
    
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
    
    	kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
    	kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
    	kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
    	kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
    	kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
    	kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
    	kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
    	kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
    
    	kvm_rip_write(vcpu, regs->rip);
    
    	kvm_set_rflags(vcpu, regs->rflags);
    
    	vcpu->arch.exception.pending = false;
    
    
    	return 0;
    }
    
    void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
    {
    	struct kvm_segment cs;
    
    
    	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
    
    	*db = cs.db;
    	*l = cs.l;
    }
    EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
    
    int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    
    	kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
    
    
    	kvm_x86_ops->get_idt(vcpu, &dt);
    
    	sregs->idt.limit = dt.size;
    	sregs->idt.base = dt.address;
    
    	kvm_x86_ops->get_gdt(vcpu, &dt);
    
    	sregs->gdt.limit = dt.size;
    	sregs->gdt.base = dt.address;
    
    	sregs->cr0 = kvm_read_cr0(vcpu);
    
    	sregs->cr2 = vcpu->arch.cr2;
    	sregs->cr3 = vcpu->arch.cr3;
    
    	sregs->cr4 = kvm_read_cr4(vcpu);
    
    	sregs->efer = vcpu->arch.efer;
    
    	sregs->apic_base = kvm_get_apic_base(vcpu);
    
    
    	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
    
    	if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
    
    		set_bit(vcpu->arch.interrupt.nr,
    			(unsigned long *)sregs->interrupt_bitmap);
    
    int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
    				    struct kvm_mp_state *mp_state)
    {
    	mp_state->mp_state = vcpu->arch.mp_state;
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
    				    struct kvm_mp_state *mp_state)
    {
    	vcpu->arch.mp_state = mp_state->mp_state;
    	return 0;
    }
    
    
    int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
    		    bool has_error_code, u32 error_code)
    
    	struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
    
    	ret = emulator_task_switch(&vcpu->arch.emulate_ctxt,
    
    				   tss_selector, reason, has_error_code,
    				   error_code);
    
    		return EMULATE_FAIL;
    
    	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
    
    	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
    
    	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
    	return EMULATE_DONE;
    
    }
    EXPORT_SYMBOL_GPL(kvm_task_switch);
    
    
    int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    	int mmu_reset_needed = 0;
    
    	int pending_vec, max_bits;
    
    	dt.size = sregs->idt.limit;
    	dt.address = sregs->idt.base;
    
    	kvm_x86_ops->set_idt(vcpu, &dt);
    
    	dt.size = sregs->gdt.limit;
    	dt.address = sregs->gdt.base;
    
    	kvm_x86_ops->set_gdt(vcpu, &dt);
    
    
    	vcpu->arch.cr2 = sregs->cr2;
    	mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
    
    	vcpu->arch.cr3 = sregs->cr3;
    
    	mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
    
    	kvm_x86_ops->set_efer(vcpu, sregs->efer);
    	kvm_set_apic_base(vcpu, sregs->apic_base);
    
    
    	mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
    
    	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
    
    	mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
    
    	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
    
    	if (!is_long_mode(vcpu) && is_pae(vcpu)) {
    
    		load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
    
    
    	if (mmu_reset_needed)
    		kvm_mmu_reset_context(vcpu);
    
    
    	max_bits = (sizeof sregs->interrupt_bitmap) << 3;
    	pending_vec = find_first_bit(
    		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
    	if (pending_vec < max_bits) {
    
    		kvm_queue_interrupt(vcpu, pending_vec, false);
    
    		pr_debug("Set back pending irq %d\n", pending_vec);
    		if (irqchip_in_kernel(vcpu->kvm))
    			kvm_pic_clear_isr_ack(vcpu->kvm);
    
    	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
    
    	update_cr8_intercept(vcpu);
    
    
    	/* Older userspace won't unhalt the vcpu on reset. */
    
    	if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
    
    	    sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
    
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    
    int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
    					struct kvm_guest_debug *dbg)
    
    	unsigned long rflags;
    
    	if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
    		r = -EBUSY;
    		if (vcpu->arch.exception.pending)
    
    		if (dbg->control & KVM_GUESTDBG_INJECT_DB)
    			kvm_queue_exception(vcpu, DB_VECTOR);
    		else
    			kvm_queue_exception(vcpu, BP_VECTOR);
    	}
    
    
    	/*
    	 * Read rflags as long as potentially injected trace flags are still
    	 * filtered out.
    	 */
    	rflags = kvm_get_rflags(vcpu);
    
    
    	vcpu->guest_debug = dbg->control;
    	if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
    		vcpu->guest_debug = 0;
    
    	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
    
    		for (i = 0; i < KVM_NR_DB_REGS; ++i)
    			vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
    		vcpu->arch.switch_db_regs =
    			(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
    	} else {
    		for (i = 0; i < KVM_NR_DB_REGS; i++)
    			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
    		vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
    	}
    
    
    	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
    		vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
    			get_segment_base(vcpu, VCPU_SREG_CS);
    
    	/*
    	 * Trigger an rflags update that will inject or remove the trace
    	 * flags.
    	 */
    	kvm_set_rflags(vcpu, rflags);
    
    	kvm_x86_ops->set_guest_debug(vcpu, dbg);
    
    /*
     * Translate a guest virtual address to a guest physical address.
     */
    int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
    				    struct kvm_translation *tr)
    {
    	unsigned long vaddr = tr->linear_address;
    	gpa_t gpa;
    
    	idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    	gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
    
    	srcu_read_unlock(&vcpu->kvm->srcu, idx);
    
    	tr->physical_address = gpa;
    	tr->valid = gpa != UNMAPPED_GVA;
    	tr->writeable = 1;
    	tr->usermode = 0;
    
    	return 0;
    }
    
    
    int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
    {
    
    Sheng Yang's avatar
    Sheng Yang committed
    	struct i387_fxsave_struct *fxsave =
    			&vcpu->arch.guest_fpu.state->fxsave;
    
    
    	memcpy(fpu->fpr, fxsave->st_space, 128);
    	fpu->fcw = fxsave->cwd;
    	fpu->fsw = fxsave->swd;
    	fpu->ftwx = fxsave->twd;
    	fpu->last_opcode = fxsave->fop;
    	fpu->last_ip = fxsave->rip;
    	fpu->last_dp = fxsave->rdp;
    	memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
    {
    
    Sheng Yang's avatar
    Sheng Yang committed
    	struct i387_fxsave_struct *fxsave =
    			&vcpu->arch.guest_fpu.state->fxsave;
    
    
    	memcpy(fxsave->st_space, fpu->fpr, 128);
    	fxsave->cwd = fpu->fcw;
    	fxsave->swd = fpu->fsw;
    	fxsave->twd = fpu->ftwx;
    	fxsave->fop = fpu->last_opcode;
    	fxsave->rip = fpu->last_ip;
    	fxsave->rdp = fpu->last_dp;
    	memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
    
    	return 0;
    }
    
    
    int fx_init(struct kvm_vcpu *vcpu)
    
    	int err;
    
    	err = fpu_alloc(&vcpu->arch.guest_fpu);
    	if (err)
    		return err;
    
    
    Sheng Yang's avatar
    Sheng Yang committed
    	fpu_finit(&vcpu->arch.guest_fpu);
    
    	/*
    	 * Ensure guest xcr0 is valid for loading
    	 */
    	vcpu->arch.xcr0 = XSTATE_FP;
    
    
    	vcpu->arch.cr0 |= X86_CR0_ET;
    
    Sheng Yang's avatar
    Sheng Yang committed
    static void fx_free(struct kvm_vcpu *vcpu)
    {
    	fpu_free(&vcpu->arch.guest_fpu);
    }
    
    
    void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
    {
    
    	if (vcpu->guest_fpu_loaded)
    
    	/*
    	 * Restore all possible states in the guest,
    	 * and assume host would use all available bits.
    	 * Guest xcr0 would be loaded later.
    	 */
    	kvm_put_guest_xcr0(vcpu);
    
    	vcpu->guest_fpu_loaded = 1;
    
    	unlazy_fpu(current);
    
    Sheng Yang's avatar
    Sheng Yang committed
    	fpu_restore_checking(&vcpu->arch.guest_fpu);
    
    	trace_kvm_fpu(1);
    
    }
    
    void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
    {
    
    	kvm_put_guest_xcr0(vcpu);
    
    
    	if (!vcpu->guest_fpu_loaded)
    		return;
    
    	vcpu->guest_fpu_loaded = 0;
    
    Sheng Yang's avatar
    Sheng Yang committed
    	fpu_save_init(&vcpu->arch.guest_fpu);
    
    Avi Kivity's avatar
    Avi Kivity committed
    	++vcpu->stat.fpu_reload;
    
    	kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
    
    	trace_kvm_fpu(0);
    
    
    void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
    {
    
    	if (vcpu->arch.time_page) {
    		kvm_release_page_dirty(vcpu->arch.time_page);
    		vcpu->arch.time_page = NULL;
    	}
    
    
    	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
    
    Sheng Yang's avatar
    Sheng Yang committed
    	fx_free(vcpu);
    
    	kvm_x86_ops->vcpu_free(vcpu);
    }
    
    struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
    						unsigned int id)
    {
    
    	if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
    		printk_once(KERN_WARNING
    		"kvm: SMP vm created on host with unstable TSC; "
    		"guest TSC will not be reliable\n");
    
    	return kvm_x86_ops->vcpu_create(kvm, id);
    }
    
    int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
    {
    	int r;
    
    Sheng Yang's avatar
    Sheng Yang committed
    	vcpu->arch.mtrr_state.have_fixed = 1;
    
    	vcpu_load(vcpu);
    	r = kvm_arch_vcpu_reset(vcpu);
    	if (r == 0)
    		r = kvm_mmu_setup(vcpu);
    	vcpu_put(vcpu);
    	if (r < 0)
    		goto free_vcpu;
    
    
    free_vcpu:
    	kvm_x86_ops->vcpu_free(vcpu);
    
    void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
    
    {
    	vcpu_load(vcpu);
    	kvm_mmu_unload(vcpu);
    	vcpu_put(vcpu);
    
    
    Sheng Yang's avatar
    Sheng Yang committed
    	fx_free(vcpu);
    
    	kvm_x86_ops->vcpu_free(vcpu);
    }
    
    int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
    {
    
    	vcpu->arch.nmi_pending = false;
    	vcpu->arch.nmi_injected = false;
    
    
    	vcpu->arch.switch_db_regs = 0;
    	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
    	vcpu->arch.dr6 = DR6_FIXED_1;
    	vcpu->arch.dr7 = DR7_FIXED_1;
    
    
    	return kvm_x86_ops->vcpu_reset(vcpu);
    }
    
    
    int kvm_arch_hardware_enable(void *garbage)
    
    	struct kvm *kvm;
    	struct kvm_vcpu *vcpu;
    	int i;
    
    
    	kvm_shared_msr_cpu_online();
    
    	list_for_each_entry(kvm, &vm_list, vm_list)
    		kvm_for_each_vcpu(i, vcpu, kvm)
    			if (vcpu->cpu == smp_processor_id())
    				kvm_request_guest_time_update(vcpu);
    
    	return kvm_x86_ops->hardware_enable(garbage);
    
    }
    
    void kvm_arch_hardware_disable(void *garbage)
    {
    	kvm_x86_ops->hardware_disable(garbage);
    
    	drop_user_return_notifiers(garbage);
    
    }
    
    int kvm_arch_hardware_setup(void)
    {
    	return kvm_x86_ops->hardware_setup();
    }
    
    void kvm_arch_hardware_unsetup(void)
    {
    	kvm_x86_ops->hardware_unsetup();
    }
    
    void kvm_arch_check_processor_compat(void *rtn)
    {
    	kvm_x86_ops->check_processor_compatibility(rtn);
    }
    
    int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
    {
    	struct page *page;
    	struct kvm *kvm;
    	int r;
    
    	BUG_ON(vcpu->kvm == NULL);
    	kvm = vcpu->kvm;
    
    
    	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
    
    	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
    
    	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
    
    	vcpu->arch.mmu.translate_gpa = translate_gpa;
    
    	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
    
    	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
    
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
    
    
    	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
    	if (!page) {
    		r = -ENOMEM;
    		goto fail;
    	}
    
    	vcpu->arch.pio_data = page_address(page);
    
    
    	r = kvm_mmu_create(vcpu);
    	if (r < 0)
    		goto fail_free_pio_data;
    
    	if (irqchip_in_kernel(kvm)) {
    		r = kvm_create_lapic(vcpu);
    		if (r < 0)
    			goto fail_mmu_destroy;
    	}
    
    
    Huang Ying's avatar
    Huang Ying committed
    	vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
    				       GFP_KERNEL);
    	if (!vcpu->arch.mce_banks) {
    		r = -ENOMEM;
    
    Huang Ying's avatar
    Huang Ying committed
    	}
    	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
    
    
    	if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
    		goto fail_free_mce_banks;
    
    
    fail_free_mce_banks:
    	kfree(vcpu->arch.mce_banks);
    
    fail_free_lapic:
    	kvm_free_lapic(vcpu);
    
    fail_mmu_destroy:
    	kvm_mmu_destroy(vcpu);
    fail_free_pio_data:
    
    	free_page((unsigned long)vcpu->arch.pio_data);
    
    fail:
    	return r;
    }
    
    void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
    {
    
    	kfree(vcpu->arch.mce_banks);
    
    	idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    	srcu_read_unlock(&vcpu->kvm->srcu, idx);
    
    	free_page((unsigned long)vcpu->arch.pio_data);
    
    
    struct  kvm *kvm_arch_create_vm(void)
    {
    	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
    
    	if (!kvm)
    		return ERR_PTR(-ENOMEM);
    
    
    	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
    
    	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
    
    	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
    	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
    
    
    	spin_lock_init(&kvm->arch.tsc_write_lock);
    
    
    	return kvm;
    }
    
    static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
    {
    	vcpu_load(vcpu);
    	kvm_mmu_unload(vcpu);
    	vcpu_put(vcpu);
    }
    
    static void kvm_free_vcpus(struct kvm *kvm)
    {
    	unsigned int i;
    
    	struct kvm_vcpu *vcpu;
    
    	kvm_for_each_vcpu(i, vcpu, kvm)
    		kvm_unload_vcpu_mmu(vcpu);
    	kvm_for_each_vcpu(i, vcpu, kvm)
    		kvm_arch_vcpu_free(vcpu);
    
    	mutex_lock(&kvm->lock);
    	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
    		kvm->vcpus[i] = NULL;
    
    	atomic_set(&kvm->online_vcpus, 0);
    	mutex_unlock(&kvm->lock);
    
    void kvm_arch_sync_events(struct kvm *kvm)
    {
    
    	kvm_free_all_assigned_devices(kvm);
    
    void kvm_arch_destroy_vm(struct kvm *kvm)
    {
    
    	kvm_iommu_unmap_guest(kvm);
    
    	kfree(kvm->arch.vpic);
    	kfree(kvm->arch.vioapic);
    
    	kvm_free_vcpus(kvm);
    	kvm_free_physmem(kvm);
    
    	if (kvm->arch.apic_access_page)
    		put_page(kvm->arch.apic_access_page);
    
    	if (kvm->arch.ept_identity_pagetable)
    		put_page(kvm->arch.ept_identity_pagetable);
    
    	cleanup_srcu_struct(&kvm->srcu);
    
    int kvm_arch_prepare_memory_region(struct kvm *kvm,
    				struct kvm_memory_slot *memslot,
    
    				struct kvm_userspace_memory_region *mem,
    
    	int npages = memslot->npages;
    
    	int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
    
    	/* Prevent internal slot pages from being moved by fork()/COW. */
    	if (memslot->id >= KVM_MEMORY_SLOTS)
    		map_flags = MAP_SHARED | MAP_ANONYMOUS;
    
    
    	/*To keep backward compatibility with older userspace,
    	 *x86 needs to hanlde !user_alloc case.
    	 */
    	if (!user_alloc) {
    		if (npages && !old.rmap) {
    
    			unsigned long userspace_addr;
    
    
    			userspace_addr = do_mmap(NULL, 0,
    						 npages * PAGE_SIZE,
    						 PROT_READ | PROT_WRITE,
    
    			if (IS_ERR((void *)userspace_addr))
    				return PTR_ERR((void *)userspace_addr);
    
    			memslot->userspace_addr = userspace_addr;
    
    
    	return 0;
    }
    
    void kvm_arch_commit_memory_region(struct kvm *kvm,
    				struct kvm_userspace_memory_region *mem,
    				struct kvm_memory_slot old,
    				int user_alloc)
    {
    
    	int npages = mem->memory_size >> PAGE_SHIFT;
    
    	if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
    		int ret;
    
    		down_write(&current->mm->mmap_sem);
    		ret = do_munmap(current->mm, old.userspace_addr,
    				old.npages * PAGE_SIZE);
    		up_write(&current->mm->mmap_sem);
    		if (ret < 0)
    			printk(KERN_WARNING
    			       "kvm_vm_ioctl_set_memory_region: "
    			       "failed to munmap memory\n");
    	}
    
    
    	spin_lock(&kvm->mmu_lock);
    
    	if (!kvm->arch.n_requested_mmu_pages) {
    
    		unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
    		kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
    	}
    
    	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
    
    	spin_unlock(&kvm->mmu_lock);
    
    void kvm_arch_flush_shadow(struct kvm *kvm)
    {
    	kvm_mmu_zap_all(kvm);
    
    	kvm_reload_remote_mmus(kvm);
    
    int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
    {
    
    	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
    
    		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
    		|| vcpu->arch.nmi_pending ||
    		(kvm_arch_interrupt_allowed(vcpu) &&
    		 kvm_cpu_has_interrupt(vcpu));
    
    
    void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
    {
    
    	int me;
    	int cpu = vcpu->cpu;
    
    
    	if (waitqueue_active(&vcpu->wq)) {
    		wake_up_interruptible(&vcpu->wq);
    		++vcpu->stat.halt_wakeup;
    	}
    
    
    	me = get_cpu();
    	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
    
    Avi Kivity's avatar
    Avi Kivity committed
    		if (atomic_xchg(&vcpu->guest_mode, 0))
    
    			smp_send_reschedule(cpu);
    
    
    int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
    {
    	return kvm_x86_ops->interrupt_allowed(vcpu);
    }
    
    bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
    {
    	unsigned long current_rip = kvm_rip_read(vcpu) +
    		get_segment_base(vcpu, VCPU_SREG_CS);
    
    	return current_rip == linear_rip;
    }
    EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
    
    
    unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
    {
    	unsigned long rflags;
    
    	rflags = kvm_x86_ops->get_rflags(vcpu);
    	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
    
    		rflags &= ~X86_EFLAGS_TF;
    
    	return rflags;
    }
    EXPORT_SYMBOL_GPL(kvm_get_rflags);
    
    void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
    {
    	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&