Skip to content
Snippets Groups Projects
x86.c 147 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	err_code = selector & 0xfffc;
    	err_vec = GP_VECTOR;
    
    	/* can't load system descriptor into segment selecor */
    	if (seg <= VCPU_SREG_GS && !kvm_seg.s)
    		goto exception;
    
    	if (!kvm_seg.present) {
    		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
    		goto exception;
    	}
    
    	rpl = selector & 3;
    	dpl = kvm_seg.dpl;
    	cpl = kvm_x86_ops->get_cpl(vcpu);
    
    	switch (seg) {
    	case VCPU_SREG_SS:
    		/*
    		 * segment is not a writable data segment or segment
    		 * selector's RPL != CPL or segment selector's RPL != CPL
    		 */
    		if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
    			goto exception;
    		break;
    	case VCPU_SREG_CS:
    		if (!(kvm_seg.type & 8))
    			goto exception;
    
    		if (kvm_seg.type & 4) {
    			/* conforming */
    			if (dpl > cpl)
    				goto exception;
    		} else {
    			/* nonconforming */
    			if (rpl > cpl || dpl != cpl)
    				goto exception;
    		}
    		/* CS(RPL) <- CPL */
    		selector = (selector & 0xfffc) | cpl;
                break;
    	case VCPU_SREG_TR:
    		if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
    			goto exception;
    		break;
    	case VCPU_SREG_LDTR:
    		if (kvm_seg.s || kvm_seg.type != 2)
    			goto exception;
    		break;
    	default: /*  DS, ES, FS, or GS */
    		/*
    		 * segment is not a data or readable code segment or
    		 * ((segment is a data or nonconforming code segment)
    		 * and (both RPL and CPL > DPL))
    		 */
    		if ((kvm_seg.type & 0xa) == 0x8 ||
    		    (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
    			goto exception;
    		break;
    	}
    
    	if (!kvm_seg.unusable && kvm_seg.s) {
    
    		/* mark segment as accessed */
    
    		kvm_seg.type |= 1;
    
    		seg_desc.type |= 1;
    		save_guest_segment_descriptor(vcpu, selector, &seg_desc);
    	}
    
    load:
    	kvm_set_segment(vcpu, &kvm_seg, seg);
    	return X86EMUL_CONTINUE;
    exception:
    	kvm_queue_exception_e(vcpu, err_vec, err_code);
    	return X86EMUL_PROPAGATE_FAULT;
    
    }
    
    static void save_state_to_tss32(struct kvm_vcpu *vcpu,
    				struct tss_segment_32 *tss)
    {
    	tss->cr3 = vcpu->arch.cr3;
    
    	tss->eip = kvm_rip_read(vcpu);
    
    	tss->eflags = kvm_get_rflags(vcpu);
    
    	tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    	tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
    
    	tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
    	tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
    	tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
    	tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
    	tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
    	tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
    	tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
    }
    
    
    static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
    {
    	struct kvm_segment kvm_seg;
    	kvm_get_segment(vcpu, &kvm_seg, seg);
    	kvm_seg.selector = sel;
    	kvm_set_segment(vcpu, &kvm_seg, seg);
    }
    
    
    static int load_state_from_tss32(struct kvm_vcpu *vcpu,
    				  struct tss_segment_32 *tss)
    {
    	kvm_set_cr3(vcpu, tss->cr3);
    
    
    	kvm_rip_write(vcpu, tss->eip);
    
    	kvm_set_rflags(vcpu, tss->eflags | 2);
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
    
    	/*
    	 * SDM says that segment selectors are loaded before segment
    	 * descriptors
    	 */
    	kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
    	kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
    	kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
    	kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
    	kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
    	kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
    	kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
    
    	/*
    	 * Now load segment descriptors. If fault happenes at this stage
    	 * it is handled in a context of new task
    	 */
    	if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
    
    		return 1;
    	return 0;
    }
    
    static void save_state_to_tss16(struct kvm_vcpu *vcpu,
    				struct tss_segment_16 *tss)
    {
    
    	tss->ip = kvm_rip_read(vcpu);
    
    	tss->flag = kvm_get_rflags(vcpu);
    
    	tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    	tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
    
    
    	tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
    	tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
    	tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
    	tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
    	tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
    }
    
    static int load_state_from_tss16(struct kvm_vcpu *vcpu,
    				 struct tss_segment_16 *tss)
    {
    
    	kvm_rip_write(vcpu, tss->ip);
    
    	kvm_set_rflags(vcpu, tss->flag | 2);
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
    
    	/*
    	 * SDM says that segment selectors are loaded before segment
    	 * descriptors
    	 */
    	kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
    	kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
    	kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
    	kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
    	kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
    
    	/*
    	 * Now load segment descriptors. If fault happenes at this stage
    	 * it is handled in a context of new task
    	 */
    	if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
    
    static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
    
    			      u16 old_tss_sel, u32 old_tss_base,
    			      struct desc_struct *nseg_desc)
    
    {
    	struct tss_segment_16 tss_segment_16;
    	int ret = 0;
    
    
    	if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
    			   sizeof tss_segment_16))
    
    		goto out;
    
    	save_state_to_tss16(vcpu, &tss_segment_16);
    
    
    	if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
    			    sizeof tss_segment_16))
    
    	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
    
    	if (old_tss_sel != 0xffff) {
    		tss_segment_16.prev_task_link = old_tss_sel;
    
    		if (kvm_write_guest(vcpu->kvm,
    
    				    get_tss_base_addr_write(vcpu, nseg_desc),
    
    				    &tss_segment_16.prev_task_link,
    				    sizeof tss_segment_16.prev_task_link))
    			goto out;
    	}
    
    
    	if (load_state_from_tss16(vcpu, &tss_segment_16))
    		goto out;
    
    	ret = 1;
    out:
    	return ret;
    }
    
    
    static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
    
    		       u16 old_tss_sel, u32 old_tss_base,
    
    		       struct desc_struct *nseg_desc)
    {
    	struct tss_segment_32 tss_segment_32;
    	int ret = 0;
    
    
    	if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
    			   sizeof tss_segment_32))
    
    		goto out;
    
    	save_state_to_tss32(vcpu, &tss_segment_32);
    
    
    	if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
    			    sizeof tss_segment_32))
    		goto out;
    
    
    	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
    
    	if (old_tss_sel != 0xffff) {
    		tss_segment_32.prev_task_link = old_tss_sel;
    
    		if (kvm_write_guest(vcpu->kvm,
    
    				    get_tss_base_addr_write(vcpu, nseg_desc),
    
    				    &tss_segment_32.prev_task_link,
    				    sizeof tss_segment_32.prev_task_link))
    			goto out;
    	}
    
    
    	if (load_state_from_tss32(vcpu, &tss_segment_32))
    		goto out;
    
    	ret = 1;
    out:
    	return ret;
    }
    
    int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
    {
    	struct kvm_segment tr_seg;
    	struct desc_struct cseg_desc;
    	struct desc_struct nseg_desc;
    	int ret = 0;
    
    	u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
    	u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
    
    	u32 desc_limit;
    
    	old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
    
    	/* FIXME: Handle errors. Failure to read either TSS or their
    	 * descriptors should generate a pagefault.
    	 */
    
    	if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
    		goto out;
    
    
    	if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
    
    		goto out;
    
    	if (reason != TASK_SWITCH_IRET) {
    		int cpl;
    
    		cpl = kvm_x86_ops->get_cpl(vcpu);
    		if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
    			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
    			return 1;
    		}
    	}
    
    
    	desc_limit = get_desc_limit(&nseg_desc);
    	if (!nseg_desc.p ||
    	    ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
    	     desc_limit < 0x2b)) {
    
    		kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
    		return 1;
    	}
    
    	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
    
    		cseg_desc.type &= ~(1 << 1); //clear the B flag
    
    		save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
    
    	}
    
    	if (reason == TASK_SWITCH_IRET) {
    
    		u32 eflags = kvm_get_rflags(vcpu);
    		kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
    
    	/* set back link to prev task only if NT bit is set in eflags
    	   note that old_tss_sel is not used afetr this point */
    	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
    		old_tss_sel = 0xffff;
    
    
    	if (nseg_desc.type & 8)
    
    		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
    					 old_tss_base, &nseg_desc);
    
    		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
    					 old_tss_base, &nseg_desc);
    
    
    	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
    
    		u32 eflags = kvm_get_rflags(vcpu);
    		kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
    
    	}
    
    	if (reason != TASK_SWITCH_IRET) {
    
    		nseg_desc.type |= (1 << 1);
    
    		save_guest_segment_descriptor(vcpu, tss_selector,
    					      &nseg_desc);
    	}
    
    
    	kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
    
    	seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
    	tr_seg.type = 11;
    
    	kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
    
    out:
    	return ret;
    }
    EXPORT_SYMBOL_GPL(kvm_task_switch);
    
    
    int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    	int mmu_reset_needed = 0;
    
    	int pending_vec, max_bits;
    
    	dt.size = sregs->idt.limit;
    	dt.address = sregs->idt.base;
    
    	kvm_x86_ops->set_idt(vcpu, &dt);
    
    	dt.size = sregs->gdt.limit;
    	dt.address = sregs->gdt.base;
    
    	kvm_x86_ops->set_gdt(vcpu, &dt);
    
    
    	vcpu->arch.cr2 = sregs->cr2;
    	mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
    
    	vcpu->arch.cr3 = sregs->cr3;
    
    	mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
    
    	kvm_x86_ops->set_efer(vcpu, sregs->efer);
    	kvm_set_apic_base(vcpu, sregs->apic_base);
    
    
    	mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
    
    	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
    
    	mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
    
    	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
    
    	if (!is_long_mode(vcpu) && is_pae(vcpu)) {
    
    		load_pdptrs(vcpu, vcpu->arch.cr3);
    
    
    	if (mmu_reset_needed)
    		kvm_mmu_reset_context(vcpu);
    
    
    	max_bits = (sizeof sregs->interrupt_bitmap) << 3;
    	pending_vec = find_first_bit(
    		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
    	if (pending_vec < max_bits) {
    
    		kvm_queue_interrupt(vcpu, pending_vec, false);
    
    		pr_debug("Set back pending irq %d\n", pending_vec);
    		if (irqchip_in_kernel(vcpu->kvm))
    			kvm_pic_clear_isr_ack(vcpu->kvm);
    
    	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
    
    	update_cr8_intercept(vcpu);
    
    
    	/* Older userspace won't unhalt the vcpu on reset. */
    
    	if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
    
    	    sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
    
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    
    int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
    					struct kvm_guest_debug *dbg)
    
    	unsigned long rflags;
    
    	if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
    		r = -EBUSY;
    		if (vcpu->arch.exception.pending)
    			goto unlock_out;
    		if (dbg->control & KVM_GUESTDBG_INJECT_DB)
    			kvm_queue_exception(vcpu, DB_VECTOR);
    		else
    			kvm_queue_exception(vcpu, BP_VECTOR);
    	}
    
    
    	/*
    	 * Read rflags as long as potentially injected trace flags are still
    	 * filtered out.
    	 */
    	rflags = kvm_get_rflags(vcpu);
    
    
    	vcpu->guest_debug = dbg->control;
    	if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
    		vcpu->guest_debug = 0;
    
    	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
    
    		for (i = 0; i < KVM_NR_DB_REGS; ++i)
    			vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
    		vcpu->arch.switch_db_regs =
    			(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
    	} else {
    		for (i = 0; i < KVM_NR_DB_REGS; i++)
    			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
    		vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
    	}
    
    
    	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
    		vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
    			get_segment_base(vcpu, VCPU_SREG_CS);
    
    	/*
    	 * Trigger an rflags update that will inject or remove the trace
    	 * flags.
    	 */
    	kvm_set_rflags(vcpu, rflags);
    
    	kvm_x86_ops->set_guest_debug(vcpu, dbg);
    
    /*
     * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when
     * we have asm/x86/processor.h
     */
    struct fxsave {
    	u16	cwd;
    	u16	swd;
    	u16	twd;
    	u16	fop;
    	u64	rip;
    	u64	rdp;
    	u32	mxcsr;
    	u32	mxcsr_mask;
    	u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
    #ifdef CONFIG_X86_64
    	u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 256 bytes */
    #else
    	u32	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
    #endif
    };
    
    
    /*
     * Translate a guest virtual address to a guest physical address.
     */
    int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
    				    struct kvm_translation *tr)
    {
    	unsigned long vaddr = tr->linear_address;
    	gpa_t gpa;
    
    	idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    	gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
    
    	srcu_read_unlock(&vcpu->kvm->srcu, idx);
    
    	tr->physical_address = gpa;
    	tr->valid = gpa != UNMAPPED_GVA;
    	tr->writeable = 1;
    	tr->usermode = 0;
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    
    int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
    {
    
    	struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
    
    
    	vcpu_load(vcpu);
    
    	memcpy(fpu->fpr, fxsave->st_space, 128);
    	fpu->fcw = fxsave->cwd;
    	fpu->fsw = fxsave->swd;
    	fpu->ftwx = fxsave->twd;
    	fpu->last_opcode = fxsave->fop;
    	fpu->last_ip = fxsave->rip;
    	fpu->last_dp = fxsave->rdp;
    	memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
    {
    
    	struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
    
    
    	vcpu_load(vcpu);
    
    	memcpy(fxsave->st_space, fpu->fpr, 128);
    	fxsave->cwd = fpu->fcw;
    	fxsave->swd = fpu->fsw;
    	fxsave->twd = fpu->ftwx;
    	fxsave->fop = fpu->last_opcode;
    	fxsave->rip = fpu->last_ip;
    	fxsave->rdp = fpu->last_dp;
    	memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    void fx_init(struct kvm_vcpu *vcpu)
    {
    	unsigned after_mxcsr_mask;
    
    
    	/*
    	 * Touch the fpu the first time in non atomic context as if
    	 * this is the first fpu instruction the exception handler
    	 * will fire before the instruction returns and it'll have to
    	 * allocate ram with GFP_KERNEL.
    	 */
    	if (!used_math())
    
    		kvm_fx_save(&vcpu->arch.host_fx_image);
    
    	/* Initialize guest FPU by resetting ours and saving into guest's */
    	preempt_disable();
    
    	kvm_fx_save(&vcpu->arch.host_fx_image);
    	kvm_fx_finit();
    	kvm_fx_save(&vcpu->arch.guest_fx_image);
    	kvm_fx_restore(&vcpu->arch.host_fx_image);
    
    	vcpu->arch.cr0 |= X86_CR0_ET;
    
    	after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
    
    	vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
    	memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
    
    	       0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
    }
    EXPORT_SYMBOL_GPL(fx_init);
    
    void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
    {
    
    	if (vcpu->guest_fpu_loaded)
    
    		return;
    
    	vcpu->guest_fpu_loaded = 1;
    
    	kvm_fx_save(&vcpu->arch.host_fx_image);
    	kvm_fx_restore(&vcpu->arch.guest_fx_image);
    
    	trace_kvm_fpu(1);
    
    }
    
    void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
    {
    	if (!vcpu->guest_fpu_loaded)
    		return;
    
    	vcpu->guest_fpu_loaded = 0;
    
    	kvm_fx_save(&vcpu->arch.guest_fx_image);
    	kvm_fx_restore(&vcpu->arch.host_fx_image);
    
    Avi Kivity's avatar
    Avi Kivity committed
    	++vcpu->stat.fpu_reload;
    
    	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
    
    	trace_kvm_fpu(0);
    
    
    void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
    {
    
    	if (vcpu->arch.time_page) {
    		kvm_release_page_dirty(vcpu->arch.time_page);
    		vcpu->arch.time_page = NULL;
    	}
    
    
    	kvm_x86_ops->vcpu_free(vcpu);
    }
    
    struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
    						unsigned int id)
    {
    
    	return kvm_x86_ops->vcpu_create(kvm, id);
    }
    
    int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
    {
    	int r;
    
    
    	/* We do fxsave: this must be aligned. */
    
    	BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
    
    Sheng Yang's avatar
    Sheng Yang committed
    	vcpu->arch.mtrr_state.have_fixed = 1;
    
    	vcpu_load(vcpu);
    	r = kvm_arch_vcpu_reset(vcpu);
    	if (r == 0)
    		r = kvm_mmu_setup(vcpu);
    	vcpu_put(vcpu);
    	if (r < 0)
    		goto free_vcpu;
    
    
    free_vcpu:
    	kvm_x86_ops->vcpu_free(vcpu);
    
    void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
    
    {
    	vcpu_load(vcpu);
    	kvm_mmu_unload(vcpu);
    	vcpu_put(vcpu);
    
    	kvm_x86_ops->vcpu_free(vcpu);
    }
    
    int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
    {
    
    	vcpu->arch.nmi_pending = false;
    	vcpu->arch.nmi_injected = false;
    
    
    	vcpu->arch.switch_db_regs = 0;
    	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
    	vcpu->arch.dr6 = DR6_FIXED_1;
    	vcpu->arch.dr7 = DR7_FIXED_1;
    
    
    	return kvm_x86_ops->vcpu_reset(vcpu);
    }
    
    
    int kvm_arch_hardware_enable(void *garbage)
    
    	/*
    	 * Since this may be called from a hotplug notifcation,
    	 * we can't get the CPU frequency directly.
    	 */
    	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
    		int cpu = raw_smp_processor_id();
    		per_cpu(cpu_tsc_khz, cpu) = 0;
    	}
    
    
    	kvm_shared_msr_cpu_online();
    
    
    	return kvm_x86_ops->hardware_enable(garbage);
    
    }
    
    void kvm_arch_hardware_disable(void *garbage)
    {
    	kvm_x86_ops->hardware_disable(garbage);
    
    	drop_user_return_notifiers(garbage);
    
    }
    
    int kvm_arch_hardware_setup(void)
    {
    	return kvm_x86_ops->hardware_setup();
    }
    
    void kvm_arch_hardware_unsetup(void)
    {
    	kvm_x86_ops->hardware_unsetup();
    }
    
    void kvm_arch_check_processor_compat(void *rtn)
    {
    	kvm_x86_ops->check_processor_compatibility(rtn);
    }
    
    int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
    {
    	struct page *page;
    	struct kvm *kvm;
    	int r;
    
    	BUG_ON(vcpu->kvm == NULL);
    	kvm = vcpu->kvm;
    
    
    	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
    
    	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
    
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
    
    
    	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
    	if (!page) {
    		r = -ENOMEM;
    		goto fail;
    	}
    
    	vcpu->arch.pio_data = page_address(page);
    
    
    	r = kvm_mmu_create(vcpu);
    	if (r < 0)
    		goto fail_free_pio_data;
    
    	if (irqchip_in_kernel(kvm)) {
    		r = kvm_create_lapic(vcpu);
    		if (r < 0)
    			goto fail_mmu_destroy;
    	}
    
    
    Huang Ying's avatar
    Huang Ying committed
    	vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
    				       GFP_KERNEL);
    	if (!vcpu->arch.mce_banks) {
    		r = -ENOMEM;
    
    Huang Ying's avatar
    Huang Ying committed
    	}
    	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
    
    
    fail_free_lapic:
    	kvm_free_lapic(vcpu);
    
    fail_mmu_destroy:
    	kvm_mmu_destroy(vcpu);
    fail_free_pio_data:
    
    	free_page((unsigned long)vcpu->arch.pio_data);
    
    fail:
    	return r;
    }
    
    void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
    {
    
    	kfree(vcpu->arch.mce_banks);
    
    	idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    	srcu_read_unlock(&vcpu->kvm->srcu, idx);
    
    	free_page((unsigned long)vcpu->arch.pio_data);
    
    
    struct  kvm *kvm_arch_create_vm(void)
    {
    	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
    
    	if (!kvm)
    		return ERR_PTR(-ENOMEM);
    
    
    	kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
    	if (!kvm->arch.aliases) {
    		kfree(kvm);
    		return ERR_PTR(-ENOMEM);
    	}
    
    
    	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
    
    	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
    
    	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
    	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
    
    
    	return kvm;
    }
    
    static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
    {
    	vcpu_load(vcpu);
    	kvm_mmu_unload(vcpu);
    	vcpu_put(vcpu);
    }
    
    static void kvm_free_vcpus(struct kvm *kvm)
    {
    	unsigned int i;
    
    	struct kvm_vcpu *vcpu;
    
    	kvm_for_each_vcpu(i, vcpu, kvm)
    		kvm_unload_vcpu_mmu(vcpu);
    	kvm_for_each_vcpu(i, vcpu, kvm)
    		kvm_arch_vcpu_free(vcpu);
    
    	mutex_lock(&kvm->lock);
    	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
    		kvm->vcpus[i] = NULL;
    
    	atomic_set(&kvm->online_vcpus, 0);
    	mutex_unlock(&kvm->lock);
    
    void kvm_arch_sync_events(struct kvm *kvm)
    {
    
    	kvm_free_all_assigned_devices(kvm);
    
    void kvm_arch_destroy_vm(struct kvm *kvm)
    {
    
    	kvm_iommu_unmap_guest(kvm);
    
    Sheng Yang's avatar
    Sheng Yang committed
    	kvm_free_pit(kvm);
    
    	kfree(kvm->arch.vpic);
    	kfree(kvm->arch.vioapic);
    
    	kvm_free_vcpus(kvm);
    	kvm_free_physmem(kvm);
    
    	if (kvm->arch.apic_access_page)
    		put_page(kvm->arch.apic_access_page);
    
    	if (kvm->arch.ept_identity_pagetable)
    		put_page(kvm->arch.ept_identity_pagetable);
    
    	cleanup_srcu_struct(&kvm->srcu);
    
    	kfree(kvm->arch.aliases);
    
    int kvm_arch_prepare_memory_region(struct kvm *kvm,
    				struct kvm_memory_slot *memslot,
    
    				struct kvm_userspace_memory_region *mem,
    
    	int npages = memslot->npages;
    
    
    	/*To keep backward compatibility with older userspace,
    	 *x86 needs to hanlde !user_alloc case.
    	 */
    	if (!user_alloc) {
    		if (npages && !old.rmap) {
    
    			unsigned long userspace_addr;
    
    
    			userspace_addr = do_mmap(NULL, 0,
    						 npages * PAGE_SIZE,
    						 PROT_READ | PROT_WRITE,
    
    						 MAP_PRIVATE | MAP_ANONYMOUS,
    
    			if (IS_ERR((void *)userspace_addr))
    				return PTR_ERR((void *)userspace_addr);
    
    			memslot->userspace_addr = userspace_addr;
    
    
    	return 0;
    }
    
    void kvm_arch_commit_memory_region(struct kvm *kvm,
    				struct kvm_userspace_memory_region *mem,
    				struct kvm_memory_slot old,
    				int user_alloc)
    {
    
    	int npages = mem->memory_size >> PAGE_SHIFT;
    
    	if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
    		int ret;
    
    		down_write(&current->mm->mmap_sem);
    		ret = do_munmap(current->mm, old.userspace_addr,
    				old.npages * PAGE_SIZE);
    		up_write(&current->mm->mmap_sem);
    		if (ret < 0)
    			printk(KERN_WARNING
    			       "kvm_vm_ioctl_set_memory_region: "
    			       "failed to munmap memory\n");
    	}
    
    
    	spin_lock(&kvm->mmu_lock);
    
    	if (!kvm->arch.n_requested_mmu_pages) {
    
    		unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
    		kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
    	}
    
    	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
    
    	spin_unlock(&kvm->mmu_lock);
    
    void kvm_arch_flush_shadow(struct kvm *kvm)
    {
    	kvm_mmu_zap_all(kvm);
    
    	kvm_reload_remote_mmus(kvm);
    
    int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
    {
    
    	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
    
    		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
    		|| vcpu->arch.nmi_pending ||
    		(kvm_arch_interrupt_allowed(vcpu) &&
    		 kvm_cpu_has_interrupt(vcpu));
    
    
    void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
    {
    
    	int me;
    	int cpu = vcpu->cpu;
    
    
    	if (waitqueue_active(&vcpu->wq)) {
    		wake_up_interruptible(&vcpu->wq);
    		++vcpu->stat.halt_wakeup;
    	}
    
    
    	me = get_cpu();
    	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
    		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
    			smp_send_reschedule(cpu);
    
    
    int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
    {
    	return kvm_x86_ops->interrupt_allowed(vcpu);
    }
    
    bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
    {
    	unsigned long current_rip = kvm_rip_read(vcpu) +
    		get_segment_base(vcpu, VCPU_SREG_CS);
    
    	return current_rip == linear_rip;
    }