Skip to content
Snippets Groups Projects
x86.c 97.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • 		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
    		vcpu->mmio_read_completed = 1;
    		vcpu->mmio_needed = 0;
    
    
    		down_read(&vcpu->kvm->slots_lock);
    
    		r = emulate_instruction(vcpu, kvm_run,
    
    					vcpu->arch.mmio_fault_cr2, 0,
    					EMULTYPE_NO_DECODE);
    
    		up_read(&vcpu->kvm->slots_lock);
    
    		if (r == EMULATE_DO_MMIO) {
    			/*
    			 * Read-modify-write.  Back to userspace.
    			 */
    			r = 0;
    			goto out;
    		}
    	}
    #endif
    
    	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
    		kvm_register_write(vcpu, VCPU_REGS_RAX,
    				     kvm_run->hypercall.ret);
    
    
    	r = __vcpu_run(vcpu, kvm_run);
    
    out:
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
    
    	vcpu_put(vcpu);
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    	vcpu_load(vcpu);
    
    
    	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
    	regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    
    	regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
    	regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
    	regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
    	regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
    	regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
    	regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
    	regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
    	regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
    
    	regs->rip = kvm_rip_read(vcpu);
    
    	regs->rflags = kvm_x86_ops->get_rflags(vcpu);
    
    	/*
    	 * Don't leak debug flags in case they were set for guest debugging
    	 */
    	if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
    		regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    	vcpu_load(vcpu);
    
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
    
    	kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
    	kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
    	kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
    	kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
    	kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
    	kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
    	kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
    	kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
    
    
    	kvm_rip_write(vcpu, regs->rip);
    
    	kvm_x86_ops->set_rflags(vcpu, regs->rflags);
    
    
    
    	vcpu->arch.exception.pending = false;
    
    
    void kvm_get_segment(struct kvm_vcpu *vcpu,
    		     struct kvm_segment *var, int seg)
    
    	kvm_x86_ops->get_segment(vcpu, var, seg);
    
    }
    
    void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
    {
    	struct kvm_segment cs;
    
    
    	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
    
    	*db = cs.db;
    	*l = cs.l;
    }
    EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
    
    int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    	struct descriptor_table dt;
    	int pending_vec;
    
    	vcpu_load(vcpu);
    
    
    	kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
    
    
    	kvm_x86_ops->get_idt(vcpu, &dt);
    	sregs->idt.limit = dt.limit;
    	sregs->idt.base = dt.base;
    	kvm_x86_ops->get_gdt(vcpu, &dt);
    	sregs->gdt.limit = dt.limit;
    	sregs->gdt.base = dt.base;
    
    	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
    
    	sregs->cr0 = vcpu->arch.cr0;
    	sregs->cr2 = vcpu->arch.cr2;
    	sregs->cr3 = vcpu->arch.cr3;
    	sregs->cr4 = vcpu->arch.cr4;
    
    	sregs->efer = vcpu->arch.shadow_efer;
    
    	sregs->apic_base = kvm_get_apic_base(vcpu);
    
    	if (irqchip_in_kernel(vcpu->kvm)) {
    		memset(sregs->interrupt_bitmap, 0,
    		       sizeof sregs->interrupt_bitmap);
    		pending_vec = kvm_x86_ops->get_irq(vcpu);
    		if (pending_vec >= 0)
    			set_bit(pending_vec,
    				(unsigned long *)sregs->interrupt_bitmap);
    	} else
    
    		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
    
    		       sizeof sregs->interrupt_bitmap);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    
    int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
    				    struct kvm_mp_state *mp_state)
    {
    	vcpu_load(vcpu);
    	mp_state->mp_state = vcpu->arch.mp_state;
    	vcpu_put(vcpu);
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
    				    struct kvm_mp_state *mp_state)
    {
    	vcpu_load(vcpu);
    	vcpu->arch.mp_state = mp_state->mp_state;
    	vcpu_put(vcpu);
    	return 0;
    }
    
    
    static void kvm_set_segment(struct kvm_vcpu *vcpu,
    
    			struct kvm_segment *var, int seg)
    {
    
    	kvm_x86_ops->set_segment(vcpu, var, seg);
    
    static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
    				   struct kvm_segment *kvm_desct)
    {
    	kvm_desct->base = seg_desc->base0;
    	kvm_desct->base |= seg_desc->base1 << 16;
    	kvm_desct->base |= seg_desc->base2 << 24;
    	kvm_desct->limit = seg_desc->limit0;
    	kvm_desct->limit |= seg_desc->limit << 16;
    
    	if (seg_desc->g) {
    		kvm_desct->limit <<= 12;
    		kvm_desct->limit |= 0xfff;
    	}
    
    	kvm_desct->selector = selector;
    	kvm_desct->type = seg_desc->type;
    	kvm_desct->present = seg_desc->p;
    	kvm_desct->dpl = seg_desc->dpl;
    	kvm_desct->db = seg_desc->d;
    	kvm_desct->s = seg_desc->s;
    	kvm_desct->l = seg_desc->l;
    	kvm_desct->g = seg_desc->g;
    	kvm_desct->avl = seg_desc->avl;
    	if (!selector)
    		kvm_desct->unusable = 1;
    	else
    		kvm_desct->unusable = 0;
    	kvm_desct->padding = 0;
    }
    
    static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
    					   u16 selector,
    					   struct descriptor_table *dtable)
    {
    	if (selector & 1 << 2) {
    		struct kvm_segment kvm_seg;
    
    
    		kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
    
    
    		if (kvm_seg.unusable)
    			dtable->limit = 0;
    		else
    			dtable->limit = kvm_seg.limit;
    		dtable->base = kvm_seg.base;
    	}
    	else
    		kvm_x86_ops->get_gdt(vcpu, dtable);
    }
    
    /* allowed just for 8 bytes segments */
    static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
    					 struct desc_struct *seg_desc)
    {
    
    	struct descriptor_table dtable;
    	u16 index = selector >> 3;
    
    	get_segment_descritptor_dtable(vcpu, selector, &dtable);
    
    	if (dtable.limit < index * 8 + 7) {
    		kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
    		return 1;
    	}
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
    	gpa += index * 8;
    	return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
    
    }
    
    /* allowed just for 8 bytes segments */
    static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
    					 struct desc_struct *seg_desc)
    {
    
    	struct descriptor_table dtable;
    	u16 index = selector >> 3;
    
    	get_segment_descritptor_dtable(vcpu, selector, &dtable);
    
    	if (dtable.limit < index * 8 + 7)
    		return 1;
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
    	gpa += index * 8;
    	return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
    
    }
    
    static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
    			     struct desc_struct *seg_desc)
    {
    	u32 base_addr;
    
    	base_addr = seg_desc->base0;
    	base_addr |= (seg_desc->base1 << 16);
    	base_addr |= (seg_desc->base2 << 24);
    
    
    	return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
    
    }
    
    static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
    {
    	struct kvm_segment kvm_seg;
    
    
    	kvm_get_segment(vcpu, &kvm_seg, seg);
    
    	return kvm_seg.selector;
    }
    
    static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
    						u16 selector,
    						struct kvm_segment *kvm_seg)
    {
    	struct desc_struct seg_desc;
    
    	if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
    		return 1;
    	seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
    	return 0;
    }
    
    
    int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
    				int type_bits, int seg)
    
    {
    	struct kvm_segment kvm_seg;
    
    	if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
    		return 1;
    	kvm_seg.type |= type_bits;
    
    	if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
    	    seg != VCPU_SREG_LDTR)
    		if (!kvm_seg.s)
    			kvm_seg.unusable = 1;
    
    
    	kvm_set_segment(vcpu, &kvm_seg, seg);
    
    	return 0;
    }
    
    static void save_state_to_tss32(struct kvm_vcpu *vcpu,
    				struct tss_segment_32 *tss)
    {
    	tss->cr3 = vcpu->arch.cr3;
    
    	tss->eip = kvm_rip_read(vcpu);
    
    	tss->eflags = kvm_x86_ops->get_rflags(vcpu);
    
    	tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    	tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
    
    	tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
    	tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
    	tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
    	tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
    	tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
    	tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
    	tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
    	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
    }
    
    static int load_state_from_tss32(struct kvm_vcpu *vcpu,
    				  struct tss_segment_32 *tss)
    {
    	kvm_set_cr3(vcpu, tss->cr3);
    
    
    	kvm_rip_write(vcpu, tss->eip);
    
    	kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
    
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
    
    		return 1;
    	return 0;
    }
    
    static void save_state_to_tss16(struct kvm_vcpu *vcpu,
    				struct tss_segment_16 *tss)
    {
    
    	tss->ip = kvm_rip_read(vcpu);
    
    	tss->flag = kvm_x86_ops->get_rflags(vcpu);
    
    	tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    	tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
    
    
    	tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
    	tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
    	tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
    	tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
    	tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
    	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
    }
    
    static int load_state_from_tss16(struct kvm_vcpu *vcpu,
    				 struct tss_segment_16 *tss)
    {
    
    	kvm_rip_write(vcpu, tss->ip);
    
    	kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
    
    static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
    
    		       struct desc_struct *nseg_desc)
    {
    	struct tss_segment_16 tss_segment_16;
    	int ret = 0;
    
    
    	if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
    			   sizeof tss_segment_16))
    
    		goto out;
    
    	save_state_to_tss16(vcpu, &tss_segment_16);
    
    
    	if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
    			    sizeof tss_segment_16))
    
    
    	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
    			   &tss_segment_16, sizeof tss_segment_16))
    		goto out;
    
    
    	if (load_state_from_tss16(vcpu, &tss_segment_16))
    		goto out;
    
    	ret = 1;
    out:
    	return ret;
    }
    
    
    static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
    
    		       struct desc_struct *nseg_desc)
    {
    	struct tss_segment_32 tss_segment_32;
    	int ret = 0;
    
    
    	if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
    			   sizeof tss_segment_32))
    
    		goto out;
    
    	save_state_to_tss32(vcpu, &tss_segment_32);
    
    
    	if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
    			    sizeof tss_segment_32))
    		goto out;
    
    	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
    			   &tss_segment_32, sizeof tss_segment_32))
    
    	if (load_state_from_tss32(vcpu, &tss_segment_32))
    		goto out;
    
    	ret = 1;
    out:
    	return ret;
    }
    
    int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
    {
    	struct kvm_segment tr_seg;
    	struct desc_struct cseg_desc;
    	struct desc_struct nseg_desc;
    	int ret = 0;
    
    	u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
    	u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
    
    	old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
    
    	/* FIXME: Handle errors. Failure to read either TSS or their
    	 * descriptors should generate a pagefault.
    	 */
    
    	if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
    		goto out;
    
    
    	if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
    
    		goto out;
    
    	if (reason != TASK_SWITCH_IRET) {
    		int cpl;
    
    		cpl = kvm_x86_ops->get_cpl(vcpu);
    		if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
    			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
    			return 1;
    		}
    	}
    
    	if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
    		kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
    		return 1;
    	}
    
    	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
    
    		cseg_desc.type &= ~(1 << 1); //clear the B flag
    
    		save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
    
    	}
    
    	if (reason == TASK_SWITCH_IRET) {
    		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
    		kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
    	}
    
    	kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    	if (nseg_desc.type & 8)
    
    		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
    
    		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
    
    					 &nseg_desc);
    
    	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
    		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
    		kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
    	}
    
    	if (reason != TASK_SWITCH_IRET) {
    
    		nseg_desc.type |= (1 << 1);
    
    		save_guest_segment_descriptor(vcpu, tss_selector,
    					      &nseg_desc);
    	}
    
    	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
    	seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
    	tr_seg.type = 11;
    
    	kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
    
    out:
    	return ret;
    }
    EXPORT_SYMBOL_GPL(kvm_task_switch);
    
    
    int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    	int mmu_reset_needed = 0;
    	int i, pending_vec, max_bits;
    	struct descriptor_table dt;
    
    	vcpu_load(vcpu);
    
    	dt.limit = sregs->idt.limit;
    	dt.base = sregs->idt.base;
    	kvm_x86_ops->set_idt(vcpu, &dt);
    	dt.limit = sregs->gdt.limit;
    	dt.base = sregs->gdt.base;
    	kvm_x86_ops->set_gdt(vcpu, &dt);
    
    
    	vcpu->arch.cr2 = sregs->cr2;
    	mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
    	vcpu->arch.cr3 = sregs->cr3;
    
    	mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
    
    	kvm_x86_ops->set_efer(vcpu, sregs->efer);
    	kvm_set_apic_base(vcpu, sregs->apic_base);
    
    	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
    
    
    	mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
    
    	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
    
    	mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
    
    	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
    	if (!is_long_mode(vcpu) && is_pae(vcpu))
    
    		load_pdptrs(vcpu, vcpu->arch.cr3);
    
    
    	if (mmu_reset_needed)
    		kvm_mmu_reset_context(vcpu);
    
    	if (!irqchip_in_kernel(vcpu->kvm)) {
    
    		memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
    		       sizeof vcpu->arch.irq_pending);
    		vcpu->arch.irq_summary = 0;
    		for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
    			if (vcpu->arch.irq_pending[i])
    				__set_bit(i, &vcpu->arch.irq_summary);
    
    	} else {
    		max_bits = (sizeof sregs->interrupt_bitmap) << 3;
    		pending_vec = find_first_bit(
    			(const unsigned long *)sregs->interrupt_bitmap,
    			max_bits);
    		/* Only pending external irq is handled here */
    		if (pending_vec < max_bits) {
    			kvm_x86_ops->set_irq(vcpu, pending_vec);
    			pr_debug("Set back pending irq %d\n",
    				 pending_vec);
    		}
    	}
    
    
    	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
    
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
    				    struct kvm_debug_guest *dbg)
    {
    	int r;
    
    	vcpu_load(vcpu);
    
    	r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
    
    	vcpu_put(vcpu);
    
    	return r;
    }
    
    
    /*
     * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when
     * we have asm/x86/processor.h
     */
    struct fxsave {
    	u16	cwd;
    	u16	swd;
    	u16	twd;
    	u16	fop;
    	u64	rip;
    	u64	rdp;
    	u32	mxcsr;
    	u32	mxcsr_mask;
    	u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
    #ifdef CONFIG_X86_64
    	u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 256 bytes */
    #else
    	u32	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
    #endif
    };
    
    
    /*
     * Translate a guest virtual address to a guest physical address.
     */
    int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
    				    struct kvm_translation *tr)
    {
    	unsigned long vaddr = tr->linear_address;
    	gpa_t gpa;
    
    	vcpu_load(vcpu);
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
    
    	tr->physical_address = gpa;
    	tr->valid = gpa != UNMAPPED_GVA;
    	tr->writeable = 1;
    	tr->usermode = 0;
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    
    int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
    {
    
    	struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
    
    
    	vcpu_load(vcpu);
    
    	memcpy(fpu->fpr, fxsave->st_space, 128);
    	fpu->fcw = fxsave->cwd;
    	fpu->fsw = fxsave->swd;
    	fpu->ftwx = fxsave->twd;
    	fpu->last_opcode = fxsave->fop;
    	fpu->last_ip = fxsave->rip;
    	fpu->last_dp = fxsave->rdp;
    	memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
    {
    
    	struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
    
    
    	vcpu_load(vcpu);
    
    	memcpy(fxsave->st_space, fpu->fpr, 128);
    	fxsave->cwd = fpu->fcw;
    	fxsave->swd = fpu->fsw;
    	fxsave->twd = fpu->ftwx;
    	fxsave->fop = fpu->last_opcode;
    	fxsave->rip = fpu->last_ip;
    	fxsave->rdp = fpu->last_dp;
    	memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    void fx_init(struct kvm_vcpu *vcpu)
    {
    	unsigned after_mxcsr_mask;
    
    
    	/*
    	 * Touch the fpu the first time in non atomic context as if
    	 * this is the first fpu instruction the exception handler
    	 * will fire before the instruction returns and it'll have to
    	 * allocate ram with GFP_KERNEL.
    	 */
    	if (!used_math())
    
    		kvm_fx_save(&vcpu->arch.host_fx_image);
    
    	/* Initialize guest FPU by resetting ours and saving into guest's */
    	preempt_disable();
    
    	kvm_fx_save(&vcpu->arch.host_fx_image);
    	kvm_fx_finit();
    	kvm_fx_save(&vcpu->arch.guest_fx_image);
    	kvm_fx_restore(&vcpu->arch.host_fx_image);
    
    	vcpu->arch.cr0 |= X86_CR0_ET;
    
    	after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
    
    	vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
    	memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
    
    	       0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
    }
    EXPORT_SYMBOL_GPL(fx_init);
    
    void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
    {
    	if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
    		return;
    
    	vcpu->guest_fpu_loaded = 1;
    
    	kvm_fx_save(&vcpu->arch.host_fx_image);
    	kvm_fx_restore(&vcpu->arch.guest_fx_image);
    
    }
    EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
    
    void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
    {
    	if (!vcpu->guest_fpu_loaded)
    		return;
    
    	vcpu->guest_fpu_loaded = 0;
    
    	kvm_fx_save(&vcpu->arch.guest_fx_image);
    	kvm_fx_restore(&vcpu->arch.host_fx_image);
    
    Avi Kivity's avatar
    Avi Kivity committed
    	++vcpu->stat.fpu_reload;
    
    }
    EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
    
    
    void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
    {
    	kvm_x86_ops->vcpu_free(vcpu);
    }
    
    struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
    						unsigned int id)
    {
    
    	return kvm_x86_ops->vcpu_create(kvm, id);
    }
    
    int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
    {
    	int r;
    
    
    	/* We do fxsave: this must be aligned. */
    
    	BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
    
    
    	vcpu_load(vcpu);
    	r = kvm_arch_vcpu_reset(vcpu);
    	if (r == 0)
    		r = kvm_mmu_setup(vcpu);
    	vcpu_put(vcpu);
    	if (r < 0)
    		goto free_vcpu;
    
    
    free_vcpu:
    	kvm_x86_ops->vcpu_free(vcpu);
    
    void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
    
    {
    	vcpu_load(vcpu);
    	kvm_mmu_unload(vcpu);
    	vcpu_put(vcpu);
    
    	kvm_x86_ops->vcpu_free(vcpu);
    }
    
    int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
    {
    	return kvm_x86_ops->vcpu_reset(vcpu);
    }
    
    void kvm_arch_hardware_enable(void *garbage)
    {
    	kvm_x86_ops->hardware_enable(garbage);
    }
    
    void kvm_arch_hardware_disable(void *garbage)
    {
    	kvm_x86_ops->hardware_disable(garbage);
    }
    
    int kvm_arch_hardware_setup(void)
    {
    	return kvm_x86_ops->hardware_setup();
    }
    
    void kvm_arch_hardware_unsetup(void)
    {
    	kvm_x86_ops->hardware_unsetup();
    }
    
    void kvm_arch_check_processor_compat(void *rtn)
    {
    	kvm_x86_ops->check_processor_compatibility(rtn);
    }
    
    int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
    {
    	struct page *page;
    	struct kvm *kvm;
    	int r;
    
    	BUG_ON(vcpu->kvm == NULL);
    	kvm = vcpu->kvm;
    
    
    	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
    
    	if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
    
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
    
    
    	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
    	if (!page) {
    		r = -ENOMEM;
    		goto fail;
    	}
    
    	vcpu->arch.pio_data = page_address(page);
    
    
    	r = kvm_mmu_create(vcpu);
    	if (r < 0)
    		goto fail_free_pio_data;
    
    	if (irqchip_in_kernel(kvm)) {
    		r = kvm_create_lapic(vcpu);
    		if (r < 0)
    			goto fail_mmu_destroy;
    	}
    
    	return 0;
    
    fail_mmu_destroy:
    	kvm_mmu_destroy(vcpu);
    fail_free_pio_data:
    
    	free_page((unsigned long)vcpu->arch.pio_data);
    
    fail:
    	return r;
    }
    
    void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
    {
    	kvm_free_lapic(vcpu);
    
    	down_read(&vcpu->kvm->slots_lock);
    
    	up_read(&vcpu->kvm->slots_lock);
    
    	free_page((unsigned long)vcpu->arch.pio_data);
    
    
    struct  kvm *kvm_arch_create_vm(void)
    {
    	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
    
    	if (!kvm)
    		return ERR_PTR(-ENOMEM);
    
    
    	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
    
    
    	return kvm;
    }
    
    static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
    {
    	vcpu_load(vcpu);
    	kvm_mmu_unload(vcpu);
    	vcpu_put(vcpu);
    }
    
    static void kvm_free_vcpus(struct kvm *kvm)
    {
    	unsigned int i;
    
    	/*
    	 * Unpin any mmu pages first.
    	 */
    	for (i = 0; i < KVM_MAX_VCPUS; ++i)
    		if (kvm->vcpus[i])
    			kvm_unload_vcpu_mmu(kvm->vcpus[i]);
    	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
    		if (kvm->vcpus[i]) {
    			kvm_arch_vcpu_free(kvm->vcpus[i]);
    			kvm->vcpus[i] = NULL;
    		}
    	}
    
    }
    
    void kvm_arch_destroy_vm(struct kvm *kvm)
    {
    
    Sheng Yang's avatar
    Sheng Yang committed
    	kvm_free_pit(kvm);
    
    	kfree(kvm->arch.vpic);
    	kfree(kvm->arch.vioapic);
    
    	kvm_free_vcpus(kvm);
    	kvm_free_physmem(kvm);
    
    	if (kvm->arch.apic_access_page)
    		put_page(kvm->arch.apic_access_page);
    
    	if (kvm->arch.ept_identity_pagetable)
    		put_page(kvm->arch.ept_identity_pagetable);
    
    
    int kvm_arch_set_memory_region(struct kvm *kvm,
    				struct kvm_userspace_memory_region *mem,
    				struct kvm_memory_slot old,
    				int user_alloc)
    {
    	int npages = mem->memory_size >> PAGE_SHIFT;
    	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
    
    	/*To keep backward compatibility with older userspace,
    	 *x86 needs to hanlde !user_alloc case.
    	 */
    	if (!user_alloc) {
    		if (npages && !old.rmap) {
    
    			unsigned long userspace_addr;
    
    
    			userspace_addr = do_mmap(NULL, 0,
    						 npages * PAGE_SIZE,
    						 PROT_READ | PROT_WRITE,
    						 MAP_SHARED | MAP_ANONYMOUS,
    						 0);
    
    			if (IS_ERR((void *)userspace_addr))
    				return PTR_ERR((void *)userspace_addr);