Skip to content
Snippets Groups Projects
x86.c 55.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	}
    
    	post_kvm_run_save(vcpu, kvm_run);
    
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    {
    	int r;
    	sigset_t sigsaved;
    
    	vcpu_load(vcpu);
    
    	if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
    		kvm_vcpu_block(vcpu);
    		vcpu_put(vcpu);
    		return -EAGAIN;
    	}
    
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
    
    	/* re-sync apic's tpr */
    	if (!irqchip_in_kernel(vcpu->kvm))
    		set_cr8(vcpu, kvm_run->cr8);
    
    	if (vcpu->pio.cur_count) {
    		r = complete_pio(vcpu);
    		if (r)
    			goto out;
    	}
    #if CONFIG_HAS_IOMEM
    	if (vcpu->mmio_needed) {
    		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
    		vcpu->mmio_read_completed = 1;
    		vcpu->mmio_needed = 0;
    		r = emulate_instruction(vcpu, kvm_run,
    					vcpu->mmio_fault_cr2, 0, 1);
    		if (r == EMULATE_DO_MMIO) {
    			/*
    			 * Read-modify-write.  Back to userspace.
    			 */
    			r = 0;
    			goto out;
    		}
    	}
    #endif
    	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
    		kvm_x86_ops->cache_regs(vcpu);
    		vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
    		kvm_x86_ops->decache_regs(vcpu);
    	}
    
    	r = __vcpu_run(vcpu, kvm_run);
    
    out:
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
    
    	vcpu_put(vcpu);
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    	vcpu_load(vcpu);
    
    	kvm_x86_ops->cache_regs(vcpu);
    
    	regs->rax = vcpu->regs[VCPU_REGS_RAX];
    	regs->rbx = vcpu->regs[VCPU_REGS_RBX];
    	regs->rcx = vcpu->regs[VCPU_REGS_RCX];
    	regs->rdx = vcpu->regs[VCPU_REGS_RDX];
    	regs->rsi = vcpu->regs[VCPU_REGS_RSI];
    	regs->rdi = vcpu->regs[VCPU_REGS_RDI];
    	regs->rsp = vcpu->regs[VCPU_REGS_RSP];
    	regs->rbp = vcpu->regs[VCPU_REGS_RBP];
    #ifdef CONFIG_X86_64
    	regs->r8 = vcpu->regs[VCPU_REGS_R8];
    	regs->r9 = vcpu->regs[VCPU_REGS_R9];
    	regs->r10 = vcpu->regs[VCPU_REGS_R10];
    	regs->r11 = vcpu->regs[VCPU_REGS_R11];
    	regs->r12 = vcpu->regs[VCPU_REGS_R12];
    	regs->r13 = vcpu->regs[VCPU_REGS_R13];
    	regs->r14 = vcpu->regs[VCPU_REGS_R14];
    	regs->r15 = vcpu->regs[VCPU_REGS_R15];
    #endif
    
    	regs->rip = vcpu->rip;
    	regs->rflags = kvm_x86_ops->get_rflags(vcpu);
    
    	/*
    	 * Don't leak debug flags in case they were set for guest debugging
    	 */
    	if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
    		regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    	vcpu_load(vcpu);
    
    	vcpu->regs[VCPU_REGS_RAX] = regs->rax;
    	vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
    	vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
    	vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
    	vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
    	vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
    	vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
    	vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
    #ifdef CONFIG_X86_64
    	vcpu->regs[VCPU_REGS_R8] = regs->r8;
    	vcpu->regs[VCPU_REGS_R9] = regs->r9;
    	vcpu->regs[VCPU_REGS_R10] = regs->r10;
    	vcpu->regs[VCPU_REGS_R11] = regs->r11;
    	vcpu->regs[VCPU_REGS_R12] = regs->r12;
    	vcpu->regs[VCPU_REGS_R13] = regs->r13;
    	vcpu->regs[VCPU_REGS_R14] = regs->r14;
    	vcpu->regs[VCPU_REGS_R15] = regs->r15;
    #endif
    
    	vcpu->rip = regs->rip;
    	kvm_x86_ops->set_rflags(vcpu, regs->rflags);
    
    	kvm_x86_ops->decache_regs(vcpu);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    static void get_segment(struct kvm_vcpu *vcpu,
    			struct kvm_segment *var, int seg)
    {
    	return kvm_x86_ops->get_segment(vcpu, var, seg);
    }
    
    void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
    {
    	struct kvm_segment cs;
    
    	get_segment(vcpu, &cs, VCPU_SREG_CS);
    	*db = cs.db;
    	*l = cs.l;
    }
    EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
    
    int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    	struct descriptor_table dt;
    	int pending_vec;
    
    	vcpu_load(vcpu);
    
    	get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
    
    	kvm_x86_ops->get_idt(vcpu, &dt);
    	sregs->idt.limit = dt.limit;
    	sregs->idt.base = dt.base;
    	kvm_x86_ops->get_gdt(vcpu, &dt);
    	sregs->gdt.limit = dt.limit;
    	sregs->gdt.base = dt.base;
    
    	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
    	sregs->cr0 = vcpu->cr0;
    	sregs->cr2 = vcpu->cr2;
    	sregs->cr3 = vcpu->cr3;
    	sregs->cr4 = vcpu->cr4;
    	sregs->cr8 = get_cr8(vcpu);
    	sregs->efer = vcpu->shadow_efer;
    	sregs->apic_base = kvm_get_apic_base(vcpu);
    
    	if (irqchip_in_kernel(vcpu->kvm)) {
    		memset(sregs->interrupt_bitmap, 0,
    		       sizeof sregs->interrupt_bitmap);
    		pending_vec = kvm_x86_ops->get_irq(vcpu);
    		if (pending_vec >= 0)
    			set_bit(pending_vec,
    				(unsigned long *)sregs->interrupt_bitmap);
    	} else
    		memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
    		       sizeof sregs->interrupt_bitmap);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    static void set_segment(struct kvm_vcpu *vcpu,
    			struct kvm_segment *var, int seg)
    {
    	return kvm_x86_ops->set_segment(vcpu, var, seg);
    }
    
    int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    	int mmu_reset_needed = 0;
    	int i, pending_vec, max_bits;
    	struct descriptor_table dt;
    
    	vcpu_load(vcpu);
    
    	dt.limit = sregs->idt.limit;
    	dt.base = sregs->idt.base;
    	kvm_x86_ops->set_idt(vcpu, &dt);
    	dt.limit = sregs->gdt.limit;
    	dt.base = sregs->gdt.base;
    	kvm_x86_ops->set_gdt(vcpu, &dt);
    
    	vcpu->cr2 = sregs->cr2;
    	mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
    	vcpu->cr3 = sregs->cr3;
    
    	set_cr8(vcpu, sregs->cr8);
    
    	mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
    #ifdef CONFIG_X86_64
    	kvm_x86_ops->set_efer(vcpu, sregs->efer);
    #endif
    	kvm_set_apic_base(vcpu, sregs->apic_base);
    
    	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
    
    	mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
    	vcpu->cr0 = sregs->cr0;
    	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
    
    	mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
    	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
    	if (!is_long_mode(vcpu) && is_pae(vcpu))
    		load_pdptrs(vcpu, vcpu->cr3);
    
    	if (mmu_reset_needed)
    		kvm_mmu_reset_context(vcpu);
    
    	if (!irqchip_in_kernel(vcpu->kvm)) {
    		memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
    		       sizeof vcpu->irq_pending);
    		vcpu->irq_summary = 0;
    		for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
    			if (vcpu->irq_pending[i])
    				__set_bit(i, &vcpu->irq_summary);
    	} else {
    		max_bits = (sizeof sregs->interrupt_bitmap) << 3;
    		pending_vec = find_first_bit(
    			(const unsigned long *)sregs->interrupt_bitmap,
    			max_bits);
    		/* Only pending external irq is handled here */
    		if (pending_vec < max_bits) {
    			kvm_x86_ops->set_irq(vcpu, pending_vec);
    			pr_debug("Set back pending irq %d\n",
    				 pending_vec);
    		}
    	}
    
    	set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
    				    struct kvm_debug_guest *dbg)
    {
    	int r;
    
    	vcpu_load(vcpu);
    
    	r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
    
    	vcpu_put(vcpu);
    
    	return r;
    }
    
    
    /*
     * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when
     * we have asm/x86/processor.h
     */
    struct fxsave {
    	u16	cwd;
    	u16	swd;
    	u16	twd;
    	u16	fop;
    	u64	rip;
    	u64	rdp;
    	u32	mxcsr;
    	u32	mxcsr_mask;
    	u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
    #ifdef CONFIG_X86_64
    	u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 256 bytes */
    #else
    	u32	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
    #endif
    };
    
    int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
    {
    	struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
    
    	vcpu_load(vcpu);
    
    	memcpy(fpu->fpr, fxsave->st_space, 128);
    	fpu->fcw = fxsave->cwd;
    	fpu->fsw = fxsave->swd;
    	fpu->ftwx = fxsave->twd;
    	fpu->last_opcode = fxsave->fop;
    	fpu->last_ip = fxsave->rip;
    	fpu->last_dp = fxsave->rdp;
    	memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
    {
    	struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
    
    	vcpu_load(vcpu);
    
    	memcpy(fxsave->st_space, fpu->fpr, 128);
    	fxsave->cwd = fpu->fcw;
    	fxsave->swd = fpu->fsw;
    	fxsave->twd = fpu->ftwx;
    	fxsave->fop = fpu->last_opcode;
    	fxsave->rip = fpu->last_ip;
    	fxsave->rdp = fpu->last_dp;
    	memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    void fx_init(struct kvm_vcpu *vcpu)
    {
    	unsigned after_mxcsr_mask;
    
    	/* Initialize guest FPU by resetting ours and saving into guest's */
    	preempt_disable();
    	fx_save(&vcpu->host_fx_image);
    	fpu_init();
    	fx_save(&vcpu->guest_fx_image);
    	fx_restore(&vcpu->host_fx_image);
    	preempt_enable();
    
    	vcpu->cr0 |= X86_CR0_ET;
    	after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
    	vcpu->guest_fx_image.mxcsr = 0x1f80;
    	memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
    	       0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
    }
    EXPORT_SYMBOL_GPL(fx_init);
    
    void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
    {
    	if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
    		return;
    
    	vcpu->guest_fpu_loaded = 1;
    	fx_save(&vcpu->host_fx_image);
    	fx_restore(&vcpu->guest_fx_image);
    }
    EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
    
    void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
    {
    	if (!vcpu->guest_fpu_loaded)
    		return;
    
    	vcpu->guest_fpu_loaded = 0;
    	fx_save(&vcpu->guest_fx_image);
    	fx_restore(&vcpu->host_fx_image);
    }
    EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
    
    
    void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
    {
    	kvm_x86_ops->vcpu_free(vcpu);
    }
    
    struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
    						unsigned int id)
    {
    	int r;
    	struct kvm_vcpu *vcpu = kvm_x86_ops->vcpu_create(kvm, id);
    
    	if (IS_ERR(vcpu)) {
    		r = -ENOMEM;
    		goto fail;
    	}
    
    	/* We do fxsave: this must be aligned. */
    	BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
    
    	vcpu_load(vcpu);
    	r = kvm_arch_vcpu_reset(vcpu);
    	if (r == 0)
    		r = kvm_mmu_setup(vcpu);
    	vcpu_put(vcpu);
    	if (r < 0)
    		goto free_vcpu;
    
    	return vcpu;
    free_vcpu:
    	kvm_x86_ops->vcpu_free(vcpu);
    fail:
    	return ERR_PTR(r);
    }
    
    void kvm_arch_vcpu_destory(struct kvm_vcpu *vcpu)
    {
    	vcpu_load(vcpu);
    	kvm_mmu_unload(vcpu);
    	vcpu_put(vcpu);
    
    	kvm_x86_ops->vcpu_free(vcpu);
    }
    
    int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
    {
    	return kvm_x86_ops->vcpu_reset(vcpu);
    }
    
    void kvm_arch_hardware_enable(void *garbage)
    {
    	kvm_x86_ops->hardware_enable(garbage);
    }
    
    void kvm_arch_hardware_disable(void *garbage)
    {
    	kvm_x86_ops->hardware_disable(garbage);
    }
    
    int kvm_arch_hardware_setup(void)
    {
    	return kvm_x86_ops->hardware_setup();
    }
    
    void kvm_arch_hardware_unsetup(void)
    {
    	kvm_x86_ops->hardware_unsetup();
    }
    
    void kvm_arch_check_processor_compat(void *rtn)
    {
    	kvm_x86_ops->check_processor_compatibility(rtn);
    }
    
    int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
    {
    	struct page *page;
    	struct kvm *kvm;
    	int r;
    
    	BUG_ON(vcpu->kvm == NULL);
    	kvm = vcpu->kvm;
    
    	vcpu->mmu.root_hpa = INVALID_PAGE;
    	if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
    		vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
    	else
    		vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
    
    	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
    	if (!page) {
    		r = -ENOMEM;
    		goto fail;
    	}
    	vcpu->pio_data = page_address(page);
    
    	r = kvm_mmu_create(vcpu);
    	if (r < 0)
    		goto fail_free_pio_data;
    
    	if (irqchip_in_kernel(kvm)) {
    		r = kvm_create_lapic(vcpu);
    		if (r < 0)
    			goto fail_mmu_destroy;
    	}
    
    	return 0;
    
    fail_mmu_destroy:
    	kvm_mmu_destroy(vcpu);
    fail_free_pio_data:
    	free_page((unsigned long)vcpu->pio_data);
    fail:
    	return r;
    }
    
    void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
    {
    	kvm_free_lapic(vcpu);
    	kvm_mmu_destroy(vcpu);
    	free_page((unsigned long)vcpu->pio_data);
    }