Skip to content
Snippets Groups Projects
x86.c 107 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	u32 function, index;
    	struct kvm_cpuid_entry2 *best;
    
    	function = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	index = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
    	best = kvm_find_cpuid_entry(vcpu, function, index);
    
    		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
    		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
    		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
    		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
    
    	}
    	kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	KVMTRACE_5D(CPUID, vcpu, function,
    
    		    (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
    		    (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
    		    (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
    		    (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
    
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
    
    /*
     * Check if userspace requested an interrupt window, and that the
     * interrupt window is open.
     *
     * No need to exit to userspace if we already have an interrupt queued.
     */
    static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
    					  struct kvm_run *kvm_run)
    {
    
    	return (!vcpu->arch.irq_summary &&
    
    		kvm_run->request_interrupt_window &&
    
    		vcpu->arch.interrupt_window_open &&
    
    		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
    }
    
    static void post_kvm_run_save(struct kvm_vcpu *vcpu,
    			      struct kvm_run *kvm_run)
    {
    	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
    
    	kvm_run->apic_base = kvm_get_apic_base(vcpu);
    
    	if (irqchip_in_kernel(vcpu->kvm))
    
    		kvm_run->ready_for_interrupt_injection = 1;
    
    		kvm_run->ready_for_interrupt_injection =
    
    					(vcpu->arch.interrupt_window_open &&
    					 vcpu->arch.irq_summary == 0);
    
    static void vapic_enter(struct kvm_vcpu *vcpu)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    	struct page *page;
    
    	if (!apic || !apic->vapic_addr)
    		return;
    
    	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
    
    }
    
    static void vapic_exit(struct kvm_vcpu *vcpu)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    
    	if (!apic || !apic->vapic_addr)
    		return;
    
    
    	down_read(&vcpu->kvm->slots_lock);
    
    	kvm_release_page_dirty(apic->vapic_page);
    	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
    
    	up_read(&vcpu->kvm->slots_lock);
    
    static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    
    	if (vcpu->requests)
    		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
    			kvm_mmu_unload(vcpu);
    
    
    	r = kvm_mmu_reload(vcpu);
    	if (unlikely(r))
    		goto out;
    
    
    	if (vcpu->requests) {
    		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
    
    Marcelo Tosatti's avatar
    Marcelo Tosatti committed
    			__kvm_migrate_timers(vcpu);
    
    		if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
    			kvm_write_guest_time(vcpu);
    
    		if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
    			kvm_mmu_sync_roots(vcpu);
    
    		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
    			kvm_x86_ops->tlb_flush(vcpu);
    
    		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
    				       &vcpu->requests)) {
    			kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
    			r = 0;
    			goto out;
    		}
    
    		if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
    			kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
    			r = 0;
    			goto out;
    		}
    
    	clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
    
    	kvm_inject_pending_timer_irqs(vcpu);
    
    	preempt_disable();
    
    	kvm_x86_ops->prepare_guest_switch(vcpu);
    	kvm_load_guest_fpu(vcpu);
    
    	local_irq_disable();
    
    
    	if (vcpu->requests || need_resched() || signal_pending(current)) {
    
    		local_irq_enable();
    		preempt_enable();
    		r = 1;
    		goto out;
    	}
    
    
    	vcpu->guest_mode = 1;
    	/*
    	 * Make sure that guest_mode assignment won't happen after
    	 * testing the pending IRQ vector bitmap.
    	 */
    	smp_wmb();
    
    
    	if (vcpu->arch.exception.pending)
    
    		__queue_exception(vcpu);
    	else if (irqchip_in_kernel(vcpu->kvm))
    
    		kvm_x86_ops->inject_pending_irq(vcpu);
    
    		kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
    
    
    	kvm_lapic_sync_to_vapic(vcpu);
    
    
    	up_read(&vcpu->kvm->slots_lock);
    
    
    	get_debugreg(vcpu->arch.host_dr6, 6);
    	get_debugreg(vcpu->arch.host_dr7, 7);
    	if (unlikely(vcpu->arch.switch_db_regs)) {
    		get_debugreg(vcpu->arch.host_db[0], 0);
    		get_debugreg(vcpu->arch.host_db[1], 1);
    		get_debugreg(vcpu->arch.host_db[2], 2);
    		get_debugreg(vcpu->arch.host_db[3], 3);
    
    		set_debugreg(0, 7);
    		set_debugreg(vcpu->arch.eff_db[0], 0);
    		set_debugreg(vcpu->arch.eff_db[1], 1);
    		set_debugreg(vcpu->arch.eff_db[2], 2);
    		set_debugreg(vcpu->arch.eff_db[3], 3);
    	}
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	KVMTRACE_0D(VMENTRY, vcpu, entryexit);
    
    	kvm_x86_ops->run(vcpu, kvm_run);
    
    
    	if (unlikely(vcpu->arch.switch_db_regs)) {
    		set_debugreg(0, 7);
    		set_debugreg(vcpu->arch.host_db[0], 0);
    		set_debugreg(vcpu->arch.host_db[1], 1);
    		set_debugreg(vcpu->arch.host_db[2], 2);
    		set_debugreg(vcpu->arch.host_db[3], 3);
    	}
    	set_debugreg(vcpu->arch.host_dr6, 6);
    	set_debugreg(vcpu->arch.host_dr7, 7);
    
    
    	vcpu->guest_mode = 0;
    	local_irq_enable();
    
    	++vcpu->stat.exits;
    
    	/*
    	 * We must have an instruction between local_irq_enable() and
    	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
    	 * the interrupt shadow.  The stat.exits increment will do nicely.
    	 * But we need to prevent reordering, hence this barrier():
    	 */
    	barrier();
    
    	kvm_guest_exit();
    
    	preempt_enable();
    
    
    	down_read(&vcpu->kvm->slots_lock);
    
    
    	/*
    	 * Profile KVM exit RIPs:
    	 */
    	if (unlikely(prof_on == KVM_PROFILING)) {
    
    		unsigned long rip = kvm_rip_read(vcpu);
    		profile_hit(KVM_PROFILING, (void *)rip);
    
    	if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
    		vcpu->arch.exception.pending = false;
    
    	kvm_lapic_sync_from_vapic(vcpu);
    
    
    	r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
    
    static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    {
    	int r;
    
    	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
    
    		pr_debug("vcpu %d received sipi with vector # %x\n",
    			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
    
    		kvm_lapic_reset(vcpu);
    
    		if (r)
    			return r;
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    	down_read(&vcpu->kvm->slots_lock);
    	vapic_enter(vcpu);
    
    	r = 1;
    	while (r > 0) {
    
    		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
    
    			r = vcpu_enter_guest(vcpu, kvm_run);
    		else {
    			up_read(&vcpu->kvm->slots_lock);
    			kvm_vcpu_block(vcpu);
    			down_read(&vcpu->kvm->slots_lock);
    			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
    				if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
    					vcpu->arch.mp_state =
    							KVM_MP_STATE_RUNNABLE;
    			if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
    				r = -EINTR;
    		}
    
    		if (r > 0) {
    			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
    				r = -EINTR;
    				kvm_run->exit_reason = KVM_EXIT_INTR;
    				++vcpu->stat.request_irq_exits;
    			}
    			if (signal_pending(current)) {
    				r = -EINTR;
    				kvm_run->exit_reason = KVM_EXIT_INTR;
    				++vcpu->stat.signal_exits;
    			}
    			if (need_resched()) {
    				up_read(&vcpu->kvm->slots_lock);
    				kvm_resched(vcpu);
    				down_read(&vcpu->kvm->slots_lock);
    			}
    		}
    
    	up_read(&vcpu->kvm->slots_lock);
    
    	post_kvm_run_save(vcpu, kvm_run);
    
    
    	vapic_exit(vcpu);
    
    
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    {
    	int r;
    	sigset_t sigsaved;
    
    	vcpu_load(vcpu);
    
    
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
    
    
    	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
    
    		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
    
    	}
    
    	/* re-sync apic's tpr */
    	if (!irqchip_in_kernel(vcpu->kvm))
    
    	if (vcpu->arch.pio.cur_count) {
    
    		r = complete_pio(vcpu);
    		if (r)
    			goto out;
    	}
    #if CONFIG_HAS_IOMEM
    	if (vcpu->mmio_needed) {
    		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
    		vcpu->mmio_read_completed = 1;
    		vcpu->mmio_needed = 0;
    
    
    		down_read(&vcpu->kvm->slots_lock);
    
    		r = emulate_instruction(vcpu, kvm_run,
    
    					vcpu->arch.mmio_fault_cr2, 0,
    					EMULTYPE_NO_DECODE);
    
    		up_read(&vcpu->kvm->slots_lock);
    
    		if (r == EMULATE_DO_MMIO) {
    			/*
    			 * Read-modify-write.  Back to userspace.
    			 */
    			r = 0;
    			goto out;
    		}
    	}
    #endif
    
    	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
    		kvm_register_write(vcpu, VCPU_REGS_RAX,
    				     kvm_run->hypercall.ret);
    
    
    	r = __vcpu_run(vcpu, kvm_run);
    
    out:
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
    
    	vcpu_put(vcpu);
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    	vcpu_load(vcpu);
    
    
    	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
    	regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    
    	regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
    	regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
    	regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
    	regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
    	regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
    	regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
    	regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
    	regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
    
    	regs->rip = kvm_rip_read(vcpu);
    
    	regs->rflags = kvm_x86_ops->get_rflags(vcpu);
    
    	/*
    	 * Don't leak debug flags in case they were set for guest debugging
    	 */
    
    	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
    
    		regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    	vcpu_load(vcpu);
    
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
    
    	kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
    	kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
    	kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
    	kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
    	kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
    	kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
    	kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
    	kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
    
    
    	kvm_rip_write(vcpu, regs->rip);
    
    	kvm_x86_ops->set_rflags(vcpu, regs->rflags);
    
    
    
    	vcpu->arch.exception.pending = false;
    
    
    void kvm_get_segment(struct kvm_vcpu *vcpu,
    		     struct kvm_segment *var, int seg)
    
    	kvm_x86_ops->get_segment(vcpu, var, seg);
    
    }
    
    void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
    {
    	struct kvm_segment cs;
    
    
    	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
    
    	*db = cs.db;
    	*l = cs.l;
    }
    EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
    
    int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    	struct descriptor_table dt;
    	int pending_vec;
    
    	vcpu_load(vcpu);
    
    
    	kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
    
    
    	kvm_x86_ops->get_idt(vcpu, &dt);
    	sregs->idt.limit = dt.limit;
    	sregs->idt.base = dt.base;
    	kvm_x86_ops->get_gdt(vcpu, &dt);
    	sregs->gdt.limit = dt.limit;
    	sregs->gdt.base = dt.base;
    
    	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
    
    	sregs->cr0 = vcpu->arch.cr0;
    	sregs->cr2 = vcpu->arch.cr2;
    	sregs->cr3 = vcpu->arch.cr3;
    	sregs->cr4 = vcpu->arch.cr4;
    
    	sregs->efer = vcpu->arch.shadow_efer;
    
    	sregs->apic_base = kvm_get_apic_base(vcpu);
    
    	if (irqchip_in_kernel(vcpu->kvm)) {
    		memset(sregs->interrupt_bitmap, 0,
    		       sizeof sregs->interrupt_bitmap);
    		pending_vec = kvm_x86_ops->get_irq(vcpu);
    		if (pending_vec >= 0)
    			set_bit(pending_vec,
    				(unsigned long *)sregs->interrupt_bitmap);
    	} else
    
    		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
    
    		       sizeof sregs->interrupt_bitmap);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    
    int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
    				    struct kvm_mp_state *mp_state)
    {
    	vcpu_load(vcpu);
    	mp_state->mp_state = vcpu->arch.mp_state;
    	vcpu_put(vcpu);
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
    				    struct kvm_mp_state *mp_state)
    {
    	vcpu_load(vcpu);
    	vcpu->arch.mp_state = mp_state->mp_state;
    	vcpu_put(vcpu);
    	return 0;
    }
    
    
    static void kvm_set_segment(struct kvm_vcpu *vcpu,
    
    			struct kvm_segment *var, int seg)
    {
    
    	kvm_x86_ops->set_segment(vcpu, var, seg);
    
    static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
    				   struct kvm_segment *kvm_desct)
    {
    	kvm_desct->base = seg_desc->base0;
    	kvm_desct->base |= seg_desc->base1 << 16;
    	kvm_desct->base |= seg_desc->base2 << 24;
    	kvm_desct->limit = seg_desc->limit0;
    	kvm_desct->limit |= seg_desc->limit << 16;
    
    	if (seg_desc->g) {
    		kvm_desct->limit <<= 12;
    		kvm_desct->limit |= 0xfff;
    	}
    
    	kvm_desct->selector = selector;
    	kvm_desct->type = seg_desc->type;
    	kvm_desct->present = seg_desc->p;
    	kvm_desct->dpl = seg_desc->dpl;
    	kvm_desct->db = seg_desc->d;
    	kvm_desct->s = seg_desc->s;
    	kvm_desct->l = seg_desc->l;
    	kvm_desct->g = seg_desc->g;
    	kvm_desct->avl = seg_desc->avl;
    	if (!selector)
    		kvm_desct->unusable = 1;
    	else
    		kvm_desct->unusable = 0;
    	kvm_desct->padding = 0;
    }
    
    
    static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
    					  u16 selector,
    					  struct descriptor_table *dtable)
    
    {
    	if (selector & 1 << 2) {
    		struct kvm_segment kvm_seg;
    
    
    		kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
    
    
    		if (kvm_seg.unusable)
    			dtable->limit = 0;
    		else
    			dtable->limit = kvm_seg.limit;
    		dtable->base = kvm_seg.base;
    	}
    	else
    		kvm_x86_ops->get_gdt(vcpu, dtable);
    }
    
    /* allowed just for 8 bytes segments */
    static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
    					 struct desc_struct *seg_desc)
    {
    
    	struct descriptor_table dtable;
    	u16 index = selector >> 3;
    
    
    	get_segment_descriptor_dtable(vcpu, selector, &dtable);
    
    
    	if (dtable.limit < index * 8 + 7) {
    		kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
    		return 1;
    	}
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
    	gpa += index * 8;
    	return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
    
    }
    
    /* allowed just for 8 bytes segments */
    static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
    					 struct desc_struct *seg_desc)
    {
    
    	struct descriptor_table dtable;
    	u16 index = selector >> 3;
    
    
    	get_segment_descriptor_dtable(vcpu, selector, &dtable);
    
    
    	if (dtable.limit < index * 8 + 7)
    		return 1;
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
    	gpa += index * 8;
    	return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
    
    }
    
    static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
    			     struct desc_struct *seg_desc)
    {
    	u32 base_addr;
    
    	base_addr = seg_desc->base0;
    	base_addr |= (seg_desc->base1 << 16);
    	base_addr |= (seg_desc->base2 << 24);
    
    
    	return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
    
    }
    
    static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
    {
    	struct kvm_segment kvm_seg;
    
    
    	kvm_get_segment(vcpu, &kvm_seg, seg);
    
    	return kvm_seg.selector;
    }
    
    static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
    						u16 selector,
    						struct kvm_segment *kvm_seg)
    {
    	struct desc_struct seg_desc;
    
    	if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
    		return 1;
    	seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
    	return 0;
    }
    
    
    static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
    
    {
    	struct kvm_segment segvar = {
    		.base = selector << 4,
    		.limit = 0xffff,
    		.selector = selector,
    		.type = 3,
    		.present = 1,
    		.dpl = 3,
    		.db = 0,
    		.s = 1,
    		.l = 0,
    		.g = 0,
    		.avl = 0,
    		.unusable = 0,
    	};
    	kvm_x86_ops->set_segment(vcpu, &segvar, seg);
    	return 0;
    }
    
    
    int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
    				int type_bits, int seg)
    
    {
    	struct kvm_segment kvm_seg;
    
    
    	if (!(vcpu->arch.cr0 & X86_CR0_PE))
    		return kvm_load_realmode_segment(vcpu, selector, seg);
    
    	if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
    		return 1;
    	kvm_seg.type |= type_bits;
    
    	if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
    	    seg != VCPU_SREG_LDTR)
    		if (!kvm_seg.s)
    			kvm_seg.unusable = 1;
    
    
    	kvm_set_segment(vcpu, &kvm_seg, seg);
    
    	return 0;
    }
    
    static void save_state_to_tss32(struct kvm_vcpu *vcpu,
    				struct tss_segment_32 *tss)
    {
    	tss->cr3 = vcpu->arch.cr3;
    
    	tss->eip = kvm_rip_read(vcpu);
    
    	tss->eflags = kvm_x86_ops->get_rflags(vcpu);
    
    	tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    	tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
    
    	tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
    	tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
    	tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
    	tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
    	tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
    	tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
    	tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
    	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
    }
    
    static int load_state_from_tss32(struct kvm_vcpu *vcpu,
    				  struct tss_segment_32 *tss)
    {
    	kvm_set_cr3(vcpu, tss->cr3);
    
    
    	kvm_rip_write(vcpu, tss->eip);
    
    	kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
    
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
    
    		return 1;
    	return 0;
    }
    
    static void save_state_to_tss16(struct kvm_vcpu *vcpu,
    				struct tss_segment_16 *tss)
    {
    
    	tss->ip = kvm_rip_read(vcpu);
    
    	tss->flag = kvm_x86_ops->get_rflags(vcpu);
    
    	tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    	tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
    
    
    	tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
    	tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
    	tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
    	tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
    	tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
    	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
    }
    
    static int load_state_from_tss16(struct kvm_vcpu *vcpu,
    				 struct tss_segment_16 *tss)
    {
    
    	kvm_rip_write(vcpu, tss->ip);
    
    	kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
    
    	if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
    
    static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
    
    		       struct desc_struct *nseg_desc)
    {
    	struct tss_segment_16 tss_segment_16;
    	int ret = 0;
    
    
    	if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
    			   sizeof tss_segment_16))
    
    		goto out;
    
    	save_state_to_tss16(vcpu, &tss_segment_16);
    
    
    	if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
    			    sizeof tss_segment_16))
    
    
    	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
    			   &tss_segment_16, sizeof tss_segment_16))
    		goto out;
    
    
    	if (load_state_from_tss16(vcpu, &tss_segment_16))
    		goto out;
    
    	ret = 1;
    out:
    	return ret;
    }
    
    
    static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
    
    		       struct desc_struct *nseg_desc)
    {
    	struct tss_segment_32 tss_segment_32;
    	int ret = 0;
    
    
    	if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
    			   sizeof tss_segment_32))
    
    		goto out;
    
    	save_state_to_tss32(vcpu, &tss_segment_32);
    
    
    	if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
    			    sizeof tss_segment_32))
    		goto out;
    
    	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
    			   &tss_segment_32, sizeof tss_segment_32))
    
    	if (load_state_from_tss32(vcpu, &tss_segment_32))
    		goto out;
    
    	ret = 1;
    out:
    	return ret;
    }
    
    int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
    {
    	struct kvm_segment tr_seg;
    	struct desc_struct cseg_desc;
    	struct desc_struct nseg_desc;
    	int ret = 0;
    
    	u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
    	u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
    
    	old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
    
    	/* FIXME: Handle errors. Failure to read either TSS or their
    	 * descriptors should generate a pagefault.
    	 */
    
    	if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
    		goto out;
    
    
    	if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
    
    		goto out;
    
    	if (reason != TASK_SWITCH_IRET) {
    		int cpl;
    
    		cpl = kvm_x86_ops->get_cpl(vcpu);
    		if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
    			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
    			return 1;
    		}
    	}
    
    	if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
    		kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
    		return 1;
    	}
    
    	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
    
    		cseg_desc.type &= ~(1 << 1); //clear the B flag
    
    		save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
    
    	}
    
    	if (reason == TASK_SWITCH_IRET) {
    		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
    		kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
    	}
    
    	kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    	if (nseg_desc.type & 8)
    
    		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
    
    		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
    
    					 &nseg_desc);
    
    	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
    		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
    		kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
    	}
    
    	if (reason != TASK_SWITCH_IRET) {
    
    		nseg_desc.type |= (1 << 1);
    
    		save_guest_segment_descriptor(vcpu, tss_selector,
    					      &nseg_desc);
    	}
    
    	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
    	seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
    	tr_seg.type = 11;
    
    	kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
    
    out:
    	return ret;
    }
    EXPORT_SYMBOL_GPL(kvm_task_switch);
    
    
    int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    	int mmu_reset_needed = 0;
    	int i, pending_vec, max_bits;
    	struct descriptor_table dt;
    
    	vcpu_load(vcpu);
    
    	dt.limit = sregs->idt.limit;
    	dt.base = sregs->idt.base;
    	kvm_x86_ops->set_idt(vcpu, &dt);
    	dt.limit = sregs->gdt.limit;
    	dt.base = sregs->gdt.base;
    	kvm_x86_ops->set_gdt(vcpu, &dt);
    
    
    	vcpu->arch.cr2 = sregs->cr2;
    	mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
    	vcpu->arch.cr3 = sregs->cr3;
    
    	mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
    
    	kvm_x86_ops->set_efer(vcpu, sregs->efer);
    	kvm_set_apic_base(vcpu, sregs->apic_base);
    
    	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
    
    
    	mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
    
    	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
    
    	mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
    
    	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
    	if (!is_long_mode(vcpu) && is_pae(vcpu))
    
    		load_pdptrs(vcpu, vcpu->arch.cr3);
    
    
    	if (mmu_reset_needed)
    		kvm_mmu_reset_context(vcpu);
    
    	if (!irqchip_in_kernel(vcpu->kvm)) {
    
    		memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
    		       sizeof vcpu->arch.irq_pending);
    		vcpu->arch.irq_summary = 0;
    		for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
    			if (vcpu->arch.irq_pending[i])
    				__set_bit(i, &vcpu->arch.irq_summary);
    
    	} else {
    		max_bits = (sizeof sregs->interrupt_bitmap) << 3;
    		pending_vec = find_first_bit(
    			(const unsigned long *)sregs->interrupt_bitmap,
    			max_bits);
    		/* Only pending external irq is handled here */
    		if (pending_vec < max_bits) {
    			kvm_x86_ops->set_irq(vcpu, pending_vec);
    			pr_debug("Set back pending irq %d\n",
    				 pending_vec);
    		}
    
    		kvm_pic_clear_isr_ack(vcpu->kvm);
    
    	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
    
    	/* Older userspace won't unhalt the vcpu on reset. */
    	if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
    	    sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
    	    !(vcpu->arch.cr0 & X86_CR0_PE))
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    
    int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
    					struct kvm_guest_debug *dbg)
    
    	if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
    	    (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
    		for (i = 0; i < KVM_NR_DB_REGS; ++i)
    			vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
    		vcpu->arch.switch_db_regs =
    			(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
    	} else {
    		for (i = 0; i < KVM_NR_DB_REGS; i++)
    			vcpu->arch.eff_db[i] = vcpu->arch.db[i];