Skip to content
Snippets Groups Projects
x86.c 150 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	return __kvm_set_dr(vcpu, dr, value);
    
    static u64 mk_cr_64(u64 curr_cr, u32 new_val)
    
    	return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
    
    static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
    
    	unsigned long value;
    
    	switch (cr) {
    	case 0:
    		value = kvm_read_cr0(vcpu);
    		break;
    	case 2:
    		value = vcpu->arch.cr2;
    		break;
    	case 3:
    		value = vcpu->arch.cr3;
    		break;
    	case 4:
    		value = kvm_read_cr4(vcpu);
    		break;
    	case 8:
    		value = kvm_get_cr8(vcpu);
    		break;
    	default:
    		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
    		return 0;
    	}
    
    	return value;
    }
    
    
    static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
    
    		res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
    
    		break;
    	case 2:
    		vcpu->arch.cr2 = val;
    		break;
    	case 3:
    
    		res = kvm_set_cr3(vcpu, val);
    
    		res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
    
    		res = __kvm_set_cr8(vcpu, val & 0xfUL);
    
    		break;
    	default:
    		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
    
    static int emulator_get_cpl(struct kvm_vcpu *vcpu)
    {
    	return kvm_x86_ops->get_cpl(vcpu);
    }
    
    
    static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
    {
    	kvm_x86_ops->get_gdt(vcpu, dt);
    }
    
    
    static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
    {
    	kvm_x86_ops->get_idt(vcpu, dt);
    }
    
    
    static unsigned long emulator_get_cached_segment_base(int seg,
    						      struct kvm_vcpu *vcpu)
    {
    	return get_segment_base(vcpu, seg);
    }
    
    
    static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
    					   struct kvm_vcpu *vcpu)
    {
    	struct kvm_segment var;
    
    	kvm_get_segment(vcpu, &var, seg);
    
    	if (var.unusable)
    		return false;
    
    	if (var.g)
    		var.limit >>= 12;
    	set_desc_limit(desc, var.limit);
    	set_desc_base(desc, (unsigned long)var.base);
    	desc->type = var.type;
    	desc->s = var.s;
    	desc->dpl = var.dpl;
    	desc->p = var.present;
    	desc->avl = var.avl;
    	desc->l = var.l;
    	desc->d = var.db;
    	desc->g = var.g;
    
    	return true;
    }
    
    static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg,
    					   struct kvm_vcpu *vcpu)
    {
    	struct kvm_segment var;
    
    	/* needed to preserve selector */
    	kvm_get_segment(vcpu, &var, seg);
    
    	var.base = get_desc_base(desc);
    	var.limit = get_desc_limit(desc);
    	if (desc->g)
    		var.limit = (var.limit << 12) | 0xfff;
    	var.type = desc->type;
    	var.present = desc->p;
    	var.dpl = desc->dpl;
    	var.db = desc->d;
    	var.s = desc->s;
    	var.l = desc->l;
    	var.g = desc->g;
    	var.avl = desc->avl;
    	var.present = desc->p;
    	var.unusable = !var.present;
    	var.padding = 0;
    
    	kvm_set_segment(vcpu, &var, seg);
    	return;
    }
    
    static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu)
    {
    	struct kvm_segment kvm_seg;
    
    	kvm_get_segment(vcpu, &kvm_seg, seg);
    	return kvm_seg.selector;
    }
    
    static void emulator_set_segment_selector(u16 sel, int seg,
    					  struct kvm_vcpu *vcpu)
    {
    	struct kvm_segment kvm_seg;
    
    	kvm_get_segment(vcpu, &kvm_seg, seg);
    	kvm_seg.selector = sel;
    	kvm_set_segment(vcpu, &kvm_seg, seg);
    }
    
    
    static struct x86_emulate_ops emulate_ops = {
    
    	.read_std            = kvm_read_guest_virt_system,
    
    	.write_std           = kvm_write_guest_virt_system,
    
    	.fetch               = kvm_fetch_guest_virt,
    
    	.read_emulated       = emulator_read_emulated,
    	.write_emulated      = emulator_write_emulated,
    	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
    
    	.pio_in_emulated     = emulator_pio_in_emulated,
    	.pio_out_emulated    = emulator_pio_out_emulated,
    
    	.get_cached_descriptor = emulator_get_cached_descriptor,
    	.set_cached_descriptor = emulator_set_cached_descriptor,
    	.get_segment_selector = emulator_get_segment_selector,
    	.set_segment_selector = emulator_set_segment_selector,
    
    	.get_cached_segment_base = emulator_get_cached_segment_base,
    
    	.get_gdt             = emulator_get_gdt,
    
    	.get_idt	     = emulator_get_idt,
    
    	.get_cr              = emulator_get_cr,
    	.set_cr              = emulator_set_cr,
    
    	.get_dr              = emulator_get_dr,
    	.set_dr              = emulator_set_dr,
    
    	.set_msr             = kvm_set_msr,
    	.get_msr             = kvm_get_msr,
    
    static void cache_all_regs(struct kvm_vcpu *vcpu)
    {
    	kvm_register_read(vcpu, VCPU_REGS_RAX);
    	kvm_register_read(vcpu, VCPU_REGS_RSP);
    	kvm_register_read(vcpu, VCPU_REGS_RIP);
    	vcpu->arch.regs_dirty = ~0;
    }
    
    
    static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
    {
    	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
    	/*
    	 * an sti; sti; sequence only disable interrupts for the first
    	 * instruction. So, if the last instruction, be it emulated or
    	 * not, left the system with the INT_STI flag enabled, it
    	 * means that the last instruction is an sti. We should not
    	 * leave the flag on in this case. The same goes for mov ss
    	 */
    	if (!(int_shadow & mask))
    		kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
    }
    
    
    static void inject_emulated_exception(struct kvm_vcpu *vcpu)
    {
    	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
    	if (ctxt->exception == PF_VECTOR)
    
    	else if (ctxt->error_code_valid)
    		kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code);
    	else
    		kvm_queue_exception(vcpu, ctxt->exception);
    }
    
    
    static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
    {
    	struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
    	int cs_db, cs_l;
    
    	cache_all_regs(vcpu);
    
    	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
    
    	vcpu->arch.emulate_ctxt.vcpu = vcpu;
    	vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
    	vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
    	vcpu->arch.emulate_ctxt.mode =
    		(!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
    		(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
    		? X86EMUL_MODE_VM86 : cs_l
    		? X86EMUL_MODE_PROT64 :	cs_db
    		? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
    	memset(c, 0, sizeof(struct decode_cache));
    	memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
    }
    
    
    int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
    {
    	struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
    	int ret;
    
    	init_emulate_ctxt(vcpu);
    
    	vcpu->arch.emulate_ctxt.decode.op_bytes = 2;
    	vcpu->arch.emulate_ctxt.decode.ad_bytes = 2;
    	vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip;
    	ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq);
    
    	if (ret != X86EMUL_CONTINUE)
    		return EMULATE_FAIL;
    
    	vcpu->arch.emulate_ctxt.eip = c->eip;
    	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
    	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
    	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
    
    	if (irq == NMI_VECTOR)
    		vcpu->arch.nmi_pending = false;
    	else
    		vcpu->arch.interrupt.pending = false;
    
    	return EMULATE_DONE;
    }
    EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
    
    
    static int handle_emulation_failure(struct kvm_vcpu *vcpu)
    {
    	++vcpu->stat.insn_emulation_fail;
    	trace_kvm_emulate_insn_failed(vcpu);
    	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
    	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
    	vcpu->run->internal.ndata = 0;
    	kvm_queue_exception(vcpu, UD_VECTOR);
    	return EMULATE_FAIL;
    }
    
    
    static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
    {
    	gpa_t gpa;
    
    
    	/*
    	 * if emulation was due to access to shadowed page table
    	 * and it failed try to unshadow page and re-entetr the
    	 * guest to let CPU execute the instruction.
    	 */
    	if (kvm_mmu_unprotect_page_virt(vcpu, gva))
    		return true;
    
    	gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
    
    	if (gpa == UNMAPPED_GVA)
    		return true; /* let cpu generate fault */
    
    	if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
    		return true;
    
    	return false;
    }
    
    
    int emulate_instruction(struct kvm_vcpu *vcpu,
    			unsigned long cr2,
    			u16 error_code,
    
    	struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
    
    	kvm_clear_exception_queue(vcpu);
    
    	vcpu->arch.mmio_fault_cr2 = cr2;
    
    	 * TODO: fix emulate.c to use guest_read/write_register
    
    	 * instead of direct ->regs accesses, can save hundred cycles
    	 * on Intel for instructions that don't read/change RSP, for
    	 * for example.
    	 */
    	cache_all_regs(vcpu);
    
    	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
    
    		vcpu->arch.emulate_ctxt.interruptibility = 0;
    
    		vcpu->arch.emulate_ctxt.exception = -1;
    
    		vcpu->arch.emulate_ctxt.perm_ok = false;
    
    		r = x86_decode_insn(&vcpu->arch.emulate_ctxt);
    
    		if (r == X86EMUL_PROPAGATE_FAULT)
    			goto done;
    
    		trace_kvm_emulate_insn_start(vcpu);
    
    		/* Only allow emulation of specific instructions on #UD
    		 * (namely VMMCALL, sysenter, sysexit, syscall)*/
    		if (emulation_type & EMULTYPE_TRAP_UD) {
    			if (!c->twobyte)
    				return EMULATE_FAIL;
    			switch (c->b) {
    			case 0x01: /* VMMCALL */
    				if (c->modrm_mod != 3 || c->modrm_rm != 1)
    					return EMULATE_FAIL;
    				break;
    			case 0x34: /* sysenter */
    			case 0x35: /* sysexit */
    				if (c->modrm_mod != 0 || c->modrm_rm != 0)
    					return EMULATE_FAIL;
    				break;
    			case 0x05: /* syscall */
    				if (c->modrm_mod != 0 || c->modrm_rm != 0)
    					return EMULATE_FAIL;
    				break;
    			default:
    				return EMULATE_FAIL;
    			}
    
    			if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
    				return EMULATE_FAIL;
    		}
    
    		++vcpu->stat.insn_emulation;
    
    			if (emulation_type & EMULTYPE_SKIP)
    				return EMULATE_FAIL;
    			return handle_emulation_failure(vcpu);
    
    	if (emulation_type & EMULTYPE_SKIP) {
    		kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
    		return EMULATE_DONE;
    	}
    
    
    	/* this is needed for vmware backdor interface to work since it
    	   changes registers values  during IO operation */
    	memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
    
    
    	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt);
    
    	if (r == EMULATION_FAILED) {
    
    		return handle_emulation_failure(vcpu);
    
    	if (vcpu->arch.emulate_ctxt.exception >= 0) {
    		inject_emulated_exception(vcpu);
    
    		r = EMULATE_DONE;
    	} else if (vcpu->arch.pio.count) {
    
    		if (!vcpu->arch.pio.in)
    			vcpu->arch.pio.count = 0;
    
    		r = EMULATE_DO_MMIO;
    	} else if (vcpu->mmio_needed) {
    
    		if (vcpu->mmio_is_write)
    			vcpu->mmio_needed = 0;
    
    	} else if (r == EMULATION_RESTART)
    
    	toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
    	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
    
    	kvm_make_request(KVM_REQ_EVENT, vcpu);
    
    	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
    	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
    
    	return r;
    
    EXPORT_SYMBOL_GPL(emulate_instruction);
    
    int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
    
    	unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
    	/* do not return to emulator after return from userspace */
    
    EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
    
    static void tsc_bad(void *info)
    {
    	__get_cpu_var(cpu_tsc_khz) = 0;
    }
    
    static void tsc_khz_changed(void *data)
    
    	struct cpufreq_freqs *freq = data;
    	unsigned long khz = 0;
    
    	if (data)
    		khz = freq->new;
    	else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
    		khz = cpufreq_quick_get(raw_smp_processor_id());
    	if (!khz)
    		khz = tsc_khz;
    	__get_cpu_var(cpu_tsc_khz) = khz;
    
    }
    
    static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
    				     void *data)
    {
    	struct cpufreq_freqs *freq = data;
    	struct kvm *kvm;
    	struct kvm_vcpu *vcpu;
    	int i, send_ipi = 0;
    
    
    	/*
    	 * We allow guests to temporarily run on slowing clocks,
    	 * provided we notify them after, or to run on accelerating
    	 * clocks, provided we notify them before.  Thus time never
    	 * goes backwards.
    	 *
    	 * However, we have a problem.  We can't atomically update
    	 * the frequency of a given CPU from this function; it is
    	 * merely a notifier, which can be called from any CPU.
    	 * Changing the TSC frequency at arbitrary points in time
    	 * requires a recomputation of local variables related to
    	 * the TSC for each VCPU.  We must flag these local variables
    	 * to be updated and be sure the update takes place with the
    	 * new frequency before any guests proceed.
    	 *
    	 * Unfortunately, the combination of hotplug CPU and frequency
    	 * change creates an intractable locking scenario; the order
    	 * of when these callouts happen is undefined with respect to
    	 * CPU hotplug, and they can race with each other.  As such,
    	 * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is
    	 * undefined; you can actually have a CPU frequency change take
    	 * place in between the computation of X and the setting of the
    	 * variable.  To protect against this problem, all updates of
    	 * the per_cpu tsc_khz variable are done in an interrupt
    	 * protected IPI, and all callers wishing to update the value
    	 * must wait for a synchronous IPI to complete (which is trivial
    	 * if the caller is on the CPU already).  This establishes the
    	 * necessary total order on variable updates.
    	 *
    	 * Note that because a guest time update may take place
    	 * anytime after the setting of the VCPU's request bit, the
    	 * correct TSC value must be set before the request.  However,
    	 * to ensure the update actually makes it to any guest which
    	 * starts running in hardware virtualization between the set
    	 * and the acquisition of the spinlock, we must also ping the
    	 * CPU after setting the request bit.
    	 *
    	 */
    
    
    	if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
    		return 0;
    	if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
    		return 0;
    
    
    	smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
    
    
    	spin_lock(&kvm_lock);
    	list_for_each_entry(kvm, &vm_list, vm_list) {
    
    		kvm_for_each_vcpu(i, vcpu, kvm) {
    
    			if (vcpu->cpu != freq->cpu)
    				continue;
    
    			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
    
    			if (vcpu->cpu != smp_processor_id())
    
    		}
    	}
    	spin_unlock(&kvm_lock);
    
    	if (freq->old < freq->new && send_ipi) {
    		/*
    		 * We upscale the frequency.  Must make the guest
    		 * doesn't see old kvmclock values while running with
    		 * the new frequency, otherwise we risk the guest sees
    		 * time go backwards.
    		 *
    		 * In case we update the frequency for another cpu
    		 * (which might be in guest context) send an interrupt
    		 * to kick the cpu out of guest context.  Next time
    		 * guest context is entered kvmclock will be updated,
    		 * so the guest will not see stale values.
    		 */
    
    		smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
    
    	}
    	return 0;
    }
    
    static struct notifier_block kvmclock_cpufreq_notifier_block = {
    
    	.notifier_call  = kvmclock_cpufreq_notifier
    };
    
    static int kvmclock_cpu_notifier(struct notifier_block *nfb,
    					unsigned long action, void *hcpu)
    {
    	unsigned int cpu = (unsigned long)hcpu;
    
    	switch (action) {
    		case CPU_ONLINE:
    		case CPU_DOWN_FAILED:
    			smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
    			break;
    		case CPU_DOWN_PREPARE:
    			smp_call_function_single(cpu, tsc_bad, NULL, 1);
    			break;
    	}
    	return NOTIFY_OK;
    }
    
    static struct notifier_block kvmclock_cpu_notifier_block = {
    	.notifier_call  = kvmclock_cpu_notifier,
    	.priority = -INT_MAX
    
    	max_tsc_khz = tsc_khz;
    
    	register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
    
    	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
    
    #ifdef CONFIG_CPU_FREQ
    		struct cpufreq_policy policy;
    		memset(&policy, 0, sizeof(policy));
    
    		cpu = get_cpu();
    		cpufreq_get_policy(&policy, cpu);
    
    		if (policy.cpuinfo.max_freq)
    			max_tsc_khz = policy.cpuinfo.max_freq;
    
    #endif
    
    		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
    					  CPUFREQ_TRANSITION_NOTIFIER);
    	}
    
    	pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
    
    	for_each_online_cpu(cpu)
    		smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
    
    static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
    
    static int kvm_is_in_guest(void)
    {
    	return percpu_read(current_vcpu) != NULL;
    }
    
    static int kvm_is_user_mode(void)
    {
    	int user_mode = 3;
    
    	if (percpu_read(current_vcpu))
    		user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
    
    	return user_mode != 0;
    }
    
    static unsigned long kvm_get_guest_ip(void)
    {
    	unsigned long ip = 0;
    
    	if (percpu_read(current_vcpu))
    		ip = kvm_rip_read(percpu_read(current_vcpu));
    
    	return ip;
    }
    
    static struct perf_guest_info_callbacks kvm_guest_cbs = {
    	.is_in_guest		= kvm_is_in_guest,
    	.is_user_mode		= kvm_is_user_mode,
    	.get_guest_ip		= kvm_get_guest_ip,
    };
    
    void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
    {
    	percpu_write(current_vcpu, vcpu);
    }
    EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
    
    void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
    {
    	percpu_write(current_vcpu, NULL);
    }
    EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
    
    
    	struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
    
    	if (kvm_x86_ops) {
    		printk(KERN_ERR "kvm: already loaded the other module\n");
    
    	}
    
    	if (!ops->cpu_has_kvm_support()) {
    		printk(KERN_ERR "kvm: no hardware support\n");
    
    	}
    	if (ops->disabled_by_bios()) {
    		printk(KERN_ERR "kvm: disabled by bios\n");
    
    	r = kvm_mmu_module_init();
    	if (r)
    		goto out;
    
    	kvm_init_msr_list();
    
    
    	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
    
    Sheng Yang's avatar
    Sheng Yang committed
    	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
    	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
    
    			PT_DIRTY_MASK, PT64_NX_MASK, 0);
    
    	perf_register_guest_info_callbacks(&kvm_guest_cbs);
    
    
    	if (cpu_has_xsave)
    		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
    
    
    	perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
    
    
    	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
    		cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
    					    CPUFREQ_TRANSITION_NOTIFIER);
    
    	unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
    
    int kvm_emulate_halt(struct kvm_vcpu *vcpu)
    {
    	++vcpu->stat.halt_exits;
    	if (irqchip_in_kernel(vcpu->kvm)) {
    
    		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
    
    		return 1;
    	} else {
    		vcpu->run->exit_reason = KVM_EXIT_HLT;
    		return 0;
    	}
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_halt);
    
    
    static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
    			   unsigned long a1)
    {
    	if (is_long_mode(vcpu))
    		return a0;
    	else
    		return a0 | ((gpa_t)a1 << 32);
    }
    
    
    int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
    {
    	u64 param, ingpa, outgpa, ret;
    	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
    	bool fast, longmode;
    	int cs_db, cs_l;
    
    	/*
    	 * hypercall generates UD from non zero cpl and real mode
    	 * per HYPER-V spec
    	 */
    
    	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
    
    		kvm_queue_exception(vcpu, UD_VECTOR);
    		return 0;
    	}
    
    	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
    	longmode = is_long_mode(vcpu) && cs_l == 1;
    
    	if (!longmode) {
    
    		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
    			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
    		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
    			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
    		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
    			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
    
    	}
    #ifdef CONFIG_X86_64
    	else {
    		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
    		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
    		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
    	}
    #endif
    
    	code = param & 0xffff;
    	fast = (param >> 16) & 0x1;
    	rep_cnt = (param >> 32) & 0xfff;
    	rep_idx = (param >> 48) & 0xfff;
    
    	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
    
    
    	switch (code) {
    	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
    		kvm_vcpu_on_spin(vcpu);
    		break;
    	default:
    		res = HV_STATUS_INVALID_HYPERCALL_CODE;
    		break;
    	}
    
    
    	ret = res | (((u64)rep_done & 0xfff) << 32);
    	if (longmode) {
    		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
    	} else {
    		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
    		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
    	}
    
    	return 1;
    }
    
    
    int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
    {
    	unsigned long nr, a0, a1, a2, a3, ret;
    
    	if (kvm_hv_hypercall_enabled(vcpu->kvm))
    		return kvm_hv_hypercall(vcpu);
    
    
    	nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
    
    	trace_kvm_hypercall(nr, a0, a1, a2, a3);
    
    	if (!is_long_mode(vcpu)) {
    		nr &= 0xFFFFFFFF;
    		a0 &= 0xFFFFFFFF;
    		a1 &= 0xFFFFFFFF;
    		a2 &= 0xFFFFFFFF;
    		a3 &= 0xFFFFFFFF;
    	}
    
    
    	if (kvm_x86_ops->get_cpl(vcpu) != 0) {
    		ret = -KVM_EPERM;
    		goto out;
    	}
    
    
    	case KVM_HC_VAPIC_POLL_IRQ:
    		ret = 0;
    		break;
    
    	case KVM_HC_MMU_OP:
    		r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
    		break;
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
    
    	++vcpu->stat.hypercalls;
    
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
    
    int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
    {
    	char instruction[3];
    
    	unsigned long rip = kvm_rip_read(vcpu);
    
    
    	/*
    	 * Blow out the MMU to ensure that no other VCPU has an active mapping
    	 * to ensure that the updated hypercall appears atomically across all
    	 * VCPUs.
    	 */
    	kvm_mmu_zap_all(vcpu->kvm);
    
    	kvm_x86_ops->patch_hypercall(vcpu, instruction);
    
    
    	return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
    
    }
    
    void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
    {
    
    	struct desc_ptr dt = { limit, base };
    
    
    	kvm_x86_ops->set_gdt(vcpu, &dt);
    }
    
    void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
    {
    
    	struct desc_ptr dt = { limit, base };
    
    static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
    {
    
    	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
    	int j, nent = vcpu->arch.cpuid_nent;
    
    
    	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
    	/* when no next entry is found, the current entry[i] is reselected */
    
    	for (j = i + 1; ; j = (j + 1) % nent) {
    
    		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
    
    		if (ej->function == e->function) {
    			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
    			return j;
    		}
    	}
    	return 0; /* silence gcc, even though control never reaches here */
    }
    
    /* find an entry with matching function, matching index (if needed), and that
     * should be read next (if it's stateful) */
    static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
    	u32 function, u32 index)
    {
    	if (e->function != function)
    		return 0;
    	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
    		return 0;
    	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
    
    	    !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
    
    struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
    					      u32 function, u32 index)
    
    	struct kvm_cpuid_entry2 *best = NULL;
    
    	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
    
    		e = &vcpu->arch.cpuid_entries[i];
    
    		if (is_matching_cpuid_entry(e, function, index)) {
    			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
    				move_to_next_stateful_cpuid_entry(vcpu, i);
    
    			best = e;
    			break;
    		}
    		/*
    		 * Both basic or both extended?
    		 */
    		if (((e->function ^ function) & 0x80000000) == 0)
    			if (!best || e->function > best->function)
    				best = e;
    	}
    
    EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
    
    int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
    {
    	struct kvm_cpuid_entry2 *best;
    
    
    	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
    	if (!best || best->eax < 0x80000008)
    		goto not_found;
    
    	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
    	if (best)
    		return best->eax & 0xff;
    
    void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
    {
    	u32 function, index;
    	struct kvm_cpuid_entry2 *best;
    
    	function = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	index = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
    	best = kvm_find_cpuid_entry(vcpu, function, index);
    
    		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
    		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
    		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
    		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
    
    	}
    	kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    	trace_kvm_cpuid(function,
    			kvm_register_read(vcpu, VCPU_REGS_RAX),
    			kvm_register_read(vcpu, VCPU_REGS_RBX),
    			kvm_register_read(vcpu, VCPU_REGS_RCX),
    			kvm_register_read(vcpu, VCPU_REGS_RDX));
    
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
    
    /*
     * Check if userspace requested an interrupt window, and that the
     * interrupt window is open.
     *
     * No need to exit to userspace if we already have an interrupt queued.
     */
    
    static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
    
    	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
    
    		vcpu->run->request_interrupt_window &&
    
    static void post_kvm_run_save(struct kvm_vcpu *vcpu)
    
    	struct kvm_run *kvm_run = vcpu->run;
    
    
    	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
    
    	kvm_run->apic_base = kvm_get_apic_base(vcpu);
    
    	if (irqchip_in_kernel(vcpu->kvm))
    
    		kvm_run->ready_for_interrupt_injection = 1;
    
    		kvm_run->ready_for_interrupt_injection =
    
    			kvm_arch_interrupt_allowed(vcpu) &&
    			!kvm_cpu_has_interrupt(vcpu) &&
    			!kvm_event_needs_reinjection(vcpu);
    
    static void vapic_enter(struct kvm_vcpu *vcpu)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    	struct page *page;
    
    	if (!apic || !apic->vapic_addr)
    		return;
    
    	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
    
    }
    
    static void vapic_exit(struct kvm_vcpu *vcpu)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    
    
    	if (!apic || !apic->vapic_addr)
    		return;
    
    
    	idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    	kvm_release_page_dirty(apic->vapic_page);
    	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
    
    	srcu_read_unlock(&vcpu->kvm->srcu, idx);