Skip to content
Snippets Groups Projects
x86.c 137 KiB
Newer Older
  • Learn to ignore specific revisions
  • 			case 0x01: /* VMMCALL */
    				if (c->modrm_mod != 3 || c->modrm_rm != 1)
    					return EMULATE_FAIL;
    				break;
    			case 0x34: /* sysenter */
    			case 0x35: /* sysexit */
    				if (c->modrm_mod != 0 || c->modrm_rm != 0)
    					return EMULATE_FAIL;
    				break;
    			case 0x05: /* syscall */
    				if (c->modrm_mod != 0 || c->modrm_rm != 0)
    					return EMULATE_FAIL;
    				break;
    			default:
    				return EMULATE_FAIL;
    			}
    
    			if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
    				return EMULATE_FAIL;
    		}
    
    		++vcpu->stat.insn_emulation;
    
    			if (emulation_type & EMULTYPE_SKIP)
    				return EMULATE_FAIL;
    			return handle_emulation_failure(vcpu);
    
    	if (emulation_type & EMULTYPE_SKIP) {
    		kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
    		return EMULATE_DONE;
    	}
    
    
    	/* this is needed for vmware backdor interface to work since it
    	   changes registers values  during IO operation */
    	memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
    
    
    	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
    
    		return handle_emulation_failure(vcpu);
    
    	toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
    
    	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
    
    	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
    
    	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
    
    	if (vcpu->arch.emulate_ctxt.exception >= 0) {
    		inject_emulated_exception(vcpu);
    		return EMULATE_DONE;
    	}
    
    
    	if (vcpu->arch.pio.count) {
    		if (!vcpu->arch.pio.in)
    			vcpu->arch.pio.count = 0;
    		return EMULATE_DO_MMIO;
    	}
    
    	if (vcpu->mmio_needed) {
    		if (vcpu->mmio_is_write)
    			vcpu->mmio_needed = 0;
    		return EMULATE_DO_MMIO;
    	}
    
    
    	if (vcpu->arch.emulate_ctxt.restart)
    		goto restart;
    
    EXPORT_SYMBOL_GPL(emulate_instruction);
    
    int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
    
    	unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
    	/* do not return to emulator after return from userspace */
    
    EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
    
    static void bounce_off(void *info)
    {
    	/* nothing */
    }
    
    static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
    				     void *data)
    {
    	struct cpufreq_freqs *freq = data;
    	struct kvm *kvm;
    	struct kvm_vcpu *vcpu;
    	int i, send_ipi = 0;
    
    	if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
    		return 0;
    	if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
    		return 0;
    
    	per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
    
    
    	spin_lock(&kvm_lock);
    	list_for_each_entry(kvm, &vm_list, vm_list) {
    
    		kvm_for_each_vcpu(i, vcpu, kvm) {
    
    			if (vcpu->cpu != freq->cpu)
    				continue;
    			if (!kvm_request_guest_time_update(vcpu))
    				continue;
    			if (vcpu->cpu != smp_processor_id())
    				send_ipi++;
    		}
    	}
    	spin_unlock(&kvm_lock);
    
    	if (freq->old < freq->new && send_ipi) {
    		/*
    		 * We upscale the frequency.  Must make the guest
    		 * doesn't see old kvmclock values while running with
    		 * the new frequency, otherwise we risk the guest sees
    		 * time go backwards.
    		 *
    		 * In case we update the frequency for another cpu
    		 * (which might be in guest context) send an interrupt
    		 * to kick the cpu out of guest context.  Next time
    		 * guest context is entered kvmclock will be updated,
    		 * so the guest will not see stale values.
    		 */
    		smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
    	}
    	return 0;
    }
    
    static struct notifier_block kvmclock_cpufreq_notifier_block = {
            .notifier_call  = kvmclock_cpufreq_notifier
    };
    
    
    static void kvm_timer_init(void)
    {
    	int cpu;
    
    	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
    		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
    					  CPUFREQ_TRANSITION_NOTIFIER);
    
    		for_each_online_cpu(cpu) {
    			unsigned long khz = cpufreq_get(cpu);
    			if (!khz)
    				khz = tsc_khz;
    			per_cpu(cpu_tsc_khz, cpu) = khz;
    		}
    
    	} else {
    		for_each_possible_cpu(cpu)
    			per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
    
    static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
    
    static int kvm_is_in_guest(void)
    {
    	return percpu_read(current_vcpu) != NULL;
    }
    
    static int kvm_is_user_mode(void)
    {
    	int user_mode = 3;
    
    	if (percpu_read(current_vcpu))
    		user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
    
    	return user_mode != 0;
    }
    
    static unsigned long kvm_get_guest_ip(void)
    {
    	unsigned long ip = 0;
    
    	if (percpu_read(current_vcpu))
    		ip = kvm_rip_read(percpu_read(current_vcpu));
    
    	return ip;
    }
    
    static struct perf_guest_info_callbacks kvm_guest_cbs = {
    	.is_in_guest		= kvm_is_in_guest,
    	.is_user_mode		= kvm_is_user_mode,
    	.get_guest_ip		= kvm_get_guest_ip,
    };
    
    void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
    {
    	percpu_write(current_vcpu, vcpu);
    }
    EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
    
    void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
    {
    	percpu_write(current_vcpu, NULL);
    }
    EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
    
    
    	struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
    
    	if (kvm_x86_ops) {
    		printk(KERN_ERR "kvm: already loaded the other module\n");
    
    	}
    
    	if (!ops->cpu_has_kvm_support()) {
    		printk(KERN_ERR "kvm: no hardware support\n");
    
    	}
    	if (ops->disabled_by_bios()) {
    		printk(KERN_ERR "kvm: disabled by bios\n");
    
    	r = kvm_mmu_module_init();
    	if (r)
    		goto out;
    
    	kvm_init_msr_list();
    
    
    	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
    
    Sheng Yang's avatar
    Sheng Yang committed
    	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
    	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
    
    			PT_DIRTY_MASK, PT64_NX_MASK, 0);
    
    	perf_register_guest_info_callbacks(&kvm_guest_cbs);
    
    
    	if (cpu_has_xsave)
    		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
    
    
    	perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
    
    
    	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
    		cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
    					    CPUFREQ_TRANSITION_NOTIFIER);
    
    int kvm_emulate_halt(struct kvm_vcpu *vcpu)
    {
    	++vcpu->stat.halt_exits;
    	if (irqchip_in_kernel(vcpu->kvm)) {
    
    		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
    
    		return 1;
    	} else {
    		vcpu->run->exit_reason = KVM_EXIT_HLT;
    		return 0;
    	}
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_halt);
    
    
    static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
    			   unsigned long a1)
    {
    	if (is_long_mode(vcpu))
    		return a0;
    	else
    		return a0 | ((gpa_t)a1 << 32);
    }
    
    
    int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
    {
    	u64 param, ingpa, outgpa, ret;
    	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
    	bool fast, longmode;
    	int cs_db, cs_l;
    
    	/*
    	 * hypercall generates UD from non zero cpl and real mode
    	 * per HYPER-V spec
    	 */
    
    	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
    
    		kvm_queue_exception(vcpu, UD_VECTOR);
    		return 0;
    	}
    
    	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
    	longmode = is_long_mode(vcpu) && cs_l == 1;
    
    	if (!longmode) {
    
    		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
    			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
    		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
    			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
    		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
    			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
    
    	}
    #ifdef CONFIG_X86_64
    	else {
    		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
    		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
    		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
    	}
    #endif
    
    	code = param & 0xffff;
    	fast = (param >> 16) & 0x1;
    	rep_cnt = (param >> 32) & 0xfff;
    	rep_idx = (param >> 48) & 0xfff;
    
    	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
    
    
    	switch (code) {
    	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
    		kvm_vcpu_on_spin(vcpu);
    		break;
    	default:
    		res = HV_STATUS_INVALID_HYPERCALL_CODE;
    		break;
    	}
    
    
    	ret = res | (((u64)rep_done & 0xfff) << 32);
    	if (longmode) {
    		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
    	} else {
    		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
    		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
    	}
    
    	return 1;
    }
    
    
    int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
    {
    	unsigned long nr, a0, a1, a2, a3, ret;
    
    	if (kvm_hv_hypercall_enabled(vcpu->kvm))
    		return kvm_hv_hypercall(vcpu);
    
    
    	nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
    
    	trace_kvm_hypercall(nr, a0, a1, a2, a3);
    
    	if (!is_long_mode(vcpu)) {
    		nr &= 0xFFFFFFFF;
    		a0 &= 0xFFFFFFFF;
    		a1 &= 0xFFFFFFFF;
    		a2 &= 0xFFFFFFFF;
    		a3 &= 0xFFFFFFFF;
    	}
    
    
    	if (kvm_x86_ops->get_cpl(vcpu) != 0) {
    		ret = -KVM_EPERM;
    		goto out;
    	}
    
    
    	case KVM_HC_VAPIC_POLL_IRQ:
    		ret = 0;
    		break;
    
    	case KVM_HC_MMU_OP:
    		r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
    		break;
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
    
    	++vcpu->stat.hypercalls;
    
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
    
    int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
    {
    	char instruction[3];
    
    	unsigned long rip = kvm_rip_read(vcpu);
    
    
    	/*
    	 * Blow out the MMU to ensure that no other VCPU has an active mapping
    	 * to ensure that the updated hypercall appears atomically across all
    	 * VCPUs.
    	 */
    	kvm_mmu_zap_all(vcpu->kvm);
    
    	kvm_x86_ops->patch_hypercall(vcpu, instruction);
    
    
    	return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
    
    }
    
    void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
    {
    
    	struct desc_ptr dt = { limit, base };
    
    
    	kvm_x86_ops->set_gdt(vcpu, &dt);
    }
    
    void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
    {
    
    	struct desc_ptr dt = { limit, base };
    
    static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
    {
    
    	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
    	int j, nent = vcpu->arch.cpuid_nent;
    
    
    	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
    	/* when no next entry is found, the current entry[i] is reselected */
    
    	for (j = i + 1; ; j = (j + 1) % nent) {
    
    		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
    
    		if (ej->function == e->function) {
    			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
    			return j;
    		}
    	}
    	return 0; /* silence gcc, even though control never reaches here */
    }
    
    /* find an entry with matching function, matching index (if needed), and that
     * should be read next (if it's stateful) */
    static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
    	u32 function, u32 index)
    {
    	if (e->function != function)
    		return 0;
    	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
    		return 0;
    	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
    
    	    !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
    
    struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
    					      u32 function, u32 index)
    
    	struct kvm_cpuid_entry2 *best = NULL;
    
    	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
    
    		e = &vcpu->arch.cpuid_entries[i];
    
    		if (is_matching_cpuid_entry(e, function, index)) {
    			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
    				move_to_next_stateful_cpuid_entry(vcpu, i);
    
    			best = e;
    			break;
    		}
    		/*
    		 * Both basic or both extended?
    		 */
    		if (((e->function ^ function) & 0x80000000) == 0)
    			if (!best || e->function > best->function)
    				best = e;
    	}
    
    EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
    
    int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
    {
    	struct kvm_cpuid_entry2 *best;
    
    
    	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
    	if (!best || best->eax < 0x80000008)
    		goto not_found;
    
    	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
    	if (best)
    		return best->eax & 0xff;
    
    void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
    {
    	u32 function, index;
    	struct kvm_cpuid_entry2 *best;
    
    	function = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	index = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
    	best = kvm_find_cpuid_entry(vcpu, function, index);
    
    		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
    		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
    		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
    		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
    
    	}
    	kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    	trace_kvm_cpuid(function,
    			kvm_register_read(vcpu, VCPU_REGS_RAX),
    			kvm_register_read(vcpu, VCPU_REGS_RBX),
    			kvm_register_read(vcpu, VCPU_REGS_RCX),
    			kvm_register_read(vcpu, VCPU_REGS_RDX));
    
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
    
    /*
     * Check if userspace requested an interrupt window, and that the
     * interrupt window is open.
     *
     * No need to exit to userspace if we already have an interrupt queued.
     */
    
    static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
    
    	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
    
    		vcpu->run->request_interrupt_window &&
    
    static void post_kvm_run_save(struct kvm_vcpu *vcpu)
    
    	struct kvm_run *kvm_run = vcpu->run;
    
    
    	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
    
    	kvm_run->apic_base = kvm_get_apic_base(vcpu);
    
    	if (irqchip_in_kernel(vcpu->kvm))
    
    		kvm_run->ready_for_interrupt_injection = 1;
    
    		kvm_run->ready_for_interrupt_injection =
    
    			kvm_arch_interrupt_allowed(vcpu) &&
    			!kvm_cpu_has_interrupt(vcpu) &&
    			!kvm_event_needs_reinjection(vcpu);
    
    static void vapic_enter(struct kvm_vcpu *vcpu)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    	struct page *page;
    
    	if (!apic || !apic->vapic_addr)
    		return;
    
    	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
    
    }
    
    static void vapic_exit(struct kvm_vcpu *vcpu)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    
    
    	if (!apic || !apic->vapic_addr)
    		return;
    
    
    	idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    	kvm_release_page_dirty(apic->vapic_page);
    	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
    
    	srcu_read_unlock(&vcpu->kvm->srcu, idx);
    
    static void update_cr8_intercept(struct kvm_vcpu *vcpu)
    {
    	int max_irr, tpr;
    
    	if (!kvm_x86_ops->update_cr8_intercept)
    		return;
    
    
    	if (!vcpu->arch.apic->vapic_addr)
    		max_irr = kvm_lapic_find_highest_irr(vcpu);
    	else
    		max_irr = -1;
    
    
    	if (max_irr != -1)
    		max_irr >>= 4;
    
    	tpr = kvm_lapic_get_cr8(vcpu);
    
    	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
    }
    
    
    static void inject_pending_event(struct kvm_vcpu *vcpu)
    
    {
    	/* try to reinject previous events if any */
    
    	if (vcpu->arch.exception.pending) {
    
    		trace_kvm_inj_exception(vcpu->arch.exception.nr,
    					vcpu->arch.exception.has_error_code,
    					vcpu->arch.exception.error_code);
    
    		kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
    					  vcpu->arch.exception.has_error_code,
    
    					  vcpu->arch.exception.error_code,
    					  vcpu->arch.exception.reinject);
    
    	if (vcpu->arch.nmi_injected) {
    		kvm_x86_ops->set_nmi(vcpu);
    		return;
    	}
    
    	if (vcpu->arch.interrupt.pending) {
    
    		kvm_x86_ops->set_irq(vcpu);
    
    		return;
    	}
    
    	/* try to inject new event if pending */
    	if (vcpu->arch.nmi_pending) {
    		if (kvm_x86_ops->nmi_allowed(vcpu)) {
    			vcpu->arch.nmi_pending = false;
    			vcpu->arch.nmi_injected = true;
    			kvm_x86_ops->set_nmi(vcpu);
    		}
    	} else if (kvm_cpu_has_interrupt(vcpu)) {
    		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
    
    			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
    					    false);
    			kvm_x86_ops->set_irq(vcpu);
    
    static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
    {
    	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
    			!vcpu->guest_xcr0_loaded) {
    		/* kvm_set_xcr() also depends on this */
    		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
    		vcpu->guest_xcr0_loaded = 1;
    	}
    }
    
    static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
    {
    	if (vcpu->guest_xcr0_loaded) {
    		if (vcpu->arch.xcr0 != host_xcr0)
    			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
    		vcpu->guest_xcr0_loaded = 0;
    	}
    }
    
    
    static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
    
    	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
    
    		vcpu->run->request_interrupt_window;
    
    		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
    
    			kvm_mmu_unload(vcpu);
    
    		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
    
    Marcelo Tosatti's avatar
    Marcelo Tosatti committed
    			__kvm_migrate_timers(vcpu);
    
    		if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu))
    
    			kvm_write_guest_time(vcpu);
    
    		if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
    
    			kvm_mmu_sync_roots(vcpu);
    
    		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
    
    			kvm_x86_ops->tlb_flush(vcpu);
    
    		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
    
    			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
    
    			r = 0;
    			goto out;
    		}
    
    		if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
    
    			vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
    
    		if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
    
    			vcpu->fpu_active = 0;
    			kvm_x86_ops->fpu_deactivate(vcpu);
    		}
    
    	r = kvm_mmu_reload(vcpu);
    	if (unlikely(r))
    		goto out;
    
    
    	preempt_disable();
    
    	kvm_x86_ops->prepare_guest_switch(vcpu);
    
    	if (vcpu->fpu_active)
    		kvm_load_guest_fpu(vcpu);
    
    	kvm_load_guest_xcr0(vcpu);
    
    Avi Kivity's avatar
    Avi Kivity committed
    	atomic_set(&vcpu->guest_mode, 1);
    	smp_wmb();
    
    Avi Kivity's avatar
    Avi Kivity committed
    	local_irq_disable();
    
    Avi Kivity's avatar
    Avi Kivity committed
    	if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
    	    || need_resched() || signal_pending(current)) {
    		atomic_set(&vcpu->guest_mode, 0);
    		smp_wmb();
    
    		local_irq_enable();
    		preempt_enable();
    		r = 1;
    		goto out;
    	}
    
    
    	inject_pending_event(vcpu);
    
    	/* enable NMI/IRQ window open exits if needed */
    	if (vcpu->arch.nmi_pending)
    		kvm_x86_ops->enable_nmi_window(vcpu);
    	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
    		kvm_x86_ops->enable_irq_window(vcpu);
    
    
    	if (kvm_lapic_enabled(vcpu)) {
    
    		update_cr8_intercept(vcpu);
    		kvm_lapic_sync_to_vapic(vcpu);
    
    	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
    
    	if (unlikely(vcpu->arch.switch_db_regs)) {
    		set_debugreg(0, 7);
    		set_debugreg(vcpu->arch.eff_db[0], 0);
    		set_debugreg(vcpu->arch.eff_db[1], 1);
    		set_debugreg(vcpu->arch.eff_db[2], 2);
    		set_debugreg(vcpu->arch.eff_db[3], 3);
    	}
    
    	trace_kvm_entry(vcpu->vcpu_id);
    
    	kvm_x86_ops->run(vcpu);
    
    	/*
    	 * If the guest has used debug registers, at least dr7
    	 * will be disabled while returning to the host.
    	 * If we don't have active breakpoints in the host, we don't
    	 * care about the messed up debug address registers. But if
    	 * we have some of them active, restore the old state.
    	 */
    
    Avi Kivity's avatar
    Avi Kivity committed
    	atomic_set(&vcpu->guest_mode, 0);
    	smp_wmb();
    
    	local_irq_enable();
    
    	++vcpu->stat.exits;
    
    	/*
    	 * We must have an instruction between local_irq_enable() and
    	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
    	 * the interrupt shadow.  The stat.exits increment will do nicely.
    	 * But we need to prevent reordering, hence this barrier():
    	 */
    	barrier();
    
    	kvm_guest_exit();
    
    	preempt_enable();
    
    
    	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    	/*
    	 * Profile KVM exit RIPs:
    	 */
    	if (unlikely(prof_on == KVM_PROFILING)) {
    
    		unsigned long rip = kvm_rip_read(vcpu);
    		profile_hit(KVM_PROFILING, (void *)rip);
    
    	kvm_lapic_sync_from_vapic(vcpu);
    
    
    	r = kvm_x86_ops->handle_exit(vcpu);
    
    static int __vcpu_run(struct kvm_vcpu *vcpu)
    
    	struct kvm *kvm = vcpu->kvm;
    
    
    	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
    
    		pr_debug("vcpu %d received sipi with vector # %x\n",
    			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
    
    		kvm_lapic_reset(vcpu);
    
    		if (r)
    			return r;
    		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
    
    	vapic_enter(vcpu);
    
    	r = 1;
    	while (r > 0) {
    
    		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
    
    			r = vcpu_enter_guest(vcpu);
    
    			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
    
    			kvm_vcpu_block(vcpu);
    
    			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
    
    			if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
    
    			{
    				switch(vcpu->arch.mp_state) {
    				case KVM_MP_STATE_HALTED:
    
    					vcpu->arch.mp_state =
    
    						KVM_MP_STATE_RUNNABLE;
    				case KVM_MP_STATE_RUNNABLE:
    					break;
    				case KVM_MP_STATE_SIPI_RECEIVED:
    				default:
    					r = -EINTR;
    					break;
    				}
    			}
    
    		if (r <= 0)
    			break;
    
    		clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
    		if (kvm_cpu_has_pending_timer(vcpu))
    			kvm_inject_pending_timer_irqs(vcpu);
    
    
    		if (dm_request_for_irq_injection(vcpu)) {
    
    			vcpu->run->exit_reason = KVM_EXIT_INTR;
    
    			++vcpu->stat.request_irq_exits;
    		}
    		if (signal_pending(current)) {
    			r = -EINTR;
    
    			vcpu->run->exit_reason = KVM_EXIT_INTR;
    
    			++vcpu->stat.signal_exits;
    		}
    		if (need_resched()) {
    
    			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
    
    			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
    
    	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
    
    	vapic_exit(vcpu);
    
    
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
    {
    	int r;
    	sigset_t sigsaved;
    
    
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
    
    
    	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
    
    		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
    
    	}
    
    	/* re-sync apic's tpr */
    	if (!irqchip_in_kernel(vcpu->kvm))
    
    	if (vcpu->arch.pio.count || vcpu->mmio_needed ||
    	    vcpu->arch.emulate_ctxt.restart) {
    		if (vcpu->mmio_needed) {
    			memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
    			vcpu->mmio_read_completed = 1;
    			vcpu->mmio_needed = 0;
    
    		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    		r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
    
    		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
    
    	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
    		kvm_register_write(vcpu, VCPU_REGS_RAX,
    				     kvm_run->hypercall.ret);
    
    	r = __vcpu_run(vcpu);
    
    	if (vcpu->sigset_active)
    		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
    
    	return r;
    }
    
    int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    
    	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
    	regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
    	regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
    	regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
    
    	regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
    	regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
    	regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
    	regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
    	regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
    	regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
    	regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
    	regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
    
    	regs->rip = kvm_rip_read(vcpu);
    
    	regs->rflags = kvm_get_rflags(vcpu);
    
    
    	return 0;
    }
    
    int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
    {
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
    	kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
    	kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
    	kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
    	kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
    	kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
    	kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
    	kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
    
    	kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
    	kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
    	kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
    	kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
    	kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
    	kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
    	kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
    	kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
    
    	kvm_rip_write(vcpu, regs->rip);
    
    	kvm_set_rflags(vcpu, regs->rflags);
    
    	vcpu->arch.exception.pending = false;
    
    
    	return 0;
    }
    
    void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
    {
    	struct kvm_segment cs;
    
    
    	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
    
    	*db = cs.db;
    	*l = cs.l;
    }
    EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
    
    int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
    				  struct kvm_sregs *sregs)
    {
    
    	kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
    	kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
    	kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
    	kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
    	kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
    	kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
    
    	kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
    	kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);