Skip to content
Snippets Groups Projects
x86.c 187 KiB
Newer Older
  • Learn to ignore specific revisions
  • int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
    
    	if (type)
    		return -EINVAL;
    
    
    	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
    
    	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
    
    	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
    
    	atomic_set(&kvm->arch.noncoherent_dma_count, 0);
    
    	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
    	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
    
    	/* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
    	set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
    		&kvm->arch.irq_sources_bitmap);
    
    	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
    
    	mutex_init(&kvm->arch.apic_map_lock);
    
    	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
    
    	pvclock_update_vm_gtod_copy(kvm);
    
    }
    
    static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
    {
    
    	int r;
    	r = vcpu_load(vcpu);
    	BUG_ON(r);
    
    	kvm_mmu_unload(vcpu);
    	vcpu_put(vcpu);
    }
    
    static void kvm_free_vcpus(struct kvm *kvm)
    {
    	unsigned int i;
    
    	struct kvm_vcpu *vcpu;
    
    	kvm_for_each_vcpu(i, vcpu, kvm) {
    		kvm_clear_async_pf_completion_queue(vcpu);
    
    		kvm_unload_vcpu_mmu(vcpu);
    
    	kvm_for_each_vcpu(i, vcpu, kvm)
    		kvm_arch_vcpu_free(vcpu);
    
    	mutex_lock(&kvm->lock);
    	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
    		kvm->vcpus[i] = NULL;
    
    	atomic_set(&kvm->online_vcpus, 0);
    	mutex_unlock(&kvm->lock);
    
    void kvm_arch_sync_events(struct kvm *kvm)
    {
    
    	kvm_free_all_assigned_devices(kvm);
    
    void kvm_arch_destroy_vm(struct kvm *kvm)
    {
    
    	if (current->mm == kvm->mm) {
    		/*
    		 * Free memory regions allocated on behalf of userspace,
    		 * unless the the memory map has changed due to process exit
    		 * or fd copying.
    		 */
    		struct kvm_userspace_memory_region mem;
    		memset(&mem, 0, sizeof(mem));
    		mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
    		kvm_set_memory_region(kvm, &mem);
    
    		mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
    		kvm_set_memory_region(kvm, &mem);
    
    		mem.slot = TSS_PRIVATE_MEMSLOT;
    		kvm_set_memory_region(kvm, &mem);
    	}
    
    	kvm_iommu_unmap_guest(kvm);
    
    	kfree(kvm->arch.vpic);
    	kfree(kvm->arch.vioapic);
    
    	if (kvm->arch.apic_access_page)
    		put_page(kvm->arch.apic_access_page);
    
    	if (kvm->arch.ept_identity_pagetable)
    		put_page(kvm->arch.ept_identity_pagetable);
    
    	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
    
    void kvm_arch_free_memslot(struct kvm_memory_slot *free,
    			   struct kvm_memory_slot *dont)
    {
    	int i;
    
    
    	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
    		if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
    			kvm_kvfree(free->arch.rmap[i]);
    			free->arch.rmap[i] = NULL;
    
    		if (i == 0)
    			continue;
    
    		if (!dont || free->arch.lpage_info[i - 1] !=
    			     dont->arch.lpage_info[i - 1]) {
    			kvm_kvfree(free->arch.lpage_info[i - 1]);
    			free->arch.lpage_info[i - 1] = NULL;
    
    		}
    	}
    }
    
    int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
    {
    	int i;
    
    
    	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
    
    		int level = i + 1;
    
    
    		lpages = gfn_to_index(slot->base_gfn + npages - 1,
    				      slot->base_gfn, level) + 1;
    
    
    		slot->arch.rmap[i] =
    			kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
    		if (!slot->arch.rmap[i])
    
    		if (i == 0)
    			continue;
    
    		slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
    					sizeof(*slot->arch.lpage_info[i - 1]));
    		if (!slot->arch.lpage_info[i - 1])
    
    			goto out_free;
    
    		if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
    
    			slot->arch.lpage_info[i - 1][0].write_count = 1;
    
    		if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
    
    			slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
    
    		ugfn = slot->userspace_addr >> PAGE_SHIFT;
    		/*
    		 * If the gfn and userspace address are not aligned wrt each
    		 * other, or if explicitly asked to, disable large page
    		 * support for this slot
    		 */
    		if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
    		    !kvm_largepages_enabled()) {
    			unsigned long j;
    
    			for (j = 0; j < lpages; ++j)
    
    				slot->arch.lpage_info[i - 1][j].write_count = 1;
    
    	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
    		kvm_kvfree(slot->arch.rmap[i]);
    		slot->arch.rmap[i] = NULL;
    		if (i == 0)
    			continue;
    
    		kvm_kvfree(slot->arch.lpage_info[i - 1]);
    		slot->arch.lpage_info[i - 1] = NULL;
    
    void kvm_arch_memslots_updated(struct kvm *kvm)
    {
    
    	/*
    	 * memslots->generation has been incremented.
    	 * mmio generation may have reached its maximum value.
    	 */
    	kvm_mmu_invalidate_mmio_sptes(kvm);
    
    int kvm_arch_prepare_memory_region(struct kvm *kvm,
    				struct kvm_memory_slot *memslot,
    				struct kvm_userspace_memory_region *mem,
    
    	/*
    	 * Only private memory slots need to be mapped here since
    	 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
    
    	if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
    
    		unsigned long userspace_addr;
    
    		/*
    		 * MAP_SHARED to prevent internal slot pages from being moved
    		 * by fork()/COW.
    		 */
    
    		userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
    
    					 PROT_READ | PROT_WRITE,
    					 MAP_SHARED | MAP_ANONYMOUS, 0);
    
    		if (IS_ERR((void *)userspace_addr))
    			return PTR_ERR((void *)userspace_addr);
    
    		memslot->userspace_addr = userspace_addr;
    
    	return 0;
    }
    
    void kvm_arch_commit_memory_region(struct kvm *kvm,
    				struct kvm_userspace_memory_region *mem,
    
    				const struct kvm_memory_slot *old,
    				enum kvm_mr_change change)
    
    	if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
    
    		ret = vm_munmap(old->userspace_addr,
    				old->npages * PAGE_SIZE);
    
    		if (ret < 0)
    			printk(KERN_WARNING
    			       "kvm_vm_ioctl_set_memory_region: "
    			       "failed to munmap memory\n");
    	}
    
    
    	if (!kvm->arch.n_requested_mmu_pages)
    		nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
    
    	if (nr_mmu_pages)
    
    		kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
    
    	/*
    	 * Write protect all pages for dirty logging.
    	 * Existing largepage mappings are destroyed here and new ones will
    	 * not be created until the end of the logging.
    	 */
    
    	if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
    
    		kvm_mmu_slot_remove_write_access(kvm, mem->slot);
    
    void kvm_arch_flush_shadow_all(struct kvm *kvm)
    
    	kvm_mmu_invalidate_zap_all_pages(kvm);
    
    void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
    				   struct kvm_memory_slot *slot)
    {
    
    	kvm_mmu_invalidate_zap_all_pages(kvm);
    
    int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
    {
    
    	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
    		!vcpu->arch.apf.halted)
    		|| !list_empty_careful(&vcpu->async_pf.done)
    
    		|| kvm_apic_has_events(vcpu)
    
    Avi Kivity's avatar
    Avi Kivity committed
    		|| atomic_read(&vcpu->arch.nmi_queued) ||
    
    		(kvm_arch_interrupt_allowed(vcpu) &&
    		 kvm_cpu_has_interrupt(vcpu));
    
    int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
    
    	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
    
    
    int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
    {
    	return kvm_x86_ops->interrupt_allowed(vcpu);
    }
    
    bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
    {
    	unsigned long current_rip = kvm_rip_read(vcpu) +
    		get_segment_base(vcpu, VCPU_SREG_CS);
    
    	return current_rip == linear_rip;
    }
    EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
    
    
    unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
    {
    	unsigned long rflags;
    
    	rflags = kvm_x86_ops->get_rflags(vcpu);
    	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
    
    		rflags &= ~X86_EFLAGS_TF;
    
    	return rflags;
    }
    EXPORT_SYMBOL_GPL(kvm_get_rflags);
    
    void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
    {
    	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
    
    	    kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
    
    		rflags |= X86_EFLAGS_TF;
    
    	kvm_x86_ops->set_rflags(vcpu, rflags);
    
    	kvm_make_request(KVM_REQ_EVENT, vcpu);
    
    }
    EXPORT_SYMBOL_GPL(kvm_set_rflags);
    
    
    void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
    {
    	int r;
    
    
    	if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
    
    		return;
    
    	r = kvm_mmu_reload(vcpu);
    	if (unlikely(r))
    		return;
    
    
    	if (!vcpu->arch.mmu.direct_map &&
    	      work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
    		return;
    
    
    	vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
    }
    
    
    static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
    {
    	return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
    }
    
    static inline u32 kvm_async_pf_next_probe(u32 key)
    {
    	return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
    }
    
    static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
    {
    	u32 key = kvm_async_pf_hash_fn(gfn);
    
    	while (vcpu->arch.apf.gfns[key] != ~0)
    		key = kvm_async_pf_next_probe(key);
    
    	vcpu->arch.apf.gfns[key] = gfn;
    }
    
    static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
    {
    	int i;
    	u32 key = kvm_async_pf_hash_fn(gfn);
    
    	for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
    
    		     (vcpu->arch.apf.gfns[key] != gfn &&
    		      vcpu->arch.apf.gfns[key] != ~0); i++)
    
    		key = kvm_async_pf_next_probe(key);
    
    	return key;
    }
    
    bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
    {
    	return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
    }
    
    static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
    {
    	u32 i, j, k;
    
    	i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
    	while (true) {
    		vcpu->arch.apf.gfns[i] = ~0;
    		do {
    			j = kvm_async_pf_next_probe(j);
    			if (vcpu->arch.apf.gfns[j] == ~0)
    				return;
    			k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
    			/*
    			 * k lies cyclically in ]i,j]
    			 * |    i.k.j |
    			 * |....j i.k.| or  |.k..j i...|
    			 */
    		} while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
    		vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
    		i = j;
    	}
    }
    
    
    static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
    {
    
    	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
    				      sizeof(val));
    }
    
    
    void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
    				     struct kvm_async_pf *work)
    {
    
    	trace_kvm_async_pf_not_present(work->arch.token, work->gva);
    
    	kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
    
    
    	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
    
    	    (vcpu->arch.apf.send_user_only &&
    	     kvm_x86_ops->get_cpl(vcpu) == 0))
    
    		kvm_make_request(KVM_REQ_APF_HALT, vcpu);
    	else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
    
    		fault.vector = PF_VECTOR;
    		fault.error_code_valid = true;
    		fault.error_code = 0;
    		fault.nested_page_fault = false;
    		fault.address = work->arch.token;
    		kvm_inject_page_fault(vcpu, &fault);
    
    }
    
    void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
    				 struct kvm_async_pf *work)
    {
    
    	trace_kvm_async_pf_ready(work->arch.token, work->gva);
    
    	if (work->wakeup_all)
    
    		work->arch.token = ~0; /* broadcast wakeup */
    	else
    		kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
    
    	if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
    	    !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
    
    		fault.vector = PF_VECTOR;
    		fault.error_code_valid = true;
    		fault.error_code = 0;
    		fault.nested_page_fault = false;
    		fault.address = work->arch.token;
    		kvm_inject_page_fault(vcpu, &fault);
    
    	vcpu->arch.apf.halted = false;
    
    	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
    
    }
    
    bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
    {
    	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
    		return true;
    	else
    		return !kvm_event_needs_reinjection(vcpu) &&
    			kvm_x86_ops->interrupt_allowed(vcpu);
    
    void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
    {
    	atomic_inc(&kvm->arch.noncoherent_dma_count);
    }
    EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
    
    void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
    {
    	atomic_dec(&kvm->arch.noncoherent_dma_count);
    }
    EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
    
    bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
    {
    	return atomic_read(&kvm->arch.noncoherent_dma_count);
    }
    EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
    
    
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
    
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
    
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
    
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
    
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
    
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
    
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
    
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
    
    EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);