Newer
Older
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (type)
return -EINVAL;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
/* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
&kvm->arch.irq_sources_bitmap);
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
pvclock_update_vm_gtod_copy(kvm);
}
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
int r;
r = vcpu_load(vcpu);
BUG_ON(r);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
{
unsigned int i;
/*
* Unpin any mmu pages first.
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_clear_async_pf_completion_queue(vcpu);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
kvm->vcpus[i] = NULL;
atomic_set(&kvm->online_vcpus, 0);
mutex_unlock(&kvm->lock);
}
void kvm_arch_sync_events(struct kvm *kvm)
{
kvm_free_all_assigned_devices(kvm);
void kvm_arch_destroy_vm(struct kvm *kvm)
{
if (current->mm == kvm->mm) {
/*
* Free memory regions allocated on behalf of userspace,
* unless the the memory map has changed due to process exit
* or fd copying.
*/
struct kvm_userspace_memory_region mem;
memset(&mem, 0, sizeof(mem));
mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
kvm_set_memory_region(kvm, &mem);
mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
kvm_set_memory_region(kvm, &mem);
mem.slot = TSS_PRIVATE_MEMSLOT;
kvm_set_memory_region(kvm, &mem);
}
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
if (kvm->arch.apic_access_page)
put_page(kvm->arch.apic_access_page);
if (kvm->arch.ept_identity_pagetable)
put_page(kvm->arch.ept_identity_pagetable);
kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
int i;
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
kvm_kvfree(free->arch.rmap[i]);
free->arch.rmap[i] = NULL;
if (i == 0)
continue;
if (!dont || free->arch.lpage_info[i - 1] !=
dont->arch.lpage_info[i - 1]) {
kvm_kvfree(free->arch.lpage_info[i - 1]);
free->arch.lpage_info[i - 1] = NULL;
}
}
}
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
{
int i;
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
unsigned long ugfn;
int lpages;
lpages = gfn_to_index(slot->base_gfn + npages - 1,
slot->base_gfn, level) + 1;
slot->arch.rmap[i] =
kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
if (!slot->arch.rmap[i])
goto out_free;
slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
sizeof(*slot->arch.lpage_info[i - 1]));
if (!slot->arch.lpage_info[i - 1])
goto out_free;
if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
slot->arch.lpage_info[i - 1][0].write_count = 1;
if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
ugfn = slot->userspace_addr >> PAGE_SHIFT;
/*
* If the gfn and userspace address are not aligned wrt each
* other, or if explicitly asked to, disable large page
* support for this slot
*/
if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
!kvm_largepages_enabled()) {
unsigned long j;
for (j = 0; j < lpages; ++j)
slot->arch.lpage_info[i - 1][j].write_count = 1;
}
}
return 0;
out_free:
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
kvm_kvfree(slot->arch.rmap[i]);
slot->arch.rmap[i] = NULL;
if (i == 0)
continue;
kvm_kvfree(slot->arch.lpage_info[i - 1]);
slot->arch.lpage_info[i - 1] = NULL;
}
return -ENOMEM;
}
void kvm_arch_memslots_updated(struct kvm *kvm)
{
/*
* memslots->generation has been incremented.
* mmio generation may have reached its maximum value.
*/
kvm_mmu_invalidate_mmio_sptes(kvm);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
/*
* Only private memory slots need to be mapped here since
* KVM_SET_MEMORY_REGION ioctl is no longer supported.
*/
if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
unsigned long userspace_addr;
/*
* MAP_SHARED to prevent internal slot pages from being moved
* by fork()/COW.
*/
userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, 0);
if (IS_ERR((void *)userspace_addr))
return PTR_ERR((void *)userspace_addr);
memslot->userspace_addr = userspace_addr;
}
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
enum kvm_mr_change change)
int nr_mmu_pages = 0;
if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
int ret;
ret = vm_munmap(old->userspace_addr,
old->npages * PAGE_SIZE);
if (ret < 0)
printk(KERN_WARNING
"kvm_vm_ioctl_set_memory_region: "
"failed to munmap memory\n");
}
if (!kvm->arch.n_requested_mmu_pages)
nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
if (nr_mmu_pages)
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
/*
* Write protect all pages for dirty logging.
* Existing largepage mappings are destroyed here and new ones will
* not be created until the end of the logging.
*/
if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
}
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
kvm_mmu_invalidate_zap_all_pages(kvm);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
kvm_mmu_invalidate_zap_all_pages(kvm);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
|| !list_empty_careful(&vcpu->async_pf.done)

Srivatsa Vaddagiri
committed
|| vcpu->arch.pv.pv_unhalted
(kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_has_interrupt(vcpu));
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
return kvm_x86_ops->interrupt_allowed(vcpu);
}
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
{
unsigned long current_rip = kvm_rip_read(vcpu) +
get_segment_base(vcpu, VCPU_SREG_CS);
return current_rip == linear_rip;
}
EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
{
unsigned long rflags;
rflags = kvm_x86_ops->get_rflags(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
rflags &= ~X86_EFLAGS_TF;
return rflags;
}
EXPORT_SYMBOL_GPL(kvm_get_rflags);
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
rflags |= X86_EFLAGS_TF;
kvm_x86_ops->set_rflags(vcpu, rflags);
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_rflags);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
{
int r;
if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
return;
r = kvm_mmu_reload(vcpu);
if (unlikely(r))
return;
if (!vcpu->arch.mmu.direct_map &&
work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
return;
vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
}
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
{
return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
}
static inline u32 kvm_async_pf_next_probe(u32 key)
{
return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
}
static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
u32 key = kvm_async_pf_hash_fn(gfn);
while (vcpu->arch.apf.gfns[key] != ~0)
key = kvm_async_pf_next_probe(key);
vcpu->arch.apf.gfns[key] = gfn;
}
static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
{
int i;
u32 key = kvm_async_pf_hash_fn(gfn);
for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
(vcpu->arch.apf.gfns[key] != gfn &&
vcpu->arch.apf.gfns[key] != ~0); i++)
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
key = kvm_async_pf_next_probe(key);
return key;
}
bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
}
static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
u32 i, j, k;
i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
while (true) {
vcpu->arch.apf.gfns[i] = ~0;
do {
j = kvm_async_pf_next_probe(j);
if (vcpu->arch.apf.gfns[j] == ~0)
return;
k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
/*
* k lies cyclically in ]i,j]
* | i.k.j |
* |....j i.k.| or |.k..j i...|
*/
} while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
i = j;
}
}
static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
{
return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
sizeof(val));
}
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
struct x86_exception fault;
trace_kvm_async_pf_not_present(work->arch.token, work->gva);
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
(vcpu->arch.apf.send_user_only &&
kvm_x86_ops->get_cpl(vcpu) == 0))
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
fault.vector = PF_VECTOR;
fault.error_code_valid = true;
fault.error_code = 0;
fault.nested_page_fault = false;
fault.address = work->arch.token;
kvm_inject_page_fault(vcpu, &fault);
}
}
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
struct x86_exception fault;
trace_kvm_async_pf_ready(work->arch.token, work->gva);
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
fault.vector = PF_VECTOR;
fault.error_code_valid = true;
fault.error_code = 0;
fault.nested_page_fault = false;
fault.address = work->arch.token;
kvm_inject_page_fault(vcpu, &fault);
}
vcpu->arch.apf.halted = false;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
return true;
else
return !kvm_event_needs_reinjection(vcpu) &&
kvm_x86_ops->interrupt_allowed(vcpu);
void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
{
atomic_inc(&kvm->arch.noncoherent_dma_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
{
atomic_dec(&kvm->arch.noncoherent_dma_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
{
return atomic_read(&kvm->arch.noncoherent_dma_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);