Skip to content
Snippets Groups Projects
x86.c 12.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * Kernel-based Virtual Machine driver for Linux
     *
     * derived from drivers/kvm/kvm_main.c
     *
     * Copyright (C) 2006 Qumranet, Inc.
     *
     * Authors:
     *   Avi Kivity   <avi@qumranet.com>
     *   Yaniv Kamay  <yaniv@qumranet.com>
     *
     * This work is licensed under the terms of the GNU GPL, version 2.  See
     * the COPYING file in the top-level directory.
     *
     */
    
    
    #include "kvm.h"
    
    #include "segment_descriptor.h"
    
    #include "irq.h"
    
    #include <linux/kvm.h>
    #include <linux/fs.h>
    #include <linux/vmalloc.h>
    
    #include <linux/module.h>
    
    
    #include <asm/uaccess.h>
    
    
    #define MAX_IO_MSRS 256
    
    
    unsigned long segment_base(u16 selector)
    {
    	struct descriptor_table gdt;
    	struct segment_descriptor *d;
    	unsigned long table_base;
    	unsigned long v;
    
    	if (selector == 0)
    		return 0;
    
    	asm("sgdt %0" : "=m"(gdt));
    	table_base = gdt.base;
    
    	if (selector & 4) {           /* from ldt */
    		u16 ldt_selector;
    
    		asm("sldt %0" : "=g"(ldt_selector));
    		table_base = segment_base(ldt_selector);
    	}
    	d = (struct segment_descriptor *)(table_base + (selector & ~7));
    	v = d->base_low | ((unsigned long)d->base_mid << 16) |
    		((unsigned long)d->base_high << 24);
    #ifdef CONFIG_X86_64
    	if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
    		v |= ((unsigned long) \
    		      ((struct segment_descriptor_64 *)d)->base_higher) << 32;
    #endif
    	return v;
    }
    EXPORT_SYMBOL_GPL(segment_base);
    
    
    /*
     * List of msr numbers which we expose to userspace through KVM_GET_MSRS
     * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
     *
     * This list is modified at module load time to reflect the
     * capabilities of the host cpu.
     */
    static u32 msrs_to_save[] = {
    	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
    	MSR_K6_STAR,
    #ifdef CONFIG_X86_64
    	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
    #endif
    	MSR_IA32_TIME_STAMP_COUNTER,
    };
    
    static unsigned num_msrs_to_save;
    
    static u32 emulated_msrs[] = {
    	MSR_IA32_MISC_ENABLE,
    };
    
    
    /*
     * Adapt set_msr() to msr_io()'s calling convention
     */
    static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
    {
    	return kvm_set_msr(vcpu, index, *data);
    }
    
    /*
     * Read or write a bunch of msrs. All parameters are kernel addresses.
     *
     * @return number of msrs set successfully.
     */
    static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
    		    struct kvm_msr_entry *entries,
    		    int (*do_msr)(struct kvm_vcpu *vcpu,
    				  unsigned index, u64 *data))
    {
    	int i;
    
    	vcpu_load(vcpu);
    
    	for (i = 0; i < msrs->nmsrs; ++i)
    		if (do_msr(vcpu, entries[i].index, &entries[i].data))
    			break;
    
    	vcpu_put(vcpu);
    
    	return i;
    }
    
    /*
     * Read or write a bunch of msrs. Parameters are user addresses.
     *
     * @return number of msrs set successfully.
     */
    static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
    		  int (*do_msr)(struct kvm_vcpu *vcpu,
    				unsigned index, u64 *data),
    		  int writeback)
    {
    	struct kvm_msrs msrs;
    	struct kvm_msr_entry *entries;
    	int r, n;
    	unsigned size;
    
    	r = -EFAULT;
    	if (copy_from_user(&msrs, user_msrs, sizeof msrs))
    		goto out;
    
    	r = -E2BIG;
    	if (msrs.nmsrs >= MAX_IO_MSRS)
    		goto out;
    
    	r = -ENOMEM;
    	size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
    	entries = vmalloc(size);
    	if (!entries)
    		goto out;
    
    	r = -EFAULT;
    	if (copy_from_user(entries, user_msrs->entries, size))
    		goto out_free;
    
    	r = n = __msr_io(vcpu, &msrs, entries, do_msr);
    	if (r < 0)
    		goto out_free;
    
    	r = -EFAULT;
    	if (writeback && copy_to_user(user_msrs->entries, entries, size))
    		goto out_free;
    
    	r = n;
    
    out_free:
    	vfree(entries);
    out:
    	return r;
    }
    
    
    long kvm_arch_dev_ioctl(struct file *filp,
    			unsigned int ioctl, unsigned long arg)
    {
    	void __user *argp = (void __user *)arg;
    	long r;
    
    	switch (ioctl) {
    	case KVM_GET_MSR_INDEX_LIST: {
    		struct kvm_msr_list __user *user_msr_list = argp;
    		struct kvm_msr_list msr_list;
    		unsigned n;
    
    		r = -EFAULT;
    		if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
    			goto out;
    		n = msr_list.nmsrs;
    		msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
    		if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
    			goto out;
    		r = -E2BIG;
    		if (n < num_msrs_to_save)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(user_msr_list->indices, &msrs_to_save,
    				 num_msrs_to_save * sizeof(u32)))
    			goto out;
    		if (copy_to_user(user_msr_list->indices
    				 + num_msrs_to_save * sizeof(u32),
    				 &emulated_msrs,
    				 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
    			goto out;
    		r = 0;
    		break;
    	}
    	default:
    		r = -EINVAL;
    	}
    out:
    	return r;
    }
    
    
    void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
    {
    	kvm_x86_ops->vcpu_load(vcpu, cpu);
    }
    
    void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
    {
    	kvm_x86_ops->vcpu_put(vcpu);
    }
    
    static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
    {
    	u64 efer;
    	int i;
    	struct kvm_cpuid_entry *e, *entry;
    
    	rdmsrl(MSR_EFER, efer);
    	entry = NULL;
    	for (i = 0; i < vcpu->cpuid_nent; ++i) {
    		e = &vcpu->cpuid_entries[i];
    		if (e->function == 0x80000001) {
    			entry = e;
    			break;
    		}
    	}
    	if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
    		entry->edx &= ~(1 << 20);
    		printk(KERN_INFO "kvm: guest NX capability removed\n");
    	}
    }
    
    static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
    				    struct kvm_cpuid *cpuid,
    				    struct kvm_cpuid_entry __user *entries)
    {
    	int r;
    
    	r = -E2BIG;
    	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
    		goto out;
    	r = -EFAULT;
    	if (copy_from_user(&vcpu->cpuid_entries, entries,
    			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
    		goto out;
    	vcpu->cpuid_nent = cpuid->nent;
    	cpuid_fix_nx_cap(vcpu);
    	return 0;
    
    out:
    	return r;
    }
    
    static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    	vcpu_load(vcpu);
    	memcpy(s->regs, vcpu->apic->regs, sizeof *s);
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    	vcpu_load(vcpu);
    	memcpy(vcpu->apic->regs, s->regs, sizeof *s);
    	kvm_apic_post_state_restore(vcpu);
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    long kvm_arch_vcpu_ioctl(struct file *filp,
    			 unsigned int ioctl, unsigned long arg)
    {
    	struct kvm_vcpu *vcpu = filp->private_data;
    	void __user *argp = (void __user *)arg;
    	int r;
    
    	switch (ioctl) {
    	case KVM_GET_LAPIC: {
    		struct kvm_lapic_state lapic;
    
    		memset(&lapic, 0, sizeof lapic);
    		r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(argp, &lapic, sizeof lapic))
    			goto out;
    		r = 0;
    		break;
    	}
    	case KVM_SET_LAPIC: {
    		struct kvm_lapic_state lapic;
    
    		r = -EFAULT;
    		if (copy_from_user(&lapic, argp, sizeof lapic))
    			goto out;
    		r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    	case KVM_SET_CPUID: {
    		struct kvm_cpuid __user *cpuid_arg = argp;
    		struct kvm_cpuid cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
    		if (r)
    			goto out;
    		break;
    	}
    	case KVM_GET_MSRS:
    		r = msr_io(vcpu, argp, kvm_get_msr, 1);
    		break;
    	case KVM_SET_MSRS:
    		r = msr_io(vcpu, argp, do_set_msr, 0);
    		break;
    	default:
    		r = -EINVAL;
    	}
    out:
    	return r;
    }
    
    
    static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
    {
    	int ret;
    
    	if (addr > (unsigned int)(-3 * PAGE_SIZE))
    		return -1;
    	ret = kvm_x86_ops->set_tss_addr(kvm, addr);
    	return ret;
    }
    
    static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
    					  u32 kvm_nr_mmu_pages)
    {
    	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
    		return -EINVAL;
    
    	mutex_lock(&kvm->lock);
    
    	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
    	kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
    
    	mutex_unlock(&kvm->lock);
    	return 0;
    }
    
    static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
    {
    	return kvm->n_alloc_mmu_pages;
    }
    
    /*
     * Set a new alias region.  Aliases map a portion of physical memory into
     * another portion.  This is useful for memory windows, for example the PC
     * VGA region.
     */
    static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
    					 struct kvm_memory_alias *alias)
    {
    	int r, n;
    	struct kvm_mem_alias *p;
    
    	r = -EINVAL;
    	/* General sanity checks */
    	if (alias->memory_size & (PAGE_SIZE - 1))
    		goto out;
    	if (alias->guest_phys_addr & (PAGE_SIZE - 1))
    		goto out;
    	if (alias->slot >= KVM_ALIAS_SLOTS)
    		goto out;
    	if (alias->guest_phys_addr + alias->memory_size
    	    < alias->guest_phys_addr)
    		goto out;
    	if (alias->target_phys_addr + alias->memory_size
    	    < alias->target_phys_addr)
    		goto out;
    
    	mutex_lock(&kvm->lock);
    
    	p = &kvm->aliases[alias->slot];
    	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
    	p->npages = alias->memory_size >> PAGE_SHIFT;
    	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
    
    	for (n = KVM_ALIAS_SLOTS; n > 0; --n)
    		if (kvm->aliases[n - 1].npages)
    			break;
    	kvm->naliases = n;
    
    	kvm_mmu_zap_all(kvm);
    
    	mutex_unlock(&kvm->lock);
    
    	return 0;
    
    out:
    	return r;
    }
    
    static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
    {
    	int r;
    
    	r = 0;
    	switch (chip->chip_id) {
    	case KVM_IRQCHIP_PIC_MASTER:
    		memcpy(&chip->chip.pic,
    			&pic_irqchip(kvm)->pics[0],
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_PIC_SLAVE:
    		memcpy(&chip->chip.pic,
    			&pic_irqchip(kvm)->pics[1],
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_IOAPIC:
    		memcpy(&chip->chip.ioapic,
    			ioapic_irqchip(kvm),
    			sizeof(struct kvm_ioapic_state));
    		break;
    	default:
    		r = -EINVAL;
    		break;
    	}
    	return r;
    }
    
    static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
    {
    	int r;
    
    	r = 0;
    	switch (chip->chip_id) {
    	case KVM_IRQCHIP_PIC_MASTER:
    		memcpy(&pic_irqchip(kvm)->pics[0],
    			&chip->chip.pic,
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_PIC_SLAVE:
    		memcpy(&pic_irqchip(kvm)->pics[1],
    			&chip->chip.pic,
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_IOAPIC:
    		memcpy(ioapic_irqchip(kvm),
    			&chip->chip.ioapic,
    			sizeof(struct kvm_ioapic_state));
    		break;
    	default:
    		r = -EINVAL;
    		break;
    	}
    	kvm_pic_update_irq(pic_irqchip(kvm));
    	return r;
    }
    
    long kvm_arch_vm_ioctl(struct file *filp,
    		       unsigned int ioctl, unsigned long arg)
    {
    	struct kvm *kvm = filp->private_data;
    	void __user *argp = (void __user *)arg;
    	int r = -EINVAL;
    
    	switch (ioctl) {
    	case KVM_SET_TSS_ADDR:
    		r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
    		if (r < 0)
    			goto out;
    		break;
    	case KVM_SET_MEMORY_REGION: {
    		struct kvm_memory_region kvm_mem;
    		struct kvm_userspace_memory_region kvm_userspace_mem;
    
    		r = -EFAULT;
    		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
    			goto out;
    		kvm_userspace_mem.slot = kvm_mem.slot;
    		kvm_userspace_mem.flags = kvm_mem.flags;
    		kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
    		kvm_userspace_mem.memory_size = kvm_mem.memory_size;
    		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
    		if (r)
    			goto out;
    		break;
    	}
    	case KVM_SET_NR_MMU_PAGES:
    		r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
    		if (r)
    			goto out;
    		break;
    	case KVM_GET_NR_MMU_PAGES:
    		r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
    		break;
    	case KVM_SET_MEMORY_ALIAS: {
    		struct kvm_memory_alias alias;
    
    		r = -EFAULT;
    		if (copy_from_user(&alias, argp, sizeof alias))
    			goto out;
    		r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
    		if (r)
    			goto out;
    		break;
    	}
    	case KVM_CREATE_IRQCHIP:
    		r = -ENOMEM;
    		kvm->vpic = kvm_create_pic(kvm);
    		if (kvm->vpic) {
    			r = kvm_ioapic_init(kvm);
    			if (r) {
    				kfree(kvm->vpic);
    				kvm->vpic = NULL;
    				goto out;
    			}
    		} else
    			goto out;
    		break;
    	case KVM_IRQ_LINE: {
    		struct kvm_irq_level irq_event;
    
    		r = -EFAULT;
    		if (copy_from_user(&irq_event, argp, sizeof irq_event))
    			goto out;
    		if (irqchip_in_kernel(kvm)) {
    			mutex_lock(&kvm->lock);
    			if (irq_event.irq < 16)
    				kvm_pic_set_irq(pic_irqchip(kvm),
    					irq_event.irq,
    					irq_event.level);
    			kvm_ioapic_set_irq(kvm->vioapic,
    					irq_event.irq,
    					irq_event.level);
    			mutex_unlock(&kvm->lock);
    			r = 0;
    		}
    		break;
    	}
    	case KVM_GET_IRQCHIP: {
    		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
    		struct kvm_irqchip chip;
    
    		r = -EFAULT;
    		if (copy_from_user(&chip, argp, sizeof chip))
    			goto out;
    		r = -ENXIO;
    		if (!irqchip_in_kernel(kvm))
    			goto out;
    		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(argp, &chip, sizeof chip))
    			goto out;
    		r = 0;
    		break;
    	}
    	case KVM_SET_IRQCHIP: {
    		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
    		struct kvm_irqchip chip;
    
    		r = -EFAULT;
    		if (copy_from_user(&chip, argp, sizeof chip))
    			goto out;
    		r = -ENXIO;
    		if (!irqchip_in_kernel(kvm))
    			goto out;
    		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    	default:
    		;
    	}
    out:
    	return r;
    }
    
    
    static __init void kvm_init_msr_list(void)
    {
    	u32 dummy[2];
    	unsigned i, j;
    
    	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
    		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
    			continue;
    		if (j < i)
    			msrs_to_save[j] = msrs_to_save[i];
    		j++;
    	}
    	num_msrs_to_save = j;
    }
    
    __init void kvm_arch_init(void)
    {
    	kvm_init_msr_list();
    }