Skip to content
Snippets Groups Projects
x86.c 97.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • 		e = &vcpu->arch.cpuid_entries[i];
    
    		if (e->function == 0x80000001) {
    			entry = e;
    			break;
    		}
    	}
    
    	if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
    
    		entry->edx &= ~(1 << 20);
    		printk(KERN_INFO "kvm: guest NX capability removed\n");
    	}
    }
    
    
    /* when an old userspace process fills a new kernel module */
    
    static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
    				    struct kvm_cpuid *cpuid,
    				    struct kvm_cpuid_entry __user *entries)
    
    {
    	int r, i;
    	struct kvm_cpuid_entry *cpuid_entries;
    
    	r = -E2BIG;
    	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
    		goto out;
    	r = -ENOMEM;
    	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
    	if (!cpuid_entries)
    		goto out;
    	r = -EFAULT;
    	if (copy_from_user(cpuid_entries, entries,
    			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
    		goto out_free;
    	for (i = 0; i < cpuid->nent; i++) {
    
    		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
    		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
    		vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
    		vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
    		vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
    		vcpu->arch.cpuid_entries[i].index = 0;
    		vcpu->arch.cpuid_entries[i].flags = 0;
    		vcpu->arch.cpuid_entries[i].padding[0] = 0;
    		vcpu->arch.cpuid_entries[i].padding[1] = 0;
    		vcpu->arch.cpuid_entries[i].padding[2] = 0;
    	}
    	vcpu->arch.cpuid_nent = cpuid->nent;
    
    	cpuid_fix_nx_cap(vcpu);
    	r = 0;
    
    out_free:
    	vfree(cpuid_entries);
    out:
    	return r;
    }
    
    static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
    				    struct kvm_cpuid2 *cpuid,
    				    struct kvm_cpuid_entry2 __user *entries)
    
    {
    	int r;
    
    	r = -E2BIG;
    	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
    		goto out;
    	r = -EFAULT;
    
    	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
    
    			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
    
    	vcpu->arch.cpuid_nent = cpuid->nent;
    
    static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
    				    struct kvm_cpuid2 *cpuid,
    				    struct kvm_cpuid_entry2 __user *entries)
    {
    	int r;
    
    	r = -E2BIG;
    
    	if (cpuid->nent < vcpu->arch.cpuid_nent)
    
    		goto out;
    	r = -EFAULT;
    
    	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
    			   vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
    
    	cpuid->nent = vcpu->arch.cpuid_nent;
    
    	return r;
    }
    
    static inline u32 bit(int bitno)
    {
    	return 1 << (bitno & 31);
    }
    
    static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
    			  u32 index)
    {
    	entry->function = function;
    	entry->index = index;
    	cpuid_count(entry->function, entry->index,
    		&entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
    	entry->flags = 0;
    }
    
    static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
    			 u32 index, int *nent, int maxnent)
    {
    	const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) |
    		bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
    		bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
    		bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
    		bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
    		bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
    		bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
    		bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
    		bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
    		bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
    	const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) |
    		bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
    		bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
    		bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
    		bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
    		bit(X86_FEATURE_PGE) |
    		bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
    		bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
    		bit(X86_FEATURE_SYSCALL) |
    		(bit(X86_FEATURE_NX) && is_efer_nx()) |
    #ifdef CONFIG_X86_64
    		bit(X86_FEATURE_LM) |
    #endif
    		bit(X86_FEATURE_MMXEXT) |
    		bit(X86_FEATURE_3DNOWEXT) |
    		bit(X86_FEATURE_3DNOW);
    	const u32 kvm_supported_word3_x86_features =
    		bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
    	const u32 kvm_supported_word6_x86_features =
    		bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
    
    	/* all func 2 cpuid_count() should be called on the same cpu */
    	get_cpu();
    	do_cpuid_1_ent(entry, function, index);
    	++*nent;
    
    	switch (function) {
    	case 0:
    		entry->eax = min(entry->eax, (u32)0xb);
    		break;
    	case 1:
    		entry->edx &= kvm_supported_word0_x86_features;
    		entry->ecx &= kvm_supported_word3_x86_features;
    		break;
    	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
    	 * may return different values. This forces us to get_cpu() before
    	 * issuing the first command, and also to emulate this annoying behavior
    	 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
    	case 2: {
    		int t, times = entry->eax & 0xff;
    
    		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
    		for (t = 1; t < times && *nent < maxnent; ++t) {
    			do_cpuid_1_ent(&entry[t], function, 0);
    			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
    			++*nent;
    		}
    		break;
    	}
    	/* function 4 and 0xb have additional index. */
    	case 4: {
    
    		int i, cache_type;
    
    
    		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    		/* read more entries until cache_type is zero */
    
    		for (i = 1; *nent < maxnent; ++i) {
    			cache_type = entry[i - 1].eax & 0x1f;
    
    			if (!cache_type)
    				break;
    
    			do_cpuid_1_ent(&entry[i], function, i);
    			entry[i].flags |=
    
    			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    			++*nent;
    		}
    		break;
    	}
    	case 0xb: {
    
    		int i, level_type;
    
    
    		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    		/* read more entries until level_type is zero */
    
    		for (i = 1; *nent < maxnent; ++i) {
    			level_type = entry[i - 1].ecx & 0xff;
    
    			if (!level_type)
    				break;
    
    			do_cpuid_1_ent(&entry[i], function, i);
    			entry[i].flags |=
    
    			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    			++*nent;
    		}
    		break;
    	}
    	case 0x80000000:
    		entry->eax = min(entry->eax, 0x8000001a);
    		break;
    	case 0x80000001:
    		entry->edx &= kvm_supported_word1_x86_features;
    		entry->ecx &= kvm_supported_word6_x86_features;
    		break;
    	}
    	put_cpu();
    }
    
    
    static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
    
    				    struct kvm_cpuid_entry2 __user *entries)
    {
    	struct kvm_cpuid_entry2 *cpuid_entries;
    	int limit, nent = 0, r = -E2BIG;
    	u32 func;
    
    	if (cpuid->nent < 1)
    		goto out;
    	r = -ENOMEM;
    	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
    	if (!cpuid_entries)
    		goto out;
    
    	do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
    	limit = cpuid_entries[0].eax;
    	for (func = 1; func <= limit && nent < cpuid->nent; ++func)
    		do_cpuid_ent(&cpuid_entries[nent], func, 0,
    				&nent, cpuid->nent);
    	r = -E2BIG;
    	if (nent >= cpuid->nent)
    		goto out_free;
    
    	do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
    	limit = cpuid_entries[nent - 1].eax;
    	for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
    		do_cpuid_ent(&cpuid_entries[nent], func, 0,
    			       &nent, cpuid->nent);
    	r = -EFAULT;
    	if (copy_to_user(entries, cpuid_entries,
    			nent * sizeof(struct kvm_cpuid_entry2)))
    		goto out_free;
    	cpuid->nent = nent;
    	r = 0;
    
    out_free:
    	vfree(cpuid_entries);
    out:
    	return r;
    }
    
    
    static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    	vcpu_load(vcpu);
    
    	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    	vcpu_load(vcpu);
    
    	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
    
    	kvm_apic_post_state_restore(vcpu);
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    
    static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
    				    struct kvm_interrupt *irq)
    {
    	if (irq->irq < 0 || irq->irq >= 256)
    		return -EINVAL;
    	if (irqchip_in_kernel(vcpu->kvm))
    		return -ENXIO;
    	vcpu_load(vcpu);
    
    
    	set_bit(irq->irq, vcpu->arch.irq_pending);
    	set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
    
    static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
    					   struct kvm_tpr_access_ctl *tac)
    {
    	if (tac->flags)
    		return -EINVAL;
    	vcpu->arch.tpr_access_reporting = !!tac->enabled;
    	return 0;
    }
    
    
    long kvm_arch_vcpu_ioctl(struct file *filp,
    			 unsigned int ioctl, unsigned long arg)
    {
    	struct kvm_vcpu *vcpu = filp->private_data;
    	void __user *argp = (void __user *)arg;
    	int r;
    
    	switch (ioctl) {
    	case KVM_GET_LAPIC: {
    		struct kvm_lapic_state lapic;
    
    		memset(&lapic, 0, sizeof lapic);
    		r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(argp, &lapic, sizeof lapic))
    			goto out;
    		r = 0;
    		break;
    	}
    	case KVM_SET_LAPIC: {
    		struct kvm_lapic_state lapic;
    
    		r = -EFAULT;
    		if (copy_from_user(&lapic, argp, sizeof lapic))
    			goto out;
    		r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_INTERRUPT: {
    		struct kvm_interrupt irq;
    
    		r = -EFAULT;
    		if (copy_from_user(&irq, argp, sizeof irq))
    			goto out;
    		r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_SET_CPUID: {
    		struct kvm_cpuid __user *cpuid_arg = argp;
    		struct kvm_cpuid cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
    		if (r)
    			goto out;
    		break;
    	}
    
    	case KVM_SET_CPUID2: {
    		struct kvm_cpuid2 __user *cpuid_arg = argp;
    		struct kvm_cpuid2 cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
    				cpuid_arg->entries);
    		if (r)
    			goto out;
    		break;
    	}
    	case KVM_GET_CPUID2: {
    		struct kvm_cpuid2 __user *cpuid_arg = argp;
    		struct kvm_cpuid2 cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
    				cpuid_arg->entries);
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_GET_MSRS:
    		r = msr_io(vcpu, argp, kvm_get_msr, 1);
    		break;
    	case KVM_SET_MSRS:
    		r = msr_io(vcpu, argp, do_set_msr, 0);
    		break;
    
    	case KVM_TPR_ACCESS_REPORTING: {
    		struct kvm_tpr_access_ctl tac;
    
    		r = -EFAULT;
    		if (copy_from_user(&tac, argp, sizeof tac))
    			goto out;
    		r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(argp, &tac, sizeof tac))
    			goto out;
    		r = 0;
    		break;
    	};
    
    	case KVM_SET_VAPIC_ADDR: {
    		struct kvm_vapic_addr va;
    
    		r = -EINVAL;
    		if (!irqchip_in_kernel(vcpu->kvm))
    			goto out;
    		r = -EFAULT;
    		if (copy_from_user(&va, argp, sizeof va))
    			goto out;
    		r = 0;
    		kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
    		break;
    	}
    
    static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
    {
    	int ret;
    
    	if (addr > (unsigned int)(-3 * PAGE_SIZE))
    		return -1;
    	ret = kvm_x86_ops->set_tss_addr(kvm, addr);
    	return ret;
    }
    
    static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
    					  u32 kvm_nr_mmu_pages)
    {
    	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
    		return -EINVAL;
    
    
    
    	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
    
    	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
    
    	return 0;
    }
    
    static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
    {
    
    	return kvm->arch.n_alloc_mmu_pages;
    
    gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
    {
    	int i;
    	struct kvm_mem_alias *alias;
    
    
    	for (i = 0; i < kvm->arch.naliases; ++i) {
    		alias = &kvm->arch.aliases[i];
    
    		if (gfn >= alias->base_gfn
    		    && gfn < alias->base_gfn + alias->npages)
    			return alias->target_gfn + gfn - alias->base_gfn;
    	}
    	return gfn;
    }
    
    
    /*
     * Set a new alias region.  Aliases map a portion of physical memory into
     * another portion.  This is useful for memory windows, for example the PC
     * VGA region.
     */
    static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
    					 struct kvm_memory_alias *alias)
    {
    	int r, n;
    	struct kvm_mem_alias *p;
    
    	r = -EINVAL;
    	/* General sanity checks */
    	if (alias->memory_size & (PAGE_SIZE - 1))
    		goto out;
    	if (alias->guest_phys_addr & (PAGE_SIZE - 1))
    		goto out;
    	if (alias->slot >= KVM_ALIAS_SLOTS)
    		goto out;
    	if (alias->guest_phys_addr + alias->memory_size
    	    < alias->guest_phys_addr)
    		goto out;
    	if (alias->target_phys_addr + alias->memory_size
    	    < alias->target_phys_addr)
    		goto out;
    
    
    	spin_lock(&kvm->mmu_lock);
    
    	p = &kvm->arch.aliases[alias->slot];
    
    	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
    	p->npages = alias->memory_size >> PAGE_SHIFT;
    	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
    
    	for (n = KVM_ALIAS_SLOTS; n > 0; --n)
    
    		if (kvm->arch.aliases[n - 1].npages)
    
    	spin_unlock(&kvm->mmu_lock);
    
    	kvm_mmu_zap_all(kvm);
    
    
    
    	return 0;
    
    out:
    	return r;
    }
    
    static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
    {
    	int r;
    
    	r = 0;
    	switch (chip->chip_id) {
    	case KVM_IRQCHIP_PIC_MASTER:
    		memcpy(&chip->chip.pic,
    			&pic_irqchip(kvm)->pics[0],
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_PIC_SLAVE:
    		memcpy(&chip->chip.pic,
    			&pic_irqchip(kvm)->pics[1],
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_IOAPIC:
    		memcpy(&chip->chip.ioapic,
    			ioapic_irqchip(kvm),
    			sizeof(struct kvm_ioapic_state));
    		break;
    	default:
    		r = -EINVAL;
    		break;
    	}
    	return r;
    }
    
    static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
    {
    	int r;
    
    	r = 0;
    	switch (chip->chip_id) {
    	case KVM_IRQCHIP_PIC_MASTER:
    		memcpy(&pic_irqchip(kvm)->pics[0],
    			&chip->chip.pic,
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_PIC_SLAVE:
    		memcpy(&pic_irqchip(kvm)->pics[1],
    			&chip->chip.pic,
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_IOAPIC:
    		memcpy(ioapic_irqchip(kvm),
    			&chip->chip.ioapic,
    			sizeof(struct kvm_ioapic_state));
    		break;
    	default:
    		r = -EINVAL;
    		break;
    	}
    	kvm_pic_update_irq(pic_irqchip(kvm));
    	return r;
    }
    
    
    static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
    {
    	int r = 0;
    
    	memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
    	return r;
    }
    
    static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
    {
    	int r = 0;
    
    	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
    	kvm_pit_load_count(kvm, 0, ps->channels[0].count);
    	return r;
    }
    
    
    /*
     * Get (and clear) the dirty memory log for a memory slot.
     */
    int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
    				      struct kvm_dirty_log *log)
    {
    	int r;
    	int n;
    	struct kvm_memory_slot *memslot;
    	int is_dirty = 0;
    
    
    
    	r = kvm_get_dirty_log(kvm, log, &is_dirty);
    	if (r)
    		goto out;
    
    	/* If nothing is dirty, don't bother messing with page tables. */
    	if (is_dirty) {
    		kvm_mmu_slot_remove_write_access(kvm, log->slot);
    		kvm_flush_remote_tlbs(kvm);
    		memslot = &kvm->memslots[log->slot];
    		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
    		memset(memslot->dirty_bitmap, 0, n);
    	}
    	r = 0;
    out:
    
    long kvm_arch_vm_ioctl(struct file *filp,
    		       unsigned int ioctl, unsigned long arg)
    {
    	struct kvm *kvm = filp->private_data;
    	void __user *argp = (void __user *)arg;
    	int r = -EINVAL;
    
    	switch (ioctl) {
    	case KVM_SET_TSS_ADDR:
    		r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
    		if (r < 0)
    			goto out;
    		break;
    	case KVM_SET_MEMORY_REGION: {
    		struct kvm_memory_region kvm_mem;
    		struct kvm_userspace_memory_region kvm_userspace_mem;
    
    		r = -EFAULT;
    		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
    			goto out;
    		kvm_userspace_mem.slot = kvm_mem.slot;
    		kvm_userspace_mem.flags = kvm_mem.flags;
    		kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
    		kvm_userspace_mem.memory_size = kvm_mem.memory_size;
    		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
    		if (r)
    			goto out;
    		break;
    	}
    	case KVM_SET_NR_MMU_PAGES:
    		r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
    		if (r)
    			goto out;
    		break;
    	case KVM_GET_NR_MMU_PAGES:
    		r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
    		break;
    	case KVM_SET_MEMORY_ALIAS: {
    		struct kvm_memory_alias alias;
    
    		r = -EFAULT;
    		if (copy_from_user(&alias, argp, sizeof alias))
    			goto out;
    		r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
    		if (r)
    			goto out;
    		break;
    	}
    	case KVM_CREATE_IRQCHIP:
    		r = -ENOMEM;
    
    		kvm->arch.vpic = kvm_create_pic(kvm);
    		if (kvm->arch.vpic) {
    
    			r = kvm_ioapic_init(kvm);
    			if (r) {
    
    				kfree(kvm->arch.vpic);
    				kvm->arch.vpic = NULL;
    
    				goto out;
    			}
    		} else
    			goto out;
    		break;
    
    Sheng Yang's avatar
    Sheng Yang committed
    	case KVM_CREATE_PIT:
    		r = -ENOMEM;
    		kvm->arch.vpit = kvm_create_pit(kvm);
    		if (kvm->arch.vpit)
    			r = 0;
    		break;
    
    	case KVM_IRQ_LINE: {
    		struct kvm_irq_level irq_event;
    
    		r = -EFAULT;
    		if (copy_from_user(&irq_event, argp, sizeof irq_event))
    			goto out;
    		if (irqchip_in_kernel(kvm)) {
    			mutex_lock(&kvm->lock);
    			if (irq_event.irq < 16)
    				kvm_pic_set_irq(pic_irqchip(kvm),
    					irq_event.irq,
    					irq_event.level);
    
    			kvm_ioapic_set_irq(kvm->arch.vioapic,
    
    					irq_event.irq,
    					irq_event.level);
    			mutex_unlock(&kvm->lock);
    			r = 0;
    		}
    		break;
    	}
    	case KVM_GET_IRQCHIP: {
    		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
    		struct kvm_irqchip chip;
    
    		r = -EFAULT;
    		if (copy_from_user(&chip, argp, sizeof chip))
    			goto out;
    		r = -ENXIO;
    		if (!irqchip_in_kernel(kvm))
    			goto out;
    		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(argp, &chip, sizeof chip))
    			goto out;
    		r = 0;
    		break;
    	}
    	case KVM_SET_IRQCHIP: {
    		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
    		struct kvm_irqchip chip;
    
    		r = -EFAULT;
    		if (copy_from_user(&chip, argp, sizeof chip))
    			goto out;
    		r = -ENXIO;
    		if (!irqchip_in_kernel(kvm))
    			goto out;
    		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_GET_PIT: {
    		struct kvm_pit_state ps;
    		r = -EFAULT;
    		if (copy_from_user(&ps, argp, sizeof ps))
    			goto out;
    		r = -ENXIO;
    		if (!kvm->arch.vpit)
    			goto out;
    		r = kvm_vm_ioctl_get_pit(kvm, &ps);
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(argp, &ps, sizeof ps))
    			goto out;
    		r = 0;
    		break;
    	}
    	case KVM_SET_PIT: {
    		struct kvm_pit_state ps;
    		r = -EFAULT;
    		if (copy_from_user(&ps, argp, sizeof ps))
    			goto out;
    		r = -ENXIO;
    		if (!kvm->arch.vpit)
    			goto out;
    		r = kvm_vm_ioctl_set_pit(kvm, &ps);
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    static void kvm_init_msr_list(void)
    
    {
    	u32 dummy[2];
    	unsigned i, j;
    
    	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
    		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
    			continue;
    		if (j < i)
    			msrs_to_save[j] = msrs_to_save[i];
    		j++;
    	}
    	num_msrs_to_save = j;
    }
    
    
    /*
     * Only apic need an MMIO device hook, so shortcut now..
     */
    static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
    
    	if (vcpu->arch.apic) {
    		dev = &vcpu->arch.apic->dev;
    
    		if (dev->in_range(dev, addr, len, is_write))
    
    			return dev;
    	}
    	return NULL;
    }
    
    
    static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
    
    	dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
    
    		dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
    					  is_write);
    
    	return dev;
    }
    
    int emulator_read_std(unsigned long addr,
    			     void *val,
    			     unsigned int bytes,
    			     struct kvm_vcpu *vcpu)
    {
    	void *data = val;
    
    	int r = X86EMUL_CONTINUE;
    
    		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    		unsigned offset = addr & (PAGE_SIZE-1);
    		unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
    		int ret;
    
    
    		if (gpa == UNMAPPED_GVA) {
    			r = X86EMUL_PROPAGATE_FAULT;
    			goto out;
    		}
    
    		ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
    
    		if (ret < 0) {
    			r = X86EMUL_UNHANDLEABLE;
    			goto out;
    		}
    
    }
    EXPORT_SYMBOL_GPL(emulator_read_std);
    
    static int emulator_read_emulated(unsigned long addr,
    				  void *val,
    				  unsigned int bytes,
    				  struct kvm_vcpu *vcpu)
    {
    	struct kvm_io_device *mmio_dev;
    	gpa_t                 gpa;
    
    	if (vcpu->mmio_read_completed) {
    		memcpy(val, vcpu->mmio_data, bytes);
    		vcpu->mmio_read_completed = 0;
    		return X86EMUL_CONTINUE;
    	}
    
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    
    	/* For APIC access vmexit */
    	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    		goto mmio;
    
    	if (emulator_read_std(addr, val, bytes, vcpu)
    			== X86EMUL_CONTINUE)
    		return X86EMUL_CONTINUE;
    	if (gpa == UNMAPPED_GVA)
    		return X86EMUL_PROPAGATE_FAULT;
    
    mmio:
    	/*
    	 * Is this MMIO handled locally?
    	 */
    
    	mutex_lock(&vcpu->kvm->lock);
    
    	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
    
    	if (mmio_dev) {
    		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
    
    		mutex_unlock(&vcpu->kvm->lock);
    
    	mutex_unlock(&vcpu->kvm->lock);
    
    
    	vcpu->mmio_needed = 1;
    	vcpu->mmio_phys_addr = gpa;
    	vcpu->mmio_size = bytes;
    	vcpu->mmio_is_write = 0;
    
    	return X86EMUL_UNHANDLEABLE;
    }
    
    
    int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
    
    			  const void *val, int bytes)
    
    {
    	int ret;
    
    	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
    
    		return 0;
    	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
    	return 1;
    }
    
    static int emulator_write_emulated_onepage(unsigned long addr,
    					   const void *val,
    					   unsigned int bytes,
    					   struct kvm_vcpu *vcpu)
    {
    	struct kvm_io_device *mmio_dev;
    
    	gpa_t                 gpa;
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    		kvm_inject_page_fault(vcpu, addr, 2);
    
    		return X86EMUL_PROPAGATE_FAULT;
    	}
    
    	/* For APIC access vmexit */
    	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    		goto mmio;
    
    	if (emulator_write_phys(vcpu, gpa, val, bytes))
    		return X86EMUL_CONTINUE;
    
    mmio:
    	/*
    	 * Is this MMIO handled locally?
    	 */
    
    	mutex_lock(&vcpu->kvm->lock);
    
    	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
    
    	if (mmio_dev) {
    		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
    
    		mutex_unlock(&vcpu->kvm->lock);
    
    	mutex_unlock(&vcpu->kvm->lock);
    
    
    	vcpu->mmio_needed = 1;
    	vcpu->mmio_phys_addr = gpa;
    	vcpu->mmio_size = bytes;
    	vcpu->mmio_is_write = 1;
    	memcpy(vcpu->mmio_data, val, bytes);
    
    	return X86EMUL_CONTINUE;
    }
    
    int emulator_write_emulated(unsigned long addr,
    				   const void *val,
    				   unsigned int bytes,
    				   struct kvm_vcpu *vcpu)
    {
    	/* Crossing a page boundary? */
    	if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
    		int rc, now;
    
    		now = -addr & ~PAGE_MASK;
    		rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
    		if (rc != X86EMUL_CONTINUE)
    			return rc;
    		addr += now;
    		val += now;
    		bytes -= now;
    	}
    	return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
    }
    EXPORT_SYMBOL_GPL(emulator_write_emulated);
    
    static int emulator_cmpxchg_emulated(unsigned long addr,
    				     const void *old,
    				     const void *new,
    				     unsigned int bytes,
    				     struct kvm_vcpu *vcpu)
    {
    	static int reported;