Skip to content
Snippets Groups Projects
x86.c 147 KiB
Newer Older
  • Learn to ignore specific revisions
  • }
    
    static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
    {
    	return vcpu->kvm->arch.iommu_domain &&
    		!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
    }
    
    
    void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
    {
    
    	/* Address WBINVD may be executed by guest */
    	if (need_emulate_wbinvd(vcpu)) {
    		if (kvm_x86_ops->has_wbinvd_exit())
    			cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
    		else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
    			smp_call_function_single(vcpu->cpu,
    					wbinvd_ipi, NULL, 1);
    	}
    
    
    	kvm_x86_ops->vcpu_load(vcpu, cpu);
    
    	if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
    
    		/* Make sure TSC doesn't go backwards */
    		s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
    				native_read_tsc() - vcpu->arch.last_host_tsc;
    		if (tsc_delta < 0)
    			mark_tsc_unstable("KVM discovered backwards TSC");
    		if (check_tsc_unstable())
    			kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
    		kvm_migrate_timers(vcpu);
    		vcpu->cpu = cpu;
    	}
    
    }
    
    void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
    {
    
    	kvm_x86_ops->vcpu_put(vcpu);
    
    	kvm_put_guest_fpu(vcpu);
    
    	vcpu->arch.last_host_tsc = native_read_tsc();
    
    static int is_efer_nx(void)
    
    	unsigned long long efer = 0;
    
    	rdmsrl_safe(MSR_EFER, &efer);
    
    	return efer & EFER_NX;
    }
    
    static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
    {
    	int i;
    	struct kvm_cpuid_entry2 *e, *entry;
    
    
    	entry = NULL;
    
    	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
    		e = &vcpu->arch.cpuid_entries[i];
    
    		if (e->function == 0x80000001) {
    			entry = e;
    			break;
    		}
    	}
    
    	if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
    
    		entry->edx &= ~(1 << 20);
    		printk(KERN_INFO "kvm: guest NX capability removed\n");
    	}
    }
    
    
    /* when an old userspace process fills a new kernel module */
    
    static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
    				    struct kvm_cpuid *cpuid,
    				    struct kvm_cpuid_entry __user *entries)
    
    {
    	int r, i;
    	struct kvm_cpuid_entry *cpuid_entries;
    
    	r = -E2BIG;
    	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
    		goto out;
    	r = -ENOMEM;
    	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
    	if (!cpuid_entries)
    		goto out;
    	r = -EFAULT;
    	if (copy_from_user(cpuid_entries, entries,
    			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
    		goto out_free;
    	for (i = 0; i < cpuid->nent; i++) {
    
    		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
    		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
    		vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
    		vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
    		vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
    		vcpu->arch.cpuid_entries[i].index = 0;
    		vcpu->arch.cpuid_entries[i].flags = 0;
    		vcpu->arch.cpuid_entries[i].padding[0] = 0;
    		vcpu->arch.cpuid_entries[i].padding[1] = 0;
    		vcpu->arch.cpuid_entries[i].padding[2] = 0;
    	}
    	vcpu->arch.cpuid_nent = cpuid->nent;
    
    	cpuid_fix_nx_cap(vcpu);
    	r = 0;
    
    	kvm_apic_set_version(vcpu);
    
    	kvm_x86_ops->cpuid_update(vcpu);
    
    	update_cpuid(vcpu);
    
    
    out_free:
    	vfree(cpuid_entries);
    out:
    	return r;
    }
    
    static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
    
    				     struct kvm_cpuid2 *cpuid,
    				     struct kvm_cpuid_entry2 __user *entries)
    
    {
    	int r;
    
    	r = -E2BIG;
    	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
    		goto out;
    	r = -EFAULT;
    
    	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
    
    			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
    
    	vcpu->arch.cpuid_nent = cpuid->nent;
    
    	kvm_apic_set_version(vcpu);
    
    	kvm_x86_ops->cpuid_update(vcpu);
    
    	update_cpuid(vcpu);
    
    static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
    
    				     struct kvm_cpuid2 *cpuid,
    				     struct kvm_cpuid_entry2 __user *entries)
    
    	if (cpuid->nent < vcpu->arch.cpuid_nent)
    
    		goto out;
    	r = -EFAULT;
    
    	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
    
    			 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
    
    	cpuid->nent = vcpu->arch.cpuid_nent;
    
    	return r;
    }
    
    static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
    
    {
    	entry->function = function;
    	entry->index = index;
    	cpuid_count(entry->function, entry->index,
    
    		    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
    
    static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
    			 u32 index, int *nent, int maxnent)
    {
    
    	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
    
    #ifdef CONFIG_X86_64
    
    	unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
    				? F(GBPAGES) : 0;
    
    	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
    
    
    	/* cpuid 1.edx */
    	const u32 kvm_supported_word0_x86_features =
    		F(FPU) | F(VME) | F(DE) | F(PSE) |
    		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
    		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
    		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
    		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
    		0 /* Reserved, DS, ACPI */ | F(MMX) |
    		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
    		0 /* HTT, TM, Reserved, PBE */;
    	/* cpuid 0x80000001.edx */
    	const u32 kvm_supported_word1_x86_features =
    		F(FPU) | F(VME) | F(DE) | F(PSE) |
    		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
    		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
    		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
    		F(PAT) | F(PSE36) | 0 /* Reserved */ |
    		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
    
    		F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
    
    		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
    	/* cpuid 1.ecx */
    	const u32 kvm_supported_word4_x86_features =
    
    		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
    
    		0 /* DS-CPL, VMX, SMX, EST */ |
    		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
    		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
    		0 /* Reserved, DCA */ | F(XMM4_1) |
    
    		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
    
    		0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX);
    
    	const u32 kvm_supported_word6_x86_features =
    
    		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
    
    		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
    		F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
    		0 /* SKINIT */ | 0 /* WDT */;
    
    	/* all calls to cpuid_count() should be made on the same cpu */
    
    	get_cpu();
    	do_cpuid_1_ent(entry, function, index);
    	++*nent;
    
    	switch (function) {
    	case 0:
    
    		entry->eax = min(entry->eax, (u32)0xd);
    
    		break;
    	case 1:
    		entry->edx &= kvm_supported_word0_x86_features;
    
    		entry->ecx &= kvm_supported_word4_x86_features;
    
    		/* we support x2apic emulation even if host does not support
    		 * it since we emulate x2apic in software */
    		entry->ecx |= F(X2APIC);
    
    		break;
    	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
    	 * may return different values. This forces us to get_cpu() before
    	 * issuing the first command, and also to emulate this annoying behavior
    	 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
    	case 2: {
    		int t, times = entry->eax & 0xff;
    
    		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
    
    		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
    
    		for (t = 1; t < times && *nent < maxnent; ++t) {
    			do_cpuid_1_ent(&entry[t], function, 0);
    			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
    			++*nent;
    		}
    		break;
    	}
    	/* function 4 and 0xb have additional index. */
    	case 4: {
    
    		int i, cache_type;
    
    
    		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    		/* read more entries until cache_type is zero */
    
    		for (i = 1; *nent < maxnent; ++i) {
    			cache_type = entry[i - 1].eax & 0x1f;
    
    			if (!cache_type)
    				break;
    
    			do_cpuid_1_ent(&entry[i], function, i);
    			entry[i].flags |=
    
    			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    			++*nent;
    		}
    		break;
    	}
    	case 0xb: {
    
    		int i, level_type;
    
    
    		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    		/* read more entries until level_type is zero */
    
    		for (i = 1; *nent < maxnent; ++i) {
    
    			level_type = entry[i - 1].ecx & 0xff00;
    
    			if (!level_type)
    				break;
    
    			do_cpuid_1_ent(&entry[i], function, i);
    			entry[i].flags |=
    
    			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    			++*nent;
    		}
    		break;
    	}
    
    	case 0xd: {
    		int i;
    
    		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    		for (i = 1; *nent < maxnent; ++i) {
    			if (entry[i - 1].eax == 0 && i != 2)
    				break;
    			do_cpuid_1_ent(&entry[i], function, i);
    			entry[i].flags |=
    			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    			++*nent;
    		}
    		break;
    	}
    
    	case KVM_CPUID_SIGNATURE: {
    		char signature[12] = "KVMKVMKVM\0\0";
    		u32 *sigptr = (u32 *)signature;
    		entry->eax = 0;
    		entry->ebx = sigptr[0];
    		entry->ecx = sigptr[1];
    		entry->edx = sigptr[2];
    		break;
    	}
    	case KVM_CPUID_FEATURES:
    		entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
    			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
    
    			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
    			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
    
    		entry->ebx = 0;
    		entry->ecx = 0;
    		entry->edx = 0;
    		break;
    
    	case 0x80000000:
    		entry->eax = min(entry->eax, 0x8000001a);
    		break;
    	case 0x80000001:
    		entry->edx &= kvm_supported_word1_x86_features;
    		entry->ecx &= kvm_supported_word6_x86_features;
    		break;
    	}
    
    
    	kvm_x86_ops->set_supported_cpuid(function, entry);
    
    
    static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
    
    				     struct kvm_cpuid_entry2 __user *entries)
    
    {
    	struct kvm_cpuid_entry2 *cpuid_entries;
    	int limit, nent = 0, r = -E2BIG;
    	u32 func;
    
    	if (cpuid->nent < 1)
    		goto out;
    
    	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
    		cpuid->nent = KVM_MAX_CPUID_ENTRIES;
    
    	r = -ENOMEM;
    	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
    	if (!cpuid_entries)
    		goto out;
    
    	do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
    	limit = cpuid_entries[0].eax;
    	for (func = 1; func <= limit && nent < cpuid->nent; ++func)
    		do_cpuid_ent(&cpuid_entries[nent], func, 0,
    
    			     &nent, cpuid->nent);
    
    	r = -E2BIG;
    	if (nent >= cpuid->nent)
    		goto out_free;
    
    	do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
    	limit = cpuid_entries[nent - 1].eax;
    	for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
    		do_cpuid_ent(&cpuid_entries[nent], func, 0,
    
    			     &nent, cpuid->nent);
    
    
    
    
    	r = -E2BIG;
    	if (nent >= cpuid->nent)
    		goto out_free;
    
    	do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
    		     cpuid->nent);
    
    	r = -E2BIG;
    	if (nent >= cpuid->nent)
    		goto out_free;
    
    	do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent,
    		     cpuid->nent);
    
    
    	r = -E2BIG;
    	if (nent >= cpuid->nent)
    		goto out_free;
    
    
    	r = -EFAULT;
    	if (copy_to_user(entries, cpuid_entries,
    
    			 nent * sizeof(struct kvm_cpuid_entry2)))
    
    		goto out_free;
    	cpuid->nent = nent;
    	r = 0;
    
    out_free:
    	vfree(cpuid_entries);
    out:
    	return r;
    }
    
    
    static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    
    	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
    
    
    	return 0;
    }
    
    static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    
    	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
    
    	kvm_apic_post_state_restore(vcpu);
    
    static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
    				    struct kvm_interrupt *irq)
    {
    	if (irq->irq < 0 || irq->irq >= 256)
    		return -EINVAL;
    	if (irqchip_in_kernel(vcpu->kvm))
    		return -ENXIO;
    
    
    	kvm_queue_interrupt(vcpu, irq->irq, false);
    
    static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
    {
    	kvm_inject_nmi(vcpu);
    
    	return 0;
    }
    
    
    static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
    					   struct kvm_tpr_access_ctl *tac)
    {
    	if (tac->flags)
    		return -EINVAL;
    	vcpu->arch.tpr_access_reporting = !!tac->enabled;
    	return 0;
    }
    
    
    Huang Ying's avatar
    Huang Ying committed
    static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
    					u64 mcg_cap)
    {
    	int r;
    	unsigned bank_num = mcg_cap & 0xff, bank;
    
    	r = -EINVAL;
    
    	if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
    
    Huang Ying's avatar
    Huang Ying committed
    		goto out;
    	if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
    		goto out;
    	r = 0;
    	vcpu->arch.mcg_cap = mcg_cap;
    	/* Init IA32_MCG_CTL to all 1s */
    	if (mcg_cap & MCG_CTL_P)
    		vcpu->arch.mcg_ctl = ~(u64)0;
    	/* Init IA32_MCi_CTL to all 1s */
    	for (bank = 0; bank < bank_num; bank++)
    		vcpu->arch.mce_banks[bank*4] = ~(u64)0;
    out:
    	return r;
    }
    
    static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
    				      struct kvm_x86_mce *mce)
    {
    	u64 mcg_cap = vcpu->arch.mcg_cap;
    	unsigned bank_num = mcg_cap & 0xff;
    	u64 *banks = vcpu->arch.mce_banks;
    
    	if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
    		return -EINVAL;
    	/*
    	 * if IA32_MCG_CTL is not all 1s, the uncorrected error
    	 * reporting is disabled
    	 */
    	if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
    	    vcpu->arch.mcg_ctl != ~(u64)0)
    		return 0;
    	banks += 4 * mce->bank;
    	/*
    	 * if IA32_MCi_CTL is not all 1s, the uncorrected error
    	 * reporting is disabled for the bank
    	 */
    	if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
    		return 0;
    	if (mce->status & MCI_STATUS_UC) {
    		if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
    
    		    !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
    
    Huang Ying's avatar
    Huang Ying committed
    			printk(KERN_DEBUG "kvm: set_mce: "
    			       "injects mce exception while "
    			       "previous one is in progress!\n");
    
    			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
    
    Huang Ying's avatar
    Huang Ying committed
    			return 0;
    		}
    		if (banks[1] & MCI_STATUS_VAL)
    			mce->status |= MCI_STATUS_OVER;
    		banks[2] = mce->addr;
    		banks[3] = mce->misc;
    		vcpu->arch.mcg_status = mce->mcg_status;
    		banks[1] = mce->status;
    		kvm_queue_exception(vcpu, MC_VECTOR);
    	} else if (!(banks[1] & MCI_STATUS_VAL)
    		   || !(banks[1] & MCI_STATUS_UC)) {
    		if (banks[1] & MCI_STATUS_VAL)
    			mce->status |= MCI_STATUS_OVER;
    		banks[2] = mce->addr;
    		banks[3] = mce->misc;
    		banks[1] = mce->status;
    	} else
    		banks[1] |= MCI_STATUS_OVER;
    	return 0;
    }
    
    
    static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
    					       struct kvm_vcpu_events *events)
    {
    
    	events->exception.injected =
    		vcpu->arch.exception.pending &&
    		!kvm_exception_is_soft(vcpu->arch.exception.nr);
    
    	events->exception.nr = vcpu->arch.exception.nr;
    	events->exception.has_error_code = vcpu->arch.exception.has_error_code;
    	events->exception.error_code = vcpu->arch.exception.error_code;
    
    
    	events->interrupt.injected =
    		vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
    
    	events->interrupt.nr = vcpu->arch.interrupt.nr;
    
    	events->interrupt.soft = 0;
    
    	events->interrupt.shadow =
    		kvm_x86_ops->get_interrupt_shadow(vcpu,
    			KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
    
    
    	events->nmi.injected = vcpu->arch.nmi_injected;
    	events->nmi.pending = vcpu->arch.nmi_pending;
    	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
    
    	events->sipi_vector = vcpu->arch.sipi_vector;
    
    
    	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
    
    			 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
    			 | KVM_VCPUEVENT_VALID_SHADOW);
    
    }
    
    static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
    					      struct kvm_vcpu_events *events)
    {
    
    	if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
    
    			      | KVM_VCPUEVENT_VALID_SIPI_VECTOR
    			      | KVM_VCPUEVENT_VALID_SHADOW))
    
    		return -EINVAL;
    
    	vcpu->arch.exception.pending = events->exception.injected;
    	vcpu->arch.exception.nr = events->exception.nr;
    	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
    	vcpu->arch.exception.error_code = events->exception.error_code;
    
    	vcpu->arch.interrupt.pending = events->interrupt.injected;
    	vcpu->arch.interrupt.nr = events->interrupt.nr;
    	vcpu->arch.interrupt.soft = events->interrupt.soft;
    	if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
    		kvm_pic_clear_isr_ack(vcpu->kvm);
    
    	if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
    		kvm_x86_ops->set_interrupt_shadow(vcpu,
    						  events->interrupt.shadow);
    
    
    	vcpu->arch.nmi_injected = events->nmi.injected;
    
    	if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
    		vcpu->arch.nmi_pending = events->nmi.pending;
    
    	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
    
    
    	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
    		vcpu->arch.sipi_vector = events->sipi_vector;
    
    static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
    					     struct kvm_debugregs *dbgregs)
    {
    	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
    	dbgregs->dr6 = vcpu->arch.dr6;
    	dbgregs->dr7 = vcpu->arch.dr7;
    	dbgregs->flags = 0;
    }
    
    static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
    					    struct kvm_debugregs *dbgregs)
    {
    	if (dbgregs->flags)
    		return -EINVAL;
    
    	memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
    	vcpu->arch.dr6 = dbgregs->dr6;
    	vcpu->arch.dr7 = dbgregs->dr7;
    
    	return 0;
    }
    
    
    static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
    					 struct kvm_xsave *guest_xsave)
    {
    	if (cpu_has_xsave)
    		memcpy(guest_xsave->region,
    			&vcpu->arch.guest_fpu.state->xsave,
    
    	else {
    		memcpy(guest_xsave->region,
    			&vcpu->arch.guest_fpu.state->fxsave,
    			sizeof(struct i387_fxsave_struct));
    		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
    			XSTATE_FPSSE;
    	}
    }
    
    static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
    					struct kvm_xsave *guest_xsave)
    {
    	u64 xstate_bv =
    		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
    
    	if (cpu_has_xsave)
    		memcpy(&vcpu->arch.guest_fpu.state->xsave,
    
    			guest_xsave->region, xstate_size);
    
    	else {
    		if (xstate_bv & ~XSTATE_FPSSE)
    			return -EINVAL;
    		memcpy(&vcpu->arch.guest_fpu.state->fxsave,
    			guest_xsave->region, sizeof(struct i387_fxsave_struct));
    	}
    	return 0;
    }
    
    static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
    					struct kvm_xcrs *guest_xcrs)
    {
    	if (!cpu_has_xsave) {
    		guest_xcrs->nr_xcrs = 0;
    		return;
    	}
    
    	guest_xcrs->nr_xcrs = 1;
    	guest_xcrs->flags = 0;
    	guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
    	guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
    }
    
    static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
    				       struct kvm_xcrs *guest_xcrs)
    {
    	int i, r = 0;
    
    	if (!cpu_has_xsave)
    		return -EINVAL;
    
    	if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
    		return -EINVAL;
    
    	for (i = 0; i < guest_xcrs->nr_xcrs; i++)
    		/* Only support XCR0 currently */
    		if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
    			r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
    				guest_xcrs->xcrs[0].value);
    			break;
    		}
    	if (r)
    		r = -EINVAL;
    	return r;
    }
    
    
    long kvm_arch_vcpu_ioctl(struct file *filp,
    			 unsigned int ioctl, unsigned long arg)
    {
    	struct kvm_vcpu *vcpu = filp->private_data;
    	void __user *argp = (void __user *)arg;
    	int r;
    
    	union {
    		struct kvm_lapic_state *lapic;
    		struct kvm_xsave *xsave;
    		struct kvm_xcrs *xcrs;
    		void *buffer;
    	} u;
    
    	u.buffer = NULL;
    
    	switch (ioctl) {
    	case KVM_GET_LAPIC: {
    
    		u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
    
    		r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
    
    		if (r)
    			goto out;
    		r = -EFAULT;
    
    		if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
    
    			goto out;
    		r = 0;
    		break;
    	}
    	case KVM_SET_LAPIC: {
    
    		u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
    
    		r = -EFAULT;
    
    		if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state)))
    
    		r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
    
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_INTERRUPT: {
    		struct kvm_interrupt irq;
    
    		r = -EFAULT;
    		if (copy_from_user(&irq, argp, sizeof irq))
    			goto out;
    		r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_NMI: {
    		r = kvm_vcpu_ioctl_nmi(vcpu);
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_SET_CPUID: {
    		struct kvm_cpuid __user *cpuid_arg = argp;
    		struct kvm_cpuid cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
    		if (r)
    			goto out;
    		break;
    	}
    
    	case KVM_SET_CPUID2: {
    		struct kvm_cpuid2 __user *cpuid_arg = argp;
    		struct kvm_cpuid2 cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
    
    					      cpuid_arg->entries);
    
    		if (r)
    			goto out;
    		break;
    	}
    	case KVM_GET_CPUID2: {
    		struct kvm_cpuid2 __user *cpuid_arg = argp;
    		struct kvm_cpuid2 cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
    
    					      cpuid_arg->entries);
    
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_GET_MSRS:
    		r = msr_io(vcpu, argp, kvm_get_msr, 1);
    		break;
    	case KVM_SET_MSRS:
    		r = msr_io(vcpu, argp, do_set_msr, 0);
    		break;
    
    	case KVM_TPR_ACCESS_REPORTING: {
    		struct kvm_tpr_access_ctl tac;
    
    		r = -EFAULT;
    		if (copy_from_user(&tac, argp, sizeof tac))
    			goto out;
    		r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(argp, &tac, sizeof tac))
    			goto out;
    		r = 0;
    		break;
    	};
    
    	case KVM_SET_VAPIC_ADDR: {
    		struct kvm_vapic_addr va;
    
    		r = -EINVAL;
    		if (!irqchip_in_kernel(vcpu->kvm))
    			goto out;
    		r = -EFAULT;
    		if (copy_from_user(&va, argp, sizeof va))
    			goto out;
    		r = 0;
    		kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
    		break;
    	}
    
    Huang Ying's avatar
    Huang Ying committed
    	case KVM_X86_SETUP_MCE: {
    		u64 mcg_cap;
    
    		r = -EFAULT;
    		if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
    			goto out;
    		r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
    		break;
    	}
    	case KVM_X86_SET_MCE: {
    		struct kvm_x86_mce mce;
    
    		r = -EFAULT;
    		if (copy_from_user(&mce, argp, sizeof mce))
    			goto out;
    		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
    		break;
    	}
    
    	case KVM_GET_VCPU_EVENTS: {
    		struct kvm_vcpu_events events;
    
    		kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
    
    		r = -EFAULT;
    		if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
    			break;
    		r = 0;
    		break;
    	}
    	case KVM_SET_VCPU_EVENTS: {
    		struct kvm_vcpu_events events;
    
    		r = -EFAULT;
    		if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
    			break;
    
    		r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
    		break;
    	}
    
    	case KVM_GET_DEBUGREGS: {
    		struct kvm_debugregs dbgregs;
    
    		kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
    
    		r = -EFAULT;
    		if (copy_to_user(argp, &dbgregs,
    				 sizeof(struct kvm_debugregs)))
    			break;
    		r = 0;
    		break;
    	}
    	case KVM_SET_DEBUGREGS: {
    		struct kvm_debugregs dbgregs;
    
    		r = -EFAULT;
    		if (copy_from_user(&dbgregs, argp,
    				   sizeof(struct kvm_debugregs)))
    			break;
    
    		r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
    		break;
    	}
    
    	case KVM_GET_XSAVE: {
    
    		u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
    
    		kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
    
    		if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
    
    			break;
    		r = 0;
    		break;
    	}
    	case KVM_SET_XSAVE: {
    
    		u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
    
    		if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave)))
    
    		r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
    
    		break;
    	}
    	case KVM_GET_XCRS: {
    
    		u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
    
    		kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
    
    		if (copy_to_user(argp, u.xcrs,
    
    				 sizeof(struct kvm_xcrs)))
    			break;
    		r = 0;
    		break;
    	}
    	case KVM_SET_XCRS: {
    
    		u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
    
    		if (copy_from_user(u.xcrs, argp,
    
    				   sizeof(struct kvm_xcrs)))
    			break;
    
    
    		r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
    
    	default:
    		r = -EINVAL;
    	}
    out:
    
    static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
    {
    	int ret;
    
    	if (addr > (unsigned int)(-3 * PAGE_SIZE))
    		return -1;
    	ret = kvm_x86_ops->set_tss_addr(kvm, addr);
    	return ret;
    }
    
    
    static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
    					      u64 ident_addr)
    {
    	kvm->arch.ept_identity_map_addr = ident_addr;
    	return 0;
    }
    
    
    static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
    					  u32 kvm_nr_mmu_pages)
    {
    	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
    		return -EINVAL;
    
    
    	mutex_lock(&kvm->slots_lock);
    
    	spin_lock(&kvm->mmu_lock);
    
    
    	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
    
    	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
    
    	spin_unlock(&kvm->mmu_lock);
    
    	mutex_unlock(&kvm->slots_lock);
    
    	return 0;
    }
    
    static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
    {
    
    	return kvm->arch.n_max_mmu_pages;
    
    }
    
    static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
    {
    	int r;
    
    	r = 0;
    	switch (chip->chip_id) {
    	case KVM_IRQCHIP_PIC_MASTER:
    		memcpy(&chip->chip.pic,
    			&pic_irqchip(kvm)->pics[0],
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_PIC_SLAVE:
    		memcpy(&chip->chip.pic,
    			&pic_irqchip(kvm)->pics[1],
    			sizeof(struct kvm_pic_state));
    		break;
    	case KVM_IRQCHIP_IOAPIC:
    
    		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
    
    		break;
    	default:
    		r = -EINVAL;