Skip to content
Snippets Groups Projects
x86.c 120 KiB
Newer Older
  • Learn to ignore specific revisions
  • Sheng Yang's avatar
    Sheng Yang committed
    		is_mtrr_mask = msr - 0x200 - 2 * idx;
    		if (!is_mtrr_mask)
    			pt =
    			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
    		else
    			pt =
    			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
    		*pdata = *pt;
    	}
    
    
    Avi Kivity's avatar
    Avi Kivity committed
    	return 0;
    }
    
    
    Huang Ying's avatar
    Huang Ying committed
    static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
    
    Huang Ying's avatar
    Huang Ying committed
    	u64 mcg_cap = vcpu->arch.mcg_cap;
    	unsigned bank_num = mcg_cap & 0xff;
    
    
    	switch (msr) {
    	case MSR_IA32_P5_MC_ADDR:
    	case MSR_IA32_P5_MC_TYPE:
    
    Huang Ying's avatar
    Huang Ying committed
    		data = 0;
    		break;
    
    Huang Ying's avatar
    Huang Ying committed
    		data = vcpu->arch.mcg_cap;
    		break;
    
    	case MSR_IA32_MCG_CTL:
    
    Huang Ying's avatar
    Huang Ying committed
    		if (!(mcg_cap & MCG_CTL_P))
    			return 1;
    		data = vcpu->arch.mcg_ctl;
    		break;
    	case MSR_IA32_MCG_STATUS:
    		data = vcpu->arch.mcg_status;
    		break;
    	default:
    		if (msr >= MSR_IA32_MC0_CTL &&
    		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
    			u32 offset = msr - MSR_IA32_MC0_CTL;
    			data = vcpu->arch.mce_banks[offset];
    			break;
    		}
    		return 1;
    	}
    	*pdata = data;
    	return 0;
    }
    
    int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
    {
    	u64 data;
    
    	switch (msr) {
    	case MSR_IA32_PLATFORM_ID:
    
    	case MSR_IA32_UCODE_REV:
    	case MSR_IA32_EBL_CR_POWERON:
    
    	case MSR_IA32_DEBUGCTLMSR:
    	case MSR_IA32_LASTBRANCHFROMIP:
    	case MSR_IA32_LASTBRANCHTOIP:
    	case MSR_IA32_LASTINTFROMIP:
    	case MSR_IA32_LASTINTTOIP:
    
    	case MSR_K8_SYSCFG:
    	case MSR_K7_HWCR:
    
    	case MSR_VM_HSAVE_PA:
    
    	case MSR_P6_PERFCTR0:
    	case MSR_P6_PERFCTR1:
    
    	case MSR_P6_EVNTSEL0:
    	case MSR_P6_EVNTSEL1:
    
    	case MSR_K7_EVNTSEL0:
    
    	case MSR_K7_PERFCTR0:
    
    	case MSR_K8_INT_PENDING_MSG:
    
    	case MSR_AMD64_NB_CFG:
    
    	case MSR_FAM10H_MMIO_CONF_BASE:
    
    Avi Kivity's avatar
    Avi Kivity committed
    	case MSR_MTRRcap:
    		data = 0x500 | KVM_NR_VAR_MTRR;
    		break;
    	case 0x200 ... 0x2ff:
    		return get_msr_mtrr(vcpu, msr, pdata);
    
    	case 0xcd: /* fsb frequency */
    		data = 3;
    		break;
    	case MSR_IA32_APICBASE:
    		data = kvm_get_apic_base(vcpu);
    		break;
    
    	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
    		return kvm_x2apic_msr_read(vcpu, msr, pdata);
    		break;
    
    	case MSR_IA32_MISC_ENABLE:
    
    		data = vcpu->arch.ia32_misc_enable_msr;
    
    	case MSR_IA32_PERF_STATUS:
    		/* TSC increment by tick */
    		data = 1000ULL;
    		/* CPU multiplier */
    		data |= (((uint64_t)4ULL) << 40);
    		break;
    
    		data = vcpu->arch.shadow_efer;
    
    	case MSR_KVM_WALL_CLOCK:
    		data = vcpu->kvm->arch.wall_clock;
    		break;
    	case MSR_KVM_SYSTEM_TIME:
    		data = vcpu->arch.time;
    		break;
    
    Huang Ying's avatar
    Huang Ying committed
    	case MSR_IA32_P5_MC_ADDR:
    	case MSR_IA32_P5_MC_TYPE:
    	case MSR_IA32_MCG_CAP:
    	case MSR_IA32_MCG_CTL:
    	case MSR_IA32_MCG_STATUS:
    	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
    		return get_msr_mce(vcpu, msr, pdata);
    
    		if (!ignore_msrs) {
    			pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
    			return 1;
    		} else {
    			pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
    			data = 0;
    		}
    		break;
    
    	}
    	*pdata = data;
    	return 0;
    }
    EXPORT_SYMBOL_GPL(kvm_get_msr_common);
    
    
    /*
     * Read or write a bunch of msrs. All parameters are kernel addresses.
     *
     * @return number of msrs set successfully.
     */
    static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
    		    struct kvm_msr_entry *entries,
    		    int (*do_msr)(struct kvm_vcpu *vcpu,
    				  unsigned index, u64 *data))
    {
    	int i;
    
    	vcpu_load(vcpu);
    
    
    	down_read(&vcpu->kvm->slots_lock);
    
    	for (i = 0; i < msrs->nmsrs; ++i)
    		if (do_msr(vcpu, entries[i].index, &entries[i].data))
    			break;
    
    	up_read(&vcpu->kvm->slots_lock);
    
    
    	vcpu_put(vcpu);
    
    	return i;
    }
    
    /*
     * Read or write a bunch of msrs. Parameters are user addresses.
     *
     * @return number of msrs set successfully.
     */
    static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
    		  int (*do_msr)(struct kvm_vcpu *vcpu,
    				unsigned index, u64 *data),
    		  int writeback)
    {
    	struct kvm_msrs msrs;
    	struct kvm_msr_entry *entries;
    	int r, n;
    	unsigned size;
    
    	r = -EFAULT;
    	if (copy_from_user(&msrs, user_msrs, sizeof msrs))
    		goto out;
    
    	r = -E2BIG;
    	if (msrs.nmsrs >= MAX_IO_MSRS)
    		goto out;
    
    	r = -ENOMEM;
    	size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
    	entries = vmalloc(size);
    	if (!entries)
    		goto out;
    
    	r = -EFAULT;
    	if (copy_from_user(entries, user_msrs->entries, size))
    		goto out_free;
    
    	r = n = __msr_io(vcpu, &msrs, entries, do_msr);
    	if (r < 0)
    		goto out_free;
    
    	r = -EFAULT;
    	if (writeback && copy_to_user(user_msrs->entries, entries, size))
    		goto out_free;
    
    	r = n;
    
    out_free:
    	vfree(entries);
    out:
    	return r;
    }
    
    
    int kvm_dev_ioctl_check_extension(long ext)
    {
    	int r;
    
    	switch (ext) {
    	case KVM_CAP_IRQCHIP:
    	case KVM_CAP_HLT:
    	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
    	case KVM_CAP_SET_TSS_ADDR:
    
    	case KVM_CAP_EXT_CPUID:
    
    	case KVM_CAP_CLOCKSOURCE:
    
    Sheng Yang's avatar
    Sheng Yang committed
    	case KVM_CAP_PIT:
    
    	case KVM_CAP_NOP_IO_DELAY:
    
    	case KVM_CAP_MP_STATE:
    
    	case KVM_CAP_SYNC_MMU:
    
    	case KVM_CAP_REINJECT_CONTROL:
    
    	case KVM_CAP_IRQ_INJECT_STATUS:
    
    	case KVM_CAP_ASSIGN_DEV_IRQ:
    
    Gregory Haskins's avatar
    Gregory Haskins committed
    	case KVM_CAP_IRQFD:
    
    	case KVM_CAP_IOEVENTFD:
    
    	case KVM_CAP_PIT2:
    
    	case KVM_CAP_PIT_STATE2:
    
    	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
    
    	case KVM_CAP_COALESCED_MMIO:
    		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
    		break;
    
    	case KVM_CAP_VAPIC:
    		r = !kvm_x86_ops->cpu_has_accelerated_tpr();
    		break;
    
    	case KVM_CAP_NR_VCPUS:
    		r = KVM_MAX_VCPUS;
    		break;
    
    	case KVM_CAP_NR_MEMSLOTS:
    		r = KVM_MEMORY_SLOTS;
    		break;
    
    	case KVM_CAP_PV_MMU:
    		r = !tdp_enabled;
    		break;
    
    	case KVM_CAP_IOMMU:
    
    		r = iommu_found();
    
    Huang Ying's avatar
    Huang Ying committed
    	case KVM_CAP_MCE:
    		r = KVM_MAX_MCE_BANKS;
    		break;
    
    long kvm_arch_dev_ioctl(struct file *filp,
    			unsigned int ioctl, unsigned long arg)
    {
    	void __user *argp = (void __user *)arg;
    	long r;
    
    	switch (ioctl) {
    	case KVM_GET_MSR_INDEX_LIST: {
    		struct kvm_msr_list __user *user_msr_list = argp;
    		struct kvm_msr_list msr_list;
    		unsigned n;
    
    		r = -EFAULT;
    		if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
    			goto out;
    		n = msr_list.nmsrs;
    		msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
    		if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
    			goto out;
    		r = -E2BIG;
    
    		if (n < msr_list.nmsrs)
    
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(user_msr_list->indices, &msrs_to_save,
    				 num_msrs_to_save * sizeof(u32)))
    			goto out;
    
    		if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
    
    				 &emulated_msrs,
    				 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_GET_SUPPORTED_CPUID: {
    		struct kvm_cpuid2 __user *cpuid_arg = argp;
    		struct kvm_cpuid2 cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
    
    						      cpuid_arg->entries);
    
    		if (r)
    			goto out;
    
    		r = -EFAULT;
    		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
    			goto out;
    		r = 0;
    		break;
    	}
    
    Huang Ying's avatar
    Huang Ying committed
    	case KVM_X86_GET_MCE_CAP_SUPPORTED: {
    		u64 mce_cap;
    
    		mce_cap = KVM_MCE_CAP_SUPPORTED;
    		r = -EFAULT;
    		if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
    			goto out;
    		r = 0;
    		break;
    	}
    
    void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
    {
    	kvm_x86_ops->vcpu_load(vcpu, cpu);
    
    	kvm_request_guest_time_update(vcpu);
    
    }
    
    void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
    {
    	kvm_x86_ops->vcpu_put(vcpu);
    
    static int is_efer_nx(void)
    
    	unsigned long long efer = 0;
    
    	rdmsrl_safe(MSR_EFER, &efer);
    
    	return efer & EFER_NX;
    }
    
    static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
    {
    	int i;
    	struct kvm_cpuid_entry2 *e, *entry;
    
    
    	entry = NULL;
    
    	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
    		e = &vcpu->arch.cpuid_entries[i];
    
    		if (e->function == 0x80000001) {
    			entry = e;
    			break;
    		}
    	}
    
    	if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
    
    		entry->edx &= ~(1 << 20);
    		printk(KERN_INFO "kvm: guest NX capability removed\n");
    	}
    }
    
    
    /* when an old userspace process fills a new kernel module */
    
    static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
    				    struct kvm_cpuid *cpuid,
    				    struct kvm_cpuid_entry __user *entries)
    
    {
    	int r, i;
    	struct kvm_cpuid_entry *cpuid_entries;
    
    	r = -E2BIG;
    	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
    		goto out;
    	r = -ENOMEM;
    	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
    	if (!cpuid_entries)
    		goto out;
    	r = -EFAULT;
    	if (copy_from_user(cpuid_entries, entries,
    			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
    		goto out_free;
    	for (i = 0; i < cpuid->nent; i++) {
    
    		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
    		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
    		vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
    		vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
    		vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
    		vcpu->arch.cpuid_entries[i].index = 0;
    		vcpu->arch.cpuid_entries[i].flags = 0;
    		vcpu->arch.cpuid_entries[i].padding[0] = 0;
    		vcpu->arch.cpuid_entries[i].padding[1] = 0;
    		vcpu->arch.cpuid_entries[i].padding[2] = 0;
    	}
    	vcpu->arch.cpuid_nent = cpuid->nent;
    
    	cpuid_fix_nx_cap(vcpu);
    	r = 0;
    
    	kvm_apic_set_version(vcpu);
    
    
    out_free:
    	vfree(cpuid_entries);
    out:
    	return r;
    }
    
    static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
    
    				     struct kvm_cpuid2 *cpuid,
    				     struct kvm_cpuid_entry2 __user *entries)
    
    {
    	int r;
    
    	r = -E2BIG;
    	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
    		goto out;
    	r = -EFAULT;
    
    	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
    
    			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
    
    	vcpu->arch.cpuid_nent = cpuid->nent;
    
    	kvm_apic_set_version(vcpu);
    
    static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
    
    				     struct kvm_cpuid2 *cpuid,
    				     struct kvm_cpuid_entry2 __user *entries)
    
    	if (cpuid->nent < vcpu->arch.cpuid_nent)
    
    		goto out;
    	r = -EFAULT;
    
    	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
    
    			 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
    
    	cpuid->nent = vcpu->arch.cpuid_nent;
    
    	return r;
    }
    
    static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
    
    {
    	entry->function = function;
    	entry->index = index;
    	cpuid_count(entry->function, entry->index,
    
    		    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
    
    static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
    			 u32 index, int *nent, int maxnent)
    {
    
    	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
    
    	unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
    
    #ifdef CONFIG_X86_64
    
    	unsigned f_lm = F(LM);
    #else
    	unsigned f_lm = 0;
    
    
    	/* cpuid 1.edx */
    	const u32 kvm_supported_word0_x86_features =
    		F(FPU) | F(VME) | F(DE) | F(PSE) |
    		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
    		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
    		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
    		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
    		0 /* Reserved, DS, ACPI */ | F(MMX) |
    		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
    		0 /* HTT, TM, Reserved, PBE */;
    	/* cpuid 0x80000001.edx */
    	const u32 kvm_supported_word1_x86_features =
    		F(FPU) | F(VME) | F(DE) | F(PSE) |
    		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
    		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
    		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
    		F(PAT) | F(PSE36) | 0 /* Reserved */ |
    		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
    
    		F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
    
    		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
    	/* cpuid 1.ecx */
    	const u32 kvm_supported_word4_x86_features =
    
    		F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
    		0 /* DS-CPL, VMX, SMX, EST */ |
    		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
    		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
    		0 /* Reserved, DCA */ | F(XMM4_1) |
    
    		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
    
    		0 /* Reserved, XSAVE, OSXSAVE */;
    
    	const u32 kvm_supported_word6_x86_features =
    
    		F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
    		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
    		F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
    		0 /* SKINIT */ | 0 /* WDT */;
    
    	/* all calls to cpuid_count() should be made on the same cpu */
    
    	get_cpu();
    	do_cpuid_1_ent(entry, function, index);
    	++*nent;
    
    	switch (function) {
    	case 0:
    		entry->eax = min(entry->eax, (u32)0xb);
    		break;
    	case 1:
    		entry->edx &= kvm_supported_word0_x86_features;
    
    		entry->ecx &= kvm_supported_word4_x86_features;
    
    		/* we support x2apic emulation even if host does not support
    		 * it since we emulate x2apic in software */
    		entry->ecx |= F(X2APIC);
    
    		break;
    	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
    	 * may return different values. This forces us to get_cpu() before
    	 * issuing the first command, and also to emulate this annoying behavior
    	 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
    	case 2: {
    		int t, times = entry->eax & 0xff;
    
    		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
    
    		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
    
    		for (t = 1; t < times && *nent < maxnent; ++t) {
    			do_cpuid_1_ent(&entry[t], function, 0);
    			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
    			++*nent;
    		}
    		break;
    	}
    	/* function 4 and 0xb have additional index. */
    	case 4: {
    
    		int i, cache_type;
    
    
    		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    		/* read more entries until cache_type is zero */
    
    		for (i = 1; *nent < maxnent; ++i) {
    			cache_type = entry[i - 1].eax & 0x1f;
    
    			if (!cache_type)
    				break;
    
    			do_cpuid_1_ent(&entry[i], function, i);
    			entry[i].flags |=
    
    			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    			++*nent;
    		}
    		break;
    	}
    	case 0xb: {
    
    		int i, level_type;
    
    
    		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    		/* read more entries until level_type is zero */
    
    		for (i = 1; *nent < maxnent; ++i) {
    
    			level_type = entry[i - 1].ecx & 0xff00;
    
    			if (!level_type)
    				break;
    
    			do_cpuid_1_ent(&entry[i], function, i);
    			entry[i].flags |=
    
    			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    			++*nent;
    		}
    		break;
    	}
    	case 0x80000000:
    		entry->eax = min(entry->eax, 0x8000001a);
    		break;
    	case 0x80000001:
    		entry->edx &= kvm_supported_word1_x86_features;
    		entry->ecx &= kvm_supported_word6_x86_features;
    		break;
    	}
    	put_cpu();
    }
    
    
    static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
    
    				     struct kvm_cpuid_entry2 __user *entries)
    
    {
    	struct kvm_cpuid_entry2 *cpuid_entries;
    	int limit, nent = 0, r = -E2BIG;
    	u32 func;
    
    	if (cpuid->nent < 1)
    		goto out;
    	r = -ENOMEM;
    	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
    	if (!cpuid_entries)
    		goto out;
    
    	do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
    	limit = cpuid_entries[0].eax;
    	for (func = 1; func <= limit && nent < cpuid->nent; ++func)
    		do_cpuid_ent(&cpuid_entries[nent], func, 0,
    
    			     &nent, cpuid->nent);
    
    	r = -E2BIG;
    	if (nent >= cpuid->nent)
    		goto out_free;
    
    	do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
    	limit = cpuid_entries[nent - 1].eax;
    	for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
    		do_cpuid_ent(&cpuid_entries[nent], func, 0,
    
    			     &nent, cpuid->nent);
    
    	r = -E2BIG;
    	if (nent >= cpuid->nent)
    		goto out_free;
    
    
    	r = -EFAULT;
    	if (copy_to_user(entries, cpuid_entries,
    
    			 nent * sizeof(struct kvm_cpuid_entry2)))
    
    		goto out_free;
    	cpuid->nent = nent;
    	r = 0;
    
    out_free:
    	vfree(cpuid_entries);
    out:
    	return r;
    }
    
    
    static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    	vcpu_load(vcpu);
    
    	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
    
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    	vcpu_load(vcpu);
    
    	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
    
    	kvm_apic_post_state_restore(vcpu);
    
    static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
    				    struct kvm_interrupt *irq)
    {
    	if (irq->irq < 0 || irq->irq >= 256)
    		return -EINVAL;
    	if (irqchip_in_kernel(vcpu->kvm))
    		return -ENXIO;
    	vcpu_load(vcpu);
    
    
    	kvm_queue_interrupt(vcpu, irq->irq, false);
    
    static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
    {
    	vcpu_load(vcpu);
    	kvm_inject_nmi(vcpu);
    	vcpu_put(vcpu);
    
    	return 0;
    }
    
    
    static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
    					   struct kvm_tpr_access_ctl *tac)
    {
    	if (tac->flags)
    		return -EINVAL;
    	vcpu->arch.tpr_access_reporting = !!tac->enabled;
    	return 0;
    }
    
    
    Huang Ying's avatar
    Huang Ying committed
    static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
    					u64 mcg_cap)
    {
    	int r;
    	unsigned bank_num = mcg_cap & 0xff, bank;
    
    	r = -EINVAL;
    	if (!bank_num)
    		goto out;
    	if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
    		goto out;
    	r = 0;
    	vcpu->arch.mcg_cap = mcg_cap;
    	/* Init IA32_MCG_CTL to all 1s */
    	if (mcg_cap & MCG_CTL_P)
    		vcpu->arch.mcg_ctl = ~(u64)0;
    	/* Init IA32_MCi_CTL to all 1s */
    	for (bank = 0; bank < bank_num; bank++)
    		vcpu->arch.mce_banks[bank*4] = ~(u64)0;
    out:
    	return r;
    }
    
    static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
    				      struct kvm_x86_mce *mce)
    {
    	u64 mcg_cap = vcpu->arch.mcg_cap;
    	unsigned bank_num = mcg_cap & 0xff;
    	u64 *banks = vcpu->arch.mce_banks;
    
    	if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
    		return -EINVAL;
    	/*
    	 * if IA32_MCG_CTL is not all 1s, the uncorrected error
    	 * reporting is disabled
    	 */
    	if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
    	    vcpu->arch.mcg_ctl != ~(u64)0)
    		return 0;
    	banks += 4 * mce->bank;
    	/*
    	 * if IA32_MCi_CTL is not all 1s, the uncorrected error
    	 * reporting is disabled for the bank
    	 */
    	if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
    		return 0;
    	if (mce->status & MCI_STATUS_UC) {
    		if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
    		    !(vcpu->arch.cr4 & X86_CR4_MCE)) {
    			printk(KERN_DEBUG "kvm: set_mce: "
    			       "injects mce exception while "
    			       "previous one is in progress!\n");
    			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
    			return 0;
    		}
    		if (banks[1] & MCI_STATUS_VAL)
    			mce->status |= MCI_STATUS_OVER;
    		banks[2] = mce->addr;
    		banks[3] = mce->misc;
    		vcpu->arch.mcg_status = mce->mcg_status;
    		banks[1] = mce->status;
    		kvm_queue_exception(vcpu, MC_VECTOR);
    	} else if (!(banks[1] & MCI_STATUS_VAL)
    		   || !(banks[1] & MCI_STATUS_UC)) {
    		if (banks[1] & MCI_STATUS_VAL)
    			mce->status |= MCI_STATUS_OVER;
    		banks[2] = mce->addr;
    		banks[3] = mce->misc;
    		banks[1] = mce->status;
    	} else
    		banks[1] |= MCI_STATUS_OVER;
    	return 0;
    }
    
    
    long kvm_arch_vcpu_ioctl(struct file *filp,
    			 unsigned int ioctl, unsigned long arg)
    {
    	struct kvm_vcpu *vcpu = filp->private_data;
    	void __user *argp = (void __user *)arg;
    	int r;
    
    	struct kvm_lapic_state *lapic = NULL;
    
    
    	switch (ioctl) {
    	case KVM_GET_LAPIC: {
    
    		lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
    
    		r = -ENOMEM;
    		if (!lapic)
    			goto out;
    		r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
    
    		if (r)
    			goto out;
    		r = -EFAULT;
    
    		if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
    
    			goto out;
    		r = 0;
    		break;
    	}
    	case KVM_SET_LAPIC: {
    
    		lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
    		r = -ENOMEM;
    		if (!lapic)
    			goto out;
    
    		r = -EFAULT;
    
    		if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
    
    		r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
    
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_INTERRUPT: {
    		struct kvm_interrupt irq;
    
    		r = -EFAULT;
    		if (copy_from_user(&irq, argp, sizeof irq))
    			goto out;
    		r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_NMI: {
    		r = kvm_vcpu_ioctl_nmi(vcpu);
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_SET_CPUID: {
    		struct kvm_cpuid __user *cpuid_arg = argp;
    		struct kvm_cpuid cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
    		if (r)
    			goto out;
    		break;
    	}
    
    	case KVM_SET_CPUID2: {
    		struct kvm_cpuid2 __user *cpuid_arg = argp;
    		struct kvm_cpuid2 cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
    
    					      cpuid_arg->entries);
    
    		if (r)
    			goto out;
    		break;
    	}
    	case KVM_GET_CPUID2: {
    		struct kvm_cpuid2 __user *cpuid_arg = argp;
    		struct kvm_cpuid2 cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
    
    					      cpuid_arg->entries);
    
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_GET_MSRS:
    		r = msr_io(vcpu, argp, kvm_get_msr, 1);
    		break;
    	case KVM_SET_MSRS:
    		r = msr_io(vcpu, argp, do_set_msr, 0);
    		break;
    
    	case KVM_TPR_ACCESS_REPORTING: {
    		struct kvm_tpr_access_ctl tac;
    
    		r = -EFAULT;
    		if (copy_from_user(&tac, argp, sizeof tac))
    			goto out;
    		r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
    		if (r)
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(argp, &tac, sizeof tac))
    			goto out;
    		r = 0;
    		break;
    	};
    
    	case KVM_SET_VAPIC_ADDR: {
    		struct kvm_vapic_addr va;
    
    		r = -EINVAL;
    		if (!irqchip_in_kernel(vcpu->kvm))
    			goto out;
    		r = -EFAULT;
    		if (copy_from_user(&va, argp, sizeof va))
    			goto out;
    		r = 0;
    		kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
    		break;
    	}
    
    Huang Ying's avatar
    Huang Ying committed
    	case KVM_X86_SETUP_MCE: {
    		u64 mcg_cap;
    
    		r = -EFAULT;
    		if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
    			goto out;
    		r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
    		break;
    	}
    	case KVM_X86_SET_MCE: {
    		struct kvm_x86_mce mce;
    
    		r = -EFAULT;
    		if (copy_from_user(&mce, argp, sizeof mce))
    			goto out;
    		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
    		break;
    	}
    
    	default:
    		r = -EINVAL;
    	}
    out:
    
    static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
    {
    	int ret;
    
    	if (addr > (unsigned int)(-3 * PAGE_SIZE))
    		return -1;
    	ret = kvm_x86_ops->set_tss_addr(kvm, addr);
    	return ret;
    }
    
    
    static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
    					      u64 ident_addr)
    {
    	kvm->arch.ept_identity_map_addr = ident_addr;
    	return 0;
    }
    
    
    static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
    					  u32 kvm_nr_mmu_pages)
    {
    	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
    		return -EINVAL;
    
    
    	spin_lock(&kvm->mmu_lock);
    
    
    	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
    
    	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
    
    	spin_unlock(&kvm->mmu_lock);
    
    	return 0;
    }
    
    static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
    {
    
    	return kvm->arch.n_alloc_mmu_pages;
    
    gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
    {
    	int i;
    	struct kvm_mem_alias *alias;
    
    
    	for (i = 0; i < kvm->arch.naliases; ++i) {
    		alias = &kvm->arch.aliases[i];
    
    		if (gfn >= alias->base_gfn
    		    && gfn < alias->base_gfn + alias->npages)
    			return alias->target_gfn + gfn - alias->base_gfn;
    	}
    	return gfn;
    }
    
    
    /*
     * Set a new alias region.  Aliases map a portion of physical memory into
     * another portion.  This is useful for memory windows, for example the PC
     * VGA region.
     */
    static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
    					 struct kvm_memory_alias *alias)
    {
    	int r, n;
    	struct kvm_mem_alias *p;
    
    	r = -EINVAL;
    	/* General sanity checks */
    	if (alias->memory_size & (PAGE_SIZE - 1))
    		goto out;
    	if (alias->guest_phys_addr & (PAGE_SIZE - 1))
    		goto out;
    	if (alias->slot >= KVM_ALIAS_SLOTS)
    		goto out;
    	if (alias->guest_phys_addr + alias->memory_size
    	    < alias->guest_phys_addr)
    		goto out;
    	if (alias->target_phys_addr + alias->memory_size
    	    < alias->target_phys_addr)
    		goto out;
    
    
    	spin_lock(&kvm->mmu_lock);