Skip to content
Snippets Groups Projects
x86.c 183 KiB
Newer Older
  • Learn to ignore specific revisions
  • 		return kvm_x2apic_msr_write(vcpu, msr, data);
    
    	case MSR_IA32_TSCDEADLINE:
    		kvm_set_lapic_tscdeadline_msr(vcpu, data);
    		break;
    
    	case MSR_IA32_TSC_ADJUST:
    		if (guest_cpuid_has_tsc_adjust(vcpu)) {
    			if (!msr_info->host_initiated) {
    				u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
    				kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
    			}
    			vcpu->arch.ia32_tsc_adjust_msr = data;
    		}
    		break;
    
    	case MSR_IA32_MISC_ENABLE:
    
    		vcpu->arch.ia32_misc_enable_msr = data;
    
    	case MSR_KVM_WALL_CLOCK_NEW:
    
    	case MSR_KVM_WALL_CLOCK:
    		vcpu->kvm->arch.wall_clock = data;
    		kvm_write_wall_clock(vcpu->kvm, data);
    		break;
    
    	case MSR_KVM_SYSTEM_TIME_NEW:
    
    	case MSR_KVM_SYSTEM_TIME: {
    
    		kvmclock_reset(vcpu);
    
    		kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
    
    
    		/* we verify if the enable bit is set... */
    		if (!(data & 1))
    			break;
    
    
    		     &vcpu->arch.pv_time, data & ~1ULL,
    		     sizeof(struct pvclock_vcpu_time_info)))
    
    			vcpu->arch.pv_time_enabled = false;
    		else
    			vcpu->arch.pv_time_enabled = true;
    
    	case MSR_KVM_ASYNC_PF_EN:
    		if (kvm_pv_enable_async_pf(vcpu, data))
    			return 1;
    		break;
    
    	case MSR_KVM_STEAL_TIME:
    
    		if (unlikely(!sched_info_on()))
    			return 1;
    
    		if (data & KVM_STEAL_RESERVED_MASK)
    			return 1;
    
    		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
    
    						data & KVM_STEAL_VALID_BITS,
    						sizeof(struct kvm_steal_time)))
    
    			return 1;
    
    		vcpu->arch.st.msr_val = data;
    
    		if (!(data & KVM_MSR_ENABLED))
    			break;
    
    		vcpu->arch.st.last_steal = current->sched_info.run_delay;
    
    		preempt_disable();
    		accumulate_steal_time(vcpu);
    		preempt_enable();
    
    		kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
    
    		break;
    
    	case MSR_KVM_PV_EOI_EN:
    		if (kvm_lapic_enable_pv_eoi(vcpu, data))
    			return 1;
    		break;
    
    Huang Ying's avatar
    Huang Ying committed
    	case MSR_IA32_MCG_CTL:
    	case MSR_IA32_MCG_STATUS:
    	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
    		return set_msr_mce(vcpu, msr, data);
    
    
    	/* Performance counters are not protected by a CPUID bit,
    	 * so we should check all of them in the generic path for the sake of
    	 * cross vendor migration.
    	 * Writing a zero into the event select MSRs disables them,
    	 * which we perfectly emulate ;-). Any other value should be at least
    	 * reported, some guests depend on them.
    	 */
    	case MSR_K7_EVNTSEL0:
    	case MSR_K7_EVNTSEL1:
    	case MSR_K7_EVNTSEL2:
    	case MSR_K7_EVNTSEL3:
    		if (data != 0)
    
    			vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
    				    "0x%x data 0x%llx\n", msr, data);
    
    		break;
    	/* at least RHEL 4 unconditionally writes to the perfctr registers,
    	 * so we ignore writes to make it happy.
    	 */
    	case MSR_K7_PERFCTR0:
    	case MSR_K7_PERFCTR1:
    	case MSR_K7_PERFCTR2:
    	case MSR_K7_PERFCTR3:
    
    		vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
    			    "0x%x data 0x%llx\n", msr, data);
    
    	case MSR_P6_PERFCTR0:
    	case MSR_P6_PERFCTR1:
    		pr = true;
    	case MSR_P6_EVNTSEL0:
    	case MSR_P6_EVNTSEL1:
    		if (kvm_pmu_msr(vcpu, msr))
    
    			return kvm_pmu_set_msr(vcpu, msr_info);
    
    			vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
    				    "0x%x data 0x%llx\n", msr, data);
    
    	case MSR_K7_CLK_CTL:
    		/*
    		 * Ignore all writes to this no longer documented MSR.
    		 * Writes are only relevant for old K7 processors,
    		 * all pre-dating SVM, but a recommended workaround from
    
    Guo Chao's avatar
    Guo Chao committed
    		 * AMD for these chips. It is possible to specify the
    
    		 * affected processor models on the command line, hence
    		 * the need to ignore the workaround.
    		 */
    		break;
    
    	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
    		if (kvm_hv_msr_partition_wide(msr)) {
    			int r;
    			mutex_lock(&vcpu->kvm->lock);
    			r = set_msr_hyperv_pw(vcpu, msr, data);
    			mutex_unlock(&vcpu->kvm->lock);
    			return r;
    		} else
    			return set_msr_hyperv(vcpu, msr, data);
    		break;
    
    	case MSR_IA32_BBL_CR_CTL3:
    		/* Drop writes to this legacy MSR -- see rdmsr
    		 * counterpart for further detail.
    		 */
    
    		vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
    
    	case MSR_AMD64_OSVW_ID_LENGTH:
    		if (!guest_cpuid_has_osvw(vcpu))
    			return 1;
    		vcpu->arch.osvw.length = data;
    		break;
    	case MSR_AMD64_OSVW_STATUS:
    		if (!guest_cpuid_has_osvw(vcpu))
    			return 1;
    		vcpu->arch.osvw.status = data;
    		break;
    
    		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
    			return xen_hvm_config(vcpu, data);
    
    		if (kvm_pmu_msr(vcpu, msr))
    
    			return kvm_pmu_set_msr(vcpu, msr_info);
    
    			vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
    				    msr, data);
    
    			vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
    				    msr, data);
    
    	}
    	return 0;
    }
    EXPORT_SYMBOL_GPL(kvm_set_msr_common);
    
    
    /*
     * Reads an msr value (of 'msr_index') into 'pdata'.
     * Returns 0 on success, non-0 otherwise.
     * Assumes vcpu_load() was already called.
     */
    int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
    {
    	return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
    }
    
    
    Avi Kivity's avatar
    Avi Kivity committed
    static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
    {
    
    Sheng Yang's avatar
    Sheng Yang committed
    	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
    
    
    Avi Kivity's avatar
    Avi Kivity committed
    	if (!msr_mtrr_valid(msr))
    		return 1;
    
    
    Sheng Yang's avatar
    Sheng Yang committed
    	if (msr == MSR_MTRRdefType)
    		*pdata = vcpu->arch.mtrr_state.def_type +
    			 (vcpu->arch.mtrr_state.enabled << 10);
    	else if (msr == MSR_MTRRfix64K_00000)
    		*pdata = p[0];
    	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
    		*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
    	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
    		*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
    	else if (msr == MSR_IA32_CR_PAT)
    		*pdata = vcpu->arch.pat;
    	else {	/* Variable MTRRs */
    		int idx, is_mtrr_mask;
    		u64 *pt;
    
    		idx = (msr - 0x200) / 2;
    		is_mtrr_mask = msr - 0x200 - 2 * idx;
    		if (!is_mtrr_mask)
    			pt =
    			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
    		else
    			pt =
    			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
    		*pdata = *pt;
    	}
    
    
    Avi Kivity's avatar
    Avi Kivity committed
    	return 0;
    }
    
    
    Huang Ying's avatar
    Huang Ying committed
    static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
    
    Huang Ying's avatar
    Huang Ying committed
    	u64 mcg_cap = vcpu->arch.mcg_cap;
    	unsigned bank_num = mcg_cap & 0xff;
    
    
    	switch (msr) {
    	case MSR_IA32_P5_MC_ADDR:
    	case MSR_IA32_P5_MC_TYPE:
    
    Huang Ying's avatar
    Huang Ying committed
    		data = 0;
    		break;
    
    Huang Ying's avatar
    Huang Ying committed
    		data = vcpu->arch.mcg_cap;
    		break;
    
    	case MSR_IA32_MCG_CTL:
    
    Huang Ying's avatar
    Huang Ying committed
    		if (!(mcg_cap & MCG_CTL_P))
    			return 1;
    		data = vcpu->arch.mcg_ctl;
    		break;
    	case MSR_IA32_MCG_STATUS:
    		data = vcpu->arch.mcg_status;
    		break;
    	default:
    		if (msr >= MSR_IA32_MC0_CTL &&
    		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
    			u32 offset = msr - MSR_IA32_MC0_CTL;
    			data = vcpu->arch.mce_banks[offset];
    			break;
    		}
    		return 1;
    	}
    	*pdata = data;
    	return 0;
    }
    
    
    static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
    {
    	u64 data = 0;
    	struct kvm *kvm = vcpu->kvm;
    
    	switch (msr) {
    	case HV_X64_MSR_GUEST_OS_ID:
    		data = kvm->arch.hv_guest_os_id;
    		break;
    	case HV_X64_MSR_HYPERCALL:
    		data = kvm->arch.hv_hypercall;
    		break;
    	default:
    
    		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
    
    		return 1;
    	}
    
    	*pdata = data;
    	return 0;
    }
    
    static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
    {
    	u64 data = 0;
    
    	switch (msr) {
    	case HV_X64_MSR_VP_INDEX: {
    		int r;
    		struct kvm_vcpu *v;
    		kvm_for_each_vcpu(r, v, vcpu->kvm)
    			if (v == vcpu)
    				data = r;
    		break;
    	}
    
    	case HV_X64_MSR_EOI:
    		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
    	case HV_X64_MSR_ICR:
    		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
    	case HV_X64_MSR_TPR:
    		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
    
    	case HV_X64_MSR_APIC_ASSIST_PAGE:
    
    		data = vcpu->arch.hv_vapic;
    		break;
    
    		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
    
    Huang Ying's avatar
    Huang Ying committed
    int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
    {
    	u64 data;
    
    	switch (msr) {
    	case MSR_IA32_PLATFORM_ID:
    
    	case MSR_IA32_EBL_CR_POWERON:
    
    	case MSR_IA32_DEBUGCTLMSR:
    	case MSR_IA32_LASTBRANCHFROMIP:
    	case MSR_IA32_LASTBRANCHTOIP:
    	case MSR_IA32_LASTINTFROMIP:
    	case MSR_IA32_LASTINTTOIP:
    
    	case MSR_K8_SYSCFG:
    	case MSR_K7_HWCR:
    
    	case MSR_VM_HSAVE_PA:
    
    	case MSR_K7_EVNTSEL0:
    
    	case MSR_K7_PERFCTR0:
    
    	case MSR_K8_INT_PENDING_MSG:
    
    	case MSR_AMD64_NB_CFG:
    
    	case MSR_FAM10H_MMIO_CONF_BASE:
    
    	case MSR_P6_PERFCTR0:
    	case MSR_P6_PERFCTR1:
    	case MSR_P6_EVNTSEL0:
    	case MSR_P6_EVNTSEL1:
    		if (kvm_pmu_msr(vcpu, msr))
    			return kvm_pmu_get_msr(vcpu, msr, pdata);
    		data = 0;
    		break;
    
    	case MSR_IA32_UCODE_REV:
    		data = 0x100000000ULL;
    		break;
    
    Avi Kivity's avatar
    Avi Kivity committed
    	case MSR_MTRRcap:
    		data = 0x500 | KVM_NR_VAR_MTRR;
    		break;
    	case 0x200 ... 0x2ff:
    		return get_msr_mtrr(vcpu, msr, pdata);
    
    	case 0xcd: /* fsb frequency */
    		data = 3;
    		break;
    
    		/*
    		 * MSR_EBC_FREQUENCY_ID
    		 * Conservative value valid for even the basic CPU models.
    		 * Models 0,1: 000 in bits 23:21 indicating a bus speed of
    		 * 100MHz, model 2 000 in bits 18:16 indicating 100MHz,
    		 * and 266MHz for model 3, or 4. Set Core Clock
    		 * Frequency to System Bus Frequency Ratio to 1 (bits
    		 * 31:24) even though these are only valid for CPU
    		 * models > 2, however guests may end up dividing or
    		 * multiplying by zero otherwise.
    		 */
    	case MSR_EBC_FREQUENCY_ID:
    		data = 1 << 24;
    		break;
    
    	case MSR_IA32_APICBASE:
    		data = kvm_get_apic_base(vcpu);
    		break;
    
    	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
    		return kvm_x2apic_msr_read(vcpu, msr, pdata);
    		break;
    
    	case MSR_IA32_TSCDEADLINE:
    		data = kvm_get_lapic_tscdeadline_msr(vcpu);
    		break;
    
    	case MSR_IA32_TSC_ADJUST:
    		data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
    		break;
    
    	case MSR_IA32_MISC_ENABLE:
    
    		data = vcpu->arch.ia32_misc_enable_msr;
    
    	case MSR_IA32_PERF_STATUS:
    		/* TSC increment by tick */
    		data = 1000ULL;
    		/* CPU multiplier */
    		data |= (((uint64_t)4ULL) << 40);
    		break;
    
    		data = vcpu->arch.efer;
    
    	case MSR_KVM_WALL_CLOCK:
    
    	case MSR_KVM_WALL_CLOCK_NEW:
    
    		data = vcpu->kvm->arch.wall_clock;
    		break;
    	case MSR_KVM_SYSTEM_TIME:
    
    	case MSR_KVM_SYSTEM_TIME_NEW:
    
    		data = vcpu->arch.time;
    		break;
    
    	case MSR_KVM_ASYNC_PF_EN:
    		data = vcpu->arch.apf.msr_val;
    		break;
    
    	case MSR_KVM_STEAL_TIME:
    		data = vcpu->arch.st.msr_val;
    		break;
    
    	case MSR_KVM_PV_EOI_EN:
    		data = vcpu->arch.pv_eoi.msr_val;
    		break;
    
    Huang Ying's avatar
    Huang Ying committed
    	case MSR_IA32_P5_MC_ADDR:
    	case MSR_IA32_P5_MC_TYPE:
    	case MSR_IA32_MCG_CAP:
    	case MSR_IA32_MCG_CTL:
    	case MSR_IA32_MCG_STATUS:
    	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
    		return get_msr_mce(vcpu, msr, pdata);
    
    	case MSR_K7_CLK_CTL:
    		/*
    		 * Provide expected ramp-up count for K7. All other
    		 * are set to zero, indicating minimum divisors for
    		 * every field.
    		 *
    		 * This prevents guest kernels on AMD host with CPU
    		 * type 6, model 8 and higher from exploding due to
    		 * the rdmsr failing.
    		 */
    		data = 0x20000000;
    		break;
    
    	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
    		if (kvm_hv_msr_partition_wide(msr)) {
    			int r;
    			mutex_lock(&vcpu->kvm->lock);
    			r = get_msr_hyperv_pw(vcpu, msr, pdata);
    			mutex_unlock(&vcpu->kvm->lock);
    			return r;
    		} else
    			return get_msr_hyperv(vcpu, msr, pdata);
    		break;
    
    	case MSR_IA32_BBL_CR_CTL3:
    		/* This legacy MSR exists but isn't fully documented in current
    		 * silicon.  It is however accessed by winxp in very narrow
    		 * scenarios where it sets bit #19, itself documented as
    		 * a "reserved" bit.  Best effort attempt to source coherent
    		 * read data here should the balance of the register be
    		 * interpreted by the guest:
    		 *
    		 * L2 cache control register 3: 64GB range, 256KB size,
    		 * enabled, latency 0x1, configured
    		 */
    		data = 0xbe702111;
    		break;
    
    	case MSR_AMD64_OSVW_ID_LENGTH:
    		if (!guest_cpuid_has_osvw(vcpu))
    			return 1;
    		data = vcpu->arch.osvw.length;
    		break;
    	case MSR_AMD64_OSVW_STATUS:
    		if (!guest_cpuid_has_osvw(vcpu))
    			return 1;
    		data = vcpu->arch.osvw.status;
    		break;
    
    		if (kvm_pmu_msr(vcpu, msr))
    			return kvm_pmu_get_msr(vcpu, msr, pdata);
    
    			vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
    
    			vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
    
    	}
    	*pdata = data;
    	return 0;
    }
    EXPORT_SYMBOL_GPL(kvm_get_msr_common);
    
    
    /*
     * Read or write a bunch of msrs. All parameters are kernel addresses.
     *
     * @return number of msrs set successfully.
     */
    static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
    		    struct kvm_msr_entry *entries,
    		    int (*do_msr)(struct kvm_vcpu *vcpu,
    				  unsigned index, u64 *data))
    {
    
    	idx = srcu_read_lock(&vcpu->kvm->srcu);
    
    	for (i = 0; i < msrs->nmsrs; ++i)
    		if (do_msr(vcpu, entries[i].index, &entries[i].data))
    			break;
    
    	srcu_read_unlock(&vcpu->kvm->srcu, idx);
    
    
    	return i;
    }
    
    /*
     * Read or write a bunch of msrs. Parameters are user addresses.
     *
     * @return number of msrs set successfully.
     */
    static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
    		  int (*do_msr)(struct kvm_vcpu *vcpu,
    				unsigned index, u64 *data),
    		  int writeback)
    {
    	struct kvm_msrs msrs;
    	struct kvm_msr_entry *entries;
    	int r, n;
    	unsigned size;
    
    	r = -EFAULT;
    	if (copy_from_user(&msrs, user_msrs, sizeof msrs))
    		goto out;
    
    	r = -E2BIG;
    	if (msrs.nmsrs >= MAX_IO_MSRS)
    		goto out;
    
    	size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
    
    	entries = memdup_user(user_msrs->entries, size);
    	if (IS_ERR(entries)) {
    		r = PTR_ERR(entries);
    
    
    	r = n = __msr_io(vcpu, &msrs, entries, do_msr);
    	if (r < 0)
    		goto out_free;
    
    	r = -EFAULT;
    	if (writeback && copy_to_user(user_msrs->entries, entries, size))
    		goto out_free;
    
    	r = n;
    
    out_free:
    
    int kvm_dev_ioctl_check_extension(long ext)
    {
    	int r;
    
    	switch (ext) {
    	case KVM_CAP_IRQCHIP:
    	case KVM_CAP_HLT:
    	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
    	case KVM_CAP_SET_TSS_ADDR:
    
    	case KVM_CAP_EXT_CPUID:
    
    	case KVM_CAP_CLOCKSOURCE:
    
    Sheng Yang's avatar
    Sheng Yang committed
    	case KVM_CAP_PIT:
    
    	case KVM_CAP_NOP_IO_DELAY:
    
    	case KVM_CAP_MP_STATE:
    
    	case KVM_CAP_SYNC_MMU:
    
    	case KVM_CAP_REINJECT_CONTROL:
    
    	case KVM_CAP_IRQ_INJECT_STATUS:
    
    Gregory Haskins's avatar
    Gregory Haskins committed
    	case KVM_CAP_IRQFD:
    
    	case KVM_CAP_IOEVENTFD:
    
    	case KVM_CAP_PIT2:
    
    	case KVM_CAP_PIT_STATE2:
    
    	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
    
    	case KVM_CAP_XEN_HVM:
    
    	case KVM_CAP_ADJUST_CLOCK:
    
    	case KVM_CAP_VCPU_EVENTS:
    
    	case KVM_CAP_HYPERV:
    
    	case KVM_CAP_HYPERV_VAPIC:
    
    	case KVM_CAP_HYPERV_SPIN:
    
    	case KVM_CAP_PCI_SEGMENT:
    
    	case KVM_CAP_X86_ROBUST_SINGLESTEP:
    
    	case KVM_CAP_XSAVE:
    
    	case KVM_CAP_KVMCLOCK_CTRL:
    
    	case KVM_CAP_READONLY_MEM:
    
    #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
    	case KVM_CAP_ASSIGN_DEV_IRQ:
    	case KVM_CAP_PCI_2_3:
    #endif
    
    	case KVM_CAP_COALESCED_MMIO:
    		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
    		break;
    
    	case KVM_CAP_VAPIC:
    		r = !kvm_x86_ops->cpu_has_accelerated_tpr();
    		break;
    
    		r = KVM_SOFT_MAX_VCPUS;
    		break;
    	case KVM_CAP_MAX_VCPUS:
    
    		r = KVM_USER_MEM_SLOTS;
    
    	case KVM_CAP_PV_MMU:	/* obsolete */
    		r = 0;
    
    #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
    
    	case KVM_CAP_IOMMU:
    
    		r = iommu_present(&pci_bus_type);
    
    Huang Ying's avatar
    Huang Ying committed
    	case KVM_CAP_MCE:
    		r = KVM_MAX_MCE_BANKS;
    		break;
    
    	case KVM_CAP_XCRS:
    		r = cpu_has_xsave;
    		break;
    
    	case KVM_CAP_TSC_CONTROL:
    		r = kvm_has_tsc_control;
    		break;
    
    	case KVM_CAP_TSC_DEADLINE_TIMER:
    		r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
    		break;
    
    long kvm_arch_dev_ioctl(struct file *filp,
    			unsigned int ioctl, unsigned long arg)
    {
    	void __user *argp = (void __user *)arg;
    	long r;
    
    	switch (ioctl) {
    	case KVM_GET_MSR_INDEX_LIST: {
    		struct kvm_msr_list __user *user_msr_list = argp;
    		struct kvm_msr_list msr_list;
    		unsigned n;
    
    		r = -EFAULT;
    		if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
    			goto out;
    		n = msr_list.nmsrs;
    		msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
    		if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
    			goto out;
    		r = -E2BIG;
    
    		if (n < msr_list.nmsrs)
    
    			goto out;
    		r = -EFAULT;
    		if (copy_to_user(user_msr_list->indices, &msrs_to_save,
    				 num_msrs_to_save * sizeof(u32)))
    			goto out;
    
    		if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
    
    				 &emulated_msrs,
    				 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
    			goto out;
    		r = 0;
    		break;
    	}
    
    	case KVM_GET_SUPPORTED_CPUID: {
    		struct kvm_cpuid2 __user *cpuid_arg = argp;
    		struct kvm_cpuid2 cpuid;
    
    		r = -EFAULT;
    		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
    			goto out;
    		r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
    
    						      cpuid_arg->entries);
    
    		if (r)
    			goto out;
    
    		r = -EFAULT;
    		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
    			goto out;
    		r = 0;
    		break;
    	}
    
    Huang Ying's avatar
    Huang Ying committed
    	case KVM_X86_GET_MCE_CAP_SUPPORTED: {
    		u64 mce_cap;
    
    		mce_cap = KVM_MCE_CAP_SUPPORTED;
    		r = -EFAULT;
    		if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
    			goto out;
    		r = 0;
    		break;
    	}
    
    static void wbinvd_ipi(void *garbage)
    {
    	wbinvd();
    }
    
    static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
    {
    	return vcpu->kvm->arch.iommu_domain &&
    		!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
    }
    
    
    void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
    {
    
    	/* Address WBINVD may be executed by guest */
    	if (need_emulate_wbinvd(vcpu)) {
    		if (kvm_x86_ops->has_wbinvd_exit())
    			cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
    		else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
    			smp_call_function_single(vcpu->cpu,
    					wbinvd_ipi, NULL, 1);
    	}
    
    
    	kvm_x86_ops->vcpu_load(vcpu, cpu);
    
    	/* Apply any externally detected TSC adjustments (due to suspend) */
    	if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
    		adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
    		vcpu->arch.tsc_offset_adjustment = 0;
    		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
    	}
    
    	if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
    
    		s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
    				native_read_tsc() - vcpu->arch.last_host_tsc;
    
    		if (tsc_delta < 0)
    			mark_tsc_unstable("KVM discovered backwards TSC");
    
    		if (check_tsc_unstable()) {
    
    			u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
    						vcpu->arch.last_guest_tsc);
    			kvm_x86_ops->write_tsc_offset(vcpu, offset);
    
    			vcpu->arch.tsc_catchup = 1;
    		}
    
    		/*
    		 * On a host with synchronized TSC, there is no need to update
    		 * kvmclock on vcpu->cpu migration
    		 */
    		if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
    
    			kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
    
    		if (vcpu->cpu != cpu)
    			kvm_migrate_timers(vcpu);
    
    		vcpu->cpu = cpu;
    
    
    	accumulate_steal_time(vcpu);
    	kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
    
    }
    
    void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
    {
    
    	kvm_x86_ops->vcpu_put(vcpu);
    
    	kvm_put_guest_fpu(vcpu);
    
    	vcpu->arch.last_host_tsc = native_read_tsc();
    
    }
    
    static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    
    	kvm_x86_ops->sync_pir_to_irr(vcpu);
    
    	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
    
    
    	return 0;
    }
    
    static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
    				    struct kvm_lapic_state *s)
    {
    
    	kvm_apic_post_state_restore(vcpu, s);
    
    static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
    				    struct kvm_interrupt *irq)
    {
    
    	if (irq->irq >= KVM_NR_INTERRUPTS)
    
    		return -EINVAL;
    	if (irqchip_in_kernel(vcpu->kvm))
    		return -ENXIO;
    
    
    	kvm_queue_interrupt(vcpu, irq->irq, false);
    
    	kvm_make_request(KVM_REQ_EVENT, vcpu);
    
    static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
    {
    	kvm_inject_nmi(vcpu);
    
    	return 0;
    }
    
    
    static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
    					   struct kvm_tpr_access_ctl *tac)
    {
    	if (tac->flags)
    		return -EINVAL;
    	vcpu->arch.tpr_access_reporting = !!tac->enabled;
    	return 0;
    }
    
    
    Huang Ying's avatar
    Huang Ying committed
    static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
    					u64 mcg_cap)
    {
    	int r;
    	unsigned bank_num = mcg_cap & 0xff, bank;
    
    	r = -EINVAL;
    
    	if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
    
    Huang Ying's avatar
    Huang Ying committed
    		goto out;
    	if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
    		goto out;
    	r = 0;
    	vcpu->arch.mcg_cap = mcg_cap;
    	/* Init IA32_MCG_CTL to all 1s */
    	if (mcg_cap & MCG_CTL_P)
    		vcpu->arch.mcg_ctl = ~(u64)0;
    	/* Init IA32_MCi_CTL to all 1s */
    	for (bank = 0; bank < bank_num; bank++)
    		vcpu->arch.mce_banks[bank*4] = ~(u64)0;
    out:
    	return r;
    }
    
    static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
    				      struct kvm_x86_mce *mce)
    {
    	u64 mcg_cap = vcpu->arch.mcg_cap;
    	unsigned bank_num = mcg_cap & 0xff;
    	u64 *banks = vcpu->arch.mce_banks;
    
    	if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
    		return -EINVAL;
    	/*
    	 * if IA32_MCG_CTL is not all 1s, the uncorrected error
    	 * reporting is disabled
    	 */
    	if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
    	    vcpu->arch.mcg_ctl != ~(u64)0)
    		return 0;
    	banks += 4 * mce->bank;
    	/*
    	 * if IA32_MCi_CTL is not all 1s, the uncorrected error
    	 * reporting is disabled for the bank
    	 */
    	if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
    		return 0;
    	if (mce->status & MCI_STATUS_UC) {
    		if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
    
    		    !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
    
    			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
    
    Huang Ying's avatar
    Huang Ying committed
    			return 0;
    		}
    		if (banks[1] & MCI_STATUS_VAL)
    			mce->status |= MCI_STATUS_OVER;
    		banks[2] = mce->addr;
    		banks[3] = mce->misc;
    		vcpu->arch.mcg_status = mce->mcg_status;
    		banks[1] = mce->status;
    		kvm_queue_exception(vcpu, MC_VECTOR);
    	} else if (!(banks[1] & MCI_STATUS_VAL)
    		   || !(banks[1] & MCI_STATUS_UC)) {
    		if (banks[1] & MCI_STATUS_VAL)
    			mce->status |= MCI_STATUS_OVER;
    		banks[2] = mce->addr;
    		banks[3] = mce->misc;
    		banks[1] = mce->status;
    	} else
    		banks[1] |= MCI_STATUS_OVER;
    	return 0;
    }
    
    
    static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
    					       struct kvm_vcpu_events *events)
    {
    
    Avi Kivity's avatar
    Avi Kivity committed
    	process_nmi(vcpu);
    
    	events->exception.injected =
    		vcpu->arch.exception.pending &&
    		!kvm_exception_is_soft(vcpu->arch.exception.nr);
    
    	events->exception.nr = vcpu->arch.exception.nr;
    	events->exception.has_error_code = vcpu->arch.exception.has_error_code;
    
    	events->exception.pad = 0;
    
    	events->exception.error_code = vcpu->arch.exception.error_code;
    
    
    	events->interrupt.injected =
    		vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
    
    	events->interrupt.nr = vcpu->arch.interrupt.nr;
    
    	events->interrupt.soft = 0;
    
    	events->interrupt.shadow =
    		kvm_x86_ops->get_interrupt_shadow(vcpu,
    			KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
    
    
    	events->nmi.injected = vcpu->arch.nmi_injected;
    
    Avi Kivity's avatar
    Avi Kivity committed
    	events->nmi.pending = vcpu->arch.nmi_pending != 0;
    
    	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
    
    	events->nmi.pad = 0;
    
    	events->sipi_vector = 0; /* never valid when reporting to user space */
    
    	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
    
    			 | KVM_VCPUEVENT_VALID_SHADOW);
    
    	memset(&events->reserved, 0, sizeof(events->reserved));
    
    }
    
    static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
    					      struct kvm_vcpu_events *events)
    {
    
    	if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
    
    			      | KVM_VCPUEVENT_VALID_SIPI_VECTOR
    			      | KVM_VCPUEVENT_VALID_SHADOW))
    
    		return -EINVAL;
    
    
    Avi Kivity's avatar
    Avi Kivity committed
    	process_nmi(vcpu);
    
    	vcpu->arch.exception.pending = events->exception.injected;
    	vcpu->arch.exception.nr = events->exception.nr;
    	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
    	vcpu->arch.exception.error_code = events->exception.error_code;
    
    	vcpu->arch.interrupt.pending = events->interrupt.injected;
    	vcpu->arch.interrupt.nr = events->interrupt.nr;
    	vcpu->arch.interrupt.soft = events->interrupt.soft;
    
    	if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
    		kvm_x86_ops->set_interrupt_shadow(vcpu,
    						  events->interrupt.shadow);
    
    
    	vcpu->arch.nmi_injected = events->nmi.injected;
    
    	if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
    		vcpu->arch.nmi_pending = events->nmi.pending;
    
    	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
    
    
    	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
    	    kvm_vcpu_has_lapic(vcpu))
    		vcpu->arch.apic->sipi_vector = events->sipi_vector;
    
    	kvm_make_request(KVM_REQ_EVENT, vcpu);
    
    
    static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
    					     struct kvm_debugregs *dbgregs)
    {
    	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
    	dbgregs->dr6 = vcpu->arch.dr6;
    	dbgregs->dr7 = vcpu->arch.dr7;
    	dbgregs->flags = 0;
    
    	memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
    
    }
    
    static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
    					    struct kvm_debugregs *dbgregs)
    {
    	if (dbgregs->flags)
    		return -EINVAL;
    
    	memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
    	vcpu->arch.dr6 = dbgregs->dr6;
    	vcpu->arch.dr7 = dbgregs->dr7;
    
    	return 0;
    }
    
    
    static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
    					 struct kvm_xsave *guest_xsave)
    {
    	if (cpu_has_xsave)
    		memcpy(guest_xsave->region,
    			&vcpu->arch.guest_fpu.state->xsave,
    
    	else {
    		memcpy(guest_xsave->region,
    			&vcpu->arch.guest_fpu.state->fxsave,
    			sizeof(struct i387_fxsave_struct));
    		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
    			XSTATE_FPSSE;
    	}
    }
    
    static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
    					struct kvm_xsave *guest_xsave)
    {
    	u64 xstate_bv =
    		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
    
    	if (cpu_has_xsave)
    		memcpy(&vcpu->arch.guest_fpu.state->xsave,
    
    			guest_xsave->region, xstate_size);
    
    	else {
    		if (xstate_bv & ~XSTATE_FPSSE)
    			return -EINVAL;
    		memcpy(&vcpu->arch.guest_fpu.state->fxsave,
    			guest_xsave->region, sizeof(struct i387_fxsave_struct));