Newer
Older
return kvm_x2apic_msr_write(vcpu, msr, data);
case MSR_IA32_TSCDEADLINE:
kvm_set_lapic_tscdeadline_msr(vcpu, data);
break;
case MSR_IA32_TSC_ADJUST:
if (guest_cpuid_has_tsc_adjust(vcpu)) {
if (!msr_info->host_initiated) {
u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
}
vcpu->arch.ia32_tsc_adjust_msr = data;
}
break;
case MSR_IA32_MISC_ENABLE:
vcpu->arch.ia32_misc_enable_msr = data;
case MSR_KVM_WALL_CLOCK:
vcpu->kvm->arch.wall_clock = data;
kvm_write_wall_clock(vcpu->kvm, data);
break;
case MSR_KVM_SYSTEM_TIME_NEW:
case MSR_KVM_SYSTEM_TIME: {

Andy Honig
committed
u64 gpa_offset;
vcpu->arch.time = data;
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
/* we verify if the enable bit is set... */
if (!(data & 1))
break;

Andy Honig
committed
gpa_offset = data & ~(PAGE_MASK | 1);

Andy Honig
committed
if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.pv_time, data & ~1ULL,
sizeof(struct pvclock_vcpu_time_info)))

Andy Honig
committed
vcpu->arch.pv_time_enabled = false;
else
vcpu->arch.pv_time_enabled = true;
case MSR_KVM_ASYNC_PF_EN:
if (kvm_pv_enable_async_pf(vcpu, data))
return 1;
break;
case MSR_KVM_STEAL_TIME:
if (unlikely(!sched_info_on()))
return 1;
if (data & KVM_STEAL_RESERVED_MASK)
return 1;
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
data & KVM_STEAL_VALID_BITS,
sizeof(struct kvm_steal_time)))
return 1;
vcpu->arch.st.msr_val = data;
if (!(data & KVM_MSR_ENABLED))
break;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
preempt_disable();
accumulate_steal_time(vcpu);
preempt_enable();
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
break;
case MSR_KVM_PV_EOI_EN:
if (kvm_lapic_enable_pv_eoi(vcpu, data))
return 1;
break;
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
return set_msr_mce(vcpu, msr, data);
/* Performance counters are not protected by a CPUID bit,
* so we should check all of them in the generic path for the sake of
* cross vendor migration.
* Writing a zero into the event select MSRs disables them,
* which we perfectly emulate ;-). Any other value should be at least
* reported, some guests depend on them.
*/
case MSR_K7_EVNTSEL0:
case MSR_K7_EVNTSEL1:
case MSR_K7_EVNTSEL2:
case MSR_K7_EVNTSEL3:
if (data != 0)
vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
"0x%x data 0x%llx\n", msr, data);
break;
/* at least RHEL 4 unconditionally writes to the perfctr registers,
* so we ignore writes to make it happy.
*/
case MSR_K7_PERFCTR0:
case MSR_K7_PERFCTR1:
case MSR_K7_PERFCTR2:
case MSR_K7_PERFCTR3:
vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
"0x%x data 0x%llx\n", msr, data);
break;
case MSR_P6_PERFCTR0:
case MSR_P6_PERFCTR1:
pr = true;
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
if (pr || data != 0)
vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
"0x%x data 0x%llx\n", msr, data);
case MSR_K7_CLK_CTL:
/*
* Ignore all writes to this no longer documented MSR.
* Writes are only relevant for old K7 processors,
* all pre-dating SVM, but a recommended workaround from
* AMD for these chips. It is possible to specify the
* affected processor models on the command line, hence
* the need to ignore the workaround.
*/
break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
if (kvm_hv_msr_partition_wide(msr)) {
int r;
mutex_lock(&vcpu->kvm->lock);
r = set_msr_hyperv_pw(vcpu, msr, data);
mutex_unlock(&vcpu->kvm->lock);
return r;
} else
return set_msr_hyperv(vcpu, msr, data);
break;
case MSR_IA32_BBL_CR_CTL3:
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
*/
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has_osvw(vcpu))
return 1;
vcpu->arch.osvw.length = data;
break;
case MSR_AMD64_OSVW_STATUS:
if (!guest_cpuid_has_osvw(vcpu))
return 1;
vcpu->arch.osvw.status = data;
break;
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
msr, data);
return 1;
} else {
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
msr, data);
break;
}
}
return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_msr_common);
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
{
return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
}
static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
if (msr == MSR_MTRRdefType)
*pdata = vcpu->arch.mtrr_state.def_type +
(vcpu->arch.mtrr_state.enabled << 10);
else if (msr == MSR_MTRRfix64K_00000)
*pdata = p[0];
else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
else if (msr == MSR_IA32_CR_PAT)
*pdata = vcpu->arch.pat;
else { /* Variable MTRRs */
int idx, is_mtrr_mask;
u64 *pt;
idx = (msr - 0x200) / 2;
is_mtrr_mask = msr - 0x200 - 2 * idx;
if (!is_mtrr_mask)
pt =
(u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
else
pt =
(u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
*pdata = *pt;
}
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
u64 mcg_cap = vcpu->arch.mcg_cap;
unsigned bank_num = mcg_cap & 0xff;
switch (msr) {
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MCG_CAP:
if (!(mcg_cap & MCG_CTL_P))
return 1;
data = vcpu->arch.mcg_ctl;
break;
case MSR_IA32_MCG_STATUS:
data = vcpu->arch.mcg_status;
break;
default:
if (msr >= MSR_IA32_MC0_CTL &&
msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
u32 offset = msr - MSR_IA32_MC0_CTL;
data = vcpu->arch.mce_banks[offset];
break;
}
return 1;
}
*pdata = data;
return 0;
}
static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data = 0;
struct kvm *kvm = vcpu->kvm;
switch (msr) {
case HV_X64_MSR_GUEST_OS_ID:
data = kvm->arch.hv_guest_os_id;
break;
case HV_X64_MSR_HYPERCALL:
data = kvm->arch.hv_hypercall;
break;
default:
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
}
*pdata = data;
return 0;
}
static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data = 0;
switch (msr) {
case HV_X64_MSR_VP_INDEX: {
int r;
struct kvm_vcpu *v;
kvm_for_each_vcpu(r, v, vcpu->kvm)
if (v == vcpu)
data = r;
break;
}
case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
case HV_X64_MSR_ICR:
return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
case HV_X64_MSR_APIC_ASSIST_PAGE:
data = vcpu->arch.hv_vapic;
break;
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
}
*pdata = data;
return 0;
}
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
switch (msr) {
case MSR_IA32_PLATFORM_ID:
case MSR_IA32_EBL_CR_POWERON:
case MSR_IA32_DEBUGCTLMSR:
case MSR_IA32_LASTBRANCHFROMIP:
case MSR_IA32_LASTBRANCHTOIP:
case MSR_IA32_LASTINTFROMIP:
case MSR_IA32_LASTINTTOIP:
case MSR_K8_SYSCFG:
case MSR_K7_HWCR:
case MSR_K8_INT_PENDING_MSG:
case MSR_AMD64_BU_CFG2:
data = 0;
break;
case MSR_P6_PERFCTR0:
case MSR_P6_PERFCTR1:
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_get_msr(vcpu, msr, pdata);
data = 0;
break;
case MSR_IA32_UCODE_REV:
data = 0x100000000ULL;
break;
case MSR_MTRRcap:
data = 0x500 | KVM_NR_VAR_MTRR;
break;
case 0x200 ... 0x2ff:
return get_msr_mtrr(vcpu, msr, pdata);
case 0xcd: /* fsb frequency */
data = 3;
break;
/*
* MSR_EBC_FREQUENCY_ID
* Conservative value valid for even the basic CPU models.
* Models 0,1: 000 in bits 23:21 indicating a bus speed of
* 100MHz, model 2 000 in bits 18:16 indicating 100MHz,
* and 266MHz for model 3, or 4. Set Core Clock
* Frequency to System Bus Frequency Ratio to 1 (bits
* 31:24) even though these are only valid for CPU
* models > 2, however guests may end up dividing or
* multiplying by zero otherwise.
*/
case MSR_EBC_FREQUENCY_ID:
data = 1 << 24;
break;
case MSR_IA32_APICBASE:
data = kvm_get_apic_base(vcpu);
break;
case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
return kvm_x2apic_msr_read(vcpu, msr, pdata);
break;
case MSR_IA32_TSCDEADLINE:
data = kvm_get_lapic_tscdeadline_msr(vcpu);
break;
case MSR_IA32_TSC_ADJUST:
data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
break;
case MSR_IA32_MISC_ENABLE:
data = vcpu->arch.ia32_misc_enable_msr;
case MSR_IA32_PERF_STATUS:
/* TSC increment by tick */
data = 1000ULL;
/* CPU multiplier */
data |= (((uint64_t)4ULL) << 40);
break;
case MSR_KVM_WALL_CLOCK:
data = vcpu->kvm->arch.wall_clock;
break;
case MSR_KVM_SYSTEM_TIME:
case MSR_KVM_SYSTEM_TIME_NEW:
data = vcpu->arch.time;
break;
case MSR_KVM_ASYNC_PF_EN:
data = vcpu->arch.apf.msr_val;
break;
case MSR_KVM_STEAL_TIME:
data = vcpu->arch.st.msr_val;
break;
case MSR_KVM_PV_EOI_EN:
data = vcpu->arch.pv_eoi.msr_val;
break;
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MCG_CAP:
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
return get_msr_mce(vcpu, msr, pdata);
case MSR_K7_CLK_CTL:
/*
* Provide expected ramp-up count for K7. All other
* are set to zero, indicating minimum divisors for
* every field.
*
* This prevents guest kernels on AMD host with CPU
* type 6, model 8 and higher from exploding due to
* the rdmsr failing.
*/
data = 0x20000000;
break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
if (kvm_hv_msr_partition_wide(msr)) {
int r;
mutex_lock(&vcpu->kvm->lock);
r = get_msr_hyperv_pw(vcpu, msr, pdata);
mutex_unlock(&vcpu->kvm->lock);
return r;
} else
return get_msr_hyperv(vcpu, msr, pdata);
break;
case MSR_IA32_BBL_CR_CTL3:
/* This legacy MSR exists but isn't fully documented in current
* silicon. It is however accessed by winxp in very narrow
* scenarios where it sets bit #19, itself documented as
* a "reserved" bit. Best effort attempt to source coherent
* read data here should the balance of the register be
* interpreted by the guest:
*
* L2 cache control register 3: 64GB range, 256KB size,
* enabled, latency 0x1, configured
*/
data = 0xbe702111;
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has_osvw(vcpu))
return 1;
data = vcpu->arch.osvw.length;
break;
case MSR_AMD64_OSVW_STATUS:
if (!guest_cpuid_has_osvw(vcpu))
return 1;
data = vcpu->arch.osvw.status;
break;
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_get_msr(vcpu, msr, pdata);
if (!ignore_msrs) {
vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
return 1;
} else {
vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
data = 0;
}
break;
}
*pdata = data;
return 0;
}
EXPORT_SYMBOL_GPL(kvm_get_msr_common);
/*
* Read or write a bunch of msrs. All parameters are kernel addresses.
*
* @return number of msrs set successfully.
*/
static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
struct kvm_msr_entry *entries,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data))
{
idx = srcu_read_lock(&vcpu->kvm->srcu);
for (i = 0; i < msrs->nmsrs; ++i)
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
srcu_read_unlock(&vcpu->kvm->srcu, idx);
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
return i;
}
/*
* Read or write a bunch of msrs. Parameters are user addresses.
*
* @return number of msrs set successfully.
*/
static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data),
int writeback)
{
struct kvm_msrs msrs;
struct kvm_msr_entry *entries;
int r, n;
unsigned size;
r = -EFAULT;
if (copy_from_user(&msrs, user_msrs, sizeof msrs))
goto out;
r = -E2BIG;
if (msrs.nmsrs >= MAX_IO_MSRS)
goto out;
size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
entries = memdup_user(user_msrs->entries, size);
if (IS_ERR(entries)) {
r = PTR_ERR(entries);
r = n = __msr_io(vcpu, &msrs, entries, do_msr);
if (r < 0)
goto out_free;
r = -EFAULT;
if (writeback && copy_to_user(user_msrs->entries, entries, size))
goto out_free;
r = n;
out_free:
kfree(entries);
int kvm_dev_ioctl_check_extension(long ext)
{
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
case KVM_CAP_HLT:
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_USER_NMI:
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
case KVM_CAP_ADJUST_CLOCK:
case KVM_CAP_HYPERV_SPIN:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
case KVM_CAP_ASYNC_PF:
case KVM_CAP_GET_TSC_KHZ:
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
#endif
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
break;
case KVM_CAP_NR_VCPUS:
r = KVM_SOFT_MAX_VCPUS;
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
r = iommu_present(&pci_bus_type);
case KVM_CAP_MCE:
r = KVM_MAX_MCE_BANKS;
break;
case KVM_CAP_XCRS:
r = cpu_has_xsave;
break;
case KVM_CAP_TSC_CONTROL:
r = kvm_has_tsc_control;
break;
case KVM_CAP_TSC_DEADLINE_TIMER:
r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
break;
default:
r = 0;
break;
}
return r;
}
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
void __user *argp = (void __user *)arg;
long r;
switch (ioctl) {
case KVM_GET_MSR_INDEX_LIST: {
struct kvm_msr_list __user *user_msr_list = argp;
struct kvm_msr_list msr_list;
unsigned n;
r = -EFAULT;
if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
goto out;
n = msr_list.nmsrs;
msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
goto out;
r = -E2BIG;
goto out;
r = -EFAULT;
if (copy_to_user(user_msr_list->indices, &msrs_to_save,
num_msrs_to_save * sizeof(u32)))
goto out;
if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
&emulated_msrs,
ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
goto out;
r = 0;
break;
}
case KVM_GET_SUPPORTED_CPUID: {
struct kvm_cpuid2 __user *cpuid_arg = argp;
struct kvm_cpuid2 cpuid;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
if (r)
goto out;
r = -EFAULT;
if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
goto out;
r = 0;
break;
}
case KVM_X86_GET_MCE_CAP_SUPPORTED: {
u64 mce_cap;
mce_cap = KVM_MCE_CAP_SUPPORTED;
r = -EFAULT;
if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
goto out;
r = 0;
break;
}
default:
r = -EINVAL;
}
out:
return r;
}
static void wbinvd_ipi(void *garbage)
{
wbinvd();
}
static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.iommu_domain &&
!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Address WBINVD may be executed by guest */
if (need_emulate_wbinvd(vcpu)) {
if (kvm_x86_ops->has_wbinvd_exit())
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
smp_call_function_single(vcpu->cpu,
wbinvd_ipi, NULL, 1);
}
kvm_x86_ops->vcpu_load(vcpu, cpu);
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
vcpu->arch.tsc_offset_adjustment = 0;
set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
}
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
native_read_tsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc);
kvm_x86_ops->write_tsc_offset(vcpu, offset);
/*
* On a host with synchronized TSC, there is no need to update
* kvmclock on vcpu->cpu migration
*/
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != cpu)
kvm_migrate_timers(vcpu);
accumulate_steal_time(vcpu);
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
vcpu->arch.last_host_tsc = native_read_tsc();
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
kvm_x86_ops->sync_pir_to_irr(vcpu);
memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
return 0;
}
static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
kvm_apic_post_state_restore(vcpu, s);
update_cr8_intercept(vcpu);
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
if (irq->irq >= KVM_NR_INTERRUPTS)
return -EINVAL;
if (irqchip_in_kernel(vcpu->kvm))
return -ENXIO;
kvm_queue_interrupt(vcpu, irq->irq, false);
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
}
static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
{
kvm_inject_nmi(vcpu);
return 0;
}
static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
struct kvm_tpr_access_ctl *tac)
{
if (tac->flags)
return -EINVAL;
vcpu->arch.tpr_access_reporting = !!tac->enabled;
return 0;
}
static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
u64 mcg_cap)
{
int r;
unsigned bank_num = mcg_cap & 0xff, bank;
r = -EINVAL;
if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
goto out;
if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
goto out;
r = 0;
vcpu->arch.mcg_cap = mcg_cap;
/* Init IA32_MCG_CTL to all 1s */
if (mcg_cap & MCG_CTL_P)
vcpu->arch.mcg_ctl = ~(u64)0;
/* Init IA32_MCi_CTL to all 1s */
for (bank = 0; bank < bank_num; bank++)
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
out:
return r;
}
static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
struct kvm_x86_mce *mce)
{
u64 mcg_cap = vcpu->arch.mcg_cap;
unsigned bank_num = mcg_cap & 0xff;
u64 *banks = vcpu->arch.mce_banks;
if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
return -EINVAL;
/*
* if IA32_MCG_CTL is not all 1s, the uncorrected error
* reporting is disabled
*/
if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
vcpu->arch.mcg_ctl != ~(u64)0)
return 0;
banks += 4 * mce->bank;
/*
* if IA32_MCi_CTL is not all 1s, the uncorrected error
* reporting is disabled for the bank
*/
if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
return 0;
if (mce->status & MCI_STATUS_UC) {
if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
!kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
return 0;
}
if (banks[1] & MCI_STATUS_VAL)
mce->status |= MCI_STATUS_OVER;
banks[2] = mce->addr;
banks[3] = mce->misc;
vcpu->arch.mcg_status = mce->mcg_status;
banks[1] = mce->status;
kvm_queue_exception(vcpu, MC_VECTOR);
} else if (!(banks[1] & MCI_STATUS_VAL)
|| !(banks[1] & MCI_STATUS_UC)) {
if (banks[1] & MCI_STATUS_VAL)
mce->status |= MCI_STATUS_OVER;
banks[2] = mce->addr;
banks[3] = mce->misc;
banks[1] = mce->status;
} else
banks[1] |= MCI_STATUS_OVER;
return 0;
}
static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
events->exception.injected =
vcpu->arch.exception.pending &&
!kvm_exception_is_soft(vcpu->arch.exception.nr);
events->exception.nr = vcpu->arch.exception.nr;
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
events->exception.pad = 0;
events->exception.error_code = vcpu->arch.exception.error_code;
events->interrupt.injected =
vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
events->interrupt.nr = vcpu->arch.interrupt.nr;
events->interrupt.soft = 0;
events->interrupt.shadow =
kvm_x86_ops->get_interrupt_shadow(vcpu,
KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending != 0;
events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
events->sipi_vector = 0; /* never valid when reporting to user space */
events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SHADOW);
memset(&events->reserved, 0, sizeof(events->reserved));
}
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
| KVM_VCPUEVENT_VALID_SHADOW))
vcpu->arch.exception.pending = events->exception.injected;
vcpu->arch.exception.nr = events->exception.nr;
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
vcpu->arch.exception.error_code = events->exception.error_code;
vcpu->arch.interrupt.pending = events->interrupt.injected;
vcpu->arch.interrupt.nr = events->interrupt.nr;
vcpu->arch.interrupt.soft = events->interrupt.soft;
if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
kvm_x86_ops->set_interrupt_shadow(vcpu,
events->interrupt.shadow);
vcpu->arch.nmi_injected = events->nmi.injected;
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
vcpu->arch.nmi_pending = events->nmi.pending;
kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
kvm_vcpu_has_lapic(vcpu))
vcpu->arch.apic->sipi_vector = events->sipi_vector;
kvm_make_request(KVM_REQ_EVENT, vcpu);
static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
struct kvm_debugregs *dbgregs)
{
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
dbgregs->dr6 = vcpu->arch.dr6;
dbgregs->dr7 = vcpu->arch.dr7;
dbgregs->flags = 0;
memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
}
static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
struct kvm_debugregs *dbgregs)
{
if (dbgregs->flags)
return -EINVAL;
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
vcpu->arch.dr6 = dbgregs->dr6;
vcpu->arch.dr7 = dbgregs->dr7;
return 0;
}
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
if (cpu_has_xsave)
memcpy(guest_xsave->region,
&vcpu->arch.guest_fpu.state->xsave,
xstate_size);
else {
memcpy(guest_xsave->region,
&vcpu->arch.guest_fpu.state->fxsave,
sizeof(struct i387_fxsave_struct));
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
XSTATE_FPSSE;
}
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
u64 xstate_bv =
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
if (cpu_has_xsave)
memcpy(&vcpu->arch.guest_fpu.state->xsave,
guest_xsave->region, xstate_size);
else {
if (xstate_bv & ~XSTATE_FPSSE)
return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state->fxsave,
guest_xsave->region, sizeof(struct i387_fxsave_struct));