Newer
Older
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
{
struct kvm *kvm = vcpu->kvm;
int lm = is_long_mode(vcpu);
u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
: (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
: kvm->arch.xen_hvm_config.blob_size_32;
u32 page_num = data & ~PAGE_MASK;
u64 page_addr = data & PAGE_MASK;
u8 *page;
int r;
r = -E2BIG;
if (page_num >= blob_size)
goto out;
r = -ENOMEM;
page = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!page)
goto out;
r = -EFAULT;
if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
goto out_free;
if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
goto out_free;
r = 0;
out_free:
kfree(page);
out:
return r;
}
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
{
return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
}
static bool kvm_hv_msr_partition_wide(u32 msr)
{
bool r = false;
switch (msr) {
case HV_X64_MSR_GUEST_OS_ID:
case HV_X64_MSR_HYPERCALL:
r = true;
break;
}
return r;
}
static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
struct kvm *kvm = vcpu->kvm;
switch (msr) {
case HV_X64_MSR_GUEST_OS_ID:
kvm->arch.hv_guest_os_id = data;
/* setting guest os id to zero disables hypercall page */
if (!kvm->arch.hv_guest_os_id)
kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
break;
case HV_X64_MSR_HYPERCALL: {
u64 gfn;
unsigned long addr;
u8 instructions[4];
/* if guest os id is not set hypercall should remain disabled */
if (!kvm->arch.hv_guest_os_id)
break;
if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
kvm->arch.hv_hypercall = data;
break;
}
gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
return 1;
kvm_x86_ops->patch_hypercall(vcpu, instructions);
((unsigned char *)instructions)[3] = 0xc3; /* ret */
if (copy_to_user((void __user *)addr, instructions, 4))
return 1;
kvm->arch.hv_hypercall = data;
break;
}
default:
pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
"data 0x%llx\n", msr, data);
return 1;
}
return 0;
}
static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
switch (msr) {
case HV_X64_MSR_APIC_ASSIST_PAGE: {
unsigned long addr;
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
vcpu->arch.hv_vapic = data;
break;
}
addr = gfn_to_hva(vcpu->kvm, data >>
HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
if (kvm_is_error_hva(addr))
return 1;
if (clear_user((void __user *)addr, PAGE_SIZE))
return 1;
vcpu->arch.hv_vapic = data;
break;
}
case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
case HV_X64_MSR_ICR:
return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
default:
pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
"data 0x%llx\n", msr, data);
return 1;
}
return 0;
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
switch (msr) {
case MSR_EFER:
set_efer(vcpu, data);
break;
case MSR_K7_HWCR:
data &= ~(u64)0x40; /* ignore flush filter disable */
data &= ~(u64)0x100; /* ignore ignne emulation enable */
if (data != 0) {
pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
return 1;
}
case MSR_FAM10H_MMIO_CONF_BASE:
if (data != 0) {
pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
"0x%llx\n", data);
return 1;
}
case MSR_IA32_DEBUGCTLMSR:
if (!data) {
/* We support the non-activated case already */
break;
} else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
/* Values other than LBR and BTF are vendor-specific,
thus reserved and should throw a #GP */
return 1;
}
pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
__func__, data);
break;
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
case 0x200 ... 0x2ff:
return set_msr_mtrr(vcpu, msr, data);
case MSR_IA32_APICBASE:
kvm_set_apic_base(vcpu, data);
break;
case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
return kvm_x2apic_msr_write(vcpu, msr, data);
case MSR_IA32_MISC_ENABLE:
vcpu->arch.ia32_misc_enable_msr = data;
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
case MSR_KVM_WALL_CLOCK:
vcpu->kvm->arch.wall_clock = data;
kvm_write_wall_clock(vcpu->kvm, data);
break;
case MSR_KVM_SYSTEM_TIME: {
if (vcpu->arch.time_page) {
kvm_release_page_dirty(vcpu->arch.time_page);
vcpu->arch.time_page = NULL;
}
vcpu->arch.time = data;
/* we verify if the enable bit is set... */
if (!(data & 1))
break;
/* ...but clean it before doing the actual write */
vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
vcpu->arch.time_page =
gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
if (is_error_page(vcpu->arch.time_page)) {
kvm_release_page_clean(vcpu->arch.time_page);
vcpu->arch.time_page = NULL;
}
kvm_request_guest_time_update(vcpu);
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
return set_msr_mce(vcpu, msr, data);
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
/* Performance counters are not protected by a CPUID bit,
* so we should check all of them in the generic path for the sake of
* cross vendor migration.
* Writing a zero into the event select MSRs disables them,
* which we perfectly emulate ;-). Any other value should be at least
* reported, some guests depend on them.
*/
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
case MSR_K7_EVNTSEL0:
case MSR_K7_EVNTSEL1:
case MSR_K7_EVNTSEL2:
case MSR_K7_EVNTSEL3:
if (data != 0)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
"0x%x data 0x%llx\n", msr, data);
break;
/* at least RHEL 4 unconditionally writes to the perfctr registers,
* so we ignore writes to make it happy.
*/
case MSR_P6_PERFCTR0:
case MSR_P6_PERFCTR1:
case MSR_K7_PERFCTR0:
case MSR_K7_PERFCTR1:
case MSR_K7_PERFCTR2:
case MSR_K7_PERFCTR3:
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
"0x%x data 0x%llx\n", msr, data);
break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
if (kvm_hv_msr_partition_wide(msr)) {
int r;
mutex_lock(&vcpu->kvm->lock);
r = set_msr_hyperv_pw(vcpu, msr, data);
mutex_unlock(&vcpu->kvm->lock);
return r;
} else
return set_msr_hyperv(vcpu, msr, data);
break;
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
if (!ignore_msrs) {
pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
msr, data);
return 1;
} else {
pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
msr, data);
break;
}
}
return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_msr_common);
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
{
return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
}
static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
if (msr == MSR_MTRRdefType)
*pdata = vcpu->arch.mtrr_state.def_type +
(vcpu->arch.mtrr_state.enabled << 10);
else if (msr == MSR_MTRRfix64K_00000)
*pdata = p[0];
else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
else if (msr == MSR_IA32_CR_PAT)
*pdata = vcpu->arch.pat;
else { /* Variable MTRRs */
int idx, is_mtrr_mask;
u64 *pt;
idx = (msr - 0x200) / 2;
is_mtrr_mask = msr - 0x200 - 2 * idx;
if (!is_mtrr_mask)
pt =
(u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
else
pt =
(u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
*pdata = *pt;
}
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
u64 mcg_cap = vcpu->arch.mcg_cap;
unsigned bank_num = mcg_cap & 0xff;
switch (msr) {
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MCG_CAP:
if (!(mcg_cap & MCG_CTL_P))
return 1;
data = vcpu->arch.mcg_ctl;
break;
case MSR_IA32_MCG_STATUS:
data = vcpu->arch.mcg_status;
break;
default:
if (msr >= MSR_IA32_MC0_CTL &&
msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
u32 offset = msr - MSR_IA32_MC0_CTL;
data = vcpu->arch.mce_banks[offset];
break;
}
return 1;
}
*pdata = data;
return 0;
}
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data = 0;
struct kvm *kvm = vcpu->kvm;
switch (msr) {
case HV_X64_MSR_GUEST_OS_ID:
data = kvm->arch.hv_guest_os_id;
break;
case HV_X64_MSR_HYPERCALL:
data = kvm->arch.hv_hypercall;
break;
default:
pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
}
*pdata = data;
return 0;
}
static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data = 0;
switch (msr) {
case HV_X64_MSR_VP_INDEX: {
int r;
struct kvm_vcpu *v;
kvm_for_each_vcpu(r, v, vcpu->kvm)
if (v == vcpu)
data = r;
break;
}
case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
case HV_X64_MSR_ICR:
return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
default:
pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
}
*pdata = data;
return 0;
}
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
switch (msr) {
case MSR_IA32_PLATFORM_ID:
case MSR_IA32_UCODE_REV:
case MSR_IA32_EBL_CR_POWERON:
case MSR_IA32_DEBUGCTLMSR:
case MSR_IA32_LASTBRANCHFROMIP:
case MSR_IA32_LASTBRANCHTOIP:
case MSR_IA32_LASTINTFROMIP:
case MSR_IA32_LASTINTTOIP:
case MSR_K8_SYSCFG:
case MSR_K7_HWCR:
case MSR_P6_PERFCTR0:
case MSR_P6_PERFCTR1:
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
case MSR_K8_INT_PENDING_MSG:
data = 0;
break;
case MSR_MTRRcap:
data = 0x500 | KVM_NR_VAR_MTRR;
break;
case 0x200 ... 0x2ff:
return get_msr_mtrr(vcpu, msr, pdata);
case 0xcd: /* fsb frequency */
data = 3;
break;
case MSR_IA32_APICBASE:
data = kvm_get_apic_base(vcpu);
break;
case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
return kvm_x2apic_msr_read(vcpu, msr, pdata);
break;
case MSR_IA32_MISC_ENABLE:
data = vcpu->arch.ia32_misc_enable_msr;
case MSR_IA32_PERF_STATUS:
/* TSC increment by tick */
data = 1000ULL;
/* CPU multiplier */
data |= (((uint64_t)4ULL) << 40);
break;
case MSR_KVM_WALL_CLOCK:
data = vcpu->kvm->arch.wall_clock;
break;
case MSR_KVM_SYSTEM_TIME:
data = vcpu->arch.time;
break;
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MCG_CAP:
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
return get_msr_mce(vcpu, msr, pdata);
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
if (kvm_hv_msr_partition_wide(msr)) {
int r;
mutex_lock(&vcpu->kvm->lock);
r = get_msr_hyperv_pw(vcpu, msr, pdata);
mutex_unlock(&vcpu->kvm->lock);
return r;
} else
return get_msr_hyperv(vcpu, msr, pdata);
break;
if (!ignore_msrs) {
pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
return 1;
} else {
pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
data = 0;
}
break;
}
*pdata = data;
return 0;
}
EXPORT_SYMBOL_GPL(kvm_get_msr_common);
/*
* Read or write a bunch of msrs. All parameters are kernel addresses.
*
* @return number of msrs set successfully.
*/
static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
struct kvm_msr_entry *entries,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data))
{
idx = srcu_read_lock(&vcpu->kvm->srcu);
for (i = 0; i < msrs->nmsrs; ++i)
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
srcu_read_unlock(&vcpu->kvm->srcu, idx);
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
vcpu_put(vcpu);
return i;
}
/*
* Read or write a bunch of msrs. Parameters are user addresses.
*
* @return number of msrs set successfully.
*/
static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data),
int writeback)
{
struct kvm_msrs msrs;
struct kvm_msr_entry *entries;
int r, n;
unsigned size;
r = -EFAULT;
if (copy_from_user(&msrs, user_msrs, sizeof msrs))
goto out;
r = -E2BIG;
if (msrs.nmsrs >= MAX_IO_MSRS)
goto out;
r = -ENOMEM;
size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
entries = vmalloc(size);
if (!entries)
goto out;
r = -EFAULT;
if (copy_from_user(entries, user_msrs->entries, size))
goto out_free;
r = n = __msr_io(vcpu, &msrs, entries, do_msr);
if (r < 0)
goto out_free;
r = -EFAULT;
if (writeback && copy_to_user(user_msrs->entries, entries, size))
goto out_free;
r = n;
out_free:
vfree(entries);
out:
return r;
}
int kvm_dev_ioctl_check_extension(long ext)
{
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
case KVM_CAP_HLT:
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
case KVM_CAP_ADJUST_CLOCK:
case KVM_CAP_HYPERV_SPIN:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
break;
case KVM_CAP_NR_VCPUS:
r = KVM_MAX_VCPUS;
break;
case KVM_CAP_NR_MEMSLOTS:
r = KVM_MEMORY_SLOTS;
break;
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
case KVM_CAP_MCE:
r = KVM_MAX_MCE_BANKS;
break;
default:
r = 0;
break;
}
return r;
}
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
void __user *argp = (void __user *)arg;
long r;
switch (ioctl) {
case KVM_GET_MSR_INDEX_LIST: {
struct kvm_msr_list __user *user_msr_list = argp;
struct kvm_msr_list msr_list;
unsigned n;
r = -EFAULT;
if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
goto out;
n = msr_list.nmsrs;
msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
goto out;
r = -E2BIG;
goto out;
r = -EFAULT;
if (copy_to_user(user_msr_list->indices, &msrs_to_save,
num_msrs_to_save * sizeof(u32)))
goto out;
if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
&emulated_msrs,
ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
goto out;
r = 0;
break;
}
case KVM_GET_SUPPORTED_CPUID: {
struct kvm_cpuid2 __user *cpuid_arg = argp;
struct kvm_cpuid2 cpuid;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
if (r)
goto out;
r = -EFAULT;
if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
goto out;
r = 0;
break;
}
case KVM_X86_GET_MCE_CAP_SUPPORTED: {
u64 mce_cap;
mce_cap = KVM_MCE_CAP_SUPPORTED;
r = -EFAULT;
if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
goto out;
r = 0;
break;
}
default:
r = -EINVAL;
}
out:
return r;
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
unsigned long khz = cpufreq_quick_get(cpu);
if (!khz)
khz = tsc_khz;
per_cpu(cpu_tsc_khz, cpu) = khz;
}
kvm_request_guest_time_update(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_put_guest_fpu(vcpu);
unsigned long long efer = 0;
rdmsrl_safe(MSR_EFER, &efer);
return efer & EFER_NX;
}
static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
{
int i;
struct kvm_cpuid_entry2 *e, *entry;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
e = &vcpu->arch.cpuid_entries[i];
if (e->function == 0x80000001) {
entry = e;
break;
}
}
if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
entry->edx &= ~(1 << 20);
printk(KERN_INFO "kvm: guest NX capability removed\n");
}
}
/* when an old userspace process fills a new kernel module */
static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
struct kvm_cpuid_entry __user *entries)
{
int r, i;
struct kvm_cpuid_entry *cpuid_entries;
r = -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
goto out;
r = -ENOMEM;
cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
if (!cpuid_entries)
goto out;
r = -EFAULT;
if (copy_from_user(cpuid_entries, entries,
cpuid->nent * sizeof(struct kvm_cpuid_entry)))
goto out_free;
for (i = 0; i < cpuid->nent; i++) {
vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
vcpu->arch.cpuid_entries[i].index = 0;
vcpu->arch.cpuid_entries[i].flags = 0;
vcpu->arch.cpuid_entries[i].padding[0] = 0;
vcpu->arch.cpuid_entries[i].padding[1] = 0;
vcpu->arch.cpuid_entries[i].padding[2] = 0;
}
vcpu->arch.cpuid_nent = cpuid->nent;
cpuid_fix_nx_cap(vcpu);
r = 0;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
out_free:
vfree(cpuid_entries);
out:
return r;
}
static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
int r;
r = -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
goto out;
r = -EFAULT;
if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
return 0;
out:
return r;
}
static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
int r;
r = -E2BIG;
if (cpuid->nent < vcpu->arch.cpuid_nent)
if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
return 0;
out:
cpuid->nent = vcpu->arch.cpuid_nent;
return r;
}
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
{
entry->function = function;
entry->index = index;
cpuid_count(entry->function, entry->index,
&entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
#define F(x) bit(X86_FEATURE_##x)
static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
u32 index, int *nent, int maxnent)
{
unsigned f_nx = is_efer_nx() ? F(NX) : 0;
unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
? F(GBPAGES) : 0;
unsigned f_lm = F(LM);
#else
unsigned f_gbpages = 0;
unsigned f_lm = 0;
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
/* cpuid 1.edx */
const u32 kvm_supported_word0_x86_features =
F(FPU) | F(VME) | F(DE) | F(PSE) |
F(TSC) | F(MSR) | F(PAE) | F(MCE) |
F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
0 /* Reserved, DS, ACPI */ | F(MMX) |
F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
0 /* HTT, TM, Reserved, PBE */;
/* cpuid 0x80000001.edx */
const u32 kvm_supported_word1_x86_features =
F(FPU) | F(VME) | F(DE) | F(PSE) |
F(TSC) | F(MSR) | F(PAE) | F(MCE) |
F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
F(PAT) | F(PSE36) | 0 /* Reserved */ |
f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
/* cpuid 1.ecx */
const u32 kvm_supported_word4_x86_features =
F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
0 /* DS-CPL, VMX, SMX, EST */ |
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
0 /* Reserved, DCA */ | F(XMM4_1) |
F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
/* cpuid 0x80000001.ecx */
const u32 kvm_supported_word6_x86_features =
F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
0 /* SKINIT */ | 0 /* WDT */;
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
do_cpuid_1_ent(entry, function, index);
++*nent;
switch (function) {
case 0:
entry->eax = min(entry->eax, (u32)0xb);
break;
case 1:
entry->edx &= kvm_supported_word0_x86_features;
entry->ecx &= kvm_supported_word4_x86_features;
/* we support x2apic emulation even if host does not support
* it since we emulate x2apic in software */
entry->ecx |= F(X2APIC);
break;
/* function 2 entries are STATEFUL. That is, repeated cpuid commands
* may return different values. This forces us to get_cpu() before
* issuing the first command, and also to emulate this annoying behavior
* in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
case 2: {
int t, times = entry->eax & 0xff;
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times && *nent < maxnent; ++t) {
do_cpuid_1_ent(&entry[t], function, 0);
entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
++*nent;
}
break;
}
/* function 4 and 0xb have additional index. */
case 4: {
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until cache_type is zero */
for (i = 1; *nent < maxnent; ++i) {
cache_type = entry[i - 1].eax & 0x1f;
do_cpuid_1_ent(&entry[i], function, i);
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
}
break;
}
case 0xb: {
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
level_type = entry[i - 1].ecx & 0xff00;
do_cpuid_1_ent(&entry[i], function, i);
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
}
break;
}
case 0x80000000:
entry->eax = min(entry->eax, 0x8000001a);
break;
case 0x80000001:
entry->edx &= kvm_supported_word1_x86_features;
entry->ecx &= kvm_supported_word6_x86_features;
break;
}
kvm_x86_ops->set_supported_cpuid(function, entry);
#undef F
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
struct kvm_cpuid_entry2 *cpuid_entries;
int limit, nent = 0, r = -E2BIG;
u32 func;
if (cpuid->nent < 1)
goto out;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
cpuid->nent = KVM_MAX_CPUID_ENTRIES;
r = -ENOMEM;
cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
if (!cpuid_entries)
goto out;
do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
limit = cpuid_entries[0].eax;
for (func = 1; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,
r = -E2BIG;
if (nent >= cpuid->nent)
goto out_free;
do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
limit = cpuid_entries[nent - 1].eax;
for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,