Newer
Older
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
vcpu->run->io.port = vcpu->arch.pio.port = port;
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 0;
vcpu->arch.pio.down = 0;
vcpu->arch.pio.guest_page_offset = 0;
vcpu->arch.pio.rep = 0;
kvm_x86_ops->cache_regs(vcpu);
memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
kvm_x86_ops->decache_regs(vcpu);
kvm_x86_ops->skip_emulated_instruction(vcpu);
pio_dev = vcpu_find_pio_dev(vcpu, port);
if (pio_dev) {
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
complete_pio(vcpu);
return 1;
}
return 0;
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio);
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port)
{
unsigned now, in_page;
int i, ret = 0;
int nr_pages = 1;
struct page *page;
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
vcpu->run->io.port = vcpu->arch.pio.port = port;
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 1;
vcpu->arch.pio.down = down;
vcpu->arch.pio.guest_page_offset = offset_in_page(address);
vcpu->arch.pio.rep = rep;
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
if (!count) {
kvm_x86_ops->skip_emulated_instruction(vcpu);
return 1;
}
if (!down)
in_page = PAGE_SIZE - offset_in_page(address);
else
in_page = offset_in_page(address) + size;
now = min(count, (unsigned long)in_page / size);
if (!now) {
/*
* String I/O straddles page boundary. Pin two guest pages
* so that we satisfy atomicity constraints. Do just one
* transaction to avoid complexity.
*/
nr_pages = 2;
now = 1;
}
if (down) {
/*
* String I/O in reverse. Yuck. Kill the guest, fix later.
*/
pr_unimpl(vcpu, "guest string pio down\n");
kvm_inject_gp(vcpu, 0);
return 1;
}
vcpu->run->io.count = now;
vcpu->arch.pio.cur_count = now;
if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
kvm_x86_ops->skip_emulated_instruction(vcpu);
for (i = 0; i < nr_pages; ++i) {
mutex_lock(&vcpu->kvm->lock);
page = gva_to_page(vcpu, address + i * PAGE_SIZE);
vcpu->arch.pio.guest_pages[i] = page;
mutex_unlock(&vcpu->kvm->lock);
if (!page) {
kvm_inject_gp(vcpu, 0);
free_pio_guest_pages(vcpu);
return 1;
}
}
pio_dev = vcpu_find_pio_dev(vcpu, port);
/* string PIO write */
ret = pio_copy_data(vcpu);
if (ret >= 0 && pio_dev) {
pio_string_write(pio_dev, vcpu);
complete_pio(vcpu);
if (vcpu->arch.pio.count == 0)
ret = 1;
}
} else if (pio_dev)
pr_unimpl(vcpu, "no string pio read support yet, "
"port %x size %d count %ld\n",
port, size, count);
return ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
int kvm_arch_init(void *opaque)
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
r = kvm_mmu_module_init();
if (r)
goto out_fail;
if (kvm_x86_ops) {
printk(KERN_ERR "kvm: already loaded the other module\n");
r = -EEXIST;
goto out;
}
if (!ops->cpu_has_kvm_support()) {
printk(KERN_ERR "kvm: no hardware support\n");
r = -EOPNOTSUPP;
goto out;
}
if (ops->disabled_by_bios()) {
printk(KERN_ERR "kvm: disabled by bios\n");
r = -EOPNOTSUPP;
goto out;
}
kvm_x86_ops = ops;
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
return 0;
out:
kvm_mmu_module_exit();
out_fail:
return r;
void kvm_arch_exit(void)
{
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
}
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.mp_state = VCPU_MP_STATE_HALTED;
kvm_vcpu_block(vcpu);
if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE)
return -EINTR;
return 1;
} else {
vcpu->run->exit_reason = KVM_EXIT_HLT;
return 0;
}
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
kvm_x86_ops->cache_regs(vcpu);
nr = vcpu->arch.regs[VCPU_REGS_RAX];
a0 = vcpu->arch.regs[VCPU_REGS_RBX];
a1 = vcpu->arch.regs[VCPU_REGS_RCX];
a2 = vcpu->arch.regs[VCPU_REGS_RDX];
a3 = vcpu->arch.regs[VCPU_REGS_RSI];
if (!is_long_mode(vcpu)) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
a1 &= 0xFFFFFFFF;
a2 &= 0xFFFFFFFF;
a3 &= 0xFFFFFFFF;
}
switch (nr) {
default:
ret = -KVM_ENOSYS;
break;
}
vcpu->arch.regs[VCPU_REGS_RAX] = ret;
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
kvm_x86_ops->decache_regs(vcpu);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
{
char instruction[3];
int ret = 0;
mutex_lock(&vcpu->kvm->lock);
/*
* Blow out the MMU to ensure that no other VCPU has an active mapping
* to ensure that the updated hypercall appears atomically across all
* VCPUs.
*/
kvm_mmu_zap_all(vcpu->kvm);
kvm_x86_ops->cache_regs(vcpu);
kvm_x86_ops->patch_hypercall(vcpu, instruction);
if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu)
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
!= X86EMUL_CONTINUE)
ret = -EFAULT;
mutex_unlock(&vcpu->kvm->lock);
return ret;
}
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
}
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
{
struct descriptor_table dt = { limit, base };
kvm_x86_ops->set_gdt(vcpu, &dt);
}
void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
{
struct descriptor_table dt = { limit, base };
kvm_x86_ops->set_idt(vcpu, &dt);
}
void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long *rflags)
{
lmsw(vcpu, msw);
*rflags = kvm_x86_ops->get_rflags(vcpu);
}
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
{
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
switch (cr) {
case 0:
case 2:
case 3:
case 4:
case 8:
return get_cr8(vcpu);
default:
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
return 0;
}
}
void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
unsigned long *rflags)
{
switch (cr) {
case 0:
set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
*rflags = kvm_x86_ops->get_rflags(vcpu);
break;
case 2:
break;
case 3:
set_cr3(vcpu, val);
break;
case 4:
set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
break;
case 8:
set_cr8(vcpu, val & 0xfUL);
break;
default:
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
}
}
static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
{
struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
int j, nent = vcpu->arch.cpuid_nent;
e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
for (j = i + 1; j == i; j = (j + 1) % nent) {
struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
if (ej->function == e->function) {
ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
return j;
}
}
return 0; /* silence gcc, even though control never reaches here */
}
/* find an entry with matching function, matching index (if needed), and that
* should be read next (if it's stateful) */
static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
u32 function, u32 index)
{
if (e->function != function)
return 0;
if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
return 0;
if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
!(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
return 0;
return 1;
}
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
int i;
u32 function, index;
struct kvm_cpuid_entry2 *e, *best;
kvm_x86_ops->cache_regs(vcpu);
function = vcpu->arch.regs[VCPU_REGS_RAX];
index = vcpu->arch.regs[VCPU_REGS_RCX];
vcpu->arch.regs[VCPU_REGS_RAX] = 0;
vcpu->arch.regs[VCPU_REGS_RBX] = 0;
vcpu->arch.regs[VCPU_REGS_RCX] = 0;
vcpu->arch.regs[VCPU_REGS_RDX] = 0;
best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
move_to_next_stateful_cpuid_entry(vcpu, i);
best = e;
break;
}
/*
* Both basic or both extended?
*/
if (((e->function ^ function) & 0x80000000) == 0)
if (!best || e->function > best->function)
best = e;
}
if (best) {
vcpu->arch.regs[VCPU_REGS_RAX] = best->eax;
vcpu->arch.regs[VCPU_REGS_RBX] = best->ebx;
vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx;
vcpu->arch.regs[VCPU_REGS_RDX] = best->edx;
}
kvm_x86_ops->decache_regs(vcpu);
kvm_x86_ops->skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
/*
* Check if userspace requested an interrupt window, and that the
* interrupt window is open.
*
* No need to exit to userspace if we already have an interrupt queued.
*/
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
return (!vcpu->arch.irq_summary &&
kvm_run->request_interrupt_window &&
vcpu->arch.interrupt_window_open &&
(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
}
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
kvm_run->ready_for_interrupt_injection = 1;
else
kvm_run->ready_for_interrupt_injection =
(vcpu->arch.interrupt_window_open &&
vcpu->arch.irq_summary == 0);
}
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
kvm_lapic_reset(vcpu);
r = kvm_x86_ops->vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
}
preempted:
if (vcpu->guest_debug.enabled)
kvm_x86_ops->guest_debug_pre(vcpu);
again:
r = kvm_mmu_reload(vcpu);
if (unlikely(r))
goto out;
kvm_inject_pending_timer_irqs(vcpu);
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
kvm_load_guest_fpu(vcpu);
local_irq_disable();
if (signal_pending(current)) {
local_irq_enable();
preempt_enable();
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
goto out;
}
if (vcpu->arch.exception.pending)
__queue_exception(vcpu);
else if (irqchip_in_kernel(vcpu->kvm))
kvm_x86_ops->inject_pending_irq(vcpu);
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
vcpu->guest_mode = 1;
kvm_guest_enter();
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
kvm_x86_ops->tlb_flush(vcpu);
kvm_x86_ops->run(vcpu, kvm_run);
vcpu->guest_mode = 0;
local_irq_enable();
++vcpu->stat.exits;
/*
* We must have an instruction between local_irq_enable() and
* kvm_guest_exit(), so the timer interrupt isn't delayed by
* the interrupt shadow. The stat.exits increment will do nicely.
* But we need to prevent reordering, hence this barrier():
*/
barrier();
kvm_guest_exit();
preempt_enable();
/*
* Profile KVM exit RIPs:
*/
if (unlikely(prof_on == KVM_PROFILING)) {
kvm_x86_ops->cache_regs(vcpu);
profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip);
if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
vcpu->arch.exception.pending = false;
r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
if (r > 0) {
if (dm_request_for_irq_injection(vcpu, kvm_run)) {
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.request_irq_exits;
goto out;
}
if (!need_resched())
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
goto again;
}
out:
if (r > 0) {
kvm_resched(vcpu);
goto preempted;
}
post_kvm_run_save(vcpu, kvm_run);
return r;
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
sigset_t sigsaved;
vcpu_load(vcpu);
if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
vcpu_put(vcpu);
return -EAGAIN;
}
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
/* re-sync apic's tpr */
if (!irqchip_in_kernel(vcpu->kvm))
set_cr8(vcpu, kvm_run->cr8);
if (vcpu->arch.pio.cur_count) {
r = complete_pio(vcpu);
if (r)
goto out;
}
#if CONFIG_HAS_IOMEM
if (vcpu->mmio_needed) {
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
r = emulate_instruction(vcpu, kvm_run,
vcpu->arch.mmio_fault_cr2, 0, 1);
if (r == EMULATE_DO_MMIO) {
/*
* Read-modify-write. Back to userspace.
*/
r = 0;
goto out;
}
}
#endif
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
kvm_x86_ops->cache_regs(vcpu);
vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
kvm_x86_ops->decache_regs(vcpu);
}
r = __vcpu_run(vcpu, kvm_run);
out:
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
vcpu_put(vcpu);
return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
kvm_x86_ops->cache_regs(vcpu);
regs->rax = vcpu->arch.regs[VCPU_REGS_RAX];
regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX];
regs->rcx = vcpu->arch.regs[VCPU_REGS_RCX];
regs->rdx = vcpu->arch.regs[VCPU_REGS_RDX];
regs->rsi = vcpu->arch.regs[VCPU_REGS_RSI];
regs->rdi = vcpu->arch.regs[VCPU_REGS_RDI];
regs->rsp = vcpu->arch.regs[VCPU_REGS_RSP];
regs->rbp = vcpu->arch.regs[VCPU_REGS_RBP];
#ifdef CONFIG_X86_64
regs->r8 = vcpu->arch.regs[VCPU_REGS_R8];
regs->r9 = vcpu->arch.regs[VCPU_REGS_R9];
regs->r10 = vcpu->arch.regs[VCPU_REGS_R10];
regs->r11 = vcpu->arch.regs[VCPU_REGS_R11];
regs->r12 = vcpu->arch.regs[VCPU_REGS_R12];
regs->r13 = vcpu->arch.regs[VCPU_REGS_R13];
regs->r14 = vcpu->arch.regs[VCPU_REGS_R14];
regs->r15 = vcpu->arch.regs[VCPU_REGS_R15];
regs->rflags = kvm_x86_ops->get_rflags(vcpu);
/*
* Don't leak debug flags in case they were set for guest debugging
*/
if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
vcpu->arch.regs[VCPU_REGS_RAX] = regs->rax;
vcpu->arch.regs[VCPU_REGS_RBX] = regs->rbx;
vcpu->arch.regs[VCPU_REGS_RCX] = regs->rcx;
vcpu->arch.regs[VCPU_REGS_RDX] = regs->rdx;
vcpu->arch.regs[VCPU_REGS_RSI] = regs->rsi;
vcpu->arch.regs[VCPU_REGS_RDI] = regs->rdi;
vcpu->arch.regs[VCPU_REGS_RSP] = regs->rsp;
vcpu->arch.regs[VCPU_REGS_RBP] = regs->rbp;
#ifdef CONFIG_X86_64
vcpu->arch.regs[VCPU_REGS_R8] = regs->r8;
vcpu->arch.regs[VCPU_REGS_R9] = regs->r9;
vcpu->arch.regs[VCPU_REGS_R10] = regs->r10;
vcpu->arch.regs[VCPU_REGS_R11] = regs->r11;
vcpu->arch.regs[VCPU_REGS_R12] = regs->r12;
vcpu->arch.regs[VCPU_REGS_R13] = regs->r13;
vcpu->arch.regs[VCPU_REGS_R14] = regs->r14;
vcpu->arch.regs[VCPU_REGS_R15] = regs->r15;
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
kvm_x86_ops->set_rflags(vcpu, regs->rflags);
kvm_x86_ops->decache_regs(vcpu);
vcpu_put(vcpu);
return 0;
}
static void get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
return kvm_x86_ops->get_segment(vcpu, var, seg);
}
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
{
struct kvm_segment cs;
get_segment(vcpu, &cs, VCPU_SREG_CS);
*db = cs.db;
*l = cs.l;
}
EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
struct descriptor_table dt;
int pending_vec;
vcpu_load(vcpu);
get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
kvm_x86_ops->get_idt(vcpu, &dt);
sregs->idt.limit = dt.limit;
sregs->idt.base = dt.base;
kvm_x86_ops->get_gdt(vcpu, &dt);
sregs->gdt.limit = dt.limit;
sregs->gdt.base = dt.base;
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
sregs->cr0 = vcpu->arch.cr0;
sregs->cr2 = vcpu->arch.cr2;
sregs->cr3 = vcpu->arch.cr3;
sregs->cr4 = vcpu->arch.cr4;
sregs->cr8 = get_cr8(vcpu);
sregs->efer = vcpu->arch.shadow_efer;
sregs->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm)) {
memset(sregs->interrupt_bitmap, 0,
sizeof sregs->interrupt_bitmap);
pending_vec = kvm_x86_ops->get_irq(vcpu);
if (pending_vec >= 0)
set_bit(pending_vec,
(unsigned long *)sregs->interrupt_bitmap);
} else
memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
sizeof sregs->interrupt_bitmap);
vcpu_put(vcpu);
return 0;
}
static void set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
return kvm_x86_ops->set_segment(vcpu, var, seg);
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
int mmu_reset_needed = 0;
int i, pending_vec, max_bits;
struct descriptor_table dt;
vcpu_load(vcpu);
dt.limit = sregs->idt.limit;
dt.base = sregs->idt.base;
kvm_x86_ops->set_idt(vcpu, &dt);
dt.limit = sregs->gdt.limit;
dt.base = sregs->gdt.base;
kvm_x86_ops->set_gdt(vcpu, &dt);
vcpu->arch.cr2 = sregs->cr2;
mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
#ifdef CONFIG_X86_64
kvm_x86_ops->set_efer(vcpu, sregs->efer);
#endif
kvm_set_apic_base(vcpu, sregs->apic_base);
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
vcpu->arch.cr0 = sregs->cr0;
kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
if (!is_long_mode(vcpu) && is_pae(vcpu))
load_pdptrs(vcpu, vcpu->arch.cr3);
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
if (!irqchip_in_kernel(vcpu->kvm)) {
memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
sizeof vcpu->arch.irq_pending);
vcpu->arch.irq_summary = 0;
for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
if (vcpu->arch.irq_pending[i])
__set_bit(i, &vcpu->arch.irq_summary);
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
} else {
max_bits = (sizeof sregs->interrupt_bitmap) << 3;
pending_vec = find_first_bit(
(const unsigned long *)sregs->interrupt_bitmap,
max_bits);
/* Only pending external irq is handled here */
if (pending_vec < max_bits) {
kvm_x86_ops->set_irq(vcpu, pending_vec);
pr_debug("Set back pending irq %d\n",
pending_vec);
}
}
set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
struct kvm_debug_guest *dbg)
{
int r;
vcpu_load(vcpu);
r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
vcpu_put(vcpu);
return r;
}
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
/*
* fxsave fpu state. Taken from x86_64/processor.h. To be killed when
* we have asm/x86/processor.h
*/
struct fxsave {
u16 cwd;
u16 swd;
u16 twd;
u16 fop;
u64 rip;
u64 rdp;
u32 mxcsr;
u32 mxcsr_mask;
u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
#ifdef CONFIG_X86_64
u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
#else
u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
#endif
};
/*
* Translate a guest virtual address to a guest physical address.
*/
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
unsigned long vaddr = tr->linear_address;
gpa_t gpa;
vcpu_load(vcpu);
mutex_lock(&vcpu->kvm->lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
tr->physical_address = gpa;
tr->valid = gpa != UNMAPPED_GVA;
tr->writeable = 1;
tr->usermode = 0;
mutex_unlock(&vcpu->kvm->lock);
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
vcpu_load(vcpu);
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
fpu->ftwx = fxsave->twd;
fpu->last_opcode = fxsave->fop;
fpu->last_ip = fxsave->rip;
fpu->last_dp = fxsave->rdp;
memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
vcpu_load(vcpu);
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
fxsave->swd = fpu->fsw;
fxsave->twd = fpu->ftwx;
fxsave->fop = fpu->last_opcode;
fxsave->rip = fpu->last_ip;
fxsave->rdp = fpu->last_dp;
memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
vcpu_put(vcpu);
return 0;
}
void fx_init(struct kvm_vcpu *vcpu)
{
unsigned after_mxcsr_mask;
/* Initialize guest FPU by resetting ours and saving into guest's */
preempt_disable();
fx_save(&vcpu->arch.host_fx_image);
fx_save(&vcpu->arch.guest_fx_image);
fx_restore(&vcpu->arch.host_fx_image);
preempt_enable();
vcpu->arch.cr0 |= X86_CR0_ET;
after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
}
EXPORT_SYMBOL_GPL(fx_init);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
return;
vcpu->guest_fpu_loaded = 1;
fx_save(&vcpu->arch.host_fx_image);
fx_restore(&vcpu->arch.guest_fx_image);
}
EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
if (!vcpu->guest_fpu_loaded)
return;
vcpu->guest_fpu_loaded = 0;
fx_save(&vcpu->arch.guest_fx_image);
fx_restore(&vcpu->arch.host_fx_image);
}
EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->vcpu_free(vcpu);
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
return kvm_x86_ops->vcpu_create(kvm, id);
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int r;
/* We do fxsave: this must be aligned. */
BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r == 0)
r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
if (r < 0)
goto free_vcpu;
return 0;
free_vcpu:
kvm_x86_ops->vcpu_free(vcpu);
return r;
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
{
vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
}
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
{
return kvm_x86_ops->vcpu_reset(vcpu);
}
void kvm_arch_hardware_enable(void *garbage)
{
kvm_x86_ops->hardware_enable(garbage);
}
void kvm_arch_hardware_disable(void *garbage)
{
kvm_x86_ops->hardware_disable(garbage);
}
int kvm_arch_hardware_setup(void)
{
return kvm_x86_ops->hardware_setup();
}
void kvm_arch_hardware_unsetup(void)
{
kvm_x86_ops->hardware_unsetup();
}
void kvm_arch_check_processor_compat(void *rtn)
{
kvm_x86_ops->check_processor_compatibility(rtn);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
struct page *page;