Newer
Older
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
{
int max_irr, tpr;
if (!kvm_x86_ops->update_cr8_intercept)
return;
if (!vcpu->arch.apic)
return;
if (!vcpu->arch.apic->vapic_addr)
max_irr = kvm_lapic_find_highest_irr(vcpu);
else
max_irr = -1;
if (max_irr != -1)
max_irr >>= 4;
tpr = kvm_lapic_get_cr8(vcpu);
kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
}
static void inject_pending_event(struct kvm_vcpu *vcpu)
{
/* try to reinject previous events if any */
if (vcpu->arch.exception.pending) {
trace_kvm_inj_exception(vcpu->arch.exception.nr,
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code);
kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
vcpu->arch.exception.reinject);
return;
}
if (vcpu->arch.nmi_injected) {
kvm_x86_ops->set_nmi(vcpu);
return;
}
if (vcpu->arch.interrupt.pending) {
return;
}
/* try to inject new event if pending */
if (vcpu->arch.nmi_pending) {
if (kvm_x86_ops->nmi_allowed(vcpu)) {
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = true;
kvm_x86_ops->set_nmi(vcpu);
}
} else if (kvm_cpu_has_interrupt(vcpu)) {
if (kvm_x86_ops->interrupt_allowed(vcpu)) {
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
false);
kvm_x86_ops->set_irq(vcpu);
static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
/* kvm_set_xcr() also depends on this */
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
vcpu->guest_xcr0_loaded = 1;
}
}
static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_xcr0_loaded) {
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
vcpu->guest_xcr0_loaded = 0;
}
}
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
if (vcpu->requests) {
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
r = kvm_guest_time_update(vcpu);
if (unlikely(r))
goto out;
}
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
kvm_x86_ops->tlb_flush(vcpu);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
vcpu->fpu_active = 0;
kvm_x86_ops->fpu_deactivate(vcpu);
}
r = kvm_mmu_reload(vcpu);
if (unlikely(r))
goto out;
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
inject_pending_event(vcpu);
/* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
kvm_x86_ops->enable_nmi_window(vcpu);
else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
if (kvm_lapic_enabled(vcpu)) {
update_cr8_intercept(vcpu);
kvm_lapic_sync_to_vapic(vcpu);
}
}
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
if (vcpu->fpu_active)
kvm_load_guest_fpu(vcpu);
atomic_set(&vcpu->guest_mode, 1);
smp_wmb();
if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
|| need_resched() || signal_pending(current)) {
atomic_set(&vcpu->guest_mode, 0);
smp_wmb();
local_irq_enable();
preempt_enable();
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
kvm_guest_enter();
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
}
trace_kvm_entry(vcpu->vcpu_id);

Frederic Weisbecker
committed
/*
* If the guest has used debug registers, at least dr7
* will be disabled while returning to the host.
* If we don't have active breakpoints in the host, we don't
* care about the messed up debug address registers. But if
* we have some of them active, restore the old state.
*/
if (hw_breakpoint_active())

Frederic Weisbecker
committed
hw_breakpoint_restore();
kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
atomic_set(&vcpu->guest_mode, 0);
smp_wmb();
local_irq_enable();
++vcpu->stat.exits;
/*
* We must have an instruction between local_irq_enable() and
* kvm_guest_exit(), so the timer interrupt isn't delayed by
* the interrupt shadow. The stat.exits increment will do nicely.
* But we need to prevent reordering, hence this barrier():
*/
barrier();
kvm_guest_exit();
preempt_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
/*
* Profile KVM exit RIPs:
*/
if (unlikely(prof_on == KVM_PROFILING)) {
unsigned long rip = kvm_rip_read(vcpu);
profile_hit(KVM_PROFILING, (void *)rip);
static int __vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
r = kvm_arch_vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
vapic_enter(vcpu);
r = 1;
while (r > 0) {
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
{
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
KVM_MP_STATE_RUNNABLE;
case KVM_MP_STATE_RUNNABLE:
break;
case KVM_MP_STATE_SIPI_RECEIVED:
default:
r = -EINTR;
break;
}
}
if (r <= 0)
break;
clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
if (dm_request_for_irq_injection(vcpu)) {
++vcpu->stat.request_irq_exits;
}
if (signal_pending(current)) {
r = -EINTR;
++vcpu->stat.signal_exits;
}
if (need_resched()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_resched(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
return r;
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
sigset_t sigsaved;
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
r = -EAGAIN;
goto out;
}
/* re-sync apic's tpr */
if (!irqchip_in_kernel(vcpu->kvm))
kvm_set_cr8(vcpu, kvm_run->cr8);
if (vcpu->arch.pio.count || vcpu->mmio_needed) {
if (vcpu->mmio_needed) {
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (r != EMULATE_DONE) {
r = 0;
goto out;
}
}
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
kvm_register_write(vcpu, VCPU_REGS_RAX,
kvm_run->hypercall.ret);
post_kvm_run_save(vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
#ifdef CONFIG_X86_64
regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_get_rflags(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
#ifdef CONFIG_X86_64
kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
kvm_rip_write(vcpu, regs->rip);
kvm_set_rflags(vcpu, regs->rflags);
vcpu->arch.exception.pending = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
}
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
{
struct kvm_segment cs;
kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
*db = cs.db;
*l = cs.l;
}
EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
struct desc_ptr dt;
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
kvm_x86_ops->get_idt(vcpu, &dt);
sregs->idt.limit = dt.size;
sregs->idt.base = dt.address;
kvm_x86_ops->get_gdt(vcpu, &dt);
sregs->gdt.limit = dt.size;
sregs->gdt.base = dt.address;
sregs->cr0 = kvm_read_cr0(vcpu);
sregs->cr2 = vcpu->arch.cr2;
sregs->cr3 = vcpu->arch.cr3;
sregs->cr4 = kvm_read_cr4(vcpu);
sregs->cr8 = kvm_get_cr8(vcpu);
sregs->apic_base = kvm_get_apic_base(vcpu);
memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
set_bit(vcpu->arch.interrupt.nr,
(unsigned long *)sregs->interrupt_bitmap);
return 0;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
mp_state->mp_state = vcpu->arch.mp_state;
return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
vcpu->arch.mp_state = mp_state->mp_state;
kvm_make_request(KVM_REQ_EVENT, vcpu);
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
bool has_error_code, u32 error_code)
struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
int ret;
init_emulate_ctxt(vcpu);
ret = emulator_task_switch(&vcpu->arch.emulate_ctxt,
tss_selector, reason, has_error_code,
error_code);
memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
int mmu_reset_needed = 0;
struct desc_ptr dt;
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
kvm_x86_ops->set_idt(vcpu, &dt);
dt.size = sregs->gdt.limit;
dt.address = sregs->gdt.base;
kvm_x86_ops->set_gdt(vcpu, &dt);
vcpu->arch.cr2 = sregs->cr2;
mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
kvm_set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
kvm_x86_ops->set_efer(vcpu, sregs->efer);
kvm_set_apic_base(vcpu, sregs->apic_base);
mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
if (sregs->cr4 & X86_CR4_OSXSAVE)
update_cpuid(vcpu);
if (!is_long_mode(vcpu) && is_pae(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
mmu_reset_needed = 1;
}
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
max_bits = (sizeof sregs->interrupt_bitmap) << 3;
pending_vec = find_first_bit(
(const unsigned long *)sregs->interrupt_bitmap, max_bits);
if (pending_vec < max_bits) {
kvm_queue_interrupt(vcpu, pending_vec, false);
pr_debug("Set back pending irq %d\n", pending_vec);
if (irqchip_in_kernel(vcpu->kvm))
kvm_pic_clear_isr_ack(vcpu->kvm);
kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
update_cr8_intercept(vcpu);
/* Older userspace won't unhalt the vcpu on reset. */
if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
!is_protmode(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
kvm_queue_exception(vcpu, DB_VECTOR);
else
kvm_queue_exception(vcpu, BP_VECTOR);
}
/*
* Read rflags as long as potentially injected trace flags are still
* filtered out.
*/
rflags = kvm_get_rflags(vcpu);
vcpu->guest_debug = dbg->control;
if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
vcpu->guest_debug = 0;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
for (i = 0; i < KVM_NR_DB_REGS; ++i)
vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
vcpu->arch.switch_db_regs =
(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
} else {
for (i = 0; i < KVM_NR_DB_REGS; i++)
vcpu->arch.eff_db[i] = vcpu->arch.db[i];
vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
}
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
get_segment_base(vcpu, VCPU_SREG_CS);
/*
* Trigger an rflags update that will inject or remove the trace
* flags.
*/
kvm_set_rflags(vcpu, rflags);
kvm_x86_ops->set_guest_debug(vcpu, dbg);
return r;
}
/*
* Translate a guest virtual address to a guest physical address.
*/
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
unsigned long vaddr = tr->linear_address;
gpa_t gpa;
idx = srcu_read_lock(&vcpu->kvm->srcu);
gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
tr->physical_address = gpa;
tr->valid = gpa != UNMAPPED_GVA;
tr->writeable = 1;
tr->usermode = 0;
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct i387_fxsave_struct *fxsave =
&vcpu->arch.guest_fpu.state->fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
fpu->ftwx = fxsave->twd;
fpu->last_opcode = fxsave->fop;
fpu->last_ip = fxsave->rip;
fpu->last_dp = fxsave->rdp;
memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct i387_fxsave_struct *fxsave =
&vcpu->arch.guest_fpu.state->fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
fxsave->swd = fpu->fsw;
fxsave->twd = fpu->ftwx;
fxsave->fop = fpu->last_opcode;
fxsave->rip = fpu->last_ip;
fxsave->rdp = fpu->last_dp;
memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
return 0;
}
int err;
err = fpu_alloc(&vcpu->arch.guest_fpu);
if (err)
return err;
/*
* Ensure guest xcr0 is valid for loading
*/
vcpu->arch.xcr0 = XSTATE_FP;
vcpu->arch.cr0 |= X86_CR0_ET;
}
EXPORT_SYMBOL_GPL(fx_init);
static void fx_free(struct kvm_vcpu *vcpu)
{
fpu_free(&vcpu->arch.guest_fpu);
}
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_fpu_loaded)
/*
* Restore all possible states in the guest,
* and assume host would use all available bits.
* Guest xcr0 would be loaded later.
*/
kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1;
}
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
if (!vcpu->guest_fpu_loaded)
return;
vcpu->guest_fpu_loaded = 0;
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.time_page) {
kvm_release_page_dirty(vcpu->arch.time_page);
vcpu->arch.time_page = NULL;
}
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
kvm_x86_ops->vcpu_free(vcpu);
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING
"kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n");
return kvm_x86_ops->vcpu_create(kvm, id);
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int r;
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r == 0)
r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
if (r < 0)
goto free_vcpu;
return 0;
free_vcpu:
kvm_x86_ops->vcpu_free(vcpu);
return r;
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
}
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
{
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = false;
vcpu->arch.switch_db_regs = 0;
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
vcpu->arch.dr6 = DR6_FIXED_1;
vcpu->arch.dr7 = DR7_FIXED_1;
kvm_make_request(KVM_REQ_EVENT, vcpu);
return kvm_x86_ops->vcpu_reset(vcpu);
}
int kvm_arch_hardware_enable(void *garbage)
struct kvm *kvm;
struct kvm_vcpu *vcpu;
int i;
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
if (vcpu->cpu == smp_processor_id())
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
return kvm_x86_ops->hardware_enable(garbage);
}
void kvm_arch_hardware_disable(void *garbage)
{
kvm_x86_ops->hardware_disable(garbage);
drop_user_return_notifiers(garbage);
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
}
int kvm_arch_hardware_setup(void)
{
return kvm_x86_ops->hardware_setup();
}
void kvm_arch_hardware_unsetup(void)
{
kvm_x86_ops->hardware_unsetup();
}
void kvm_arch_check_processor_compat(void *rtn)
{
kvm_x86_ops->check_processor_compatibility(rtn);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
struct page *page;
struct kvm *kvm;
int r;
BUG_ON(vcpu->kvm == NULL);
kvm = vcpu->kvm;
vcpu->arch.emulate_ctxt.ops = &emulate_ops;
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
vcpu->arch.mmu.translate_gpa = translate_gpa;
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
goto fail;
}
vcpu->arch.pio_data = page_address(page);
if (!kvm->arch.virtual_tsc_khz)
kvm_arch_set_tsc_khz(kvm, max_tsc_khz);
r = kvm_mmu_create(vcpu);
if (r < 0)
goto fail_free_pio_data;
if (irqchip_in_kernel(kvm)) {
r = kvm_create_lapic(vcpu);
if (r < 0)
goto fail_mmu_destroy;
}
vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
GFP_KERNEL);
if (!vcpu->arch.mce_banks) {
r = -ENOMEM;
goto fail_free_lapic;
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
goto fail_free_mce_banks;
return 0;
fail_free_mce_banks:
kfree(vcpu->arch.mce_banks);
fail_free_lapic:
kvm_free_lapic(vcpu);
fail_mmu_destroy:
kvm_mmu_destroy(vcpu);
fail_free_pio_data:
free_page((unsigned long)vcpu->arch.pio_data);
fail:
return r;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kfree(vcpu->arch.mce_banks);
kvm_free_lapic(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_mmu_destroy(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
free_page((unsigned long)vcpu->arch.pio_data);
struct kvm *kvm_arch_create_vm(void)
{
struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
if (!kvm)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
spin_lock_init(&kvm->arch.tsc_write_lock);
return kvm;
}
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
{
unsigned int i;
/*
* Unpin any mmu pages first.
*/
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_unload_vcpu_mmu(vcpu);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
kvm->vcpus[i] = NULL;
atomic_set(&kvm->online_vcpus, 0);
mutex_unlock(&kvm->lock);
}
void kvm_arch_sync_events(struct kvm *kvm)
{
kvm_free_all_assigned_devices(kvm);
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
if (kvm->arch.apic_access_page)
put_page(kvm->arch.apic_access_page);
if (kvm->arch.ept_identity_pagetable)
put_page(kvm->arch.ept_identity_pagetable);
cleanup_srcu_struct(&kvm->srcu);
kfree(kvm);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
int npages = memslot->npages;
int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
/* Prevent internal slot pages from being moved by fork()/COW. */
if (memslot->id >= KVM_MEMORY_SLOTS)
map_flags = MAP_SHARED | MAP_ANONYMOUS;
/*To keep backward compatibility with older userspace,
*x86 needs to hanlde !user_alloc case.
*/
if (!user_alloc) {
if (npages && !old.rmap) {
unsigned long userspace_addr;
down_write(¤t->mm->mmap_sem);
userspace_addr = do_mmap(NULL, 0,
npages * PAGE_SIZE,
PROT_READ | PROT_WRITE,