Newer
Older
err_code = selector & 0xfffc;
err_vec = GP_VECTOR;
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
/* can't load system descriptor into segment selecor */
if (seg <= VCPU_SREG_GS && !kvm_seg.s)
goto exception;
if (!kvm_seg.present) {
err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
goto exception;
}
rpl = selector & 3;
dpl = kvm_seg.dpl;
cpl = kvm_x86_ops->get_cpl(vcpu);
switch (seg) {
case VCPU_SREG_SS:
/*
* segment is not a writable data segment or segment
* selector's RPL != CPL or segment selector's RPL != CPL
*/
if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
goto exception;
break;
case VCPU_SREG_CS:
if (!(kvm_seg.type & 8))
goto exception;
if (kvm_seg.type & 4) {
/* conforming */
if (dpl > cpl)
goto exception;
} else {
/* nonconforming */
if (rpl > cpl || dpl != cpl)
goto exception;
}
/* CS(RPL) <- CPL */
selector = (selector & 0xfffc) | cpl;
break;
case VCPU_SREG_TR:
if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
goto exception;
break;
case VCPU_SREG_LDTR:
if (kvm_seg.s || kvm_seg.type != 2)
goto exception;
break;
default: /* DS, ES, FS, or GS */
/*
* segment is not a data or readable code segment or
* ((segment is a data or nonconforming code segment)
* and (both RPL and CPL > DPL))
*/
if ((kvm_seg.type & 0xa) == 0x8 ||
(((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
goto exception;
break;
}
if (!kvm_seg.unusable && kvm_seg.s) {
/* mark segment as accessed */
seg_desc.type |= 1;
save_guest_segment_descriptor(vcpu, selector, &seg_desc);
}
load:
kvm_set_segment(vcpu, &kvm_seg, seg);
return X86EMUL_CONTINUE;
exception:
kvm_queue_exception_e(vcpu, err_vec, err_code);
return X86EMUL_PROPAGATE_FAULT;
}
static void save_state_to_tss32(struct kvm_vcpu *vcpu,
struct tss_segment_32 *tss)
{
tss->cr3 = vcpu->arch.cr3;
tss->eip = kvm_rip_read(vcpu);
tss->eflags = kvm_get_rflags(vcpu);
tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
}
static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
{
struct kvm_segment kvm_seg;
kvm_get_segment(vcpu, &kvm_seg, seg);
kvm_seg.selector = sel;
kvm_set_segment(vcpu, &kvm_seg, seg);
}
static int load_state_from_tss32(struct kvm_vcpu *vcpu,
struct tss_segment_32 *tss)
{
kvm_set_cr3(vcpu, tss->cr3);
kvm_rip_write(vcpu, tss->eip);
kvm_set_rflags(vcpu, tss->eflags | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
/*
* SDM says that segment selectors are loaded before segment
* descriptors
*/
kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
/*
* Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task
*/
if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
return 1;
return 0;
}
static void save_state_to_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
tss->ip = kvm_rip_read(vcpu);
tss->flag = kvm_get_rflags(vcpu);
tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
}
static int load_state_from_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
kvm_rip_write(vcpu, tss->ip);
kvm_set_rflags(vcpu, tss->flag | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
/*
* SDM says that segment selectors are loaded before segment
* descriptors
*/
kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
/*
* Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task
*/
if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
return 1;
return 0;
}
static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
u16 old_tss_sel, u32 old_tss_base,
struct desc_struct *nseg_desc)
{
struct tss_segment_16 tss_segment_16;
int ret = 0;

Marcelo Tosatti
committed
if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
sizeof tss_segment_16))
goto out;
save_state_to_tss16(vcpu, &tss_segment_16);

Marcelo Tosatti
committed
if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
sizeof tss_segment_16))

Marcelo Tosatti
committed
if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),

Marcelo Tosatti
committed
&tss_segment_16, sizeof tss_segment_16))
goto out;
if (old_tss_sel != 0xffff) {
tss_segment_16.prev_task_link = old_tss_sel;
if (kvm_write_guest(vcpu->kvm,
get_tss_base_addr_write(vcpu, nseg_desc),
&tss_segment_16.prev_task_link,
sizeof tss_segment_16.prev_task_link))
goto out;
}
if (load_state_from_tss16(vcpu, &tss_segment_16))
goto out;
ret = 1;
out:
return ret;
}
static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
u16 old_tss_sel, u32 old_tss_base,
struct desc_struct *nseg_desc)
{
struct tss_segment_32 tss_segment_32;
int ret = 0;

Marcelo Tosatti
committed
if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
sizeof tss_segment_32))
goto out;
save_state_to_tss32(vcpu, &tss_segment_32);

Marcelo Tosatti
committed
if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
sizeof tss_segment_32))
goto out;
if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),

Marcelo Tosatti
committed
&tss_segment_32, sizeof tss_segment_32))

Marcelo Tosatti
committed
if (old_tss_sel != 0xffff) {
tss_segment_32.prev_task_link = old_tss_sel;
if (kvm_write_guest(vcpu->kvm,
get_tss_base_addr_write(vcpu, nseg_desc),
&tss_segment_32.prev_task_link,
sizeof tss_segment_32.prev_task_link))
goto out;
}
if (load_state_from_tss32(vcpu, &tss_segment_32))
goto out;
ret = 1;
out:
return ret;
}
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
{
struct kvm_segment tr_seg;
struct desc_struct cseg_desc;
struct desc_struct nseg_desc;
int ret = 0;

Marcelo Tosatti
committed
u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);

Marcelo Tosatti
committed
/* FIXME: Handle errors. Failure to read either TSS or their
* descriptors should generate a pagefault.
*/
if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
goto out;

Marcelo Tosatti
committed
if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
goto out;
if (reason != TASK_SWITCH_IRET) {
int cpl;
cpl = kvm_x86_ops->get_cpl(vcpu);
if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
return 1;
}
}
desc_limit = get_desc_limit(&nseg_desc);
if (!nseg_desc.p ||
((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
desc_limit < 0x2b)) {
kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
return 1;
}
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
cseg_desc.type &= ~(1 << 1); //clear the B flag

Marcelo Tosatti
committed
save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
}
if (reason == TASK_SWITCH_IRET) {
u32 eflags = kvm_get_rflags(vcpu);
kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
/* set back link to prev task only if NT bit is set in eflags
note that old_tss_sel is not used afetr this point */
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
old_tss_sel = 0xffff;
ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
old_tss_base, &nseg_desc);
ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
old_tss_base, &nseg_desc);
if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
u32 eflags = kvm_get_rflags(vcpu);
kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
}
if (reason != TASK_SWITCH_IRET) {
nseg_desc.type |= (1 << 1);
save_guest_segment_descriptor(vcpu, tss_selector,
&nseg_desc);
}
kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
tr_seg.type = 11;
kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
out:
return ret;
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
int mmu_reset_needed = 0;
struct desc_ptr dt;
vcpu_load(vcpu);
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
kvm_x86_ops->set_idt(vcpu, &dt);
dt.size = sregs->gdt.limit;
dt.address = sregs->gdt.base;
kvm_x86_ops->set_gdt(vcpu, &dt);
vcpu->arch.cr2 = sregs->cr2;
mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
kvm_set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
kvm_x86_ops->set_efer(vcpu, sregs->efer);
kvm_set_apic_base(vcpu, sregs->apic_base);
mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
if (!is_long_mode(vcpu) && is_pae(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.cr3);
mmu_reset_needed = 1;
}
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
max_bits = (sizeof sregs->interrupt_bitmap) << 3;
pending_vec = find_first_bit(
(const unsigned long *)sregs->interrupt_bitmap, max_bits);
if (pending_vec < max_bits) {
kvm_queue_interrupt(vcpu, pending_vec, false);
pr_debug("Set back pending irq %d\n", pending_vec);
if (irqchip_in_kernel(vcpu->kvm))
kvm_pic_clear_isr_ack(vcpu->kvm);
kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
update_cr8_intercept(vcpu);
/* Older userspace won't unhalt the vcpu on reset. */
if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
!is_protmode(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
vcpu_load(vcpu);
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
goto unlock_out;
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
kvm_queue_exception(vcpu, DB_VECTOR);
else
kvm_queue_exception(vcpu, BP_VECTOR);
}
/*
* Read rflags as long as potentially injected trace flags are still
* filtered out.
*/
rflags = kvm_get_rflags(vcpu);
vcpu->guest_debug = dbg->control;
if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
vcpu->guest_debug = 0;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
for (i = 0; i < KVM_NR_DB_REGS; ++i)
vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
vcpu->arch.switch_db_regs =
(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
} else {
for (i = 0; i < KVM_NR_DB_REGS; i++)
vcpu->arch.eff_db[i] = vcpu->arch.db[i];
vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
}
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
get_segment_base(vcpu, VCPU_SREG_CS);
/*
* Trigger an rflags update that will inject or remove the trace
* flags.
*/
kvm_set_rflags(vcpu, rflags);
kvm_x86_ops->set_guest_debug(vcpu, dbg);
vcpu_put(vcpu);
return r;
}
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
/*
* fxsave fpu state. Taken from x86_64/processor.h. To be killed when
* we have asm/x86/processor.h
*/
struct fxsave {
u16 cwd;
u16 swd;
u16 twd;
u16 fop;
u64 rip;
u64 rdp;
u32 mxcsr;
u32 mxcsr_mask;
u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
#ifdef CONFIG_X86_64
u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
#else
u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
#endif
};
/*
* Translate a guest virtual address to a guest physical address.
*/
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
unsigned long vaddr = tr->linear_address;
gpa_t gpa;
vcpu_load(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
tr->physical_address = gpa;
tr->valid = gpa != UNMAPPED_GVA;
tr->writeable = 1;
tr->usermode = 0;
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
vcpu_load(vcpu);
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
fpu->ftwx = fxsave->twd;
fpu->last_opcode = fxsave->fop;
fpu->last_ip = fxsave->rip;
fpu->last_dp = fxsave->rdp;
memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
vcpu_load(vcpu);
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
fxsave->swd = fpu->fsw;
fxsave->twd = fpu->ftwx;
fxsave->fop = fpu->last_opcode;
fxsave->rip = fpu->last_ip;
fxsave->rdp = fpu->last_dp;
memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
vcpu_put(vcpu);
return 0;
}
void fx_init(struct kvm_vcpu *vcpu)
{
unsigned after_mxcsr_mask;
/*
* Touch the fpu the first time in non atomic context as if
* this is the first fpu instruction the exception handler
* will fire before the instruction returns and it'll have to
* allocate ram with GFP_KERNEL.
*/
if (!used_math())
kvm_fx_save(&vcpu->arch.host_fx_image);
/* Initialize guest FPU by resetting ours and saving into guest's */
preempt_disable();
kvm_fx_save(&vcpu->arch.host_fx_image);
kvm_fx_finit();
kvm_fx_save(&vcpu->arch.guest_fx_image);
kvm_fx_restore(&vcpu->arch.host_fx_image);
preempt_enable();
vcpu->arch.cr0 |= X86_CR0_ET;
after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
}
EXPORT_SYMBOL_GPL(fx_init);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_fpu_loaded)
return;
vcpu->guest_fpu_loaded = 1;
kvm_fx_save(&vcpu->arch.host_fx_image);
kvm_fx_restore(&vcpu->arch.guest_fx_image);
}
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
if (!vcpu->guest_fpu_loaded)
return;
vcpu->guest_fpu_loaded = 0;
kvm_fx_save(&vcpu->arch.guest_fx_image);
kvm_fx_restore(&vcpu->arch.host_fx_image);
set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.time_page) {
kvm_release_page_dirty(vcpu->arch.time_page);
vcpu->arch.time_page = NULL;
}
kvm_x86_ops->vcpu_free(vcpu);
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
return kvm_x86_ops->vcpu_create(kvm, id);
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int r;
/* We do fxsave: this must be aligned. */
BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r == 0)
r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
if (r < 0)
goto free_vcpu;
return 0;
free_vcpu:
kvm_x86_ops->vcpu_free(vcpu);
return r;
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
kvm_x86_ops->vcpu_free(vcpu);
}
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
{
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = false;
vcpu->arch.switch_db_regs = 0;
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
vcpu->arch.dr6 = DR6_FIXED_1;
vcpu->arch.dr7 = DR7_FIXED_1;
return kvm_x86_ops->vcpu_reset(vcpu);
}
int kvm_arch_hardware_enable(void *garbage)
/*
* Since this may be called from a hotplug notifcation,
* we can't get the CPU frequency directly.
*/
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
int cpu = raw_smp_processor_id();
per_cpu(cpu_tsc_khz, cpu) = 0;
}
return kvm_x86_ops->hardware_enable(garbage);
}
void kvm_arch_hardware_disable(void *garbage)
{
kvm_x86_ops->hardware_disable(garbage);
drop_user_return_notifiers(garbage);
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
}
int kvm_arch_hardware_setup(void)
{
return kvm_x86_ops->hardware_setup();
}
void kvm_arch_hardware_unsetup(void)
{
kvm_x86_ops->hardware_unsetup();
}
void kvm_arch_check_processor_compat(void *rtn)
{
kvm_x86_ops->check_processor_compatibility(rtn);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
struct page *page;
struct kvm *kvm;
int r;
BUG_ON(vcpu->kvm == NULL);
kvm = vcpu->kvm;
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
goto fail;
}
vcpu->arch.pio_data = page_address(page);
r = kvm_mmu_create(vcpu);
if (r < 0)
goto fail_free_pio_data;
if (irqchip_in_kernel(kvm)) {
r = kvm_create_lapic(vcpu);
if (r < 0)
goto fail_mmu_destroy;
}
vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
GFP_KERNEL);
if (!vcpu->arch.mce_banks) {
r = -ENOMEM;
goto fail_free_lapic;
return 0;
fail_free_lapic:
kvm_free_lapic(vcpu);
fail_mmu_destroy:
kvm_mmu_destroy(vcpu);
fail_free_pio_data:
free_page((unsigned long)vcpu->arch.pio_data);
fail:
return r;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kfree(vcpu->arch.mce_banks);
kvm_free_lapic(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_mmu_destroy(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
free_page((unsigned long)vcpu->arch.pio_data);
struct kvm *kvm_arch_create_vm(void)
{
struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
if (!kvm)
return ERR_PTR(-ENOMEM);
kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
if (!kvm->arch.aliases) {
kfree(kvm);
return ERR_PTR(-ENOMEM);
}
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
rdtscll(kvm->arch.vm_init_tsc);
return kvm;
}
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
{
unsigned int i;
/*
* Unpin any mmu pages first.
*/
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_unload_vcpu_mmu(vcpu);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
kvm->vcpus[i] = NULL;
atomic_set(&kvm->online_vcpus, 0);
mutex_unlock(&kvm->lock);
}
void kvm_arch_sync_events(struct kvm *kvm)
{
kvm_free_all_assigned_devices(kvm);
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
if (kvm->arch.apic_access_page)
put_page(kvm->arch.apic_access_page);
if (kvm->arch.ept_identity_pagetable)
put_page(kvm->arch.ept_identity_pagetable);
cleanup_srcu_struct(&kvm->srcu);
kfree(kvm->arch.aliases);
kfree(kvm);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
int npages = memslot->npages;
/*To keep backward compatibility with older userspace,
*x86 needs to hanlde !user_alloc case.
*/
if (!user_alloc) {
if (npages && !old.rmap) {
unsigned long userspace_addr;
down_write(¤t->mm->mmap_sem);
userspace_addr = do_mmap(NULL, 0,
npages * PAGE_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
up_write(¤t->mm->mmap_sem);
if (IS_ERR((void *)userspace_addr))
return PTR_ERR((void *)userspace_addr);
memslot->userspace_addr = userspace_addr;
}
}
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc)
{
int npages = mem->memory_size >> PAGE_SHIFT;
if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
int ret;
down_write(¤t->mm->mmap_sem);
ret = do_munmap(current->mm, old.userspace_addr,
old.npages * PAGE_SIZE);
up_write(¤t->mm->mmap_sem);
if (ret < 0)
printk(KERN_WARNING
"kvm_vm_ioctl_set_memory_region: "
"failed to munmap memory\n");
}
spin_lock(&kvm->mmu_lock);
if (!kvm->arch.n_requested_mmu_pages) {
unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
}
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
spin_unlock(&kvm->mmu_lock);
}
void kvm_arch_flush_shadow(struct kvm *kvm)
{
kvm_mmu_zap_all(kvm);
kvm_reload_remote_mmus(kvm);
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|| vcpu->arch.nmi_pending ||
(kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_has_interrupt(vcpu));
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{
int me;
int cpu = vcpu->cpu;
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
++vcpu->stat.halt_wakeup;
}
me = get_cpu();
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
smp_send_reschedule(cpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
return kvm_x86_ops->interrupt_allowed(vcpu);
}
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
{
unsigned long current_rip = kvm_rip_read(vcpu) +
get_segment_base(vcpu, VCPU_SREG_CS);
return current_rip == linear_rip;
}