Newer
Older
u32 function, index;
struct kvm_cpuid_entry2 *best;
function = kvm_register_read(vcpu, VCPU_REGS_RAX);
index = kvm_register_read(vcpu, VCPU_REGS_RCX);
kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
best = kvm_find_cpuid_entry(vcpu, function, index);
if (best) {
kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
}
kvm_x86_ops->skip_emulated_instruction(vcpu);
(u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
(u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
(u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
(u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
/*
* Check if userspace requested an interrupt window, and that the
* interrupt window is open.
*
* No need to exit to userspace if we already have an interrupt queued.
*/
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
return (!vcpu->arch.irq_summary &&
kvm_run->request_interrupt_window &&
vcpu->arch.interrupt_window_open &&
(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
}
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
kvm_run->ready_for_interrupt_injection = 1;
kvm_run->ready_for_interrupt_injection =
(vcpu->arch.interrupt_window_open &&
vcpu->arch.irq_summary == 0);
static void vapic_enter(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct page *page;
if (!apic || !apic->vapic_addr)
return;
page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
vcpu->arch.apic->vapic_page = page;
}
static void vapic_exit(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic || !apic->vapic_addr)
return;
down_read(&vcpu->kvm->slots_lock);
kvm_release_page_dirty(apic->vapic_page);
mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
up_read(&vcpu->kvm->slots_lock);
static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
kvm_mmu_unload(vcpu);
r = kvm_mmu_reload(vcpu);
if (unlikely(r))
goto out;
if (vcpu->requests) {
if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
kvm_write_guest_time(vcpu);
if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
kvm_mmu_sync_roots(vcpu);
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
kvm_x86_ops->tlb_flush(vcpu);
if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
&vcpu->requests)) {
kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
goto out;
}
if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
r = 0;
goto out;
}
clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
kvm_inject_pending_timer_irqs(vcpu);
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
kvm_load_guest_fpu(vcpu);
local_irq_disable();
if (vcpu->requests || need_resched() || signal_pending(current)) {
local_irq_enable();
preempt_enable();
r = 1;
goto out;
}
vcpu->guest_mode = 1;
/*
* Make sure that guest_mode assignment won't happen after
* testing the pending IRQ vector bitmap.
*/
smp_wmb();
if (vcpu->arch.exception.pending)
__queue_exception(vcpu);
else if (irqchip_in_kernel(vcpu->kvm))
kvm_x86_ops->inject_pending_irq(vcpu);
kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
up_read(&vcpu->kvm->slots_lock);
kvm_guest_enter();
get_debugreg(vcpu->arch.host_dr6, 6);
get_debugreg(vcpu->arch.host_dr7, 7);
if (unlikely(vcpu->arch.switch_db_regs)) {
get_debugreg(vcpu->arch.host_db[0], 0);
get_debugreg(vcpu->arch.host_db[1], 1);
get_debugreg(vcpu->arch.host_db[2], 2);
get_debugreg(vcpu->arch.host_db[3], 3);
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
}
kvm_x86_ops->run(vcpu, kvm_run);
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.host_db[0], 0);
set_debugreg(vcpu->arch.host_db[1], 1);
set_debugreg(vcpu->arch.host_db[2], 2);
set_debugreg(vcpu->arch.host_db[3], 3);
}
set_debugreg(vcpu->arch.host_dr6, 6);
set_debugreg(vcpu->arch.host_dr7, 7);
vcpu->guest_mode = 0;
local_irq_enable();
++vcpu->stat.exits;
/*
* We must have an instruction between local_irq_enable() and
* kvm_guest_exit(), so the timer interrupt isn't delayed by
* the interrupt shadow. The stat.exits increment will do nicely.
* But we need to prevent reordering, hence this barrier():
*/
barrier();
kvm_guest_exit();
preempt_enable();
down_read(&vcpu->kvm->slots_lock);
/*
* Profile KVM exit RIPs:
*/
if (unlikely(prof_on == KVM_PROFILING)) {
unsigned long rip = kvm_rip_read(vcpu);
profile_hit(KVM_PROFILING, (void *)rip);
if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
vcpu->arch.exception.pending = false;
r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
r = kvm_arch_vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
down_read(&vcpu->kvm->slots_lock);
vapic_enter(vcpu);
r = 1;
while (r > 0) {
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
r = vcpu_enter_guest(vcpu, kvm_run);
else {
up_read(&vcpu->kvm->slots_lock);
kvm_vcpu_block(vcpu);
down_read(&vcpu->kvm->slots_lock);
if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
r = -EINTR;
}
if (r > 0) {
if (dm_request_for_irq_injection(vcpu, kvm_run)) {
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.request_irq_exits;
}
if (signal_pending(current)) {
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
}
if (need_resched()) {
up_read(&vcpu->kvm->slots_lock);
kvm_resched(vcpu);
down_read(&vcpu->kvm->slots_lock);
}
}
up_read(&vcpu->kvm->slots_lock);
post_kvm_run_save(vcpu, kvm_run);
return r;
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
sigset_t sigsaved;
vcpu_load(vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
r = -EAGAIN;
goto out;
}
/* re-sync apic's tpr */
if (!irqchip_in_kernel(vcpu->kvm))
kvm_set_cr8(vcpu, kvm_run->cr8);
if (vcpu->arch.pio.cur_count) {
r = complete_pio(vcpu);
if (r)
goto out;
}
#if CONFIG_HAS_IOMEM
if (vcpu->mmio_needed) {
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
down_read(&vcpu->kvm->slots_lock);
r = emulate_instruction(vcpu, kvm_run,
vcpu->arch.mmio_fault_cr2, 0,
EMULTYPE_NO_DECODE);
if (r == EMULATE_DO_MMIO) {
/*
* Read-modify-write. Back to userspace.
*/
r = 0;
goto out;
}
}
#endif
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
kvm_register_write(vcpu, VCPU_REGS_RAX,
kvm_run->hypercall.ret);
r = __vcpu_run(vcpu, kvm_run);
out:
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
vcpu_put(vcpu);
return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
#ifdef CONFIG_X86_64
regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_x86_ops->get_rflags(vcpu);
/*
* Don't leak debug flags in case they were set for guest debugging
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
#ifdef CONFIG_X86_64
kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
kvm_rip_write(vcpu, regs->rip);
kvm_x86_ops->set_rflags(vcpu, regs->rflags);
vcpu->arch.exception.pending = false;
vcpu_put(vcpu);
return 0;
}
void kvm_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
kvm_x86_ops->get_segment(vcpu, var, seg);
}
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
{
struct kvm_segment cs;
kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
*db = cs.db;
*l = cs.l;
}
EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
struct descriptor_table dt;
int pending_vec;
vcpu_load(vcpu);
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
kvm_x86_ops->get_idt(vcpu, &dt);
sregs->idt.limit = dt.limit;
sregs->idt.base = dt.base;
kvm_x86_ops->get_gdt(vcpu, &dt);
sregs->gdt.limit = dt.limit;
sregs->gdt.base = dt.base;
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
sregs->cr0 = vcpu->arch.cr0;
sregs->cr2 = vcpu->arch.cr2;
sregs->cr3 = vcpu->arch.cr3;
sregs->cr4 = vcpu->arch.cr4;
sregs->cr8 = kvm_get_cr8(vcpu);
sregs->efer = vcpu->arch.shadow_efer;
sregs->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm)) {
memset(sregs->interrupt_bitmap, 0,
sizeof sregs->interrupt_bitmap);
pending_vec = kvm_x86_ops->get_irq(vcpu);
if (pending_vec >= 0)
set_bit(pending_vec,
(unsigned long *)sregs->interrupt_bitmap);
} else
memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
sizeof sregs->interrupt_bitmap);
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
vcpu_load(vcpu);
mp_state->mp_state = vcpu->arch.mp_state;
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
vcpu_load(vcpu);
vcpu->arch.mp_state = mp_state->mp_state;
vcpu_put(vcpu);
return 0;
}
static void kvm_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
kvm_x86_ops->set_segment(vcpu, var, seg);
static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
struct kvm_segment *kvm_desct)
{
kvm_desct->base = seg_desc->base0;
kvm_desct->base |= seg_desc->base1 << 16;
kvm_desct->base |= seg_desc->base2 << 24;
kvm_desct->limit = seg_desc->limit0;
kvm_desct->limit |= seg_desc->limit << 16;

Marcelo Tosatti
committed
if (seg_desc->g) {
kvm_desct->limit <<= 12;
kvm_desct->limit |= 0xfff;
}
kvm_desct->selector = selector;
kvm_desct->type = seg_desc->type;
kvm_desct->present = seg_desc->p;
kvm_desct->dpl = seg_desc->dpl;
kvm_desct->db = seg_desc->d;
kvm_desct->s = seg_desc->s;
kvm_desct->l = seg_desc->l;
kvm_desct->g = seg_desc->g;
kvm_desct->avl = seg_desc->avl;
if (!selector)
kvm_desct->unusable = 1;
else
kvm_desct->unusable = 0;
kvm_desct->padding = 0;
}
static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
u16 selector,
struct descriptor_table *dtable)
{
if (selector & 1 << 2) {
struct kvm_segment kvm_seg;
kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
if (kvm_seg.unusable)
dtable->limit = 0;
else
dtable->limit = kvm_seg.limit;
dtable->base = kvm_seg.base;
}
else
kvm_x86_ops->get_gdt(vcpu, dtable);
}
/* allowed just for 8 bytes segments */
static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct desc_struct *seg_desc)
{
struct descriptor_table dtable;
u16 index = selector >> 3;
get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7) {
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
return 1;
}
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
gpa += index * 8;
return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
}
/* allowed just for 8 bytes segments */
static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct desc_struct *seg_desc)
{
struct descriptor_table dtable;
u16 index = selector >> 3;
get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7)
return 1;
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
gpa += index * 8;
return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
}
static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
struct desc_struct *seg_desc)
{
u32 base_addr;
base_addr = seg_desc->base0;
base_addr |= (seg_desc->base1 << 16);
base_addr |= (seg_desc->base2 << 24);
return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
}
static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
{
struct kvm_segment kvm_seg;
kvm_get_segment(vcpu, &kvm_seg, seg);
return kvm_seg.selector;
}
static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
u16 selector,
struct kvm_segment *kvm_seg)
{
struct desc_struct seg_desc;
if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
return 1;
seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
return 0;
}
static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
{
struct kvm_segment segvar = {
.base = selector << 4,
.limit = 0xffff,
.selector = selector,
.type = 3,
.present = 1,
.dpl = 3,
.db = 0,
.s = 1,
.l = 0,
.g = 0,
.avl = 0,
.unusable = 0,
};
kvm_x86_ops->set_segment(vcpu, &segvar, seg);
return 0;
}
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
int type_bits, int seg)
{
struct kvm_segment kvm_seg;
if (!(vcpu->arch.cr0 & X86_CR0_PE))
return kvm_load_realmode_segment(vcpu, selector, seg);
if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
return 1;
kvm_seg.type |= type_bits;
if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
seg != VCPU_SREG_LDTR)
if (!kvm_seg.s)
kvm_seg.unusable = 1;
kvm_set_segment(vcpu, &kvm_seg, seg);
return 0;
}
static void save_state_to_tss32(struct kvm_vcpu *vcpu,
struct tss_segment_32 *tss)
{
tss->cr3 = vcpu->arch.cr3;
tss->eip = kvm_rip_read(vcpu);
tss->eflags = kvm_x86_ops->get_rflags(vcpu);
tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
}
static int load_state_from_tss32(struct kvm_vcpu *vcpu,
struct tss_segment_32 *tss)
{
kvm_set_cr3(vcpu, tss->cr3);
kvm_rip_write(vcpu, tss->eip);
kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
return 1;
return 0;
}
static void save_state_to_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
tss->ip = kvm_rip_read(vcpu);
tss->flag = kvm_x86_ops->get_rflags(vcpu);
tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
}
static int load_state_from_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
kvm_rip_write(vcpu, tss->ip);
kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
return 1;
return 0;
}
static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,

Marcelo Tosatti
committed
u32 old_tss_base,
struct desc_struct *nseg_desc)
{
struct tss_segment_16 tss_segment_16;
int ret = 0;

Marcelo Tosatti
committed
if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
sizeof tss_segment_16))
goto out;
save_state_to_tss16(vcpu, &tss_segment_16);

Marcelo Tosatti
committed
if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
sizeof tss_segment_16))

Marcelo Tosatti
committed
if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
&tss_segment_16, sizeof tss_segment_16))
goto out;
if (load_state_from_tss16(vcpu, &tss_segment_16))
goto out;
ret = 1;
out:
return ret;
}
static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,

Marcelo Tosatti
committed
u32 old_tss_base,
struct desc_struct *nseg_desc)
{
struct tss_segment_32 tss_segment_32;
int ret = 0;

Marcelo Tosatti
committed
if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
sizeof tss_segment_32))
goto out;
save_state_to_tss32(vcpu, &tss_segment_32);

Marcelo Tosatti
committed
if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
sizeof tss_segment_32))
goto out;
if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
&tss_segment_32, sizeof tss_segment_32))

Marcelo Tosatti
committed
if (load_state_from_tss32(vcpu, &tss_segment_32))
goto out;
ret = 1;
out:
return ret;
}
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
{
struct kvm_segment tr_seg;
struct desc_struct cseg_desc;
struct desc_struct nseg_desc;
int ret = 0;

Marcelo Tosatti
committed
u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);

Marcelo Tosatti
committed
old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);

Marcelo Tosatti
committed
/* FIXME: Handle errors. Failure to read either TSS or their
* descriptors should generate a pagefault.
*/
if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
goto out;

Marcelo Tosatti
committed
if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
goto out;
if (reason != TASK_SWITCH_IRET) {
int cpl;
cpl = kvm_x86_ops->get_cpl(vcpu);
if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
return 1;
}
}
if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
return 1;
}
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
cseg_desc.type &= ~(1 << 1); //clear the B flag

Marcelo Tosatti
committed
save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
}
if (reason == TASK_SWITCH_IRET) {
u32 eflags = kvm_x86_ops->get_rflags(vcpu);
kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
}
kvm_x86_ops->skip_emulated_instruction(vcpu);
if (nseg_desc.type & 8)

Marcelo Tosatti
committed
ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,

Marcelo Tosatti
committed
ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
&nseg_desc);
if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
u32 eflags = kvm_x86_ops->get_rflags(vcpu);
kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
}
if (reason != TASK_SWITCH_IRET) {
nseg_desc.type |= (1 << 1);
save_guest_segment_descriptor(vcpu, tss_selector,
&nseg_desc);
}
kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
tr_seg.type = 11;
kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
out:
return ret;
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
int mmu_reset_needed = 0;
int i, pending_vec, max_bits;
struct descriptor_table dt;
vcpu_load(vcpu);
dt.limit = sregs->idt.limit;
dt.base = sregs->idt.base;
kvm_x86_ops->set_idt(vcpu, &dt);
dt.limit = sregs->gdt.limit;
dt.base = sregs->gdt.base;
kvm_x86_ops->set_gdt(vcpu, &dt);
vcpu->arch.cr2 = sregs->cr2;
mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
kvm_set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
kvm_x86_ops->set_efer(vcpu, sregs->efer);
kvm_set_apic_base(vcpu, sregs->apic_base);
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
if (!is_long_mode(vcpu) && is_pae(vcpu))
load_pdptrs(vcpu, vcpu->arch.cr3);
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
if (!irqchip_in_kernel(vcpu->kvm)) {
memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
sizeof vcpu->arch.irq_pending);
vcpu->arch.irq_summary = 0;
for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
if (vcpu->arch.irq_pending[i])
__set_bit(i, &vcpu->arch.irq_summary);
} else {
max_bits = (sizeof sregs->interrupt_bitmap) << 3;
pending_vec = find_first_bit(
(const unsigned long *)sregs->interrupt_bitmap,
max_bits);
/* Only pending external irq is handled here */
if (pending_vec < max_bits) {
kvm_x86_ops->set_irq(vcpu, pending_vec);
pr_debug("Set back pending irq %d\n",
pending_vec);
}
kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
/* Older userspace won't unhalt the vcpu on reset. */
if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
!(vcpu->arch.cr0 & X86_CR0_PE))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
vcpu_load(vcpu);
if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
for (i = 0; i < KVM_NR_DB_REGS; ++i)
vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
vcpu->arch.switch_db_regs =
(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
} else {
for (i = 0; i < KVM_NR_DB_REGS; i++)
vcpu->arch.eff_db[i] = vcpu->arch.db[i];