Newer
Older
j++;
}
num_msrs_to_save = j;
}
/*
* Only apic need an MMIO device hook, so shortcut now..
*/
static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
gpa_t addr, int len,
int is_write)
{
struct kvm_io_device *dev;
if (vcpu->arch.apic) {
dev = &vcpu->arch.apic->dev;
if (dev->in_range(dev, addr, len, is_write))
return dev;
}
return NULL;
}
static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
gpa_t addr, int len,
int is_write)
{
struct kvm_io_device *dev;
dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
if (dev == NULL)
dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
is_write);
return dev;
}
int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu)
{
void *data = val;
while (bytes) {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
if (gpa == UNMAPPED_GVA) {
r = X86EMUL_PROPAGATE_FAULT;
goto out;
}
ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
if (ret < 0) {
r = X86EMUL_UNHANDLEABLE;
goto out;
}
bytes -= toread;
data += toread;
addr += toread;
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu)
{
void *data = val;
int r = X86EMUL_CONTINUE;
while (bytes) {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
if (gpa == UNMAPPED_GVA) {
r = X86EMUL_PROPAGATE_FAULT;
goto out;
}
ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
if (ret < 0) {
r = X86EMUL_UNHANDLEABLE;
goto out;
}
bytes -= towrite;
data += towrite;
addr += towrite;
}
out:
return r;
}
static int emulator_read_emulated(unsigned long addr,
void *val,
unsigned int bytes,
struct kvm_vcpu *vcpu)
{
struct kvm_io_device *mmio_dev;
gpa_t gpa;
if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
vcpu->mmio_read_completed = 0;
return X86EMUL_CONTINUE;
}
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
/* For APIC access vmexit */
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto mmio;
if (kvm_read_guest_virt(addr, val, bytes, vcpu)
== X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
mmio:
/*
* Is this MMIO handled locally?
*/
mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
if (mmio_dev) {
kvm_iodevice_read(mmio_dev, gpa, bytes, val);
return X86EMUL_CONTINUE;
}
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
vcpu->mmio_is_write = 0;
return X86EMUL_UNHANDLEABLE;
}
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes)
{
int ret;
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
return 0;
kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
return 1;
}
static int emulator_write_emulated_onepage(unsigned long addr,
const void *val,
unsigned int bytes,
struct kvm_vcpu *vcpu)
{
struct kvm_io_device *mmio_dev;
gpa_t gpa;
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA) {
kvm_inject_page_fault(vcpu, addr, 2);
return X86EMUL_PROPAGATE_FAULT;
}
/* For APIC access vmexit */
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto mmio;
if (emulator_write_phys(vcpu, gpa, val, bytes))
return X86EMUL_CONTINUE;
mmio:
/*
* Is this MMIO handled locally?
*/
mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
if (mmio_dev) {
kvm_iodevice_write(mmio_dev, gpa, bytes, val);
return X86EMUL_CONTINUE;
}
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
vcpu->mmio_is_write = 1;
memcpy(vcpu->mmio_data, val, bytes);
return X86EMUL_CONTINUE;
}
int emulator_write_emulated(unsigned long addr,
const void *val,
unsigned int bytes,
struct kvm_vcpu *vcpu)
{
/* Crossing a page boundary? */
if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
int rc, now;
now = -addr & ~PAGE_MASK;
rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
if (rc != X86EMUL_CONTINUE)
return rc;
addr += now;
val += now;
bytes -= now;
}
return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
}
EXPORT_SYMBOL_GPL(emulator_write_emulated);
static int emulator_cmpxchg_emulated(unsigned long addr,
const void *old,
const void *new,
unsigned int bytes,
struct kvm_vcpu *vcpu)
{
static int reported;
if (!reported) {
reported = 1;
printk(KERN_WARNING "kvm: emulating exchange as write\n");
}
#ifndef CONFIG_X86_64
/* guests cmpxchg8b have to be emulated atomically */
if (bytes == 8) {
struct page *page;
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA ||
(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto emul_write;
if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
goto emul_write;
val = *(u64 *)new;
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
kaddr = kmap_atomic(page, KM_USER0);
set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
kunmap_atomic(kaddr, KM_USER0);
kvm_release_page_dirty(page);
}
return emulator_write_emulated(addr, new, bytes, vcpu);
}
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
return kvm_x86_ops->get_segment_base(vcpu, seg);
}
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
{
return X86EMUL_CONTINUE;
}
int emulate_clts(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
return X86EMUL_CONTINUE;
}
int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
switch (dr) {
case 0 ... 3:
*dest = kvm_x86_ops->get_dr(vcpu, dr);
return X86EMUL_CONTINUE;
default:
pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
return X86EMUL_UNHANDLEABLE;
}
}
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
{
unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
int exception;
kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
if (exception) {
/* FIXME: better handling */
return X86EMUL_UNHANDLEABLE;
}
return X86EMUL_CONTINUE;
}
void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
{
u8 opcodes[4];
unsigned long rip = kvm_rip_read(vcpu);
unsigned long rip_linear;
if (!printk_ratelimit())
return;
rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
}
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
static struct x86_emulate_ops emulate_ops = {
.read_std = kvm_read_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
{
kvm_register_read(vcpu, VCPU_REGS_RAX);
kvm_register_read(vcpu, VCPU_REGS_RSP);
kvm_register_read(vcpu, VCPU_REGS_RIP);
vcpu->arch.regs_dirty = ~0;
}
int emulate_instruction(struct kvm_vcpu *vcpu,
struct kvm_run *run,
unsigned long cr2,
u16 error_code,
int emulation_type)
{
int r;
struct decode_cache *c;
kvm_clear_exception_queue(vcpu);
vcpu->arch.mmio_fault_cr2 = cr2;
/*
* TODO: fix x86_emulate.c to use guest_read/write_register
* instead of direct ->regs accesses, can save hundred cycles
* on Intel for instructions that don't read/change RSP, for
* for example.
*/
cache_all_regs(vcpu);
vcpu->mmio_is_write = 0;
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
int cs_db, cs_l;
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
vcpu->arch.emulate_ctxt.vcpu = vcpu;
vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
vcpu->arch.emulate_ctxt.mode =
(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
? X86EMUL_MODE_REAL : cs_l
? X86EMUL_MODE_PROT64 : cs_db
? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
/* Reject the instructions other than VMCALL/VMMCALL when
* try to emulate invalid opcode */
c = &vcpu->arch.emulate_ctxt.decode;
if ((emulation_type & EMULTYPE_TRAP_UD) &&
(!(c->twobyte && c->b == 0x01 &&
(c->modrm_reg == 0 || c->modrm_reg == 3) &&
c->modrm_mod == 3 && c->modrm_rm == 1)))
return EMULATE_FAIL;
if (r) {
++vcpu->stat.insn_emulation_fail;
if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
return EMULATE_DONE;
return EMULATE_FAIL;
}
}
r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
return EMULATE_DO_MMIO;
if ((r || vcpu->mmio_is_write) && run) {
run->exit_reason = KVM_EXIT_MMIO;
run->mmio.phys_addr = vcpu->mmio_phys_addr;
memcpy(run->mmio.data, vcpu->mmio_data, 8);
run->mmio.len = vcpu->mmio_size;
run->mmio.is_write = vcpu->mmio_is_write;
}
if (r) {
if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
return EMULATE_DONE;
if (!vcpu->mmio_needed) {
kvm_report_emulation_failure(vcpu, "mmio");
return EMULATE_FAIL;
}
return EMULATE_DO_MMIO;
}
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
if (vcpu->mmio_is_write) {
vcpu->mmio_needed = 0;
return EMULATE_DO_MMIO;
}
return EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(emulate_instruction);
static int pio_copy_data(struct kvm_vcpu *vcpu)
{
void *p = vcpu->arch.pio_data;
bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
if (vcpu->arch.pio.in)
ret = kvm_read_guest_virt(q, p, bytes, vcpu);
return ret;
}
int complete_pio(struct kvm_vcpu *vcpu)
{
struct kvm_pio_request *io = &vcpu->arch.pio;
long delta;
int r;
if (!io->string) {
if (io->in) {
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(&val, vcpu->arch.pio_data, io->size);
kvm_register_write(vcpu, VCPU_REGS_RAX, val);
}
} else {
if (io->in) {
r = pio_copy_data(vcpu);
return r;
}
delta = 1;
if (io->rep) {
delta *= io->cur_count;
/*
* The size of the register should really depend on
* current address size.
*/
val = kvm_register_read(vcpu, VCPU_REGS_RCX);
val -= delta;
kvm_register_write(vcpu, VCPU_REGS_RCX, val);
}
if (io->down)
delta = -delta;
delta *= io->size;
if (io->in) {
val = kvm_register_read(vcpu, VCPU_REGS_RDI);
val += delta;
kvm_register_write(vcpu, VCPU_REGS_RDI, val);
} else {
val = kvm_register_read(vcpu, VCPU_REGS_RSI);
val += delta;
kvm_register_write(vcpu, VCPU_REGS_RSI, val);
}
}
io->count -= io->cur_count;
io->cur_count = 0;
return 0;
}
static void kernel_pio(struct kvm_io_device *pio_dev,
struct kvm_vcpu *vcpu,
void *pd)
{
/* TODO: String I/O for in kernel device */
mutex_lock(&vcpu->kvm->lock);
if (vcpu->arch.pio.in)
kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
vcpu->arch.pio.size,
kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
vcpu->arch.pio.size,
pd);
mutex_unlock(&vcpu->kvm->lock);
}
static void pio_string_write(struct kvm_io_device *pio_dev,
struct kvm_vcpu *vcpu)
{
struct kvm_pio_request *io = &vcpu->arch.pio;
void *pd = vcpu->arch.pio_data;
int i;
mutex_lock(&vcpu->kvm->lock);
for (i = 0; i < io->cur_count; i++) {
kvm_iodevice_write(pio_dev, io->port,
io->size,
pd);
pd += io->size;
}
mutex_unlock(&vcpu->kvm->lock);
}
static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
gpa_t addr, int len,
int is_write)
return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
}
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned port)
{
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
vcpu->run->io.port = vcpu->arch.pio.port = port;
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 0;
vcpu->arch.pio.down = 0;
vcpu->arch.pio.rep = 0;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
handler);
else
KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
handler);
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
complete_pio(vcpu);
return 1;
}
return 0;
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio);
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port)
{
unsigned now, in_page;
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
vcpu->run->io.port = vcpu->arch.pio.port = port;
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 1;
vcpu->arch.pio.down = down;
vcpu->arch.pio.rep = rep;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
handler);
else
KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
handler);
if (!count) {
kvm_x86_ops->skip_emulated_instruction(vcpu);
return 1;
}
if (!down)
in_page = PAGE_SIZE - offset_in_page(address);
else
in_page = offset_in_page(address) + size;
now = min(count, (unsigned long)in_page / size);
now = 1;
if (down) {
/*
* String I/O in reverse. Yuck. Kill the guest, fix later.
*/
pr_unimpl(vcpu, "guest string pio down\n");
kvm_inject_gp(vcpu, 0);
return 1;
}
vcpu->run->io.count = now;
vcpu->arch.pio.cur_count = now;
if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
kvm_x86_ops->skip_emulated_instruction(vcpu);
pio_dev = vcpu_find_pio_dev(vcpu, port,
vcpu->arch.pio.cur_count,
!vcpu->arch.pio.in);
/* string PIO write */
ret = pio_copy_data(vcpu);
if (ret == X86EMUL_PROPAGATE_FAULT) {
kvm_inject_gp(vcpu, 0);
return 1;
}
if (ret == 0 && pio_dev) {
pio_string_write(pio_dev, vcpu);
complete_pio(vcpu);
if (vcpu->arch.pio.count == 0)
ret = 1;
}
} else if (pio_dev)
pr_unimpl(vcpu, "no string pio read support yet, "
"port %x size %d count %ld\n",
port, size, count);
return ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
static void bounce_off(void *info)
{
/* nothing */
}
static unsigned int ref_freq;
static unsigned long tsc_khz_ref;
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_freqs *freq = data;
struct kvm *kvm;
struct kvm_vcpu *vcpu;
int i, send_ipi = 0;
if (!ref_freq)
ref_freq = freq->old;
if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
return 0;
if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
return 0;
per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (vcpu->cpu != freq->cpu)
continue;
if (!kvm_request_guest_time_update(vcpu))
continue;
if (vcpu->cpu != smp_processor_id())
send_ipi++;
}
}
spin_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
* We upscale the frequency. Must make the guest
* doesn't see old kvmclock values while running with
* the new frequency, otherwise we risk the guest sees
* time go backwards.
*
* In case we update the frequency for another cpu
* (which might be in guest context) send an interrupt
* to kick the cpu out of guest context. Next time
* guest context is entered kvmclock will be updated,
* so the guest will not see stale values.
*/
smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
}
return 0;
}
static struct notifier_block kvmclock_cpufreq_notifier_block = {
.notifier_call = kvmclock_cpufreq_notifier
};
int kvm_arch_init(void *opaque)
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
printk(KERN_ERR "kvm: already loaded the other module\n");
r = -EEXIST;
goto out;
}
if (!ops->cpu_has_kvm_support()) {
printk(KERN_ERR "kvm: no hardware support\n");
r = -EOPNOTSUPP;
goto out;
}
if (ops->disabled_by_bios()) {
printk(KERN_ERR "kvm: disabled by bios\n");
r = -EOPNOTSUPP;
goto out;
}
r = kvm_mmu_module_init();
if (r)
goto out;
kvm_init_msr_list();
kvm_x86_ops = ops;
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
for_each_possible_cpu(cpu)
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
tsc_khz_ref = tsc_khz;
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
return 0;
out:
return r;
void kvm_arch_exit(void)
{
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
}
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
return 1;
} else {
vcpu->run->exit_reason = KVM_EXIT_HLT;
return 0;
}
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
unsigned long a1)
{
if (is_long_mode(vcpu))
return a0;
else
return a0 | ((gpa_t)a1 << 32);
}
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
if (!is_long_mode(vcpu)) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
a1 &= 0xFFFFFFFF;
a2 &= 0xFFFFFFFF;
a3 &= 0xFFFFFFFF;
}
switch (nr) {
case KVM_HC_VAPIC_POLL_IRQ:
ret = 0;
break;
case KVM_HC_MMU_OP:
r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
break;
default:
ret = -KVM_ENOSYS;
break;
}
kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
{
char instruction[3];
int ret = 0;
unsigned long rip = kvm_rip_read(vcpu);
/*
* Blow out the MMU to ensure that no other VCPU has an active mapping
* to ensure that the updated hypercall appears atomically across all
* VCPUs.
*/
kvm_mmu_zap_all(vcpu->kvm);
kvm_x86_ops->patch_hypercall(vcpu, instruction);
if (emulator_write_emulated(rip, instruction, 3, vcpu)
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
!= X86EMUL_CONTINUE)
ret = -EFAULT;
return ret;
}
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
}
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
{
struct descriptor_table dt = { limit, base };
kvm_x86_ops->set_gdt(vcpu, &dt);
}
void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
{
struct descriptor_table dt = { limit, base };
kvm_x86_ops->set_idt(vcpu, &dt);
}
void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long *rflags)
{
kvm_lmsw(vcpu, msw);
*rflags = kvm_x86_ops->get_rflags(vcpu);
}
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
{
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
switch (cr) {
case 0:
case 2:
case 3:
case 4:
default:
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
return 0;
}
KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
(u32)((u64)value >> 32), handler);
return value;
}
void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
unsigned long *rflags)
{
KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
(u32)((u64)val >> 32), handler);
switch (cr) {
case 0:
kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
*rflags = kvm_x86_ops->get_rflags(vcpu);
break;
case 2:
break;
case 3:
kvm_set_cr3(vcpu, val);
break;
case 4:
kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
break;
kvm_set_cr8(vcpu, val & 0xfUL);
default:
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
}
}
static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
{
struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
int j, nent = vcpu->arch.cpuid_nent;
e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
for (j = i + 1; ; j = (j + 1) % nent) {
struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
if (ej->function == e->function) {
ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
return j;
}
}
return 0; /* silence gcc, even though control never reaches here */
}
/* find an entry with matching function, matching index (if needed), and that
* should be read next (if it's stateful) */
static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
u32 function, u32 index)
{
if (e->function != function)
return 0;
if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
return 0;
if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
!(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
return 0;
return 1;
}
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index)
{
int i;
struct kvm_cpuid_entry2 *best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
struct kvm_cpuid_entry2 *e;
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
move_to_next_stateful_cpuid_entry(vcpu, i);
best = e;
break;
}
/*
* Both basic or both extended?
*/
if (((e->function ^ function) & 0x80000000) == 0)
if (!best || e->function > best->function)
best = e;
}
return best;
}
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{