Skip to content
Snippets Groups Projects
x86.c 107 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * Only apic need an MMIO device hook, so shortcut now..
     */
    static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
    
    	if (vcpu->arch.apic) {
    		dev = &vcpu->arch.apic->dev;
    
    		if (dev->in_range(dev, addr, len, is_write))
    
    			return dev;
    	}
    	return NULL;
    }
    
    
    static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
    
    	dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
    
    		dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
    					  is_write);
    
    int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
    			struct kvm_vcpu *vcpu)
    
    	int r = X86EMUL_CONTINUE;
    
    		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    		unsigned offset = addr & (PAGE_SIZE-1);
    
    		unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
    
    		if (gpa == UNMAPPED_GVA) {
    			r = X86EMUL_PROPAGATE_FAULT;
    			goto out;
    		}
    
    		ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
    
    		if (ret < 0) {
    			r = X86EMUL_UNHANDLEABLE;
    			goto out;
    		}
    
    		bytes -= toread;
    		data += toread;
    		addr += toread;
    
    
    int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
    			 struct kvm_vcpu *vcpu)
    {
    	void *data = val;
    	int r = X86EMUL_CONTINUE;
    
    	while (bytes) {
    		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    		unsigned offset = addr & (PAGE_SIZE-1);
    		unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
    		int ret;
    
    		if (gpa == UNMAPPED_GVA) {
    			r = X86EMUL_PROPAGATE_FAULT;
    			goto out;
    		}
    		ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
    		if (ret < 0) {
    			r = X86EMUL_UNHANDLEABLE;
    			goto out;
    		}
    
    		bytes -= towrite;
    		data += towrite;
    		addr += towrite;
    	}
    out:
    	return r;
    }
    
    
    
    static int emulator_read_emulated(unsigned long addr,
    				  void *val,
    				  unsigned int bytes,
    				  struct kvm_vcpu *vcpu)
    {
    	struct kvm_io_device *mmio_dev;
    	gpa_t                 gpa;
    
    	if (vcpu->mmio_read_completed) {
    		memcpy(val, vcpu->mmio_data, bytes);
    		vcpu->mmio_read_completed = 0;
    		return X86EMUL_CONTINUE;
    	}
    
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    
    	/* For APIC access vmexit */
    	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    		goto mmio;
    
    
    	if (kvm_read_guest_virt(addr, val, bytes, vcpu)
    				== X86EMUL_CONTINUE)
    
    		return X86EMUL_CONTINUE;
    	if (gpa == UNMAPPED_GVA)
    		return X86EMUL_PROPAGATE_FAULT;
    
    mmio:
    	/*
    	 * Is this MMIO handled locally?
    	 */
    
    	mutex_lock(&vcpu->kvm->lock);
    
    	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
    
    	if (mmio_dev) {
    		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
    
    		mutex_unlock(&vcpu->kvm->lock);
    
    	mutex_unlock(&vcpu->kvm->lock);
    
    
    	vcpu->mmio_needed = 1;
    	vcpu->mmio_phys_addr = gpa;
    	vcpu->mmio_size = bytes;
    	vcpu->mmio_is_write = 0;
    
    	return X86EMUL_UNHANDLEABLE;
    }
    
    
    int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
    
    			  const void *val, int bytes)
    
    {
    	int ret;
    
    	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
    
    	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
    
    	return 1;
    }
    
    static int emulator_write_emulated_onepage(unsigned long addr,
    					   const void *val,
    					   unsigned int bytes,
    					   struct kvm_vcpu *vcpu)
    {
    	struct kvm_io_device *mmio_dev;
    
    	gpa_t                 gpa;
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    		kvm_inject_page_fault(vcpu, addr, 2);
    
    		return X86EMUL_PROPAGATE_FAULT;
    	}
    
    	/* For APIC access vmexit */
    	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    		goto mmio;
    
    	if (emulator_write_phys(vcpu, gpa, val, bytes))
    		return X86EMUL_CONTINUE;
    
    mmio:
    	/*
    	 * Is this MMIO handled locally?
    	 */
    
    	mutex_lock(&vcpu->kvm->lock);
    
    	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
    
    	if (mmio_dev) {
    		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
    
    		mutex_unlock(&vcpu->kvm->lock);
    
    	mutex_unlock(&vcpu->kvm->lock);
    
    
    	vcpu->mmio_needed = 1;
    	vcpu->mmio_phys_addr = gpa;
    	vcpu->mmio_size = bytes;
    	vcpu->mmio_is_write = 1;
    	memcpy(vcpu->mmio_data, val, bytes);
    
    	return X86EMUL_CONTINUE;
    }
    
    int emulator_write_emulated(unsigned long addr,
    				   const void *val,
    				   unsigned int bytes,
    				   struct kvm_vcpu *vcpu)
    {
    	/* Crossing a page boundary? */
    	if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
    		int rc, now;
    
    		now = -addr & ~PAGE_MASK;
    		rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
    		if (rc != X86EMUL_CONTINUE)
    			return rc;
    		addr += now;
    		val += now;
    		bytes -= now;
    	}
    	return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
    }
    EXPORT_SYMBOL_GPL(emulator_write_emulated);
    
    static int emulator_cmpxchg_emulated(unsigned long addr,
    				     const void *old,
    				     const void *new,
    				     unsigned int bytes,
    				     struct kvm_vcpu *vcpu)
    {
    	static int reported;
    
    	if (!reported) {
    		reported = 1;
    		printk(KERN_WARNING "kvm: emulating exchange as write\n");
    	}
    
    #ifndef CONFIG_X86_64
    	/* guests cmpxchg8b have to be emulated atomically */
    	if (bytes == 8) {
    
    Andrew Morton's avatar
    Andrew Morton committed
    		char *kaddr;
    
    		gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    
    		if (gpa == UNMAPPED_GVA ||
    		   (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    			goto emul_write;
    
    		if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
    			goto emul_write;
    
    		val = *(u64 *)new;
    
    		page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
    
    Andrew Morton's avatar
    Andrew Morton committed
    		kaddr = kmap_atomic(page, KM_USER0);
    		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
    		kunmap_atomic(kaddr, KM_USER0);
    
    		kvm_release_page_dirty(page);
    	}
    
    emul_write:
    
    	return emulator_write_emulated(addr, new, bytes, vcpu);
    }
    
    static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
    {
    	return kvm_x86_ops->get_segment_base(vcpu, seg);
    }
    
    int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
    {
    
    Marcelo Tosatti's avatar
    Marcelo Tosatti committed
    	kvm_mmu_invlpg(vcpu, address);
    
    	return X86EMUL_CONTINUE;
    }
    
    int emulate_clts(struct kvm_vcpu *vcpu)
    {
    
    	KVMTRACE_0D(CLTS, vcpu, handler);
    
    	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
    
    	return X86EMUL_CONTINUE;
    }
    
    int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
    {
    	struct kvm_vcpu *vcpu = ctxt->vcpu;
    
    	switch (dr) {
    	case 0 ... 3:
    		*dest = kvm_x86_ops->get_dr(vcpu, dr);
    		return X86EMUL_CONTINUE;
    	default:
    
    		pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
    
    		return X86EMUL_UNHANDLEABLE;
    	}
    }
    
    int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
    {
    	unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
    	int exception;
    
    	kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
    	if (exception) {
    		/* FIXME: better handling */
    		return X86EMUL_UNHANDLEABLE;
    	}
    	return X86EMUL_CONTINUE;
    }
    
    void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
    {
    	u8 opcodes[4];
    
    	unsigned long rip = kvm_rip_read(vcpu);
    
    	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
    
    
    	kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
    
    
    	printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
    	       context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
    }
    EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
    
    
    static struct x86_emulate_ops emulate_ops = {
    
    	.read_std            = kvm_read_guest_virt,
    
    	.read_emulated       = emulator_read_emulated,
    	.write_emulated      = emulator_write_emulated,
    	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
    };
    
    
    static void cache_all_regs(struct kvm_vcpu *vcpu)
    {
    	kvm_register_read(vcpu, VCPU_REGS_RAX);
    	kvm_register_read(vcpu, VCPU_REGS_RSP);
    	kvm_register_read(vcpu, VCPU_REGS_RIP);
    	vcpu->arch.regs_dirty = ~0;
    }
    
    
    int emulate_instruction(struct kvm_vcpu *vcpu,
    			struct kvm_run *run,
    			unsigned long cr2,
    			u16 error_code,
    
    	kvm_clear_exception_queue(vcpu);
    
    	vcpu->arch.mmio_fault_cr2 = cr2;
    
    	/*
    	 * TODO: fix x86_emulate.c to use guest_read/write_register
    	 * instead of direct ->regs accesses, can save hundred cycles
    	 * on Intel for instructions that don't read/change RSP, for
    	 * for example.
    	 */
    	cache_all_regs(vcpu);
    
    	vcpu->arch.pio.string = 0;
    
    	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
    
    		int cs_db, cs_l;
    		kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
    
    
    		vcpu->arch.emulate_ctxt.vcpu = vcpu;
    		vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
    		vcpu->arch.emulate_ctxt.mode =
    			(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
    
    			? X86EMUL_MODE_REAL : cs_l
    			? X86EMUL_MODE_PROT64 :	cs_db
    			? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
    
    
    		r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
    
    
    		/* Reject the instructions other than VMCALL/VMMCALL when
    		 * try to emulate invalid opcode */
    		c = &vcpu->arch.emulate_ctxt.decode;
    		if ((emulation_type & EMULTYPE_TRAP_UD) &&
    		    (!(c->twobyte && c->b == 0x01 &&
    		      (c->modrm_reg == 0 || c->modrm_reg == 3) &&
    		       c->modrm_mod == 3 && c->modrm_rm == 1)))
    			return EMULATE_FAIL;
    
    
    		++vcpu->stat.insn_emulation;
    
    			++vcpu->stat.insn_emulation_fail;
    
    			if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
    				return EMULATE_DONE;
    			return EMULATE_FAIL;
    		}
    	}
    
    
    	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
    
    	if (vcpu->arch.pio.string)
    
    		return EMULATE_DO_MMIO;
    
    	if ((r || vcpu->mmio_is_write) && run) {
    		run->exit_reason = KVM_EXIT_MMIO;
    		run->mmio.phys_addr = vcpu->mmio_phys_addr;
    		memcpy(run->mmio.data, vcpu->mmio_data, 8);
    		run->mmio.len = vcpu->mmio_size;
    		run->mmio.is_write = vcpu->mmio_is_write;
    	}
    
    	if (r) {
    		if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
    			return EMULATE_DONE;
    		if (!vcpu->mmio_needed) {
    			kvm_report_emulation_failure(vcpu, "mmio");
    			return EMULATE_FAIL;
    		}
    		return EMULATE_DO_MMIO;
    	}
    
    
    	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
    
    
    	if (vcpu->mmio_is_write) {
    		vcpu->mmio_needed = 0;
    		return EMULATE_DO_MMIO;
    	}
    
    	return EMULATE_DONE;
    }
    EXPORT_SYMBOL_GPL(emulate_instruction);
    
    
    static int pio_copy_data(struct kvm_vcpu *vcpu)
    {
    
    	void *p = vcpu->arch.pio_data;
    
    Izik Eidus's avatar
    Izik Eidus committed
    	gva_t q = vcpu->arch.pio.guest_gva;
    
    Izik Eidus's avatar
    Izik Eidus committed
    	int ret;
    
    	bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
    	if (vcpu->arch.pio.in)
    
    Izik Eidus's avatar
    Izik Eidus committed
    		ret = kvm_write_guest_virt(q, p, bytes, vcpu);
    
    Izik Eidus's avatar
    Izik Eidus committed
    		ret = kvm_read_guest_virt(q, p, bytes, vcpu);
    	return ret;
    
    }
    
    int complete_pio(struct kvm_vcpu *vcpu)
    {
    
    	struct kvm_pio_request *io = &vcpu->arch.pio;
    
    	unsigned long val;
    
    		if (io->in) {
    			val = kvm_register_read(vcpu, VCPU_REGS_RAX);
    			memcpy(&val, vcpu->arch.pio_data, io->size);
    			kvm_register_write(vcpu, VCPU_REGS_RAX, val);
    		}
    
    	} else {
    		if (io->in) {
    			r = pio_copy_data(vcpu);
    
    				return r;
    		}
    
    		delta = 1;
    		if (io->rep) {
    			delta *= io->cur_count;
    			/*
    			 * The size of the register should really depend on
    			 * current address size.
    			 */
    
    			val = kvm_register_read(vcpu, VCPU_REGS_RCX);
    			val -= delta;
    			kvm_register_write(vcpu, VCPU_REGS_RCX, val);
    
    		}
    		if (io->down)
    			delta = -delta;
    		delta *= io->size;
    
    		if (io->in) {
    			val = kvm_register_read(vcpu, VCPU_REGS_RDI);
    			val += delta;
    			kvm_register_write(vcpu, VCPU_REGS_RDI, val);
    		} else {
    			val = kvm_register_read(vcpu, VCPU_REGS_RSI);
    			val += delta;
    			kvm_register_write(vcpu, VCPU_REGS_RSI, val);
    		}
    
    	}
    
    	io->count -= io->cur_count;
    	io->cur_count = 0;
    
    	return 0;
    }
    
    static void kernel_pio(struct kvm_io_device *pio_dev,
    		       struct kvm_vcpu *vcpu,
    		       void *pd)
    {
    	/* TODO: String I/O for in kernel device */
    
    	mutex_lock(&vcpu->kvm->lock);
    
    	if (vcpu->arch.pio.in)
    		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
    				  vcpu->arch.pio.size,
    
    		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
    				   vcpu->arch.pio.size,
    
    				   pd);
    	mutex_unlock(&vcpu->kvm->lock);
    }
    
    static void pio_string_write(struct kvm_io_device *pio_dev,
    			     struct kvm_vcpu *vcpu)
    {
    
    	struct kvm_pio_request *io = &vcpu->arch.pio;
    	void *pd = vcpu->arch.pio_data;
    
    	int i;
    
    	mutex_lock(&vcpu->kvm->lock);
    	for (i = 0; i < io->cur_count; i++) {
    		kvm_iodevice_write(pio_dev, io->port,
    				   io->size,
    				   pd);
    		pd += io->size;
    	}
    	mutex_unlock(&vcpu->kvm->lock);
    }
    
    static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
    
    	return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
    
    }
    
    int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
    		  int size, unsigned port)
    {
    	struct kvm_io_device *pio_dev;
    
    	unsigned long val;
    
    
    	vcpu->run->exit_reason = KVM_EXIT_IO;
    	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
    
    	vcpu->run->io.size = vcpu->arch.pio.size = size;
    
    	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
    
    	vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
    	vcpu->run->io.port = vcpu->arch.pio.port = port;
    	vcpu->arch.pio.in = in;
    	vcpu->arch.pio.string = 0;
    	vcpu->arch.pio.down = 0;
    	vcpu->arch.pio.rep = 0;
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
    		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    	else
    		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    
    
    	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	memcpy(vcpu->arch.pio_data, &val, 4);
    
    	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
    
    		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
    
    		complete_pio(vcpu);
    		return 1;
    	}
    	return 0;
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_pio);
    
    int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
    		  int size, unsigned long count, int down,
    		  gva_t address, int rep, unsigned port)
    {
    	unsigned now, in_page;
    
    Izik Eidus's avatar
    Izik Eidus committed
    	int ret = 0;
    
    	struct kvm_io_device *pio_dev;
    
    	vcpu->run->exit_reason = KVM_EXIT_IO;
    	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
    
    	vcpu->run->io.size = vcpu->arch.pio.size = size;
    
    	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
    
    	vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
    	vcpu->run->io.port = vcpu->arch.pio.port = port;
    	vcpu->arch.pio.in = in;
    	vcpu->arch.pio.string = 1;
    	vcpu->arch.pio.down = down;
    	vcpu->arch.pio.rep = rep;
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
    		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    	else
    		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    
    
    	if (!count) {
    		kvm_x86_ops->skip_emulated_instruction(vcpu);
    		return 1;
    	}
    
    	if (!down)
    		in_page = PAGE_SIZE - offset_in_page(address);
    	else
    		in_page = offset_in_page(address) + size;
    	now = min(count, (unsigned long)in_page / size);
    
    Izik Eidus's avatar
    Izik Eidus committed
    	if (!now)
    
    		now = 1;
    	if (down) {
    		/*
    		 * String I/O in reverse.  Yuck.  Kill the guest, fix later.
    		 */
    		pr_unimpl(vcpu, "guest string pio down\n");
    
    		return 1;
    	}
    	vcpu->run->io.count = now;
    
    	vcpu->arch.pio.cur_count = now;
    
    	if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
    
    		kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    
    Izik Eidus's avatar
    Izik Eidus committed
    	vcpu->arch.pio.guest_gva = address;
    
    	pio_dev = vcpu_find_pio_dev(vcpu, port,
    				    vcpu->arch.pio.cur_count,
    				    !vcpu->arch.pio.in);
    
    	if (!vcpu->arch.pio.in) {
    
    		/* string PIO write */
    		ret = pio_copy_data(vcpu);
    
    Izik Eidus's avatar
    Izik Eidus committed
    		if (ret == X86EMUL_PROPAGATE_FAULT) {
    			kvm_inject_gp(vcpu, 0);
    			return 1;
    		}
    		if (ret == 0 && pio_dev) {
    
    			pio_string_write(pio_dev, vcpu);
    			complete_pio(vcpu);
    
    			if (vcpu->arch.pio.count == 0)
    
    				ret = 1;
    		}
    	} else if (pio_dev)
    		pr_unimpl(vcpu, "no string pio read support yet, "
    		       "port %x size %d count %ld\n",
    			port, size, count);
    
    	return ret;
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
    
    
    static void bounce_off(void *info)
    {
    	/* nothing */
    }
    
    static unsigned int  ref_freq;
    static unsigned long tsc_khz_ref;
    
    static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
    				     void *data)
    {
    	struct cpufreq_freqs *freq = data;
    	struct kvm *kvm;
    	struct kvm_vcpu *vcpu;
    	int i, send_ipi = 0;
    
    	if (!ref_freq)
    		ref_freq = freq->old;
    
    	if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
    		return 0;
    	if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
    		return 0;
    	per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
    
    	spin_lock(&kvm_lock);
    	list_for_each_entry(kvm, &vm_list, vm_list) {
    		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
    			vcpu = kvm->vcpus[i];
    			if (!vcpu)
    				continue;
    			if (vcpu->cpu != freq->cpu)
    				continue;
    			if (!kvm_request_guest_time_update(vcpu))
    				continue;
    			if (vcpu->cpu != smp_processor_id())
    				send_ipi++;
    		}
    	}
    	spin_unlock(&kvm_lock);
    
    	if (freq->old < freq->new && send_ipi) {
    		/*
    		 * We upscale the frequency.  Must make the guest
    		 * doesn't see old kvmclock values while running with
    		 * the new frequency, otherwise we risk the guest sees
    		 * time go backwards.
    		 *
    		 * In case we update the frequency for another cpu
    		 * (which might be in guest context) send an interrupt
    		 * to kick the cpu out of guest context.  Next time
    		 * guest context is entered kvmclock will be updated,
    		 * so the guest will not see stale values.
    		 */
    		smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
    	}
    	return 0;
    }
    
    static struct notifier_block kvmclock_cpufreq_notifier_block = {
            .notifier_call  = kvmclock_cpufreq_notifier
    };
    
    
    	struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
    
    	if (kvm_x86_ops) {
    		printk(KERN_ERR "kvm: already loaded the other module\n");
    
    	}
    
    	if (!ops->cpu_has_kvm_support()) {
    		printk(KERN_ERR "kvm: no hardware support\n");
    
    	}
    	if (ops->disabled_by_bios()) {
    		printk(KERN_ERR "kvm: disabled by bios\n");
    
    	r = kvm_mmu_module_init();
    	if (r)
    		goto out;
    
    	kvm_init_msr_list();
    
    
    	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
    
    Sheng Yang's avatar
    Sheng Yang committed
    	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
    	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
    
    Sheng Yang's avatar
    Sheng Yang committed
    			PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
    
    
    	for_each_possible_cpu(cpu)
    		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
    	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
    		tsc_khz_ref = tsc_khz;
    		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
    					  CPUFREQ_TRANSITION_NOTIFIER);
    	}
    
    
    void kvm_arch_exit(void)
    {
    	kvm_x86_ops = NULL;
    
    int kvm_emulate_halt(struct kvm_vcpu *vcpu)
    {
    	++vcpu->stat.halt_exits;
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	KVMTRACE_0D(HLT, vcpu, handler);
    
    	if (irqchip_in_kernel(vcpu->kvm)) {
    
    		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
    
    		return 1;
    	} else {
    		vcpu->run->exit_reason = KVM_EXIT_HLT;
    		return 0;
    	}
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_halt);
    
    
    static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
    			   unsigned long a1)
    {
    	if (is_long_mode(vcpu))
    		return a0;
    	else
    		return a0 | ((gpa_t)a1 << 32);
    }
    
    
    int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
    {
    	unsigned long nr, a0, a1, a2, a3, ret;
    
    	nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
    	a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
    	a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
    	a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
    
    
    	if (!is_long_mode(vcpu)) {
    		nr &= 0xFFFFFFFF;
    		a0 &= 0xFFFFFFFF;
    		a1 &= 0xFFFFFFFF;
    		a2 &= 0xFFFFFFFF;
    		a3 &= 0xFFFFFFFF;
    	}
    
    	switch (nr) {
    
    	case KVM_HC_VAPIC_POLL_IRQ:
    		ret = 0;
    		break;
    
    	case KVM_HC_MMU_OP:
    		r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
    		break;
    
    	kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
    
    	++vcpu->stat.hypercalls;
    
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
    
    int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
    {
    	char instruction[3];
    	int ret = 0;
    
    	unsigned long rip = kvm_rip_read(vcpu);
    
    
    
    	/*
    	 * Blow out the MMU to ensure that no other VCPU has an active mapping
    	 * to ensure that the updated hypercall appears atomically across all
    	 * VCPUs.
    	 */
    	kvm_mmu_zap_all(vcpu->kvm);
    
    	kvm_x86_ops->patch_hypercall(vcpu, instruction);
    
    	if (emulator_write_emulated(rip, instruction, 3, vcpu)
    
    	    != X86EMUL_CONTINUE)
    		ret = -EFAULT;
    
    	return ret;
    }
    
    static u64 mk_cr_64(u64 curr_cr, u32 new_val)
    {
    	return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
    }
    
    void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
    {
    	struct descriptor_table dt = { limit, base };
    
    	kvm_x86_ops->set_gdt(vcpu, &dt);
    }
    
    void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
    {
    	struct descriptor_table dt = { limit, base };
    
    	kvm_x86_ops->set_idt(vcpu, &dt);
    }
    
    void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
    		   unsigned long *rflags)
    {
    
    	*rflags = kvm_x86_ops->get_rflags(vcpu);
    }
    
    unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
    {
    
    	unsigned long value;
    
    
    	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
    	switch (cr) {
    	case 0:
    
    		value = vcpu->arch.cr0;
    		break;
    
    		value = vcpu->arch.cr2;
    		break;
    
    		value = vcpu->arch.cr3;
    		break;
    
    		value = vcpu->arch.cr4;
    		break;
    
    		value = kvm_get_cr8(vcpu);
    		break;
    
    		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
    
    	KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
    		    (u32)((u64)value >> 32), handler);
    
    	return value;
    
    }
    
    void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
    		     unsigned long *rflags)
    {
    
    	KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
    		    (u32)((u64)val >> 32), handler);
    
    
    		kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
    
    		*rflags = kvm_x86_ops->get_rflags(vcpu);
    		break;
    	case 2:
    
    		vcpu->arch.cr2 = val;
    
    		kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
    
    		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
    
    static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
    {
    
    	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
    	int j, nent = vcpu->arch.cpuid_nent;
    
    
    	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
    	/* when no next entry is found, the current entry[i] is reselected */
    
    	for (j = i + 1; ; j = (j + 1) % nent) {
    
    		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
    
    		if (ej->function == e->function) {
    			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
    			return j;
    		}
    	}
    	return 0; /* silence gcc, even though control never reaches here */
    }
    
    /* find an entry with matching function, matching index (if needed), and that
     * should be read next (if it's stateful) */
    static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
    	u32 function, u32 index)
    {
    	if (e->function != function)
    		return 0;
    	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
    		return 0;
    	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
    
    	    !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
    
    struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
    					      u32 function, u32 index)
    
    	struct kvm_cpuid_entry2 *best = NULL;
    
    	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
    
    		e = &vcpu->arch.cpuid_entries[i];
    
    		if (is_matching_cpuid_entry(e, function, index)) {
    			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
    				move_to_next_stateful_cpuid_entry(vcpu, i);
    
    			best = e;
    			break;
    		}
    		/*
    		 * Both basic or both extended?
    		 */
    		if (((e->function ^ function) & 0x80000000) == 0)
    			if (!best || e->function > best->function)
    				best = e;
    	}
    
    	return best;
    }
    
    void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
    {