Skip to content
Snippets Groups Projects
x86.c 116 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	int n;
    	struct kvm_memory_slot *memslot;
    	int is_dirty = 0;
    
    
    
    	r = kvm_get_dirty_log(kvm, log, &is_dirty);
    	if (r)
    		goto out;
    
    	/* If nothing is dirty, don't bother messing with page tables. */
    	if (is_dirty) {
    
    		spin_lock(&kvm->mmu_lock);
    
    		kvm_mmu_slot_remove_write_access(kvm, log->slot);
    
    		spin_unlock(&kvm->mmu_lock);
    
    		kvm_flush_remote_tlbs(kvm);
    		memslot = &kvm->memslots[log->slot];
    		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
    		memset(memslot->dirty_bitmap, 0, n);
    	}
    	r = 0;
    out:
    
    long kvm_arch_vm_ioctl(struct file *filp,
    		       unsigned int ioctl, unsigned long arg)
    {
    	struct kvm *kvm = filp->private_data;
    	void __user *argp = (void __user *)arg;
    	int r = -EINVAL;
    
    	/*
    	 * This union makes it completely explicit to gcc-3.x
    	 * that these two variables' stack usage should be
    	 * combined, not added together.
    	 */
    	union {
    		struct kvm_pit_state ps;
    		struct kvm_memory_alias alias;
    	} u;
    
    
    	switch (ioctl) {
    	case KVM_SET_TSS_ADDR:
    		r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
    		if (r < 0)
    			goto out;
    		break;
    	case KVM_SET_MEMORY_REGION: {
    		struct kvm_memory_region kvm_mem;
    		struct kvm_userspace_memory_region kvm_userspace_mem;
    
    		r = -EFAULT;
    		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
    			goto out;
    		kvm_userspace_mem.slot = kvm_mem.slot;
    		kvm_userspace_mem.flags = kvm_mem.flags;
    		kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
    		kvm_userspace_mem.memory_size = kvm_mem.memory_size;
    		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
    		if (r)
    			goto out;
    		break;
    	}
    	case KVM_SET_NR_MMU_PAGES:
    		r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
    		if (r)
    			goto out;
    		break;
    	case KVM_GET_NR_MMU_PAGES:
    		r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
    		break;
    
    	case KVM_SET_MEMORY_ALIAS:
    
    		if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
    
    		r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
    
    		if (r)
    			goto out;
    		break;
    	case KVM_CREATE_IRQCHIP:
    		r = -ENOMEM;
    
    		kvm->arch.vpic = kvm_create_pic(kvm);
    		if (kvm->arch.vpic) {
    
    			r = kvm_ioapic_init(kvm);
    			if (r) {
    
    				kfree(kvm->arch.vpic);
    				kvm->arch.vpic = NULL;
    
    				goto out;
    			}
    		} else
    			goto out;
    
    		r = kvm_setup_default_irq_routing(kvm);
    		if (r) {
    			kfree(kvm->arch.vpic);
    			kfree(kvm->arch.vioapic);
    			goto out;
    		}
    
    Sheng Yang's avatar
    Sheng Yang committed
    	case KVM_CREATE_PIT:
    
    		mutex_lock(&kvm->lock);
    		r = -EEXIST;
    		if (kvm->arch.vpit)
    			goto create_pit_unlock;
    
    Sheng Yang's avatar
    Sheng Yang committed
    		r = -ENOMEM;
    		kvm->arch.vpit = kvm_create_pit(kvm);
    		if (kvm->arch.vpit)
    			r = 0;
    
    	create_pit_unlock:
    		mutex_unlock(&kvm->lock);
    
    Sheng Yang's avatar
    Sheng Yang committed
    		break;
    
    	case KVM_IRQ_LINE_STATUS:
    
    	case KVM_IRQ_LINE: {
    		struct kvm_irq_level irq_event;
    
    		r = -EFAULT;
    		if (copy_from_user(&irq_event, argp, sizeof irq_event))
    			goto out;
    		if (irqchip_in_kernel(kvm)) {
    
    			mutex_lock(&kvm->lock);
    
    			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
    					irq_event.irq, irq_event.level);
    
    			mutex_unlock(&kvm->lock);
    
    			if (ioctl == KVM_IRQ_LINE_STATUS) {
    				irq_event.status = status;
    				if (copy_to_user(argp, &irq_event,
    							sizeof irq_event))
    					goto out;
    			}
    
    			r = 0;
    		}
    		break;
    	}
    	case KVM_GET_IRQCHIP: {
    		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
    
    		struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
    
    		r = -EFAULT;
    		if (copy_from_user(chip, argp, sizeof *chip))
    			goto get_irqchip_out;
    
    		r = -ENXIO;
    		if (!irqchip_in_kernel(kvm))
    
    			goto get_irqchip_out;
    		r = kvm_vm_ioctl_get_irqchip(kvm, chip);
    
    			goto get_irqchip_out;
    
    		if (copy_to_user(argp, chip, sizeof *chip))
    			goto get_irqchip_out;
    
    	get_irqchip_out:
    		kfree(chip);
    		if (r)
    			goto out;
    
    		break;
    	}
    	case KVM_SET_IRQCHIP: {
    		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
    
    		struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
    
    		r = -EFAULT;
    		if (copy_from_user(chip, argp, sizeof *chip))
    			goto set_irqchip_out;
    
    		r = -ENXIO;
    		if (!irqchip_in_kernel(kvm))
    
    			goto set_irqchip_out;
    		r = kvm_vm_ioctl_set_irqchip(kvm, chip);
    
    			goto set_irqchip_out;
    
    	set_irqchip_out:
    		kfree(chip);
    		if (r)
    			goto out;
    
    	case KVM_GET_PIT: {
    		r = -EFAULT;
    
    		if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
    
    			goto out;
    		r = -ENXIO;
    		if (!kvm->arch.vpit)
    			goto out;
    
    		r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
    
    		if (r)
    			goto out;
    		r = -EFAULT;
    
    		if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
    
    			goto out;
    		r = 0;
    		break;
    	}
    	case KVM_SET_PIT: {
    		r = -EFAULT;
    
    		if (copy_from_user(&u.ps, argp, sizeof u.ps))
    
    			goto out;
    		r = -ENXIO;
    		if (!kvm->arch.vpit)
    			goto out;
    
    		r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
    
    	case KVM_REINJECT_CONTROL: {
    		struct kvm_reinject_control control;
    		r =  -EFAULT;
    		if (copy_from_user(&control, argp, sizeof(control)))
    			goto out;
    		r = kvm_vm_ioctl_reinject(kvm, &control);
    		if (r)
    			goto out;
    		r = 0;
    		break;
    	}
    
    static void kvm_init_msr_list(void)
    
    {
    	u32 dummy[2];
    	unsigned i, j;
    
    	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
    		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
    			continue;
    		if (j < i)
    			msrs_to_save[j] = msrs_to_save[i];
    		j++;
    	}
    	num_msrs_to_save = j;
    }
    
    
    /*
     * Only apic need an MMIO device hook, so shortcut now..
     */
    static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
    
    	if (vcpu->arch.apic) {
    		dev = &vcpu->arch.apic->dev;
    
    		if (dev->in_range(dev, addr, len, is_write))
    
    			return dev;
    	}
    	return NULL;
    }
    
    
    static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
    
    	dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
    
    		dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
    					  is_write);
    
    static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
    			       struct kvm_vcpu *vcpu)
    
    	int r = X86EMUL_CONTINUE;
    
    		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    		unsigned offset = addr & (PAGE_SIZE-1);
    
    		unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
    
    		if (gpa == UNMAPPED_GVA) {
    			r = X86EMUL_PROPAGATE_FAULT;
    			goto out;
    		}
    
    		ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
    
    		if (ret < 0) {
    			r = X86EMUL_UNHANDLEABLE;
    			goto out;
    		}
    
    		bytes -= toread;
    		data += toread;
    		addr += toread;
    
    static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
    				struct kvm_vcpu *vcpu)
    
    {
    	void *data = val;
    	int r = X86EMUL_CONTINUE;
    
    	while (bytes) {
    		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    		unsigned offset = addr & (PAGE_SIZE-1);
    		unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
    		int ret;
    
    		if (gpa == UNMAPPED_GVA) {
    			r = X86EMUL_PROPAGATE_FAULT;
    			goto out;
    		}
    		ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
    		if (ret < 0) {
    			r = X86EMUL_UNHANDLEABLE;
    			goto out;
    		}
    
    		bytes -= towrite;
    		data += towrite;
    		addr += towrite;
    	}
    out:
    	return r;
    }
    
    
    
    static int emulator_read_emulated(unsigned long addr,
    				  void *val,
    				  unsigned int bytes,
    				  struct kvm_vcpu *vcpu)
    {
    	struct kvm_io_device *mmio_dev;
    	gpa_t                 gpa;
    
    	if (vcpu->mmio_read_completed) {
    		memcpy(val, vcpu->mmio_data, bytes);
    		vcpu->mmio_read_completed = 0;
    		return X86EMUL_CONTINUE;
    	}
    
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    
    	/* For APIC access vmexit */
    	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    		goto mmio;
    
    
    	if (kvm_read_guest_virt(addr, val, bytes, vcpu)
    				== X86EMUL_CONTINUE)
    
    		return X86EMUL_CONTINUE;
    	if (gpa == UNMAPPED_GVA)
    		return X86EMUL_PROPAGATE_FAULT;
    
    mmio:
    	/*
    	 * Is this MMIO handled locally?
    	 */
    
    	mutex_lock(&vcpu->kvm->lock);
    
    	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
    
    	if (mmio_dev) {
    		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
    
    		mutex_unlock(&vcpu->kvm->lock);
    
    	mutex_unlock(&vcpu->kvm->lock);
    
    
    	vcpu->mmio_needed = 1;
    	vcpu->mmio_phys_addr = gpa;
    	vcpu->mmio_size = bytes;
    	vcpu->mmio_is_write = 0;
    
    	return X86EMUL_UNHANDLEABLE;
    }
    
    
    int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
    
    			  const void *val, int bytes)
    
    {
    	int ret;
    
    	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
    
    	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
    
    	return 1;
    }
    
    static int emulator_write_emulated_onepage(unsigned long addr,
    					   const void *val,
    					   unsigned int bytes,
    					   struct kvm_vcpu *vcpu)
    {
    	struct kvm_io_device *mmio_dev;
    
    	gpa_t                 gpa;
    
    	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    		kvm_inject_page_fault(vcpu, addr, 2);
    
    		return X86EMUL_PROPAGATE_FAULT;
    	}
    
    	/* For APIC access vmexit */
    	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    		goto mmio;
    
    	if (emulator_write_phys(vcpu, gpa, val, bytes))
    		return X86EMUL_CONTINUE;
    
    mmio:
    	/*
    	 * Is this MMIO handled locally?
    	 */
    
    	mutex_lock(&vcpu->kvm->lock);
    
    	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
    
    	if (mmio_dev) {
    		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
    
    		mutex_unlock(&vcpu->kvm->lock);
    
    	mutex_unlock(&vcpu->kvm->lock);
    
    
    	vcpu->mmio_needed = 1;
    	vcpu->mmio_phys_addr = gpa;
    	vcpu->mmio_size = bytes;
    	vcpu->mmio_is_write = 1;
    	memcpy(vcpu->mmio_data, val, bytes);
    
    	return X86EMUL_CONTINUE;
    }
    
    int emulator_write_emulated(unsigned long addr,
    				   const void *val,
    				   unsigned int bytes,
    				   struct kvm_vcpu *vcpu)
    {
    	/* Crossing a page boundary? */
    	if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
    		int rc, now;
    
    		now = -addr & ~PAGE_MASK;
    		rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
    		if (rc != X86EMUL_CONTINUE)
    			return rc;
    		addr += now;
    		val += now;
    		bytes -= now;
    	}
    	return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
    }
    EXPORT_SYMBOL_GPL(emulator_write_emulated);
    
    static int emulator_cmpxchg_emulated(unsigned long addr,
    				     const void *old,
    				     const void *new,
    				     unsigned int bytes,
    				     struct kvm_vcpu *vcpu)
    {
    	static int reported;
    
    	if (!reported) {
    		reported = 1;
    		printk(KERN_WARNING "kvm: emulating exchange as write\n");
    	}
    
    #ifndef CONFIG_X86_64
    	/* guests cmpxchg8b have to be emulated atomically */
    	if (bytes == 8) {
    
    Andrew Morton's avatar
    Andrew Morton committed
    		char *kaddr;
    
    		gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
    
    
    		if (gpa == UNMAPPED_GVA ||
    		   (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
    			goto emul_write;
    
    		if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
    			goto emul_write;
    
    		val = *(u64 *)new;
    
    		page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
    
    Andrew Morton's avatar
    Andrew Morton committed
    		kaddr = kmap_atomic(page, KM_USER0);
    		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
    		kunmap_atomic(kaddr, KM_USER0);
    
    		kvm_release_page_dirty(page);
    	}
    
    emul_write:
    
    	return emulator_write_emulated(addr, new, bytes, vcpu);
    }
    
    static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
    {
    	return kvm_x86_ops->get_segment_base(vcpu, seg);
    }
    
    int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
    {
    
    Marcelo Tosatti's avatar
    Marcelo Tosatti committed
    	kvm_mmu_invlpg(vcpu, address);
    
    	return X86EMUL_CONTINUE;
    }
    
    int emulate_clts(struct kvm_vcpu *vcpu)
    {
    
    	KVMTRACE_0D(CLTS, vcpu, handler);
    
    	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
    
    	return X86EMUL_CONTINUE;
    }
    
    int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
    {
    	struct kvm_vcpu *vcpu = ctxt->vcpu;
    
    	switch (dr) {
    	case 0 ... 3:
    		*dest = kvm_x86_ops->get_dr(vcpu, dr);
    		return X86EMUL_CONTINUE;
    	default:
    
    		pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
    
    		return X86EMUL_UNHANDLEABLE;
    	}
    }
    
    int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
    {
    	unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
    	int exception;
    
    	kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
    	if (exception) {
    		/* FIXME: better handling */
    		return X86EMUL_UNHANDLEABLE;
    	}
    	return X86EMUL_CONTINUE;
    }
    
    void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
    {
    	u8 opcodes[4];
    
    	unsigned long rip = kvm_rip_read(vcpu);
    
    	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
    
    
    	kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
    
    
    	printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
    	       context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
    }
    EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
    
    
    static struct x86_emulate_ops emulate_ops = {
    
    	.read_std            = kvm_read_guest_virt,
    
    	.read_emulated       = emulator_read_emulated,
    	.write_emulated      = emulator_write_emulated,
    	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
    };
    
    
    static void cache_all_regs(struct kvm_vcpu *vcpu)
    {
    	kvm_register_read(vcpu, VCPU_REGS_RAX);
    	kvm_register_read(vcpu, VCPU_REGS_RSP);
    	kvm_register_read(vcpu, VCPU_REGS_RIP);
    	vcpu->arch.regs_dirty = ~0;
    }
    
    
    int emulate_instruction(struct kvm_vcpu *vcpu,
    			struct kvm_run *run,
    			unsigned long cr2,
    			u16 error_code,
    
    	kvm_clear_exception_queue(vcpu);
    
    	vcpu->arch.mmio_fault_cr2 = cr2;
    
    	/*
    	 * TODO: fix x86_emulate.c to use guest_read/write_register
    	 * instead of direct ->regs accesses, can save hundred cycles
    	 * on Intel for instructions that don't read/change RSP, for
    	 * for example.
    	 */
    	cache_all_regs(vcpu);
    
    	vcpu->arch.pio.string = 0;
    
    	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
    
    		int cs_db, cs_l;
    		kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
    
    
    		vcpu->arch.emulate_ctxt.vcpu = vcpu;
    		vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
    		vcpu->arch.emulate_ctxt.mode =
    			(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
    
    			? X86EMUL_MODE_REAL : cs_l
    			? X86EMUL_MODE_PROT64 :	cs_db
    			? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
    
    
    		r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
    
    
    		/* Reject the instructions other than VMCALL/VMMCALL when
    		 * try to emulate invalid opcode */
    		c = &vcpu->arch.emulate_ctxt.decode;
    		if ((emulation_type & EMULTYPE_TRAP_UD) &&
    		    (!(c->twobyte && c->b == 0x01 &&
    		      (c->modrm_reg == 0 || c->modrm_reg == 3) &&
    		       c->modrm_mod == 3 && c->modrm_rm == 1)))
    			return EMULATE_FAIL;
    
    
    		++vcpu->stat.insn_emulation;
    
    			++vcpu->stat.insn_emulation_fail;
    
    			if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
    				return EMULATE_DONE;
    			return EMULATE_FAIL;
    		}
    	}
    
    
    	if (emulation_type & EMULTYPE_SKIP) {
    		kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
    		return EMULATE_DONE;
    	}
    
    
    	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
    
    	shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
    
    	if (r == 0)
    		kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
    
    	if (vcpu->arch.pio.string)
    
    		return EMULATE_DO_MMIO;
    
    	if ((r || vcpu->mmio_is_write) && run) {
    		run->exit_reason = KVM_EXIT_MMIO;
    		run->mmio.phys_addr = vcpu->mmio_phys_addr;
    		memcpy(run->mmio.data, vcpu->mmio_data, 8);
    		run->mmio.len = vcpu->mmio_size;
    		run->mmio.is_write = vcpu->mmio_is_write;
    	}
    
    	if (r) {
    		if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
    			return EMULATE_DONE;
    		if (!vcpu->mmio_needed) {
    			kvm_report_emulation_failure(vcpu, "mmio");
    			return EMULATE_FAIL;
    		}
    		return EMULATE_DO_MMIO;
    	}
    
    
    	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
    
    
    	if (vcpu->mmio_is_write) {
    		vcpu->mmio_needed = 0;
    		return EMULATE_DO_MMIO;
    	}
    
    	return EMULATE_DONE;
    }
    EXPORT_SYMBOL_GPL(emulate_instruction);
    
    
    static int pio_copy_data(struct kvm_vcpu *vcpu)
    {
    
    	void *p = vcpu->arch.pio_data;
    
    Izik Eidus's avatar
    Izik Eidus committed
    	gva_t q = vcpu->arch.pio.guest_gva;
    
    Izik Eidus's avatar
    Izik Eidus committed
    	int ret;
    
    	bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
    	if (vcpu->arch.pio.in)
    
    Izik Eidus's avatar
    Izik Eidus committed
    		ret = kvm_write_guest_virt(q, p, bytes, vcpu);
    
    Izik Eidus's avatar
    Izik Eidus committed
    		ret = kvm_read_guest_virt(q, p, bytes, vcpu);
    	return ret;
    
    }
    
    int complete_pio(struct kvm_vcpu *vcpu)
    {
    
    	struct kvm_pio_request *io = &vcpu->arch.pio;
    
    	unsigned long val;
    
    		if (io->in) {
    			val = kvm_register_read(vcpu, VCPU_REGS_RAX);
    			memcpy(&val, vcpu->arch.pio_data, io->size);
    			kvm_register_write(vcpu, VCPU_REGS_RAX, val);
    		}
    
    	} else {
    		if (io->in) {
    			r = pio_copy_data(vcpu);
    
    				return r;
    		}
    
    		delta = 1;
    		if (io->rep) {
    			delta *= io->cur_count;
    			/*
    			 * The size of the register should really depend on
    			 * current address size.
    			 */
    
    			val = kvm_register_read(vcpu, VCPU_REGS_RCX);
    			val -= delta;
    			kvm_register_write(vcpu, VCPU_REGS_RCX, val);
    
    		}
    		if (io->down)
    			delta = -delta;
    		delta *= io->size;
    
    		if (io->in) {
    			val = kvm_register_read(vcpu, VCPU_REGS_RDI);
    			val += delta;
    			kvm_register_write(vcpu, VCPU_REGS_RDI, val);
    		} else {
    			val = kvm_register_read(vcpu, VCPU_REGS_RSI);
    			val += delta;
    			kvm_register_write(vcpu, VCPU_REGS_RSI, val);
    		}
    
    	}
    
    	io->count -= io->cur_count;
    	io->cur_count = 0;
    
    	return 0;
    }
    
    static void kernel_pio(struct kvm_io_device *pio_dev,
    		       struct kvm_vcpu *vcpu,
    		       void *pd)
    {
    	/* TODO: String I/O for in kernel device */
    
    	mutex_lock(&vcpu->kvm->lock);
    
    	if (vcpu->arch.pio.in)
    		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
    				  vcpu->arch.pio.size,
    
    		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
    				   vcpu->arch.pio.size,
    
    				   pd);
    	mutex_unlock(&vcpu->kvm->lock);
    }
    
    static void pio_string_write(struct kvm_io_device *pio_dev,
    			     struct kvm_vcpu *vcpu)
    {
    
    	struct kvm_pio_request *io = &vcpu->arch.pio;
    	void *pd = vcpu->arch.pio_data;
    
    	int i;
    
    	mutex_lock(&vcpu->kvm->lock);
    	for (i = 0; i < io->cur_count; i++) {
    		kvm_iodevice_write(pio_dev, io->port,
    				   io->size,
    				   pd);
    		pd += io->size;
    	}
    	mutex_unlock(&vcpu->kvm->lock);
    }
    
    static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
    
    	return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
    
    }
    
    int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
    		  int size, unsigned port)
    {
    	struct kvm_io_device *pio_dev;
    
    	unsigned long val;
    
    
    	vcpu->run->exit_reason = KVM_EXIT_IO;
    	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
    
    	vcpu->run->io.size = vcpu->arch.pio.size = size;
    
    	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
    
    	vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
    	vcpu->run->io.port = vcpu->arch.pio.port = port;
    	vcpu->arch.pio.in = in;
    	vcpu->arch.pio.string = 0;
    	vcpu->arch.pio.down = 0;
    	vcpu->arch.pio.rep = 0;
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
    		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    	else
    		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    
    
    	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
    	memcpy(vcpu->arch.pio_data, &val, 4);
    
    	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
    
    		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
    
    		complete_pio(vcpu);
    		return 1;
    	}
    	return 0;
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_pio);
    
    int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
    		  int size, unsigned long count, int down,
    		  gva_t address, int rep, unsigned port)
    {
    	unsigned now, in_page;
    
    Izik Eidus's avatar
    Izik Eidus committed
    	int ret = 0;
    
    	struct kvm_io_device *pio_dev;
    
    	vcpu->run->exit_reason = KVM_EXIT_IO;
    	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
    
    	vcpu->run->io.size = vcpu->arch.pio.size = size;
    
    	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
    
    	vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
    	vcpu->run->io.port = vcpu->arch.pio.port = port;
    	vcpu->arch.pio.in = in;
    	vcpu->arch.pio.string = 1;
    	vcpu->arch.pio.down = down;
    	vcpu->arch.pio.rep = rep;
    
    Feng (Eric) Liu's avatar
    Feng (Eric) Liu committed
    	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
    		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    	else
    		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
    			    handler);
    
    
    	if (!count) {
    		kvm_x86_ops->skip_emulated_instruction(vcpu);
    		return 1;
    	}
    
    	if (!down)
    		in_page = PAGE_SIZE - offset_in_page(address);
    	else
    		in_page = offset_in_page(address) + size;
    	now = min(count, (unsigned long)in_page / size);
    
    Izik Eidus's avatar
    Izik Eidus committed
    	if (!now)
    
    		now = 1;
    	if (down) {
    		/*
    		 * String I/O in reverse.  Yuck.  Kill the guest, fix later.
    		 */
    		pr_unimpl(vcpu, "guest string pio down\n");
    
    		return 1;
    	}
    	vcpu->run->io.count = now;
    
    	vcpu->arch.pio.cur_count = now;
    
    	if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
    
    		kvm_x86_ops->skip_emulated_instruction(vcpu);
    
    
    Izik Eidus's avatar
    Izik Eidus committed
    	vcpu->arch.pio.guest_gva = address;
    
    	pio_dev = vcpu_find_pio_dev(vcpu, port,
    				    vcpu->arch.pio.cur_count,
    				    !vcpu->arch.pio.in);
    
    	if (!vcpu->arch.pio.in) {
    
    		/* string PIO write */
    		ret = pio_copy_data(vcpu);
    
    Izik Eidus's avatar
    Izik Eidus committed
    		if (ret == X86EMUL_PROPAGATE_FAULT) {
    			kvm_inject_gp(vcpu, 0);
    			return 1;
    		}
    		if (ret == 0 && pio_dev) {
    
    			pio_string_write(pio_dev, vcpu);
    			complete_pio(vcpu);
    
    			if (vcpu->arch.pio.count == 0)
    
    				ret = 1;
    		}
    	} else if (pio_dev)
    		pr_unimpl(vcpu, "no string pio read support yet, "
    		       "port %x size %d count %ld\n",
    			port, size, count);
    
    	return ret;
    }
    EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
    
    
    static void bounce_off(void *info)
    {
    	/* nothing */
    }
    
    static unsigned int  ref_freq;
    static unsigned long tsc_khz_ref;
    
    static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
    				     void *data)
    {
    	struct cpufreq_freqs *freq = data;
    	struct kvm *kvm;
    	struct kvm_vcpu *vcpu;
    	int i, send_ipi = 0;
    
    	if (!ref_freq)
    		ref_freq = freq->old;
    
    	if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
    		return 0;
    	if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
    		return 0;
    	per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
    
    	spin_lock(&kvm_lock);
    	list_for_each_entry(kvm, &vm_list, vm_list) {
    		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
    			vcpu = kvm->vcpus[i];
    			if (!vcpu)
    				continue;
    			if (vcpu->cpu != freq->cpu)
    				continue;
    			if (!kvm_request_guest_time_update(vcpu))
    				continue;
    			if (vcpu->cpu != smp_processor_id())
    				send_ipi++;
    		}
    	}
    	spin_unlock(&kvm_lock);
    
    	if (freq->old < freq->new && send_ipi) {
    		/*
    		 * We upscale the frequency.  Must make the guest
    		 * doesn't see old kvmclock values while running with
    		 * the new frequency, otherwise we risk the guest sees
    		 * time go backwards.
    		 *
    		 * In case we update the frequency for another cpu
    		 * (which might be in guest context) send an interrupt
    		 * to kick the cpu out of guest context.  Next time
    		 * guest context is entered kvmclock will be updated,
    		 * so the guest will not see stale values.
    		 */
    		smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
    	}
    	return 0;
    }
    
    static struct notifier_block kvmclock_cpufreq_notifier_block = {
            .notifier_call  = kvmclock_cpufreq_notifier
    };
    
    
    	struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
    
    	if (kvm_x86_ops) {
    		printk(KERN_ERR "kvm: already loaded the other module\n");
    
    	}
    
    	if (!ops->cpu_has_kvm_support()) {
    		printk(KERN_ERR "kvm: no hardware support\n");
    
    	}
    	if (ops->disabled_by_bios()) {
    		printk(KERN_ERR "kvm: disabled by bios\n");
    
    	r = kvm_mmu_module_init();
    	if (r)
    		goto out;
    
    	kvm_init_msr_list();
    
    
    	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
    
    Sheng Yang's avatar
    Sheng Yang committed
    	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
    	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
    
    			PT_DIRTY_MASK, PT64_NX_MASK, 0);