Skip to content
Snippets Groups Projects
lapic.c 45.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • 
    /*
     * Local APIC virtualization
     *
     * Copyright (C) 2006 Qumranet, Inc.
     * Copyright (C) 2007 Novell
     * Copyright (C) 2007 Intel
    
     * Copyright 2009 Red Hat, Inc. and/or its affiliates.
    
     *
     * Authors:
     *   Dor Laor <dor.laor@qumranet.com>
     *   Gregory Haskins <ghaskins@novell.com>
     *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
     *
     * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
     *
     * This work is licensed under the terms of the GNU GPL, version 2.  See
     * the COPYING file in the top-level directory.
     */
    
    
    #include <linux/kvm_host.h>
    
    #include <linux/kvm.h>
    #include <linux/mm.h>
    #include <linux/highmem.h>
    #include <linux/smp.h>
    #include <linux/hrtimer.h>
    #include <linux/io.h>
    #include <linux/module.h>
    
    #include <linux/math64.h>
    
    #include <asm/processor.h>
    #include <asm/msr.h>
    #include <asm/page.h>
    #include <asm/current.h>
    #include <asm/apicdef.h>
    
    #include <linux/atomic.h>
    
    #include "kvm_cache_regs.h"
    
    #include "irq.h"
    
    #include "cpuid.h"
    
    #ifndef CONFIG_X86_64
    #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
    #else
    #define mod_64(x, y) ((x) % (y))
    #endif
    
    
    #define PRId64 "d"
    #define PRIx64 "llx"
    #define PRIu64 "u"
    #define PRIo64 "o"
    
    #define APIC_BUS_CYCLE_NS 1
    
    /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
    #define apic_debug(fmt, arg...)
    
    #define APIC_LVT_NUM			6
    /* 14 is the version for Xeon and Pentium 8.4.8*/
    #define APIC_VERSION			(0x14UL | ((APIC_LVT_NUM - 1) << 16))
    #define LAPIC_MMIO_LENGTH		(1 << 12)
    /* followed define is not in apicdef.h */
    #define APIC_SHORT_MASK			0xc0000
    #define APIC_DEST_NOSHORT		0x0
    #define APIC_DEST_MASK			0x800
    #define MAX_APIC_VECTOR			256
    
    
    #define VEC_POS(v) ((v) & (32 - 1))
    #define REG_POS(v) (((v) >> 5) << 4)
    
    static unsigned int min_timer_period_us = 500;
    module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
    
    
    static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
    {
    	*((u32 *) (apic->regs + reg_off)) = val;
    }
    
    static inline int apic_test_and_set_vector(int vec, void *bitmap)
    {
    	return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    }
    
    static inline int apic_test_and_clear_vector(int vec, void *bitmap)
    {
    	return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    }
    
    
    static inline int apic_test_vector(int vec, void *bitmap)
    {
    	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    }
    
    
    static inline void apic_set_vector(int vec, void *bitmap)
    {
    	set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    }
    
    static inline void apic_clear_vector(int vec, void *bitmap)
    {
    	clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    }
    
    
    static inline int __apic_test_and_set_vector(int vec, void *bitmap)
    {
    	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    }
    
    static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
    {
    	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    }
    
    
    struct static_key_deferred apic_hw_disabled __read_mostly;
    
    struct static_key_deferred apic_sw_disabled __read_mostly;
    
    static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
    {
    
    	if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
    
    		if (val & APIC_SPIV_APIC_ENABLED)
    			static_key_slow_dec_deferred(&apic_sw_disabled);
    		else
    			static_key_slow_inc(&apic_sw_disabled.key);
    	}
    	apic_set_reg(apic, APIC_SPIV, val);
    }
    
    
    static inline int apic_enabled(struct kvm_lapic *apic)
    {
    
    	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
    
    #define LVT_MASK	\
    	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
    
    #define LINT_MASK	\
    	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
    	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
    
    static inline int kvm_apic_id(struct kvm_lapic *apic)
    {
    
    	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
    
    static void recalculate_apic_map(struct kvm *kvm)
    {
    	struct kvm_apic_map *new, *old = NULL;
    	struct kvm_vcpu *vcpu;
    	int i;
    
    	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
    
    	mutex_lock(&kvm->arch.apic_map_lock);
    
    	if (!new)
    		goto out;
    
    	new->ldr_bits = 8;
    	/* flat mode is default */
    	new->cid_shift = 8;
    	new->cid_mask = 0;
    	new->lid_mask = 0xff;
    
    	kvm_for_each_vcpu(i, vcpu, kvm) {
    		struct kvm_lapic *apic = vcpu->arch.apic;
    		u16 cid, lid;
    		u32 ldr;
    
    		if (!kvm_apic_present(vcpu))
    			continue;
    
    		/*
    		 * All APICs have to be configured in the same mode by an OS.
    		 * We take advatage of this while building logical id loockup
    		 * table. After reset APICs are in xapic/flat mode, so if we
    		 * find apic with different setting we assume this is the mode
    		 * OS wants all apics to be in; build lookup table accordingly.
    		 */
    		if (apic_x2apic_mode(apic)) {
    			new->ldr_bits = 32;
    			new->cid_shift = 16;
    			new->cid_mask = new->lid_mask = 0xffff;
    		} else if (kvm_apic_sw_enabled(apic) &&
    				!new->cid_mask /* flat mode */ &&
    				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
    			new->cid_shift = 4;
    			new->cid_mask = 0xf;
    			new->lid_mask = 0xf;
    		}
    
    		new->phys_map[kvm_apic_id(apic)] = apic;
    
    		ldr = kvm_apic_get_reg(apic, APIC_LDR);
    		cid = apic_cluster_id(new, ldr);
    		lid = apic_logical_id(new, ldr);
    
    		if (lid)
    			new->logical_map[cid][ffs(lid) - 1] = apic;
    	}
    out:
    	old = rcu_dereference_protected(kvm->arch.apic_map,
    			lockdep_is_held(&kvm->arch.apic_map_lock));
    	rcu_assign_pointer(kvm->arch.apic_map, new);
    	mutex_unlock(&kvm->arch.apic_map_lock);
    
    	if (old)
    		kfree_rcu(old, rcu);
    
    
    	kvm_ioapic_make_eoibitmap_request(kvm);
    
    }
    
    static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
    {
    	apic_set_reg(apic, APIC_ID, id << 24);
    	recalculate_apic_map(apic->vcpu->kvm);
    }
    
    static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
    {
    	apic_set_reg(apic, APIC_LDR, id);
    	recalculate_apic_map(apic->vcpu->kvm);
    }
    
    
    static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
    {
    
    	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
    
    }
    
    static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
    {
    
    	return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
    
    static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
    {
    
    	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
    
    		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
    }
    
    
    static inline int apic_lvtt_period(struct kvm_lapic *apic)
    {
    
    	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
    
    		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
    }
    
    static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
    {
    
    	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
    
    		apic->lapic_timer.timer_mode_mask) ==
    			APIC_LVT_TIMER_TSCDEADLINE);
    
    static inline int apic_lvt_nmi_mode(u32 lvt_val)
    {
    	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
    }
    
    
    void kvm_apic_set_version(struct kvm_vcpu *vcpu)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    	struct kvm_cpuid_entry2 *feat;
    	u32 v = APIC_VERSION;
    
    
    	if (!kvm_vcpu_has_lapic(vcpu))
    
    		return;
    
    	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
    	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
    		v |= APIC_LVR_DIRECTED_EOI;
    	apic_set_reg(apic, APIC_LVR, v);
    }
    
    
    static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
    
    	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
    
    	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
    	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
    	LINT_MASK, LINT_MASK,	/* LVT0-1 */
    	LVT_MASK		/* LVTERR */
    };
    
    static int find_highest_vector(void *bitmap)
    {
    
    	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
    	     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
    		reg = bitmap + REG_POS(vec);
    		if (*reg)
    			return fls(*reg) - 1 + vec;
    	}
    
    static u8 count_vectors(void *bitmap)
    {
    
    	u8 count = 0;
    
    
    	for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
    		reg = bitmap + REG_POS(vec);
    		count += hweight32(*reg);
    	}
    
    
    	return count;
    }
    
    
    static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
    {
    
    	apic->irr_pending = true;
    
    	return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
    }
    
    
    static inline int apic_search_irr(struct kvm_lapic *apic)
    
    	return find_highest_vector(apic->regs + APIC_IRR);
    
    }
    
    static inline int apic_find_highest_irr(struct kvm_lapic *apic)
    {
    	int result;
    
    
    	/*
    	 * Note that irr_pending is just a hint. It will be always
    	 * true with virtual interrupt delivery enabled.
    	 */
    
    	if (!apic->irr_pending)
    		return -1;
    
    	result = apic_search_irr(apic);
    
    	ASSERT(result == -1 || result >= 16);
    
    	return result;
    }
    
    
    static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
    {
    	apic->irr_pending = false;
    	apic_clear_vector(vec, apic->regs + APIC_IRR);
    	if (apic_search_irr(apic) != -1)
    		apic->irr_pending = true;
    }
    
    
    static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
    {
    	if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
    		++apic->isr_count;
    	BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
    	/*
    	 * ISR (in service register) bit is set when injecting an interrupt.
    	 * The highest vector is injected. Thus the latest bit set matches
    	 * the highest bit in ISR.
    	 */
    	apic->highest_isr_cache = vec;
    }
    
    static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
    {
    	if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
    		--apic->isr_count;
    	BUG_ON(apic->isr_count < 0);
    	apic->highest_isr_cache = -1;
    }
    
    
    int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
    {
    	int highest_irr;
    
    
    	/* This may race with setting of irr in __apic_accept_irq() and
    	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
    	 * will cause vmexit immediately and the value will be recalculated
    	 * on the next vmentry.
    	 */
    
    	if (!kvm_vcpu_has_lapic(vcpu))
    
    	highest_irr = apic_find_highest_irr(vcpu->arch.apic);
    
    static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
    			     int vector, int level, int trig_mode);
    
    
    int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
    
    	struct kvm_lapic *apic = vcpu->arch.apic;
    
    	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
    			irq->level, irq->trig_mode);
    
    static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
    {
    
    	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
    				      sizeof(val));
    }
    
    static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
    {
    
    	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
    				      sizeof(*val));
    }
    
    static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
    {
    	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
    }
    
    static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
    {
    	u8 val;
    	if (pv_eoi_get_user(vcpu, &val) < 0)
    		apic_debug("Can't read EOI MSR value: 0x%llx\n",
    			   (unsigned long long)vcpi->arch.pv_eoi.msr_val);
    	return val & 0x1;
    }
    
    static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
    {
    	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
    		apic_debug("Can't set EOI MSR value: 0x%llx\n",
    			   (unsigned long long)vcpi->arch.pv_eoi.msr_val);
    		return;
    	}
    	__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
    }
    
    static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
    {
    	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
    		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
    			   (unsigned long long)vcpi->arch.pv_eoi.msr_val);
    		return;
    	}
    	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
    }
    
    
    static inline int apic_find_highest_isr(struct kvm_lapic *apic)
    {
    	int result;
    
    
    	/* Note that isr_count is always 1 with vid enabled */
    
    	if (!apic->isr_count)
    		return -1;
    	if (likely(apic->highest_isr_cache != -1))
    		return apic->highest_isr_cache;
    
    
    	result = find_highest_vector(apic->regs + APIC_ISR);
    	ASSERT(result == -1 || result >= 16);
    
    	return result;
    }
    
    static void apic_update_ppr(struct kvm_lapic *apic)
    {
    
    	u32 tpr, isrv, ppr, old_ppr;
    
    	old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI);
    	tpr = kvm_apic_get_reg(apic, APIC_TASKPRI);
    
    	isr = apic_find_highest_isr(apic);
    	isrv = (isr != -1) ? isr : 0;
    
    	if ((tpr & 0xf0) >= (isrv & 0xf0))
    		ppr = tpr & 0xff;
    	else
    		ppr = isrv & 0xf0;
    
    	apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
    		   apic, ppr, isr, isrv);
    
    
    	if (old_ppr != ppr) {
    		apic_set_reg(apic, APIC_PROCPRI, ppr);
    
    		if (ppr < old_ppr)
    			kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
    
    }
    
    static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
    {
    	apic_set_reg(apic, APIC_TASKPRI, tpr);
    	apic_update_ppr(apic);
    }
    
    int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
    {
    
    	return dest == 0xff || kvm_apic_id(apic) == dest;
    
    }
    
    int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
    {
    	int result = 0;
    
    	u32 logical_id;
    
    	if (apic_x2apic_mode(apic)) {
    
    		logical_id = kvm_apic_get_reg(apic, APIC_LDR);
    
    		return logical_id & mda;
    	}
    
    	logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR));
    
    	switch (kvm_apic_get_reg(apic, APIC_DFR)) {
    
    	case APIC_DFR_FLAT:
    		if (logical_id & mda)
    			result = 1;
    		break;
    	case APIC_DFR_CLUSTER:
    		if (((logical_id >> 4) == (mda >> 0x4))
    		    && (logical_id & mda & 0xf))
    			result = 1;
    		break;
    	default:
    
    		apic_debug("Bad DFR vcpu %d: %08x\n",
    
    			   apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
    
    int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
    
    			   int short_hand, int dest, int dest_mode)
    {
    	int result = 0;
    
    	struct kvm_lapic *target = vcpu->arch.apic;
    
    
    	apic_debug("target %p, source %p, dest 0x%x, "
    
    		   "dest_mode 0x%x, short_hand 0x%x\n",
    
    		   target, source, dest, dest_mode, short_hand);
    
    
    	ASSERT(target);
    
    	switch (short_hand) {
    	case APIC_DEST_NOSHORT:
    
    			/* Physical mode. */
    
    			result = kvm_apic_match_physical_addr(target, dest);
    		else
    
    			/* Logical mode. */
    			result = kvm_apic_match_logical_addr(target, dest);
    		break;
    	case APIC_DEST_SELF:
    
    		result = (target == source);
    
    		break;
    	case APIC_DEST_ALLINC:
    		result = 1;
    		break;
    	case APIC_DEST_ALLBUT:
    
    		result = (target != source);
    
    		break;
    	default:
    
    		apic_debug("kvm: apic: Bad dest shorthand value %x\n",
    			   short_hand);
    
    bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
    		struct kvm_lapic_irq *irq, int *r)
    {
    	struct kvm_apic_map *map;
    	unsigned long bitmap = 1;
    	struct kvm_lapic **dst;
    	int i;
    	bool ret = false;
    
    	*r = -1;
    
    	if (irq->shorthand == APIC_DEST_SELF) {
    		*r = kvm_apic_set_irq(src->vcpu, irq);
    		return true;
    	}
    
    	if (irq->shorthand)
    		return false;
    
    	rcu_read_lock();
    	map = rcu_dereference(kvm->arch.apic_map);
    
    	if (!map)
    		goto out;
    
    	if (irq->dest_mode == 0) { /* physical mode */
    		if (irq->delivery_mode == APIC_DM_LOWEST ||
    				irq->dest_id == 0xff)
    			goto out;
    		dst = &map->phys_map[irq->dest_id & 0xff];
    	} else {
    		u32 mda = irq->dest_id << (32 - map->ldr_bits);
    
    		dst = map->logical_map[apic_cluster_id(map, mda)];
    
    		bitmap = apic_logical_id(map, mda);
    
    		if (irq->delivery_mode == APIC_DM_LOWEST) {
    			int l = -1;
    			for_each_set_bit(i, &bitmap, 16) {
    				if (!dst[i])
    					continue;
    				if (l < 0)
    					l = i;
    				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
    					l = i;
    			}
    
    			bitmap = (l >= 0) ? 1 << l : 0;
    		}
    	}
    
    	for_each_set_bit(i, &bitmap, 16) {
    		if (!dst[i])
    			continue;
    		if (*r < 0)
    			*r = 0;
    		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
    	}
    
    	ret = true;
    out:
    	rcu_read_unlock();
    	return ret;
    }
    
    
    /*
     * Add a pending IRQ into lapic.
     * Return 1 if successfully added and 0 if discarded.
     */
    static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
    			     int vector, int level, int trig_mode)
    {
    
    	struct kvm_vcpu *vcpu = apic->vcpu;
    
    
    	switch (delivery_mode) {
    	case APIC_DM_LOWEST:
    
    		vcpu->arch.apic_arb_prio++;
    	case APIC_DM_FIXED:
    
    		/* FIXME add logic for vcpu on reset */
    		if (unlikely(!apic_enabled(apic)))
    			break;
    
    
    		if (trig_mode) {
    			apic_debug("level trig mode for vector %d", vector);
    			apic_set_vector(vector, apic->regs + APIC_TMR);
    		} else
    			apic_clear_vector(vector, apic->regs + APIC_TMR);
    
    
    		result = !apic_test_and_set_irr(vector, apic);
    
    		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
    
    					  trig_mode, vector, !result);
    
    		if (!result) {
    			if (trig_mode)
    				apic_debug("level trig mode repeatedly for "
    						"vector %d", vector);
    
    		kvm_make_request(KVM_REQ_EVENT, vcpu);
    
    		kvm_vcpu_kick(vcpu);
    
    		break;
    
    	case APIC_DM_REMRD:
    
    		apic_debug("Ignoring delivery mode 3\n");
    
    		break;
    
    	case APIC_DM_SMI:
    
    		apic_debug("Ignoring guest SMI\n");
    
    		break;
    
    	case APIC_DM_NMI:
    
    		kvm_inject_nmi(vcpu);
    
    Jan Kiszka's avatar
    Jan Kiszka committed
    		kvm_vcpu_kick(vcpu);
    
    		break;
    
    	case APIC_DM_INIT:
    
    			/* assumes that there are only KVM_APIC_INIT/SIPI */
    			apic->pending_events = (1UL << KVM_APIC_INIT);
    			/* make sure pending_events is visible before sending
    			 * the request */
    			smp_wmb();
    
    			kvm_make_request(KVM_REQ_EVENT, vcpu);
    
    			kvm_vcpu_kick(vcpu);
    		} else {
    
    			apic_debug("Ignoring de-assert INIT to vcpu %d\n",
    				   vcpu->vcpu_id);
    
    		break;
    
    	case APIC_DM_STARTUP:
    
    		apic_debug("SIPI to vcpu %d vector 0x%02x\n",
    			   vcpu->vcpu_id, vector);
    
    		result = 1;
    		apic->sipi_vector = vector;
    		/* make sure sipi_vector is visible for the receiver */
    		smp_wmb();
    		set_bit(KVM_APIC_SIPI, &apic->pending_events);
    		kvm_make_request(KVM_REQ_EVENT, vcpu);
    		kvm_vcpu_kick(vcpu);
    
    	case APIC_DM_EXTINT:
    		/*
    		 * Should only be called by kvm_apic_local_deliver() with LVT0,
    		 * before NMI watchdog was enabled. Already handled by
    		 * kvm_apic_accept_pic_intr().
    		 */
    		break;
    
    
    	default:
    		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
    		       delivery_mode);
    		break;
    	}
    	return result;
    }
    
    
    int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
    
    	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
    
    static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
    {
    	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
    	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
    		int trigger_mode;
    		if (apic_test_vector(vector, apic->regs + APIC_TMR))
    			trigger_mode = IOAPIC_LEVEL_TRIG;
    		else
    			trigger_mode = IOAPIC_EDGE_TRIG;
    		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
    	}
    }
    
    
    static int apic_set_eoi(struct kvm_lapic *apic)
    
    {
    	int vector = apic_find_highest_isr(apic);
    
    
    	trace_kvm_eoi(apic, vector);
    
    
    	/*
    	 * Not every write EOI will has corresponding ISR,
    	 * one example is when Kernel check timer on setup_IO_APIC
    	 */
    	if (vector == -1)
    
    		return vector;
    
    	apic_clear_isr(vector, apic);
    
    	apic_update_ppr(apic);
    
    
    	kvm_ioapic_send_eoi(apic, vector);
    
    	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
    
    	return vector;
    
    /*
     * this interface assumes a trap-like exit, which has already finished
     * desired side effect including vISR and vPPR update.
     */
    void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
    {
    	struct kvm_lapic *apic = vcpu->arch.apic;
    
    	trace_kvm_eoi(apic, vector);
    
    	kvm_ioapic_send_eoi(apic, vector);
    	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
    }
    EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
    
    
    static void apic_send_ipi(struct kvm_lapic *apic)
    {
    
    	u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
    	u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2);
    
    	struct kvm_lapic_irq irq;
    
    	irq.vector = icr_low & APIC_VECTOR_MASK;
    	irq.delivery_mode = icr_low & APIC_MODE_MASK;
    	irq.dest_mode = icr_low & APIC_DEST_MASK;
    	irq.level = icr_low & APIC_INT_ASSERT;
    	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
    	irq.shorthand = icr_low & APIC_SHORT_MASK;
    
    	if (apic_x2apic_mode(apic))
    		irq.dest_id = icr_high;
    	else
    		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
    
    	trace_kvm_apic_ipi(icr_low, irq.dest_id);
    
    
    	apic_debug("icr_high 0x%x, icr_low 0x%x, "
    		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
    		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
    
    		   icr_high, icr_low, irq.shorthand, irq.dest_id,
    
    		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
    		   irq.vector);
    
    	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
    
    }
    
    static u32 apic_get_tmcct(struct kvm_lapic *apic)
    {
    
    	ktime_t remaining;
    	s64 ns;
    
    
    	ASSERT(apic != NULL);
    
    
    	/* if initial count is 0, current count should also be 0 */
    
    	if (kvm_apic_get_reg(apic, APIC_TMICT) == 0)
    
    	remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
    
    	if (ktime_to_ns(remaining) < 0)
    		remaining = ktime_set(0, 0);
    
    
    	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
    	tmcct = div64_u64(ns,
    			 (APIC_BUS_CYCLE_NS * apic->divide_count));
    
    static void __report_tpr_access(struct kvm_lapic *apic, bool write)
    {
    	struct kvm_vcpu *vcpu = apic->vcpu;
    	struct kvm_run *run = vcpu->run;
    
    
    	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
    
    	run->tpr_access.rip = kvm_rip_read(vcpu);
    
    	run->tpr_access.is_write = write;
    }
    
    static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
    {
    	if (apic->vcpu->arch.tpr_access_reporting)
    		__report_tpr_access(apic, write);
    }
    
    
    static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
    {
    	u32 val = 0;
    
    	if (offset >= LAPIC_MMIO_LENGTH)
    		return 0;
    
    	switch (offset) {
    
    	case APIC_ID:
    		if (apic_x2apic_mode(apic))
    			val = kvm_apic_id(apic);
    		else
    			val = kvm_apic_id(apic) << 24;
    		break;
    
    	case APIC_ARBPRI:
    
    		apic_debug("Access APIC ARBPRI register which is for P6\n");
    
    		break;
    
    	case APIC_TMCCT:	/* Timer CCR */
    
    		if (apic_lvtt_tscdeadline(apic))
    			return 0;
    
    
    		val = apic_get_tmcct(apic);
    		break;
    
    	case APIC_PROCPRI:
    		apic_update_ppr(apic);
    
    		val = kvm_apic_get_reg(apic, offset);
    
    	case APIC_TASKPRI:
    		report_tpr_access(apic, false);
    		/* fall thru */
    
    	default:
    
    		val = kvm_apic_get_reg(apic, offset);
    
    static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
    {
    	return container_of(dev, struct kvm_lapic, dev);
    }
    
    
    static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
    		void *data)
    
    {
    	unsigned char alignment = offset & 0xf;
    	u32 result;
    
    Guo Chao's avatar
    Guo Chao committed
    	/* this bitmask has a bit cleared for each reserved register */
    
    	static const u64 rmask = 0x43ff01ffffffe70cULL;
    
    
    	if ((alignment + len) > 4) {
    
    		apic_debug("KVM_APIC_READ: alignment error %x %d\n",
    			   offset, len);
    
    		return 1;
    
    
    	if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
    
    		apic_debug("KVM_APIC_READ: read reserved register %x\n",
    			   offset);
    
    	result = __apic_read(apic, offset & ~0xf);
    
    
    	trace_kvm_apic_read(offset, result);
    
    
    	switch (len) {
    	case 1:
    	case 2:
    	case 4:
    		memcpy(data, (char *)&result + alignment, len);
    		break;
    	default:
    		printk(KERN_ERR "Local APIC read with len = %x, "
    		       "should be 1,2, or 4 instead\n", len);
    		break;
    	}
    
    static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
    {
    
    	return kvm_apic_hw_enabled(apic) &&
    
    	    addr >= apic->base_address &&
    	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
    }
    
    static int apic_mmio_read(struct kvm_io_device *this,
    			   gpa_t address, int len, void *data)
    {
    	struct kvm_lapic *apic = to_lapic(this);
    	u32 offset = address - apic->base_address;
    
    	if (!apic_mmio_in_range(apic, address))
    		return -EOPNOTSUPP;
    
    	apic_reg_read(apic, offset, len, data);
    
    	return 0;
    }
    
    
    static void update_divide_count(struct kvm_lapic *apic)
    {
    	u32 tmp1, tmp2, tdcr;
    
    
    	tdcr = kvm_apic_get_reg(apic, APIC_TDCR);
    
    	tmp1 = tdcr & 0xf;
    	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
    
    	apic->divide_count = 0x1 << (tmp2 & 0x7);
    
    
    	apic_debug("timer divide count is 0x%x\n",
    
    				   apic->divide_count);
    
    }
    
    static void start_apic_timer(struct kvm_lapic *apic)
    {
    
    	atomic_set(&apic->lapic_timer.pending, 0);
    
    	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
    
    Guo Chao's avatar
    Guo Chao committed
    		/* lapic timer in oneshot or periodic mode */
    
    		now = apic->lapic_timer.timer.base->get_time();
    
    		apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT)
    
    			    * APIC_BUS_CYCLE_NS * apic->divide_count;
    
    		if (!apic->lapic_timer.period)
    			return;
    		/*
    		 * Do not allow the guest to program periodic timers with small
    		 * interval, since the hrtimers are not throttled by the host
    		 * scheduler.
    		 */
    		if (apic_lvtt_period(apic)) {
    			s64 min_period = min_timer_period_us * 1000LL;
    
    			if (apic->lapic_timer.period < min_period) {
    				pr_info_ratelimited(
    				    "kvm: vcpu %i: requested %lld ns "
    				    "lapic timer period limited to %lld ns\n",
    				    apic->vcpu->vcpu_id,
    				    apic->lapic_timer.period, min_period);
    				apic->lapic_timer.period = min_period;
    			}
    
    		hrtimer_start(&apic->lapic_timer.timer,
    			      ktime_add_ns(now, apic->lapic_timer.period),
    			      HRTIMER_MODE_ABS);