Skip to content
Snippets Groups Projects
perf_event.c 42.4 KiB
Newer Older
  • Learn to ignore specific revisions
  •  * Performance events x86 architecture code
    
     *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
     *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
     *  Copyright (C) 2009 Jaswinder Singh Rajput
     *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
     *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
    
     *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
    
     *  Copyright (C) 2009 Google, Inc., Stephane Eranian
    
     *
     *  For licencing details see kernel-base/COPYING
     */
    
    
    #include <linux/perf_event.h>
    
    #include <linux/capability.h>
    #include <linux/notifier.h>
    #include <linux/hardirq.h>
    #include <linux/kprobes.h>
    
    #include <linux/module.h>
    
    #include <linux/kdebug.h>
    #include <linux/sched.h>
    
    #include <linux/uaccess.h>
    
    #include <linux/bitops.h>
    
    #include <linux/device.h>
    
    
    #include <asm/apic.h>
    
    #include <asm/stacktrace.h>
    
    #include <asm/nmi.h>
    
    #include "perf_event.h"
    
    
    #if 0
    #undef wrmsrl
    #define wrmsrl(msr, val) 					\
    do {								\
    	trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
    			(unsigned long)(val));			\
    	native_write_msr((msr), (u32)((u64)(val)), 		\
    			(u32)((u64)(val) >> 32));		\
    } while (0)
    #endif
    
    
    struct x86_pmu x86_pmu __read_mostly;
    
    DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
    
    u64 __read_mostly hw_cache_event_ids
    
    				[PERF_COUNT_HW_CACHE_MAX]
    				[PERF_COUNT_HW_CACHE_OP_MAX]
    				[PERF_COUNT_HW_CACHE_RESULT_MAX];
    
    u64 __read_mostly hw_cache_extra_regs
    
    				[PERF_COUNT_HW_CACHE_MAX]
    				[PERF_COUNT_HW_CACHE_OP_MAX]
    				[PERF_COUNT_HW_CACHE_RESULT_MAX];
    
     * Propagate event elapsed time into the generic event.
     * Can only be executed on the CPU where the event is active.
    
     * Returns the delta events processed.
     */
    
    u64 x86_perf_event_update(struct perf_event *event)
    
    	struct hw_perf_event *hwc = &event->hw;
    
    	int shift = 64 - x86_pmu.cntval_bits;
    
    	u64 prev_raw_count, new_raw_count;
    
    	if (idx == X86_PMC_IDX_FIXED_BTS)
    		return 0;
    
    
    	 * Careful: an NMI might modify the previous event value.
    
    	 *
    	 * Our tactic to handle this is to first atomically read and
    	 * exchange a new raw count - then add that new-prev delta
    
    	 * count to the generic event atomically:
    
    	prev_raw_count = local64_read(&hwc->prev_count);
    
    	rdmsrl(hwc->event_base, new_raw_count);
    
    	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
    
    					new_raw_count) != prev_raw_count)
    		goto again;
    
    	/*
    	 * Now we have the new raw value and have updated the prev
    	 * timestamp already. We can now calculate the elapsed delta
    
    	 * (event-)time and add that to the generic event.
    
    	 *
    	 * Careful, not all hw sign-extends above the physical width
    
    	delta = (new_raw_count << shift) - (prev_raw_count << shift);
    	delta >>= shift;
    
    	local64_add(delta, &event->count);
    	local64_sub(delta, &hwc->period_left);
    
    /*
     * Find and validate any extra registers to set up.
     */
    static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
    {
    
    
    	if (!x86_pmu.extra_regs)
    		return 0;
    
    	for (er = x86_pmu.extra_regs; er->msr; er++) {
    		if (er->event != (config & er->config_mask))
    			continue;
    		if (event->attr.config1 & ~er->valid_mask)
    			return -EINVAL;
    
    
    		reg->idx = er->idx;
    		reg->config = event->attr.config1;
    		reg->reg = er->msr;
    
    static atomic_t active_events;
    
    static DEFINE_MUTEX(pmc_reserve_mutex);
    
    
    static bool reserve_pmc_hardware(void)
    {
    	int i;
    
    
    	for (i = 0; i < x86_pmu.num_counters; i++) {
    
    		if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
    
    			goto perfctr_fail;
    	}
    
    
    	for (i = 0; i < x86_pmu.num_counters; i++) {
    
    		if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
    
    			goto eventsel_fail;
    	}
    
    	return true;
    
    eventsel_fail:
    	for (i--; i >= 0; i--)
    
    		release_evntsel_nmi(x86_pmu_config_addr(i));
    
    	i = x86_pmu.num_counters;
    
    
    perfctr_fail:
    	for (i--; i >= 0; i--)
    
    		release_perfctr_nmi(x86_pmu_event_addr(i));
    
    
    	return false;
    }
    
    static void release_pmc_hardware(void)
    {
    	int i;
    
    
    	for (i = 0; i < x86_pmu.num_counters; i++) {
    
    		release_perfctr_nmi(x86_pmu_event_addr(i));
    		release_evntsel_nmi(x86_pmu_config_addr(i));
    
    #else
    
    static bool reserve_pmc_hardware(void) { return true; }
    static void release_pmc_hardware(void) {}
    
    #endif
    
    
    static bool check_hw_exists(void)
    {
    	u64 val, val_new = 0;
    
    	/*
    	 * Check to see if the BIOS enabled any of the counters, if so
    	 * complain and bail.
    	 */
    	for (i = 0; i < x86_pmu.num_counters; i++) {
    
    		reg = x86_pmu_config_addr(i);
    
    		ret = rdmsrl_safe(reg, &val);
    		if (ret)
    			goto msr_fail;
    		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
    			goto bios_fail;
    	}
    
    	if (x86_pmu.num_counters_fixed) {
    		reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
    		ret = rdmsrl_safe(reg, &val);
    		if (ret)
    			goto msr_fail;
    		for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
    			if (val & (0x03 << i*4))
    				goto bios_fail;
    		}
    	}
    
    	/*
    	 * Now write a value and read it back to see if it matches,
    	 * this is needed to detect certain hardware emulators (qemu/kvm)
    	 * that don't trap on the MSR access and always return 0s.
    	 */
    
    	ret = wrmsrl_safe(x86_pmu_event_addr(0), val);
    
    	ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
    
    	if (ret || val != val_new)
    
    	/*
    	 * We still allow the PMU driver to operate:
    	 */
    	printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
    
    	printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
    
    
    msr_fail:
    	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
    
    static void hw_perf_event_destroy(struct perf_event *event)
    
    	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
    
    		release_pmc_hardware();
    
    		release_ds_buffers();
    
    		mutex_unlock(&pmc_reserve_mutex);
    	}
    }
    
    
    static inline int x86_pmu_initialized(void)
    {
    	return x86_pmu.handle_irq != NULL;
    }
    
    
    set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
    
    	struct perf_event_attr *attr = &event->attr;
    
    	unsigned int cache_type, cache_op, cache_result;
    	u64 config, val;
    
    	config = attr->config;
    
    	cache_type = (config >>  0) & 0xff;
    	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
    		return -EINVAL;
    
    	cache_op = (config >>  8) & 0xff;
    	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
    		return -EINVAL;
    
    	cache_result = (config >> 16) & 0xff;
    	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
    		return -EINVAL;
    
    	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
    
    	if (val == 0)
    		return -ENOENT;
    
    	if (val == -1)
    		return -EINVAL;
    
    	hwc->config |= val;
    
    	attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
    	return x86_pmu_extra_regs(val, event);
    
    int x86_setup_perfctr(struct perf_event *event)
    
    {
    	struct perf_event_attr *attr = &event->attr;
    	struct hw_perf_event *hwc = &event->hw;
    	u64 config;
    
    
    	if (!is_sampling_event(event)) {
    
    		hwc->sample_period = x86_pmu.max_period;
    		hwc->last_period = hwc->sample_period;
    
    		local64_set(&hwc->period_left, hwc->sample_period);
    
    	} else {
    		/*
    		 * If we have a PMU initialized but no APIC
    		 * interrupts, we cannot sample hardware
    		 * events (user-space has to fall back and
    		 * sample via a hrtimer based software event):
    		 */
    		if (!x86_pmu.apic)
    			return -EOPNOTSUPP;
    	}
    
    	if (attr->type == PERF_TYPE_RAW)
    
    		return x86_pmu_extra_regs(event->attr.config, event);
    
    
    	if (attr->type == PERF_TYPE_HW_CACHE)
    
    		return set_ext_hw_attr(hwc, event);
    
    
    	if (attr->config >= x86_pmu.max_events)
    		return -EINVAL;
    
    	/*
    	 * The generic map:
    	 */
    	config = x86_pmu.event_map(attr->config);
    
    	if (config == 0)
    		return -ENOENT;
    
    	if (config == -1LL)
    		return -EINVAL;
    
    	/*
    	 * Branch tracing:
    	 */
    
    	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
    	    !attr->freq && hwc->sample_period == 1) {
    
    		/* BTS is not supported by this architecture. */
    
    			return -EOPNOTSUPP;
    
    		/* BTS is currently only allowed for user-mode. */
    		if (!attr->exclude_kernel)
    			return -EOPNOTSUPP;
    	}
    
    	hwc->config |= config;
    
    	return 0;
    }
    
    /*
     * check that branch_sample_type is compatible with
     * settings needed for precise_ip > 1 which implies
     * using the LBR to capture ALL taken branches at the
     * priv levels of the measurement
     */
    static inline int precise_br_compat(struct perf_event *event)
    {
    	u64 m = event->attr.branch_sample_type;
    	u64 b = 0;
    
    	/* must capture all branches */
    	if (!(m & PERF_SAMPLE_BRANCH_ANY))
    		return 0;
    
    	m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
    
    	if (!event->attr.exclude_user)
    		b |= PERF_SAMPLE_BRANCH_USER;
    
    	if (!event->attr.exclude_kernel)
    		b |= PERF_SAMPLE_BRANCH_KERNEL;
    
    	/*
    	 * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
    	 */
    
    	return m == b;
    }
    
    
    int x86_pmu_hw_config(struct perf_event *event)
    
    	if (event->attr.precise_ip) {
    		int precise = 0;
    
    		/* Support for constant skid */
    
    			/* Support for IP fixup */
    			if (x86_pmu.lbr_nr)
    				precise++;
    		}
    
    
    		if (event->attr.precise_ip > precise)
    			return -EOPNOTSUPP;
    
    		/*
    		 * check that PEBS LBR correction does not conflict with
    		 * whatever the user is asking with attr->branch_sample_type
    		 */
    		if (event->attr.precise_ip > 1) {
    			u64 *br_type = &event->attr.branch_sample_type;
    
    			if (has_branch_stack(event)) {
    				if (!precise_br_compat(event))
    					return -EOPNOTSUPP;
    
    				/* branch_sample_type is compatible */
    
    			} else {
    				/*
    				 * user did not specify  branch_sample_type
    				 *
    				 * For PEBS fixups, we capture all
    				 * the branches at the priv level of the
    				 * event.
    				 */
    				*br_type = PERF_SAMPLE_BRANCH_ANY;
    
    				if (!event->attr.exclude_user)
    					*br_type |= PERF_SAMPLE_BRANCH_USER;
    
    				if (!event->attr.exclude_kernel)
    					*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
    			}
    		}
    
    	/*
    	 * Generate PMC IRQs:
    	 * (keep 'enabled' bit clear for now)
    	 */
    
    	event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
    
    
    	/*
    	 * Count user and OS events unless requested not to
    	 */
    
    	if (!event->attr.exclude_user)
    		event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
    	if (!event->attr.exclude_kernel)
    		event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
    
    	if (event->attr.type == PERF_TYPE_RAW)
    		event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
    
    	return x86_setup_perfctr(event);
    
     * Setup the hardware configuration for a given attr_type
    
    static int __x86_pmu_event_init(struct perf_event *event)
    
    	int err;
    
    	if (!x86_pmu_initialized())
    		return -ENODEV;
    
    	err = 0;
    
    	if (!atomic_inc_not_zero(&active_events)) {
    
    		mutex_lock(&pmc_reserve_mutex);
    
    		if (atomic_read(&active_events) == 0) {
    
    			if (!reserve_pmc_hardware())
    				err = -EBUSY;
    
    			else
    				reserve_ds_buffers();
    
    		mutex_unlock(&pmc_reserve_mutex);
    	}
    	if (err)
    		return err;
    
    
    	event->destroy = hw_perf_event_destroy;
    
    	event->hw.idx = -1;
    	event->hw.last_cpu = -1;
    	event->hw.last_tag = ~0ULL;
    
    	/* mark unused */
    	event->hw.extra_reg.idx = EXTRA_REG_NONE;
    
    	event->hw.branch_reg.idx = EXTRA_REG_NONE;
    
    
    	return x86_pmu.hw_config(event);
    
    void x86_pmu_disable_all(void)
    
    	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
    
    	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
    
    		if (!test_bit(idx, cpuc->active_mask))
    
    		rdmsrl(x86_pmu_config_addr(idx), val);
    
    		if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
    
    		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
    
    		wrmsrl(x86_pmu_config_addr(idx), val);
    
    static void x86_pmu_disable(struct pmu *pmu)
    
    	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
    
    
    	if (!cpuc->enabled)
    		return;
    
    	cpuc->n_added = 0;
    	cpuc->enabled = 0;
    	barrier();
    
    void x86_pmu_enable_all(int added)
    
    	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
    
    	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
    
    		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
    
    		if (!test_bit(idx, cpuc->active_mask))
    
    		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
    
    static struct pmu pmu;
    
    
    static inline int is_x86_event(struct perf_event *event)
    {
    	return event->pmu == &pmu;
    }
    
    
    /*
     * Event scheduler state:
     *
     * Assign events iterating over all events and counters, beginning
     * with events with least weights first. Keep the current iterator
     * state in struct sched_state.
     */
    struct sched_state {
    	int	weight;
    	int	event;		/* event index */
    	int	counter;	/* counter index */
    	int	unassigned;	/* number of events to be assigned left */
    	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
    };
    
    
    /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
    #define	SCHED_STATES_MAX	2
    
    
    struct perf_sched {
    	int			max_weight;
    	int			max_events;
    	struct event_constraint	**constraints;
    	struct sched_state	state;
    
    	int			saved_states;
    	struct sched_state	saved[SCHED_STATES_MAX];
    
    };
    
    /*
     * Initialize interator that runs through all events and counters.
     */
    static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
    			    int num, int wmin, int wmax)
    {
    	int idx;
    
    	memset(sched, 0, sizeof(*sched));
    	sched->max_events	= num;
    	sched->max_weight	= wmax;
    	sched->constraints	= c;
    
    	for (idx = 0; idx < num; idx++) {
    		if (c[idx]->weight == wmin)
    			break;
    	}
    
    	sched->state.event	= idx;		/* start with min weight */
    	sched->state.weight	= wmin;
    	sched->state.unassigned	= num;
    }
    
    
    static void perf_sched_save_state(struct perf_sched *sched)
    {
    	if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
    		return;
    
    	sched->saved[sched->saved_states] = sched->state;
    	sched->saved_states++;
    }
    
    static bool perf_sched_restore_state(struct perf_sched *sched)
    {
    	if (!sched->saved_states)
    		return false;
    
    	sched->saved_states--;
    	sched->state = sched->saved[sched->saved_states];
    
    	/* continue with next counter: */
    	clear_bit(sched->state.counter++, sched->state.used);
    
    	return true;
    }
    
    
    /*
     * Select a counter for the current event to schedule. Return true on
     * success.
     */
    
    static bool __perf_sched_find_counter(struct perf_sched *sched)
    
    {
    	struct event_constraint *c;
    	int idx;
    
    	if (!sched->state.unassigned)
    		return false;
    
    	if (sched->state.event >= sched->max_events)
    		return false;
    
    	c = sched->constraints[sched->state.event];
    
    
    	/* Prefer fixed purpose counters */
    	if (x86_pmu.num_counters_fixed) {
    		idx = X86_PMC_IDX_FIXED;
    
    		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
    
    			if (!__test_and_set_bit(idx, sched->state.used))
    				goto done;
    		}
    	}
    
    	/* Grab the first unused counter starting with idx */
    	idx = sched->state.counter;
    
    	for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
    
    		if (!__test_and_set_bit(idx, sched->state.used))
    
    	return false;
    
    done:
    	sched->state.counter = idx;
    
    	if (c->overlap)
    		perf_sched_save_state(sched);
    
    	return true;
    }
    
    static bool perf_sched_find_counter(struct perf_sched *sched)
    {
    	while (!__perf_sched_find_counter(sched)) {
    		if (!perf_sched_restore_state(sched))
    			return false;
    	}
    
    
    	return true;
    }
    
    /*
     * Go through all unassigned events and find the next one to schedule.
     * Take events with the least weight first. Return true on success.
     */
    static bool perf_sched_next_event(struct perf_sched *sched)
    {
    	struct event_constraint *c;
    
    	if (!sched->state.unassigned || !--sched->state.unassigned)
    		return false;
    
    	do {
    		/* next event */
    		sched->state.event++;
    		if (sched->state.event >= sched->max_events) {
    			/* next weight */
    			sched->state.event = 0;
    			sched->state.weight++;
    			if (sched->state.weight > sched->max_weight)
    				return false;
    		}
    		c = sched->constraints[sched->state.event];
    	} while (c->weight != sched->state.weight);
    
    	sched->state.counter = 0;	/* start with first counter */
    
    	return true;
    }
    
    /*
     * Assign a counter for each event.
     */
    static int perf_assign_events(struct event_constraint **constraints, int n,
    			      int wmin, int wmax, int *assign)
    {
    	struct perf_sched sched;
    
    	perf_sched_init(&sched, constraints, n, wmin, wmax);
    
    	do {
    		if (!perf_sched_find_counter(&sched))
    			break;	/* failed */
    		if (assign)
    			assign[sched.state.event] = sched.state.counter;
    	} while (perf_sched_next_event(&sched));
    
    	return sched.state.unassigned;
    }
    
    
    int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
    
    	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
    
    	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
    
    	int i, wmin, wmax, num = 0;
    
    	struct hw_perf_event *hwc;
    
    	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
    
    
    	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
    
    		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
    		constraints[i] = c;
    
    		wmin = min(wmin, c->weight);
    		wmax = max(wmax, c->weight);
    
    	/*
    	 * fastpath, try to reuse previous register
    	 */
    
    	for (i = 0; i < n; i++) {
    
    		hwc = &cpuc->event_list[i]->hw;
    
    		c = constraints[i];
    
    
    		/* never assigned */
    		if (hwc->idx == -1)
    			break;
    
    		/* constraint still honored */
    
    		if (!test_bit(hwc->idx, c->idxmsk))
    
    			break;
    
    		/* not already used */
    		if (test_bit(hwc->idx, used_mask))
    			break;
    
    
    		__set_bit(hwc->idx, used_mask);
    
    	/* slow path */
    	if (i != n)
    		num = perf_assign_events(constraints, n, wmin, wmax, assign);
    
    	/*
    	 * scheduling failed or is just a simulation,
    	 * free resources if necessary
    	 */
    	if (!assign || num) {
    		for (i = 0; i < n; i++) {
    			if (x86_pmu.put_event_constraints)
    				x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
    		}
    	}
    
    	return num ? -EINVAL : 0;
    
    }
    
    /*
     * dogrp: true if must collect siblings events (group)
     * returns total number of events and error code
     */
    static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
    {
    	struct perf_event *event;
    	int n, max_count;
    
    
    	max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
    
    
    	/* current number of events already accepted */
    	n = cpuc->n_events;
    
    	if (is_x86_event(leader)) {
    		if (n >= max_count)
    
    		cpuc->event_list[n] = leader;
    		n++;
    	}
    	if (!dogrp)
    		return n;
    
    	list_for_each_entry(event, &leader->sibling_list, group_entry) {
    		if (!is_x86_event(event) ||
    
    		    event->state <= PERF_EVENT_STATE_OFF)
    
    			continue;
    
    		if (n >= max_count)
    
    
    		cpuc->event_list[n] = event;
    		n++;
    	}
    	return n;
    }
    
    static inline void x86_assign_hw_event(struct perf_event *event,
    
    				struct cpu_hw_events *cpuc, int i)
    
    	struct hw_perf_event *hwc = &event->hw;
    
    	hwc->idx = cpuc->assign[i];
    	hwc->last_cpu = smp_processor_id();
    	hwc->last_tag = ++cpuc->tags[i];
    
    
    	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
    		hwc->config_base = 0;
    		hwc->event_base	= 0;
    	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
    		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
    
    		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
    
    		hwc->config_base = x86_pmu_config_addr(hwc->idx);
    		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
    
    static inline int match_prev_assignment(struct hw_perf_event *hwc,
    					struct cpu_hw_events *cpuc,
    					int i)
    {
    	return hwc->idx == cpuc->assign[i] &&
    		hwc->last_cpu == smp_processor_id() &&
    		hwc->last_tag == cpuc->tags[i];
    }
    
    
    static void x86_pmu_start(struct perf_event *event, int flags);
    
    static void x86_pmu_enable(struct pmu *pmu)
    
    	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
    	struct perf_event *event;
    	struct hw_perf_event *hwc;
    
    	int i, added = cpuc->n_added;
    
    	if (cpuc->n_added) {
    
    		int n_running = cpuc->n_events - cpuc->n_added;
    
    		/*
    		 * apply assignment obtained either from
    		 * hw_perf_group_sched_in() or x86_pmu_enable()
    		 *
    		 * step1: save events moving to new counters
    		 * step2: reprogram moved events into new counters
    		 */
    
    		for (i = 0; i < n_running; i++) {
    
    			event = cpuc->event_list[i];
    			hwc = &event->hw;
    
    
    			/*
    			 * we can avoid reprogramming counter if:
    			 * - assigned same counter as last time
    			 * - running on same CPU as last time
    			 * - no other event has used the counter since
    			 */
    			if (hwc->idx == -1 ||
    			    match_prev_assignment(hwc, cpuc, i))
    
    			/*
    			 * Ensure we don't accidentally enable a stopped
    			 * counter simply because we rescheduled.
    			 */
    			if (hwc->state & PERF_HES_STOPPED)
    				hwc->state |= PERF_HES_ARCH;
    
    			x86_pmu_stop(event, PERF_EF_UPDATE);
    
    		}
    
    		for (i = 0; i < cpuc->n_events; i++) {
    			event = cpuc->event_list[i];
    			hwc = &event->hw;
    
    
    			if (!match_prev_assignment(hwc, cpuc, i))
    
    				x86_assign_hw_event(event, cpuc, i);
    
    			else if (i < n_running)
    				continue;
    
    			if (hwc->state & PERF_HES_ARCH)
    				continue;
    
    			x86_pmu_start(event, PERF_EF_RELOAD);
    
    		}
    		cpuc->n_added = 0;
    		perf_events_lapic_init();
    	}
    
    	x86_pmu.enable_all(added);
    
    static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
    
    /*
     * Set the next IRQ period, based on the hwc->period_left value.
    
     * To be called with the event disabled in hw:
    
    int x86_perf_event_set_period(struct perf_event *event)
    
    	struct hw_perf_event *hwc = &event->hw;
    
    	s64 left = local64_read(&hwc->period_left);
    
    	s64 period = hwc->sample_period;
    
    	int ret = 0, idx = hwc->idx;
    
    	if (idx == X86_PMC_IDX_FIXED_BTS)
    		return 0;
    
    
    	 * If we are way outside a reasonable range then just skip forward:
    
    	 */
    	if (unlikely(left <= -period)) {
    		left = period;
    
    		local64_set(&hwc->period_left, left);
    
    		hwc->last_period = period;
    
    	}
    
    	if (unlikely(left <= 0)) {
    		left += period;
    
    		local64_set(&hwc->period_left, left);
    
    		hwc->last_period = period;
    
    	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
    
    	 */
    	if (unlikely(left < 2))
    		left = 2;
    
    	if (left > x86_pmu.max_period)
    		left = x86_pmu.max_period;
    
    
    	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
    
    	 * The hw event starts counting from this event offset,
    
    	 * mark it to be able to extra future deltas:
    	 */
    
    	local64_set(&hwc->prev_count, (u64)-left);
    
    	wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
    
    
    	/*
    	 * Due to erratum on certan cpu we need
    	 * a second write to be sure the register
    	 * is updated properly
    	 */
    	if (x86_pmu.perfctr_second_write) {
    
    			(u64)(-left) & x86_pmu.cntval_mask);
    
    	perf_event_update_userpage(event);
    
    void x86_pmu_enable_event(struct perf_event *event)
    
    	if (__this_cpu_read(cpu_hw_events.enabled))
    
    		__x86_pmu_enable_event(&event->hw,
    				       ARCH_PERFMON_EVENTSEL_ENABLE);
    
     * Add a single event to the PMU.
    
     *
     * The event is added to the group of enabled events
     * but only if it can be scehduled with existing events.
    
    static int x86_pmu_add(struct perf_event *event, int flags)