Skip to content
Snippets Groups Projects
io_apic.c 70.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	Intel IO-APIC support for multi-Pentium hosts.
     *
     *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
     *
     *	Many thanks to Stig Venaas for trying out countless experimental
     *	patches and reporting/debugging problems patiently!
     *
     *	(c) 1999, Multiple IO-APIC support, developed by
     *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
     *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
     *	further tested and cleaned up by Zach Brown <zab@redhat.com>
     *	and Ingo Molnar <mingo@redhat.com>
     *
     *	Fixes
     *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
     *					thanks to Eric Gilmore
     *					and Rolf G. Tews
     *					for testing these extensively
     *	Paul Diefenbaugh	:	Added full ACPI support
     */
    
    #include <linux/mm.h>
    #include <linux/interrupt.h>
    #include <linux/init.h>
    #include <linux/delay.h>
    #include <linux/sched.h>
    #include <linux/smp_lock.h>
    #include <linux/mc146818rtc.h>
    #include <linux/compiler.h>
    #include <linux/acpi.h>
    
    #include <linux/module.h>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include <linux/sysdev.h>
    
    #include <linux/htirq.h>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include <asm/io.h>
    #include <asm/smp.h>
    #include <asm/desc.h>
    #include <asm/timer.h>
    
    #include <asm/i8259.h>
    
    #include <asm/nmi.h>
    
    #include <asm/hypertransport.h>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    #include <mach_apic.h>
    
    #include <mach_apicdef.h>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    #include "io_ports.h"
    
    int (*ioapic_renumber_irq)(int ioapic, int irq);
    atomic_t irq_mis_count;
    
    
    /* Where if anywhere is the i8259 connect in external int mode */
    static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    static DEFINE_SPINLOCK(ioapic_lock);
    
    static DEFINE_SPINLOCK(vector_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    int timer_over_8254 __initdata = 1;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	Is the SiS APIC rmw bug present ?
     *	-1 = don't know, 0 = no, 1 = yes
     */
    int sis_apic_bug = -1;
    
    /*
     * # of IRQ routing registers
     */
    int nr_ioapic_registers[MAX_IO_APICS];
    
    
    static int disable_timer_pin_1 __initdata;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * Rough estimation of how many shared IRQs there are, can
     * be changed anytime.
     */
    #define MAX_PLUS_SHARED_IRQS NR_IRQS
    #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
    
    /*
     * This is performance-critical, we want to do it O(1)
     *
     * the indexing order of this array favors 1:1 mappings
     * between pins and IRQs.
     */
    
    static struct irq_pin_list {
    	int apic, pin, next;
    } irq_2_pin[PIN_MAP_SIZE];
    
    
    struct io_apic {
    	unsigned int index;
    	unsigned int unused[3];
    	unsigned int data;
    };
    
    static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
    {
    	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
    		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
    }
    
    static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
    {
    	struct io_apic __iomem *io_apic = io_apic_base(apic);
    	writel(reg, &io_apic->index);
    	return readl(&io_apic->data);
    }
    
    static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
    {
    	struct io_apic __iomem *io_apic = io_apic_base(apic);
    	writel(reg, &io_apic->index);
    	writel(value, &io_apic->data);
    }
    
    /*
     * Re-write a value: to be used for read-modify-write
     * cycles where the read already set up the index register.
     *
     * Older SiS APIC requires we rewrite the index register
     */
    static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
    {
    	volatile struct io_apic *io_apic = io_apic_base(apic);
    	if (sis_apic_bug)
    		writel(reg, &io_apic->index);
    	writel(value, &io_apic->data);
    }
    
    
    union entry_union {
    	struct { u32 w1, w2; };
    	struct IO_APIC_route_entry entry;
    };
    
    static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
    {
    	union entry_union eu;
    	unsigned long flags;
    	spin_lock_irqsave(&ioapic_lock, flags);
    	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
    	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
    	spin_unlock_irqrestore(&ioapic_lock, flags);
    	return eu.entry;
    }
    
    
    /*
     * When we write a new IO APIC routing entry, we need to write the high
     * word first! If the mask bit in the low word is clear, we will enable
     * the interrupt, and we need to make sure the entry is fully populated
     * before that happens.
     */
    
    static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
    {
    	unsigned long flags;
    	union entry_union eu;
    	eu.entry = e;
    
    	spin_lock_irqsave(&ioapic_lock, flags);
    	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
    	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
    	spin_unlock_irqrestore(&ioapic_lock, flags);
    }
    
    /*
     * When we mask an IO APIC routing entry, we need to write the low
     * word first, in order to set the mask bit before we change the
     * high bits!
     */
    static void ioapic_mask_entry(int apic, int pin)
    {
    	unsigned long flags;
    	union entry_union eu = { .entry.mask = 1 };
    
    
    	spin_lock_irqsave(&ioapic_lock, flags);
    	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
    	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
    	spin_unlock_irqrestore(&ioapic_lock, flags);
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
     * shared ISA-space IRQs, so we have to support them. We are super
     * fast in the common case, and fast for shared ISA-space IRQs.
     */
    static void add_pin_to_irq(unsigned int irq, int apic, int pin)
    {
    	static int first_free_entry = NR_IRQS;
    	struct irq_pin_list *entry = irq_2_pin + irq;
    
    	while (entry->next)
    		entry = irq_2_pin + entry->next;
    
    	if (entry->pin != -1) {
    		entry->next = first_free_entry;
    		entry = irq_2_pin + entry->next;
    		if (++first_free_entry >= PIN_MAP_SIZE)
    			panic("io_apic.c: whoops");
    	}
    	entry->apic = apic;
    	entry->pin = pin;
    }
    
    /*
     * Reroute an IRQ to a different pin.
     */
    static void __init replace_pin_at_irq(unsigned int irq,
    				      int oldapic, int oldpin,
    				      int newapic, int newpin)
    {
    	struct irq_pin_list *entry = irq_2_pin + irq;
    
    	while (1) {
    		if (entry->apic == oldapic && entry->pin == oldpin) {
    			entry->apic = newapic;
    			entry->pin = newpin;
    		}
    		if (!entry->next)
    			break;
    		entry = irq_2_pin + entry->next;
    	}
    }
    
    static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
    {
    	struct irq_pin_list *entry = irq_2_pin + irq;
    	unsigned int pin, reg;
    
    	for (;;) {
    		pin = entry->pin;
    		if (pin == -1)
    			break;
    		reg = io_apic_read(entry->apic, 0x10 + pin*2);
    		reg &= ~disable;
    		reg |= enable;
    		io_apic_modify(entry->apic, 0x10 + pin*2, reg);
    		if (!entry->next)
    			break;
    		entry = irq_2_pin + entry->next;
    	}
    }
    
    /* mask = 1 */
    static void __mask_IO_APIC_irq (unsigned int irq)
    {
    	__modify_IO_APIC_irq(irq, 0x00010000, 0);
    }
    
    /* mask = 0 */
    static void __unmask_IO_APIC_irq (unsigned int irq)
    {
    	__modify_IO_APIC_irq(irq, 0, 0x00010000);
    }
    
    /* mask = 1, trigger = 0 */
    static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
    {
    	__modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
    }
    
    /* mask = 0, trigger = 1 */
    static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
    {
    	__modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
    }
    
    static void mask_IO_APIC_irq (unsigned int irq)
    {
    	unsigned long flags;
    
    	spin_lock_irqsave(&ioapic_lock, flags);
    	__mask_IO_APIC_irq(irq);
    	spin_unlock_irqrestore(&ioapic_lock, flags);
    }
    
    static void unmask_IO_APIC_irq (unsigned int irq)
    {
    	unsigned long flags;
    
    	spin_lock_irqsave(&ioapic_lock, flags);
    	__unmask_IO_APIC_irq(irq);
    	spin_unlock_irqrestore(&ioapic_lock, flags);
    }
    
    static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
    {
    	struct IO_APIC_route_entry entry;
    	
    	/* Check delivery_mode to be sure we're not clearing an SMI pin */
    
    	entry = ioapic_read_entry(apic, pin);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (entry.delivery_mode == dest_SMI)
    		return;
    
    	/*
    	 * Disable it in the IO-APIC irq-routing table:
    	 */
    
    	ioapic_mask_entry(apic, pin);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    static void clear_IO_APIC (void)
    {
    	int apic, pin;
    
    	for (apic = 0; apic < nr_ioapics; apic++)
    		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
    			clear_IO_APIC_pin(apic, pin);
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
    {
    	unsigned long flags;
    	int pin;
    	struct irq_pin_list *entry = irq_2_pin + irq;
    	unsigned int apicid_value;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	
    
    	cpus_and(tmp, cpumask, cpu_online_map);
    	if (cpus_empty(tmp))
    		tmp = TARGET_CPUS;
    
    	cpus_and(cpumask, tmp, CPU_MASK_ALL);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	apicid_value = cpu_mask_to_apicid(cpumask);
    	/* Prepare to do the io_apic_write */
    	apicid_value = apicid_value << 24;
    	spin_lock_irqsave(&ioapic_lock, flags);
    	for (;;) {
    		pin = entry->pin;
    		if (pin == -1)
    			break;
    		io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
    		if (!entry->next)
    			break;
    		entry = irq_2_pin + entry->next;
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	spin_unlock_irqrestore(&ioapic_lock, flags);
    }
    
    #if defined(CONFIG_IRQBALANCE)
    # include <asm/processor.h>	/* kernel_thread() */
    # include <linux/kernel_stat.h>	/* kstat */
    # include <linux/slab.h>		/* kmalloc() */
    # include <linux/timer.h>	/* time_after() */
     
    
    #ifdef CONFIG_BALANCED_IRQ_DEBUG
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
    #  define Dprintk(x...) do { TDprintk(x); } while (0)
    # else
    #  define TDprintk(x...) 
    #  define Dprintk(x...) 
    # endif
    
    #define IRQBALANCE_CHECK_ARCH -999
    
    #define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
    #define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
    #define BALANCED_IRQ_MORE_DELTA		(HZ/10)
    #define BALANCED_IRQ_LESS_DELTA		(HZ)
    
    static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
    static int physical_balance __read_mostly;
    static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    static struct irq_cpu_info {
    	unsigned long * last_irq;
    	unsigned long * irq_delta;
    	unsigned long irq;
    } irq_cpu_data[NR_CPUS];
    
    #define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
    #define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
    #define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
    
    #define IDLE_ENOUGH(cpu,now) \
    	(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
    
    #define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
    
    #define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
    
    
    static cpumask_t balance_irq_affinity[NR_IRQS] = {
    	[0 ... NR_IRQS-1] = CPU_MASK_ALL
    };
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
    {
    	balance_irq_affinity[irq] = mask;
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
    			unsigned long now, int direction)
    {
    	int search_idle = 1;
    	int cpu = curr_cpu;
    
    	goto inside;
    
    	do {
    		if (unlikely(cpu == curr_cpu))
    			search_idle = 0;
    inside:
    		if (direction == 1) {
    			cpu++;
    			if (cpu >= NR_CPUS)
    				cpu = 0;
    		} else {
    			cpu--;
    			if (cpu == -1)
    				cpu = NR_CPUS-1;
    		}
    	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
    			(search_idle && !IDLE_ENOUGH(cpu,now)));
    
    	return cpu;
    }
    
    static inline void balance_irq(int cpu, int irq)
    {
    	unsigned long now = jiffies;
    	cpumask_t allowed_mask;
    	unsigned int new_cpu;
    		
    	if (irqbalance_disabled)
    		return; 
    
    
    	cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	new_cpu = move(cpu, allowed_mask, now, 1);
    	if (cpu != new_cpu) {
    
    		set_pending_irq(irq, cpumask_of_cpu(new_cpu));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    }
    
    static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
    {
    	int i, j;
    	Dprintk("Rotating IRQs among CPUs.\n");
    
    	for_each_online_cpu(i) {
    		for (j = 0; j < NR_IRQS; j++) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (!irq_desc[j].action)
    				continue;
    			/* Is it a significant load ?  */
    			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
    						useful_load_threshold)
    				continue;
    			balance_irq(i, j);
    		}
    	}
    	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
    		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
    	return;
    }
    
    static void do_irq_balance(void)
    {
    	int i, j;
    	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
    	unsigned long move_this_load = 0;
    	int max_loaded = 0, min_loaded = 0;
    	int load;
    	unsigned long useful_load_threshold = balanced_irq_interval + 10;
    	int selected_irq;
    	int tmp_loaded, first_attempt = 1;
    	unsigned long tmp_cpu_irq;
    	unsigned long imbalance = 0;
    	cpumask_t allowed_mask, target_cpu_mask, tmp;
    
    
    	for_each_possible_cpu(i) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		int package_index;
    		CPU_IRQ(i) = 0;
    		if (!cpu_online(i))
    			continue;
    		package_index = CPU_TO_PACKAGEINDEX(i);
    		for (j = 0; j < NR_IRQS; j++) {
    			unsigned long value_now, delta;
    			/* Is this an active IRQ? */
    			if (!irq_desc[j].action)
    				continue;
    			if ( package_index == i )
    				IRQ_DELTA(package_index,j) = 0;
    			/* Determine the total count per processor per IRQ */
    			value_now = (unsigned long) kstat_cpu(i).irqs[j];
    
    			/* Determine the activity per processor per IRQ */
    			delta = value_now - LAST_CPU_IRQ(i,j);
    
    			/* Update last_cpu_irq[][] for the next time */
    			LAST_CPU_IRQ(i,j) = value_now;
    
    			/* Ignore IRQs whose rate is less than the clock */
    			if (delta < useful_load_threshold)
    				continue;
    			/* update the load for the processor or package total */
    			IRQ_DELTA(package_index,j) += delta;
    
    			/* Keep track of the higher numbered sibling as well */
    			if (i != package_index)
    				CPU_IRQ(i) += delta;
    			/*
    			 * We have sibling A and sibling B in the package
    			 *
    			 * cpu_irq[A] = load for cpu A + load for cpu B
    			 * cpu_irq[B] = load for cpu B
    			 */
    			CPU_IRQ(package_index) += delta;
    		}
    	}
    	/* Find the least loaded processor package */
    
    	for_each_online_cpu(i) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (i != CPU_TO_PACKAGEINDEX(i))
    			continue;
    		if (min_cpu_irq > CPU_IRQ(i)) {
    			min_cpu_irq = CPU_IRQ(i);
    			min_loaded = i;
    		}
    	}
    	max_cpu_irq = ULONG_MAX;
    
    tryanothercpu:
    	/* Look for heaviest loaded processor.
    	 * We may come back to get the next heaviest loaded processor.
    	 * Skip processors with trivial loads.
    	 */
    	tmp_cpu_irq = 0;
    	tmp_loaded = -1;
    
    	for_each_online_cpu(i) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (i != CPU_TO_PACKAGEINDEX(i))
    			continue;
    		if (max_cpu_irq <= CPU_IRQ(i)) 
    			continue;
    		if (tmp_cpu_irq < CPU_IRQ(i)) {
    			tmp_cpu_irq = CPU_IRQ(i);
    			tmp_loaded = i;
    		}
    	}
    
    	if (tmp_loaded == -1) {
     	 /* In the case of small number of heavy interrupt sources, 
    	  * loading some of the cpus too much. We use Ingo's original 
    	  * approach to rotate them around.
    	  */
    		if (!first_attempt && imbalance >= useful_load_threshold) {
    			rotate_irqs_among_cpus(useful_load_threshold);
    			return;
    		}
    		goto not_worth_the_effort;
    	}
    	
    	first_attempt = 0;		/* heaviest search */
    	max_cpu_irq = tmp_cpu_irq;	/* load */
    	max_loaded = tmp_loaded;	/* processor */
    	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
    	
    	Dprintk("max_loaded cpu = %d\n", max_loaded);
    	Dprintk("min_loaded cpu = %d\n", min_loaded);
    	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
    	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
    	Dprintk("load imbalance = %lu\n", imbalance);
    
    	/* if imbalance is less than approx 10% of max load, then
    	 * observe diminishing returns action. - quit
    	 */
    	if (imbalance < (max_cpu_irq >> 3)) {
    		Dprintk("Imbalance too trivial\n");
    		goto not_worth_the_effort;
    	}
    
    tryanotherirq:
    	/* if we select an IRQ to move that can't go where we want, then
    	 * see if there is another one to try.
    	 */
    	move_this_load = 0;
    	selected_irq = -1;
    	for (j = 0; j < NR_IRQS; j++) {
    		/* Is this an active IRQ? */
    		if (!irq_desc[j].action)
    			continue;
    		if (imbalance <= IRQ_DELTA(max_loaded,j))
    			continue;
    		/* Try to find the IRQ that is closest to the imbalance
    		 * without going over.
    		 */
    		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
    			move_this_load = IRQ_DELTA(max_loaded,j);
    			selected_irq = j;
    		}
    	}
    	if (selected_irq == -1) {
    		goto tryanothercpu;
    	}
    
    	imbalance = move_this_load;
    	
    	/* For physical_balance case, we accumlated both load
    	 * values in the one of the siblings cpu_irq[],
    	 * to use the same code for physical and logical processors
    	 * as much as possible. 
    	 *
    	 * NOTE: the cpu_irq[] array holds the sum of the load for
    	 * sibling A and sibling B in the slot for the lowest numbered
    	 * sibling (A), _AND_ the load for sibling B in the slot for
    	 * the higher numbered sibling.
    	 *
    	 * We seek the least loaded sibling by making the comparison
    	 * (A+B)/2 vs B
    	 */
    	load = CPU_IRQ(min_loaded) >> 1;
    	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
    		if (load > CPU_IRQ(j)) {
    			/* This won't change cpu_sibling_map[min_loaded] */
    			load = CPU_IRQ(j);
    			min_loaded = j;
    		}
    	}
    
    
    	cpus_and(allowed_mask,
    		cpu_online_map,
    		balance_irq_affinity[selected_irq]);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	target_cpu_mask = cpumask_of_cpu(min_loaded);
    	cpus_and(tmp, target_cpu_mask, allowed_mask);
    
    	if (!cpus_empty(tmp)) {
    
    		Dprintk("irq = %d moved to cpu = %d\n",
    				selected_irq, min_loaded);
    		/* mark for change destination */
    
    		set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/* Since we made a change, come back sooner to 
    		 * check for more variation.
    		 */
    		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
    			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
    		return;
    	}
    	goto tryanotherirq;
    
    not_worth_the_effort:
    	/*
    	 * if we did not find an IRQ to move, then adjust the time interval
    	 * upward
    	 */
    	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
    		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
    	Dprintk("IRQ worth rotating not found\n");
    	return;
    }
    
    static int balanced_irq(void *unused)
    {
    	int i;
    	unsigned long prev_balance_time = jiffies;
    	long time_remaining = balanced_irq_interval;
    
    	daemonize("kirqd");
    	
    	/* push everything to CPU 0 to give us a starting point.  */
    	for (i = 0 ; i < NR_IRQS ; i++) {
    
    		irq_desc[i].pending_mask = cpumask_of_cpu(0);
    
    		set_pending_irq(i, cpumask_of_cpu(0));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	for ( ; ; ) {
    
    		time_remaining = schedule_timeout_interruptible(time_remaining);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (time_after(jiffies,
    				prev_balance_time+balanced_irq_interval)) {
    
    			preempt_disable();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			do_irq_balance();
    			prev_balance_time = jiffies;
    			time_remaining = balanced_irq_interval;
    
    			preempt_enable();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	}
    	return 0;
    }
    
    static int __init balanced_irq_init(void)
    {
    	int i;
    	struct cpuinfo_x86 *c;
    	cpumask_t tmp;
    
    	cpus_shift_right(tmp, cpu_online_map, 2);
            c = &boot_cpu_data;
    	/* When not overwritten by the command line ask subarchitecture. */
    	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
    		irqbalance_disabled = NO_BALANCE_IRQ;
    	if (irqbalance_disabled)
    		return 0;
    	
    	 /* disable irqbalance completely if there is only one processor online */
    	if (num_online_cpus() < 2) {
    		irqbalance_disabled = 1;
    		return 0;
    	}
    	/*
    	 * Enable physical balance only if more than 1 physical processor
    	 * is present
    	 */
    	if (smp_num_siblings > 1 && !cpus_empty(tmp))
    		physical_balance = 1;
    
    
    	for_each_online_cpu(i) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
    		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
    		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
    			printk(KERN_ERR "balanced_irq_init: out of memory");
    			goto failed;
    		}
    		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
    		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
    	}
    	
    	printk(KERN_INFO "Starting balanced_irq\n");
    	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
    		return 0;
    	else 
    		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
    failed:
    
    	for_each_possible_cpu(i) {
    
    		kfree(irq_cpu_data[i].irq_delta);
    
    		irq_cpu_data[i].irq_delta = NULL;
    
    		kfree(irq_cpu_data[i].last_irq);
    
    		irq_cpu_data[i].last_irq = NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return 0;
    }
    
    int __init irqbalance_disable(char *str)
    {
    	irqbalance_disabled = 1;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    __setup("noirqbalance", irqbalance_disable);
    
    late_initcall(balanced_irq_init);
    #endif /* CONFIG_IRQBALANCE */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    #ifndef CONFIG_SMP
    void fastcall send_IPI_self(int vector)
    {
    	unsigned int cfg;
    
    	/*
    	 * Wait for idle.
    	 */
    	apic_wait_icr_idle();
    	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
    	/*
    	 * Send the IPI. The write to APIC_ICR fires this off.
    	 */
    	apic_write_around(APIC_ICR, cfg);
    }
    #endif /* !CONFIG_SMP */
    
    
    /*
     * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
     * specific CPU-side IRQs.
     */
    
    #define MAX_PIRQS 8
    static int pirq_entries [MAX_PIRQS];
    static int pirqs_enabled;
    int skip_ioapic_setup;
    
    static int __init ioapic_setup(char *str)
    {
    	skip_ioapic_setup = 1;
    	return 1;
    }
    
    __setup("noapic", ioapic_setup);
    
    static int __init ioapic_pirq_setup(char *str)
    {
    	int i, max;
    	int ints[MAX_PIRQS+1];
    
    	get_options(str, ARRAY_SIZE(ints), ints);
    
    	for (i = 0; i < MAX_PIRQS; i++)
    		pirq_entries[i] = -1;
    
    	pirqs_enabled = 1;
    	apic_printk(APIC_VERBOSE, KERN_INFO
    			"PIRQ redirection, working around broken MP-BIOS.\n");
    	max = MAX_PIRQS;
    	if (ints[0] < MAX_PIRQS)
    		max = ints[0];
    
    	for (i = 0; i < max; i++) {
    		apic_printk(APIC_VERBOSE, KERN_DEBUG
    				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
    		/*
    		 * PIRQs are mapped upside down, usually.
    		 */
    		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
    	}
    	return 1;
    }
    
    __setup("pirq=", ioapic_pirq_setup);
    
    /*
     * Find the IRQ entry number of a certain pin.
     */
    static int find_irq_entry(int apic, int pin, int type)
    {
    	int i;
    
    	for (i = 0; i < mp_irq_entries; i++)
    		if (mp_irqs[i].mpc_irqtype == type &&
    		    (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
    		     mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
    		    mp_irqs[i].mpc_dstirq == pin)
    			return i;
    
    	return -1;
    }
    
    /*
     * Find the pin to which IRQ[irq] (ISA) is connected
     */
    
    static int __init find_isa_irq_pin(int irq, int type)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	int i;
    
    	for (i = 0; i < mp_irq_entries; i++) {
    		int lbus = mp_irqs[i].mpc_srcbus;
    
    		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
    		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
    		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
    		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
    		    ) &&
    		    (mp_irqs[i].mpc_irqtype == type) &&
    		    (mp_irqs[i].mpc_srcbusirq == irq))
    
    			return mp_irqs[i].mpc_dstirq;
    	}
    	return -1;
    }
    
    
    static int __init find_isa_irq_apic(int irq, int type)
    {
    	int i;
    
    	for (i = 0; i < mp_irq_entries; i++) {
    		int lbus = mp_irqs[i].mpc_srcbus;
    
    		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
    		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
    		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
    		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
    		    ) &&
    		    (mp_irqs[i].mpc_irqtype == type) &&
    		    (mp_irqs[i].mpc_srcbusirq == irq))
    			break;
    	}
    	if (i < mp_irq_entries) {
    		int apic;
    		for(apic = 0; apic < nr_ioapics; apic++) {
    			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
    				return apic;
    		}
    	}
    
    	return -1;
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * Find a specific PCI IRQ entry.
     * Not an __init, possibly needed by modules
     */
    static int pin_2_irq(int idx, int apic, int pin);
    
    int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
    {
    	int apic, i, best_guess = -1;
    
    	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
    		"slot:%d, pin:%d.\n", bus, slot, pin);
    	if (mp_bus_id_to_pci_bus[bus] == -1) {
    		printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
    		return -1;
    	}
    	for (i = 0; i < mp_irq_entries; i++) {
    		int lbus = mp_irqs[i].mpc_srcbus;
    
    		for (apic = 0; apic < nr_ioapics; apic++)
    			if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
    			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
    				break;
    
    		if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
    		    !mp_irqs[i].mpc_irqtype &&
    		    (bus == lbus) &&
    		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
    			int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
    
    			if (!(apic || IO_APIC_IRQ(irq)))
    				continue;
    
    			if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
    				return irq;
    			/*
    			 * Use the first all-but-pin matching entry as a
    			 * best-guess fuzzy result for broken mptables.
    			 */
    			if (best_guess < 0)
    				best_guess = irq;
    		}
    	}
    	return best_guess;
    }
    
    EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /*
     * This function currently is only a helper for the i386 smp boot process where 
     * we need to reprogram the ioredtbls to cater for the cpus which have come online
     * so mask in all cases should simply be TARGET_CPUS
     */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    void __init setup_ioapic_dest(void)
    {
    	int pin, ioapic, irq, irq_entry;
    
    	if (skip_ioapic_setup == 1)
    		return;
    
    	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
    		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
    			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
    			if (irq_entry == -1)
    				continue;
    			irq = pin_2_irq(irq_entry, ioapic, pin);
    			set_ioapic_affinity_irq(irq, TARGET_CPUS);
    		}
    
    	}
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /*
     * EISA Edge/Level control register, ELCR
     */
    static int EISA_ELCR(unsigned int irq)
    {
    	if (irq < 16) {
    		unsigned int port = 0x4d0 + (irq >> 3);
    		return (inb(port) >> (irq & 7)) & 1;
    	}
    	apic_printk(APIC_VERBOSE, KERN_INFO
    			"Broken MPtable reports ISA irq %d\n", irq);
    	return 0;
    }
    
    /* EISA interrupts are always polarity zero and can be edge or level
     * trigger depending on the ELCR value.  If an interrupt is listed as
     * EISA conforming in the MP table, that means its trigger type must
     * be read in from the ELCR */
    
    #define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
    #define default_EISA_polarity(idx)	(0)
    
    /* ISA interrupts are always polarity zero edge triggered,
     * when listed as conforming in the MP table. */
    
    #define default_ISA_trigger(idx)	(0)
    #define default_ISA_polarity(idx)	(0)
    
    /* PCI interrupts are always polarity one level triggered,
     * when listed as conforming in the MP table. */
    
    #define default_PCI_trigger(idx)	(1)
    #define default_PCI_polarity(idx)	(1)
    
    /* MCA interrupts are always polarity zero level triggered,
     * when listed as conforming in the MP table. */
    
    #define default_MCA_trigger(idx)	(1)
    #define default_MCA_polarity(idx)	(0)
    
    /* NEC98 interrupts are always polarity zero edge triggered,
     * when listed as conforming in the MP table. */
    
    #define default_NEC98_trigger(idx)     (0)
    #define default_NEC98_polarity(idx)    (0)
    
    static int __init MPBIOS_polarity(int idx)
    {
    	int bus = mp_irqs[idx].mpc_srcbus;
    	int polarity;