Skip to content
Snippets Groups Projects
io_apic.c 103 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	 * IOAPIC RTE aswell as interrupt-remapping table entry).
    	 * As this gets called during crash dump, keep this simple for now.
    
    	if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
    
    		struct IO_APIC_route_entry entry;
    
    		memset(&entry, 0, sizeof(entry));
    		entry.mask            = 0; /* Enabled */
    		entry.trigger         = 0; /* Edge */
    		entry.irr             = 0;
    		entry.polarity        = 0; /* High */
    		entry.delivery_status = 0;
    		entry.dest_mode       = 0; /* Physical */
    
    		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
    
    		entry.dest            = read_apic_id();
    
    
    		/*
    		 * Add it to the IO-APIC irq-routing table:
    		 */
    
    		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
    
    	/*
    	 * Use virtual wire A mode when interrupt remapping is enabled.
    	 */
    
    	if (cpu_has_apic || apic_from_smp_config())
    
    		disconnect_bsp_APIC(!intr_remapping_enabled &&
    				ioapic_i8259.pin != -1);
    
    #ifdef CONFIG_X86_32
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * function to set the IO-APIC physical IDs based on the
     * values stored in the MPC table.
     *
     * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
     */
    
    
    void __init setup_ioapic_ids_from_mpc(void)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	union IO_APIC_reg_00 reg_00;
    	physid_mask_t phys_id_present_map;
    
    	int apic_id;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int i;
    	unsigned char old_id;
    	unsigned long flags;
    
    
    	/*
    	 * Don't check I/O APIC IDs for xAPIC systems.  They have
    	 * no meaning without the serial APIC bus.
    	 */
    
    	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
    		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 * This is broken; anything with a real cpu count has to
    	 * circumvent this idiocy regardless.
    	 */
    
    	apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/*
    	 * Set the IOAPIC ID to the value stored in the MPC table.
    	 */
    
    	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		/* Read the register 0 value */
    
    		raw_spin_lock_irqsave(&ioapic_lock, flags);
    
    		reg_00.raw = io_apic_read(apic_id, 0);
    
    		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
    
    		old_id = mp_ioapics[apic_id].apicid;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
    
    				apic_id, mp_ioapics[apic_id].apicid);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
    				reg_00.bits.ID);
    
    			mp_ioapics[apic_id].apicid = reg_00.bits.ID;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    		/*
    		 * Sanity check, is the ID really free? Every APIC in a
    		 * system must have a unique ID or we get lots of nice
    		 * 'stuck on smp_invalidate_needed IPI wait' messages.
    		 */
    
    		if (apic->check_apicid_used(&phys_id_present_map,
    
    					mp_ioapics[apic_id].apicid)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
    
    				apic_id, mp_ioapics[apic_id].apicid);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			for (i = 0; i < get_physical_broadcast(); i++)
    				if (!physid_isset(i, phys_id_present_map))
    					break;
    			if (i >= get_physical_broadcast())
    				panic("Max APIC ID exceeded!\n");
    			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
    				i);
    			physid_set(i, phys_id_present_map);
    
    			mp_ioapics[apic_id].apicid = i;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		} else {
    			physid_mask_t tmp;
    
    			apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			apic_printk(APIC_VERBOSE, "Setting %d in the "
    					"phys_id_present_map\n",
    
    					mp_ioapics[apic_id].apicid);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			physids_or(phys_id_present_map, phys_id_present_map, tmp);
    		}
    
    
    		/*
    		 * We need to adjust the IRQ routing table
    		 * if the ID changed.
    		 */
    
    		if (old_id != mp_ioapics[apic_id].apicid)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			for (i = 0; i < mp_irq_entries; i++)
    
    				if (mp_irqs[i].dstapic == old_id)
    					mp_irqs[i].dstapic
    
    						= mp_ioapics[apic_id].apicid;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		/*
    		 * Read the right value from the MPC table and
    		 * write it into the ID register.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		apic_printk(APIC_VERBOSE, KERN_INFO
    			"...changing IO-APIC physical APIC ID to %d ...",
    
    			mp_ioapics[apic_id].apicid);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		reg_00.bits.ID = mp_ioapics[apic_id].apicid;
    
    		raw_spin_lock_irqsave(&ioapic_lock, flags);
    
    		io_apic_write(apic_id, 0, reg_00.raw);
    
    		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		/*
    		 * Sanity check
    		 */
    
    		raw_spin_lock_irqsave(&ioapic_lock, flags);
    
    		reg_00.raw = io_apic_read(apic_id, 0);
    
    		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
    
    		if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			printk("could not set ID!\n");
    		else
    			apic_printk(APIC_VERBOSE, " ok.\n");
    	}
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    int no_timer_check __initdata;
    
    
    static int __init notimercheck(char *s)
    {
    	no_timer_check = 1;
    	return 1;
    }
    __setup("no_timer_check", notimercheck);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * There is a nasty bug in some older SMP boards, their mptable lies
     * about the timer IRQ. We do the following to work around the situation:
     *
     *	- timer IRQ defaults to IO-APIC IRQ
     *	- if this function detects that timer IRQs are defunct, then we fall
     *	  back to ISA timer IRQs
     */
    
    static int __init timer_irq_works(void)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	unsigned long t1 = jiffies;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	local_irq_enable();
    	/* Let ten ticks pass... */
    	mdelay((10 * 1000) / HZ);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/*
    	 * Expect a few ticks at least, to be sure some possible
    	 * glue logic does not lock up after one or two first
    	 * ticks in a non-ExtINT mode.  Also the local APIC
    	 * might have cached one ExtINT interrupt.  Finally, at
    	 * least one tick may be lost due to delays.
    	 */
    
    	if (time_after(jiffies, t1 + 4))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return 1;
    	return 0;
    }
    
    /*
     * In the SMP+IOAPIC case it might happen that there are an unspecified
     * number of pending IRQ events unhandled. These cases are very rare,
     * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
     * better to do it this way as thus we do not have to be aware of
     * 'pending' interrupts in the IRQ path, except at this point.
     */
    /*
     * Edge triggered needs to resend any interrupt
     * that was delayed but this is now handled in the device
     * independent code.
     */
    
    /*
     * Starting up a edge-triggered IO-APIC interrupt is
     * nasty - we need to make sure that we get the edge.
     * If it is already asserted for some reason, we need
     * return 1 to indicate that is was pending.
     *
     * This is not complete - we should be able to fake
     * an edge even if it isn't on the 8259A...
     */
    
    static unsigned int startup_ioapic_irq(struct irq_data *data)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	int was_pending = 0, irq = data->irq;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	unsigned long flags;
    
    
    	raw_spin_lock_irqsave(&ioapic_lock, flags);
    
    	if (irq < legacy_pic->nr_legacy_irqs) {
    
    		legacy_pic->mask(irq);
    
    		if (legacy_pic->irq_pending(irq))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			was_pending = 1;
    	}
    
    	__unmask_ioapic(data->chip_data);
    
    	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	return was_pending;
    }
    
    
    static int ioapic_retrigger_irq(struct irq_data *data)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct irq_cfg *cfg = data->chip_data;
    
    	raw_spin_lock_irqsave(&vector_lock, flags);
    
    	apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
    
    	raw_spin_unlock_irqrestore(&vector_lock, flags);
    
    /*
     * Level and edge triggered IO-APIC interrupts need different handling,
     * so we use two separate IRQ descriptors. Edge triggered IRQs can be
     * handled with the level-triggered descriptor, but that one has slightly
     * more overhead. Level-triggered interrupts cannot be handled with the
     * edge-triggered handler, without risking IRQ storms and other ugly
     * races.
     */
    
    #ifdef CONFIG_SMP
    
    void send_cleanup_vector(struct irq_cfg *cfg)
    
    {
    	cpumask_var_t cleanup_mask;
    
    	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
    		unsigned int i;
    		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
    			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
    	} else {
    		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
    		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
    		free_cpumask_var(cleanup_mask);
    	}
    	cfg->move_in_progress = 0;
    }
    
    
    static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
    
    {
    	int apic, pin;
    	struct irq_pin_list *entry;
    	u8 vector = cfg->vector;
    
    
    	for_each_irq_pin(entry, cfg->irq_2_pin) {
    
    		unsigned int reg;
    
    		apic = entry->apic;
    		pin = entry->pin;
    		/*
    		 * With interrupt-remapping, destination information comes
    		 * from interrupt-remapping table entry.
    		 */
    		if (!irq_remapped(irq))
    			io_apic_write(apic, 0x11 + pin*2, dest);
    		reg = io_apic_read(apic, 0x10 + pin*2);
    		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
    		reg |= vector;
    		io_apic_modify(apic, 0x10 + pin*2, reg);
    	}
    }
    
    /*
     * Either sets desc->affinity to a valid value, and returns
    
     * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
    
     * leaves desc->affinity untouched.
     */
    
    set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
    		  unsigned int *dest_id)
    
    {
    	struct irq_cfg *cfg;
    	unsigned int irq;
    
    	if (!cpumask_intersects(mask, cpu_online_mask))
    
    
    	irq = desc->irq;
    
    	cfg = get_irq_desc_chip_data(desc);
    
    	if (assign_irq_vector(irq, cfg, mask))
    
    
    	cpumask_copy(desc->affinity, mask);
    
    
    	*dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
    	return 0;
    
    set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
    {
    	struct irq_cfg *cfg;
    	unsigned long flags;
    	unsigned int dest;
    	unsigned int irq;
    
    	int ret = -1;
    
    
    	irq = desc->irq;
    
    	cfg = get_irq_desc_chip_data(desc);
    
    	raw_spin_lock_irqsave(&ioapic_lock, flags);
    
    	ret = set_desc_affinity(desc, mask, &dest);
    	if (!ret) {
    
    		/* Only the high 8 bits are valid. */
    		dest = SET_APIC_LOGICAL_ID(dest);
    		__target_IO_APIC_irq(irq, dest, cfg);
    	}
    
    	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
    
    set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
    {
    	struct irq_desc *desc;
    
    	desc = irq_to_desc(irq);
    
    
    	return set_ioapic_affinity_irq_desc(desc, mask);
    
    #ifdef CONFIG_INTR_REMAP
    
    /*
     * Migrate the IO-APIC irq in the presence of intr-remapping.
     *
    
     * For both level and edge triggered, irq migration is a simple atomic
     * update(of vector and cpu destination) of IRTE and flush the hardware cache.
    
     * For level triggered, we eliminate the io-apic RTE modification (with the
     * updated vector information), by using a virtual vector (io-apic pin number).
     * Real vector that is used for interrupting cpu will be coming from
     * the interrupt-remapping table entry.
    
    migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
    
    	struct irq_cfg *cfg;
    	struct irte irte;
    	unsigned int dest;
    
    	unsigned int irq;
    
    	if (!cpumask_intersects(mask, cpu_online_mask))
    
    	irq = desc->irq;
    
    	if (get_irte(irq, &irte))
    
    	cfg = get_irq_desc_chip_data(desc);
    
    	if (assign_irq_vector(irq, cfg, mask))
    
    	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
    
    
    	irte.vector = cfg->vector;
    	irte.dest_id = IRTE_DEST(dest);
    
    	/*
    	 * Modified the IRTE and flushes the Interrupt entry cache.
    	 */
    	modify_irte(irq, &irte);
    
    
    	if (cfg->move_in_progress)
    		send_cleanup_vector(cfg);
    
    	cpumask_copy(desc->affinity, mask);
    
    }
    
    /*
     * Migrates the IRQ destination in the process context.
     */
    
    static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
    
    Rusty Russell's avatar
    Rusty Russell committed
    					    const struct cpumask *mask)
    
    	return migrate_ioapic_irq_desc(desc, mask);
    
    static int set_ir_ioapic_affinity_irq(unsigned int irq,
    
    Rusty Russell's avatar
    Rusty Russell committed
    				       const struct cpumask *mask)
    
    {
    	struct irq_desc *desc = irq_to_desc(irq);
    
    
    	return set_ir_ioapic_affinity_irq_desc(desc, mask);
    
    static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
    
    #endif
    
    asmlinkage void smp_irq_move_cleanup_interrupt(void)
    {
    	unsigned vector, me;
    
    	ack_APIC_irq();
    	exit_idle();
    	irq_enter();
    
    	me = smp_processor_id();
    	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
    		unsigned int irq;
    
    		struct irq_desc *desc;
    		struct irq_cfg *cfg;
    		irq = __get_cpu_var(vector_irq)[vector];
    
    
    		desc = irq_to_desc(irq);
    		if (!desc)
    			continue;
    
    		cfg = irq_cfg(irq);
    
    		raw_spin_lock(&desc->lock);
    
    		/*
    		 * Check if the irq migration is in progress. If so, we
    		 * haven't received the cleanup request yet for this irq.
    		 */
    		if (cfg->move_in_progress)
    			goto unlock;
    
    
    		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
    
    		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
    		/*
    		 * Check if the vector that needs to be cleanedup is
    		 * registered at the cpu's IRR. If so, then this is not
    		 * the best time to clean it up. Lets clean it up in the
    		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
    		 * to myself.
    		 */
    		if (irr  & (1 << (vector % 32))) {
    			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
    			goto unlock;
    		}
    
    		__get_cpu_var(vector_irq)[vector] = -1;
    unlock:
    
    		raw_spin_unlock(&desc->lock);
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
    
    	if (likely(!cfg->move_in_progress))
    
    		return;
    
    	me = smp_processor_id();
    
    	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
    
    		send_cleanup_vector(cfg);
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    static void irq_complete_move(struct irq_cfg *cfg)
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
    
    }
    
    void irq_force_complete_move(int irq)
    {
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	struct irq_cfg *cfg = get_irq_chip_data(irq);
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	__irq_complete_move(cfg, cfg->vector);
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    static inline void irq_complete_move(struct irq_cfg *cfg) { }
    
    static void ack_apic_edge(struct irq_data *data)
    
    	irq_complete_move(data->chip_data);
    	move_native_irq(data->irq);
    
    Yinghai Lu's avatar
    Yinghai Lu committed
    atomic_t irq_mis_count;
    
    
    /*
     * IO-APIC versions below 0x20 don't support EOI register.
     * For the record, here is the information about various versions:
     *     0Xh     82489DX
     *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
     *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
     *     30h-FFh Reserved
     *
     * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
     * version as 0x2. This is an error with documentation and these ICH chips
     * use io-apic's of version 0x20.
     *
     * For IO-APIC's with EOI register, we use that to do an explicit EOI.
     * Otherwise, we simulate the EOI message manually by changing the trigger
     * mode to edge and then back to level, with RTE being masked during this.
    */
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
    
    {
    	struct irq_pin_list *entry;
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	unsigned long flags;
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	raw_spin_lock_irqsave(&ioapic_lock, flags);
    
    	for_each_irq_pin(entry, cfg->irq_2_pin) {
    
    		if (mp_ioapics[entry->apic].apicver >= 0x20) {
    			/*
    			 * Intr-remapping uses pin number as the virtual vector
    			 * in the RTE. Actual vector is programmed in
    			 * intr-remapping table entry. Hence for the io-apic
    			 * EOI we use the pin number.
    			 */
    			if (irq_remapped(irq))
    				io_apic_eoi(entry->apic, entry->pin);
    			else
    				io_apic_eoi(entry->apic, cfg->vector);
    		} else {
    			__mask_and_edge_IO_APIC_irq(entry);
    			__unmask_and_level_IO_APIC_irq(entry);
    		}
    
    	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
    
    static void ack_apic_level(struct irq_data *data)
    
    	struct irq_cfg *cfg = data->chip_data;
    	int i, do_unmask_irq = 0, irq = data->irq;
    
    	struct irq_desc *desc = irq_to_desc(irq);
    
    Yinghai Lu's avatar
    Yinghai Lu committed
    	unsigned long v;
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	irq_complete_move(cfg);
    
    #ifdef CONFIG_GENERIC_PENDING_IRQ
    
    	/* If we are moving the irq we need to mask it */
    
    	if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
    
    		do_unmask_irq = 1;
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    		mask_ioapic(cfg);
    
    Yinghai Lu's avatar
    Yinghai Lu committed
    	/*
    
    	 * It appears there is an erratum which affects at least version 0x11
    	 * of I/O APIC (that's the 82093AA and cores integrated into various
    	 * chipsets).  Under certain conditions a level-triggered interrupt is
    	 * erroneously delivered as edge-triggered one but the respective IRR
    	 * bit gets set nevertheless.  As a result the I/O unit expects an EOI
    	 * message but it will never arrive and further interrupts are blocked
    	 * from the source.  The exact reason is so far unknown, but the
    	 * phenomenon was observed when two consecutive interrupt requests
    	 * from a given source get delivered to the same CPU and the source is
    	 * temporarily disabled in between.
    	 *
    	 * A workaround is to simulate an EOI message manually.  We achieve it
    	 * by setting the trigger mode to edge and then to level when the edge
    	 * trigger mode gets detected in the TMR of a local APIC for a
    	 * level-triggered interrupt.  We mask the source for the time of the
    	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
    	 * The idea is from Manfred Spraul.  --macro
    
    	 *
    	 * Also in the case when cpu goes offline, fixup_irqs() will forward
    	 * any unhandled interrupt on the offlined cpu to the new cpu
    	 * destination that is handling the corresponding interrupt. This
    	 * interrupt forwarding is done via IPI's. Hence, in this case also
    	 * level-triggered io-apic interrupt will be seen as an edge
    	 * interrupt in the IRR. And we can't rely on the cpu's EOI
    	 * to be broadcasted to the IO-APIC's which will clear the remoteIRR
    	 * corresponding to the level-triggered interrupt. Hence on IO-APIC's
    	 * supporting EOI register, we do an explicit EOI to clear the
    	 * remote IRR and on IO-APIC's which don't have an EOI register,
    	 * we use the above logic (mask+edge followed by unmask+level) from
    	 * Manfred Spraul to clear the remote IRR.
    
    	i = cfg->vector;
    
    Yinghai Lu's avatar
    Yinghai Lu committed
    	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
    
    
    	/*
    	 * We must acknowledge the irq before we move it or the acknowledge will
    	 * not propagate properly.
    	 */
    	ack_APIC_irq();
    
    
    	/*
    	 * Tail end of clearing remote IRR bit (either by delivering the EOI
    	 * message via io-apic EOI register write or simulating it using
    	 * mask+edge followed by unnask+level logic) manually when the
    	 * level triggered interrupt is seen as the edge triggered interrupt
    	 * at the cpu.
    	 */
    
    	if (!(v & (1 << (i & 0x1f)))) {
    		atomic_inc(&irq_mis_count);
    
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    		eoi_ioapic_irq(irq, cfg);
    
    	/* Now we can move and renable the irq */
    	if (unlikely(do_unmask_irq)) {
    		/* Only migrate the irq if the ack has been received.
    		 *
    		 * On rare occasions the broadcast level triggered ack gets
    		 * delayed going to ioapics, and if we reprogram the
    		 * vector while Remote IRR is still set the irq will never
    		 * fire again.
    		 *
    		 * To prevent this scenario we read the Remote IRR bit
    		 * of the ioapic.  This has two effects.
    		 * - On any sane system the read of the ioapic will
    		 *   flush writes (and acks) going to the ioapic from
    		 *   this cpu.
    		 * - We get to see if the ACK has actually been delivered.
    		 *
    		 * Based on failed experiments of reprogramming the
    		 * ioapic entry from outside of irq context starting
    		 * with masking the ioapic entry and then polling until
    		 * Remote IRR was clear before reprogramming the
    		 * ioapic I don't trust the Remote IRR bit to be
    		 * completey accurate.
    		 *
    		 * However there appears to be no other way to plug
    		 * this race, so if the Remote IRR bit is not
    		 * accurate and is causing problems then it is a hardware bug
    		 * and you can go talk to the chipset vendor about it.
    		 */
    
    		if (!io_apic_level_ack_pending(cfg))
    
    			move_masked_irq(irq);
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    		unmask_ioapic(cfg);
    
    Yinghai Lu's avatar
    Yinghai Lu committed
    }
    
    static void ir_ack_apic_edge(struct irq_data *data)
    
    static void ir_ack_apic_level(struct irq_data *data)
    
    	eoi_ioapic_irq(data->irq, data->chip_data);
    
    static struct irq_chip ioapic_chip __read_mostly = {
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	.name		= "IO-APIC",
    
    	.irq_startup	= startup_ioapic_irq,
    
    	.irq_mask	= mask_ioapic_irq,
    	.irq_unmask	= unmask_ioapic_irq,
    	.irq_ack	= ack_apic_edge,
    	.irq_eoi	= ack_apic_level,
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	.set_affinity	= set_ioapic_affinity_irq,
    
    	.irq_retrigger	= ioapic_retrigger_irq,
    
    static struct irq_chip ir_ioapic_chip __read_mostly = {
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	.name		= "IR-IO-APIC",
    
    	.irq_startup	= startup_ioapic_irq,
    
    	.irq_mask	= mask_ioapic_irq,
    	.irq_unmask	= unmask_ioapic_irq,
    
    	.irq_ack	= ir_ack_apic_edge,
    	.irq_eoi	= ir_ack_apic_level,
    
    #ifdef CONFIG_SMP
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    	.set_affinity	= set_ir_ioapic_affinity_irq,
    
    	.irq_retrigger	= ioapic_retrigger_irq,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    static inline void init_IO_APIC_traps(void)
    {
    	int irq;
    
    	struct irq_cfg *cfg;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/*
    	 * NOTE! The local APIC isn't very good at handling
    	 * multiple interrupts at the same interrupt level.
    	 * As the interrupt level is determined by taking the
    	 * vector number and shifting that right by 4, we
    	 * want to spread these out a bit so that they don't
    	 * all fall in the same interrupt level.
    	 *
    	 * Also, we've got to be careful not to trash gate
    	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
    	 */
    
    	for_each_irq_desc(irq, desc) {
    
    		cfg = get_irq_desc_chip_data(desc);
    
    		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			/*
    			 * Hmm.. We don't have an entry for this,
    			 * so default to an old-fashioned 8259
    			 * interrupt if we can..
    			 */
    
    			if (irq < legacy_pic->nr_legacy_irqs)
    				legacy_pic->make_irq(irq);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				/* Strange. Oh, well.. */
    
    /*
     * The local APIC irq-chip implementation:
     */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static void mask_lapic_irq(struct irq_data *data)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	unsigned long v;
    
    	v = apic_read(APIC_LVT0);
    
    	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
    
    static void unmask_lapic_irq(struct irq_data *data)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	v = apic_read(APIC_LVT0);
    
    	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static void ack_lapic_irq(struct irq_data *data)
    
    static struct irq_chip lapic_chip __read_mostly = {
    
    	.irq_mask	= mask_lapic_irq,
    	.irq_unmask	= unmask_lapic_irq,
    	.irq_ack	= ack_lapic_irq,
    
    static void lapic_register_intr(int irq, struct irq_desc *desc)
    
    	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
    				      "edge");
    }
    
    
    static void __init setup_nmi(void)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	/*
    
    	 * Dirty trick to enable the NMI watchdog ...
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 * We put the 8259A master into AEOI mode and
    	 * unmask on all local APICs LVT0 as NMI.
    	 *
    	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
    	 * is from Maciej W. Rozycki - so we do not have to EOI from
    	 * the NMI handler or the timer interrupt.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
    
    
    	enable_NMI_through_LVT0();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	apic_printk(APIC_VERBOSE, " done.\n");
    }
    
    /*
     * This looks a bit hackish but it's about the only one way of sending
     * a few INTA cycles to 8259As and any associated glue logic.  ICR does
     * not support the ExtINT mode, unfortunately.  We need to send these
     * cycles as some i82489DX-based boards have glue logic that keeps the
     * 8259A interrupt line asserted until INTA.  --macro
     */
    
    static inline void __init unlock_ExtINT_logic(void)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct IO_APIC_route_entry entry0, entry1;
    	unsigned char save_control, save_freq_select;
    
    
    	if (pin == -1) {
    		WARN_ON_ONCE(1);
    		return;
    	}
    
    	if (apic == -1) {
    		WARN_ON_ONCE(1);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	entry0 = ioapic_read_entry(apic, pin);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	memset(&entry1, 0, sizeof(entry1));
    
    	entry1.dest_mode = 0;			/* physical delivery */
    	entry1.mask = 0;			/* unmask IRQ now */
    
    	entry1.dest = hard_smp_processor_id();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	entry1.delivery_mode = dest_ExtINT;
    	entry1.polarity = entry0.polarity;
    	entry1.trigger = 0;
    	entry1.vector = 0;
    
    
    	ioapic_write_entry(apic, pin, entry1);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	save_control = CMOS_READ(RTC_CONTROL);
    	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
    	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
    		   RTC_FREQ_SELECT);
    	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
    
    	i = 100;
    	while (i-- > 0) {
    		mdelay(10);
    		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
    			i -= 10;
    	}
    
    	CMOS_WRITE(save_control, RTC_CONTROL);
    	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	ioapic_write_entry(apic, pin, entry0);
    
    static int disable_timer_pin_1 __initdata;
    
    /* Actually the next is obsolete, but keep it for paranoid reasons -AK */
    
    static int __init disable_timer_pin_setup(char *arg)
    
    {
    	disable_timer_pin_1 = 1;
    	return 0;
    }
    
    early_param("disable_timer_pin_1", disable_timer_pin_setup);
    
    
    int timer_through_8259 __initdata;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * This code may look a bit paranoid, but it's supposed to cooperate with
     * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
     * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
     * fanatically on his truly buggy board.
    
     *
     * FIXME: really need to revamp this for all platforms.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    static inline void __init check_timer(void)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct irq_desc *desc = irq_to_desc(0);
    
    	struct irq_cfg *cfg = get_irq_desc_chip_data(desc);
    
    	int node = cpu_to_node(0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 * get/set the timer IRQ vector:
    	 */
    
    	legacy_pic->mask(0);
    
    	assign_irq_vector(0, cfg, apic->target_cpus());
    
    	 * As IRQ0 is to be enabled in the 8259A, the virtual
    	 * wire has to be disabled in the local APIC.  Also
    	 * timer interrupts need to be acknowledged manually in
    	 * the 8259A for the i82489DX when using the NMI
    	 * watchdog as that APIC treats NMIs as level-triggered.
    	 * The AEOI mode will finish them in the 8259A
    	 * automatically.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    
    	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
    
    	legacy_pic->init(1);
    
    #ifdef CONFIG_X86_32
    
    Yinghai Lu's avatar
    Yinghai Lu committed
    	{
    		unsigned int ver;
    
    		ver = apic_read(APIC_LVR);
    		ver = GET_APIC_VERSION(ver);
    		timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	pin1  = find_isa_irq_pin(0, mp_INT);
    	apic1 = find_isa_irq_apic(0, mp_INT);
    	pin2  = ioapic_i8259.pin;
    	apic2 = ioapic_i8259.apic;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
    		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
    
    		    cfg->vector, apic1, pin1, apic2, pin2);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/*
    	 * Some BIOS writers are clueless and report the ExtINTA
    	 * I/O APIC input from the cascaded 8259A as the timer
    	 * interrupt input.  So just in case, if only one pin
    	 * was found above, try it both directly and through the
    	 * 8259A.
    	 */
    	if (pin1 == -1) {
    
    		if (intr_remapping_enabled)
    			panic("BIOS bug: timer not connected to IO-APIC");
    
    		pin1 = pin2;
    		apic1 = apic2;
    		no_pin1 = 1;
    	} else if (pin2 == -1) {
    		pin2 = pin1;
    		apic2 = apic1;
    	}
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (pin1 != -1) {
    		/*
    		 * Ok, does IRQ0 through the IOAPIC work?
    		 */
    
    			add_pin_to_irq_node(cfg, node, apic1, pin1);
    
    			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
    
    Yinghai Lu's avatar
    Yinghai Lu committed
    		} else {
    			/* for edge trigger, setup_IO_APIC_irq already
    			 * leave it unmasked.
    			 * so only need to unmask if it is level-trigger
    			 * do we really have level trigger timer?
    			 */
    			int idx;
    			idx = find_irq_entry(apic1, pin1, mp_INT);
    			if (idx != -1 && irq_trigger(idx))
    
    Thomas Gleixner's avatar
    Thomas Gleixner committed
    				unmask_ioapic(cfg);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (timer_irq_works()) {
    			if (nmi_watchdog == NMI_IO_APIC) {
    				setup_nmi();
    
    				legacy_pic->unmask(0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			}
    
    			if (disable_timer_pin_1 > 0)
    				clear_IO_APIC_pin(0, pin1);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    		if (intr_remapping_enabled)
    			panic("timer doesn't work through Interrupt-remapped IO-APIC");
    
    Yinghai Lu's avatar
    Yinghai Lu committed
    		local_irq_disable();
    
    			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
    				    "8254 timer not connected to IO-APIC\n");
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
    			    "(IRQ0) through the 8259A ...\n");
    		apic_printk(APIC_QUIET, KERN_INFO
    			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/*
    		 * legacy devices should be connected to IO APIC #0
    		 */
    
    		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
    
    		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
    
    		legacy_pic->unmask(0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (timer_irq_works()) {
    
    			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (nmi_watchdog == NMI_IO_APIC) {
    
    				legacy_pic->mask(0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				setup_nmi();
    
    				legacy_pic->unmask(0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			}