| From owner-linux-pci@atrey.karlin.mff.cuni.cz Fri Mar 30 11:54:51 2007 |
| From: Mitch Williams <mitch.a.williams@intel.com> |
| Date: Fri, 30 Mar 2007 11:54:08 -0700 |
| Subject: PCI: Flush MSI-X table writes |
| To: linux-pci@atrey.karlin.mff.cuni.cz, akpm@linux-foundation.org |
| Cc: gregkh@suse.de, ebiederm@xmission.com, linux-kernel@vger.kernel.org, <auke-jan.h.kok@intel.com> |
| Message-ID: <1175280848.17652.5.camel@strongmad> |
| |
| |
| This patch fixes a kernel bug which is triggered when using the |
| irqbalance daemon with MSI-X hardware. |
| |
| Because both MSI-X interrupt messages and MSI-X table writes are posted, |
| it's possible for them to cross while in-flight. This results in |
| interrupts being received long after the kernel thinks they're disabled, |
| and in interrupts being sent to stale vectors after rebalancing. |
| |
| This patch performs a read flush after writes to the MSI-X table for |
| mask and unmask operations. Since the SMP affinity is set while |
| the interrupt is masked, and since it's unmasked immediately after, |
| no additional flushes are required in the various affinity setting |
| routines. |
| |
| This patch has been validated with (unreleased) network hardware which |
| uses MSI-X. |
| |
| Revised with input from Eric Biederman. |
| |
| Signed-off-by: Mitch Williams <mitch.a.williams@intel.com> |
| Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| |
| --- |
| drivers/pci/msi.c | 25 +++++++++++++++++++++++++ |
| 1 file changed, 25 insertions(+) |
| |
| --- a/drivers/pci/msi.c |
| +++ b/drivers/pci/msi.c |
| @@ -68,6 +68,29 @@ static void msix_set_enable(struct pci_d |
| } |
| } |
| |
| +static void msix_flush_writes(unsigned int irq) |
| +{ |
| + struct msi_desc *entry; |
| + |
| + entry = get_irq_msi(irq); |
| + BUG_ON(!entry || !entry->dev); |
| + switch (entry->msi_attrib.type) { |
| + case PCI_CAP_ID_MSI: |
| + /* nothing to do */ |
| + break; |
| + case PCI_CAP_ID_MSIX: |
| + { |
| + int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + |
| + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; |
| + readl(entry->mask_base + offset); |
| + break; |
| + } |
| + default: |
| + BUG(); |
| + break; |
| + } |
| +} |
| + |
| static void msi_set_mask_bit(unsigned int irq, int flag) |
| { |
| struct msi_desc *entry; |
| @@ -187,11 +210,13 @@ void write_msi_msg(unsigned int irq, str |
| void mask_msi_irq(unsigned int irq) |
| { |
| msi_set_mask_bit(irq, 1); |
| + msix_flush_writes(irq); |
| } |
| |
| void unmask_msi_irq(unsigned int irq) |
| { |
| msi_set_mask_bit(irq, 0); |
| + msix_flush_writes(irq); |
| } |
| |
| static int msi_free_irq(struct pci_dev* dev, int irq); |