| From 3a9ad0b4fdcd57f775d3615004c8c64c021a9e7d Mon Sep 17 00:00:00 2001 |
| From: Yinghai Lu <yinghai@kernel.org> |
| Date: Wed, 27 May 2015 17:23:51 -0700 |
| Subject: PCI: Add pci_bus_addr_t |
| |
| From: Yinghai Lu <yinghai@kernel.org> |
| |
| commit 3a9ad0b4fdcd57f775d3615004c8c64c021a9e7d upstream. |
| |
| David Ahern reported that d63e2e1f3df9 ("sparc/PCI: Clip bridge windows |
| to fit in upstream windows") fails to boot on sparc/T5-8: |
| |
| pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address 0x110204000) |
| |
| The problem is that sparc64 assumed that dma_addr_t only needed to hold DMA |
| addresses, i.e., bus addresses returned via the DMA API (dma_map_single(), |
| etc.), while the PCI core assumed dma_addr_t could hold *any* bus address, |
| including raw BAR values. On sparc64, all DMA addresses fit in 32 bits, so |
| dma_addr_t is a 32-bit type. However, BAR values can be 64 bits wide, so |
| they don't fit in a dma_addr_t. d63e2e1f3df9 added new checking that |
| tripped over this mismatch. |
| |
| Add pci_bus_addr_t, which is wide enough to hold any PCI bus address, |
| including both raw BAR values and DMA addresses. This will be 64 bits |
| on 64-bit platforms and on platforms with a 64-bit dma_addr_t. Then |
| dma_addr_t only needs to be wide enough to hold addresses from the DMA API. |
| |
| [bhelgaas: changelog, bugzilla, Kconfig to ensure pci_bus_addr_t is at |
| least as wide as dma_addr_t, documentation] |
| Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows") |
| Fixes: 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB") |
| Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@mail.gmail.com |
| Link: http://lkml.kernel.org/r/1427857069-6789-1-git-send-email-yinghai@kernel.org |
| Link: https://bugzilla.kernel.org/show_bug.cgi?id=96231 |
| Reported-by: David Ahern <david.ahern@oracle.com> |
| Tested-by: David Ahern <david.ahern@oracle.com> |
| Signed-off-by: Yinghai Lu <yinghai@kernel.org> |
| Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> |
| Acked-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| Documentation/DMA-API-HOWTO.txt | 29 +++++++++++++++++------------ |
| Documentation/DMA-API.txt | 30 +++++++++++++++--------------- |
| drivers/pci/Kconfig | 4 ++++ |
| drivers/pci/bus.c | 10 +++++----- |
| drivers/pci/probe.c | 12 ++++++------ |
| include/linux/pci.h | 12 +++++++++--- |
| include/linux/types.h | 12 ++++++++++-- |
| 7 files changed, 66 insertions(+), 43 deletions(-) |
| |
| --- a/Documentation/DMA-API-HOWTO.txt |
| +++ b/Documentation/DMA-API-HOWTO.txt |
| @@ -25,13 +25,18 @@ physical addresses. These are the addre |
| address is not directly useful to a driver; it must use ioremap() to map |
| the space and produce a virtual address. |
| |
| -I/O devices use a third kind of address: a "bus address" or "DMA address". |
| -If a device has registers at an MMIO address, or if it performs DMA to read |
| -or write system memory, the addresses used by the device are bus addresses. |
| -In some systems, bus addresses are identical to CPU physical addresses, but |
| -in general they are not. IOMMUs and host bridges can produce arbitrary |
| +I/O devices use a third kind of address: a "bus address". If a device has |
| +registers at an MMIO address, or if it performs DMA to read or write system |
| +memory, the addresses used by the device are bus addresses. In some |
| +systems, bus addresses are identical to CPU physical addresses, but in |
| +general they are not. IOMMUs and host bridges can produce arbitrary |
| mappings between physical and bus addresses. |
| |
| +From a device's point of view, DMA uses the bus address space, but it may |
| +be restricted to a subset of that space. For example, even if a system |
| +supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU |
| +so devices only need to use 32-bit DMA addresses. |
| + |
| Here's a picture and some examples: |
| |
| CPU CPU Bus |
| @@ -72,11 +77,11 @@ can use virtual address X to access the |
| cannot because DMA doesn't go through the CPU virtual memory system. |
| |
| In some simple systems, the device can do DMA directly to physical address |
| -Y. But in many others, there is IOMMU hardware that translates bus |
| +Y. But in many others, there is IOMMU hardware that translates DMA |
| addresses to physical addresses, e.g., it translates Z to Y. This is part |
| of the reason for the DMA API: the driver can give a virtual address X to |
| an interface like dma_map_single(), which sets up any required IOMMU |
| -mapping and returns the bus address Z. The driver then tells the device to |
| +mapping and returns the DMA address Z. The driver then tells the device to |
| do DMA to Z, and the IOMMU maps it to the buffer at address Y in system |
| RAM. |
| |
| @@ -98,7 +103,7 @@ First of all, you should make sure |
| #include <linux/dma-mapping.h> |
| |
| is in your driver, which provides the definition of dma_addr_t. This type |
| -can hold any valid DMA or bus address for the platform and should be used |
| +can hold any valid DMA address for the platform and should be used |
| everywhere you hold a DMA address returned from the DMA mapping functions. |
| |
| What memory is DMA'able? |
| @@ -316,7 +321,7 @@ There are two types of DMA mappings: |
| Think of "consistent" as "synchronous" or "coherent". |
| |
| The current default is to return consistent memory in the low 32 |
| - bits of the bus space. However, for future compatibility you should |
| + bits of the DMA space. However, for future compatibility you should |
| set the consistent mask even if this default is fine for your |
| driver. |
| |
| @@ -403,7 +408,7 @@ dma_alloc_coherent() returns two values: |
| can use to access it from the CPU and dma_handle which you pass to the |
| card. |
| |
| -The CPU virtual address and the DMA bus address are both |
| +The CPU virtual address and the DMA address are both |
| guaranteed to be aligned to the smallest PAGE_SIZE order which |
| is greater than or equal to the requested size. This invariant |
| exists (for example) to guarantee that if you allocate a chunk |
| @@ -645,8 +650,8 @@ PLEASE NOTE: The 'nents' argument to th |
| dma_map_sg call. |
| |
| Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}() |
| -counterpart, because the bus address space is a shared resource and |
| -you could render the machine unusable by consuming all bus addresses. |
| +counterpart, because the DMA address space is a shared resource and |
| +you could render the machine unusable by consuming all DMA addresses. |
| |
| If you need to use the same streaming DMA region multiple times and touch |
| the data in between the DMA transfers, the buffer needs to be synced |
| --- a/Documentation/DMA-API.txt |
| +++ b/Documentation/DMA-API.txt |
| @@ -18,10 +18,10 @@ Part I - dma_ API |
| To get the dma_ API, you must #include <linux/dma-mapping.h>. This |
| provides dma_addr_t and the interfaces described below. |
| |
| -A dma_addr_t can hold any valid DMA or bus address for the platform. It |
| -can be given to a device to use as a DMA source or target. A CPU cannot |
| -reference a dma_addr_t directly because there may be translation between |
| -its physical address space and the bus address space. |
| +A dma_addr_t can hold any valid DMA address for the platform. It can be |
| +given to a device to use as a DMA source or target. A CPU cannot reference |
| +a dma_addr_t directly because there may be translation between its physical |
| +address space and the DMA address space. |
| |
| Part Ia - Using large DMA-coherent buffers |
| ------------------------------------------ |
| @@ -42,7 +42,7 @@ It returns a pointer to the allocated re |
| address space) or NULL if the allocation failed. |
| |
| It also returns a <dma_handle> which may be cast to an unsigned integer the |
| -same width as the bus and given to the device as the bus address base of |
| +same width as the bus and given to the device as the DMA address base of |
| the region. |
| |
| Note: consistent memory can be expensive on some platforms, and the |
| @@ -193,7 +193,7 @@ dma_map_single(struct device *dev, void |
| enum dma_data_direction direction) |
| |
| Maps a piece of processor virtual memory so it can be accessed by the |
| -device and returns the bus address of the memory. |
| +device and returns the DMA address of the memory. |
| |
| The direction for both APIs may be converted freely by casting. |
| However the dma_ API uses a strongly typed enumerator for its |
| @@ -212,20 +212,20 @@ contiguous piece of memory. For this re |
| this API should be obtained from sources which guarantee it to be |
| physically contiguous (like kmalloc). |
| |
| -Further, the bus address of the memory must be within the |
| +Further, the DMA address of the memory must be within the |
| dma_mask of the device (the dma_mask is a bit mask of the |
| -addressable region for the device, i.e., if the bus address of |
| -the memory ANDed with the dma_mask is still equal to the bus |
| +addressable region for the device, i.e., if the DMA address of |
| +the memory ANDed with the dma_mask is still equal to the DMA |
| address, then the device can perform DMA to the memory). To |
| ensure that the memory allocated by kmalloc is within the dma_mask, |
| the driver may specify various platform-dependent flags to restrict |
| -the bus address range of the allocation (e.g., on x86, GFP_DMA |
| -guarantees to be within the first 16MB of available bus addresses, |
| +the DMA address range of the allocation (e.g., on x86, GFP_DMA |
| +guarantees to be within the first 16MB of available DMA addresses, |
| as required by ISA devices). |
| |
| Note also that the above constraints on physical contiguity and |
| dma_mask may not apply if the platform has an IOMMU (a device which |
| -maps an I/O bus address to a physical memory address). However, to be |
| +maps an I/O DMA address to a physical memory address). However, to be |
| portable, device driver writers may *not* assume that such an IOMMU |
| exists. |
| |
| @@ -296,7 +296,7 @@ reduce current DMA mapping usage or dela |
| dma_map_sg(struct device *dev, struct scatterlist *sg, |
| int nents, enum dma_data_direction direction) |
| |
| -Returns: the number of bus address segments mapped (this may be shorter |
| +Returns: the number of DMA address segments mapped (this may be shorter |
| than <nents> passed in if some elements of the scatter/gather list are |
| physically or virtually adjacent and an IOMMU maps them with a single |
| entry). |
| @@ -340,7 +340,7 @@ must be the same as those and passed in |
| API. |
| |
| Note: <nents> must be the number you passed in, *not* the number of |
| -bus address entries returned. |
| +DMA address entries returned. |
| |
| void |
| dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, |
| @@ -507,7 +507,7 @@ it's asked for coherent memory for this |
| phys_addr is the CPU physical address to which the memory is currently |
| assigned (this will be ioremapped so the CPU can access the region). |
| |
| -device_addr is the bus address the device needs to be programmed |
| +device_addr is the DMA address the device needs to be programmed |
| with to actually address this memory (this will be handed out as the |
| dma_addr_t in dma_alloc_coherent()). |
| |
| --- a/drivers/pci/Kconfig |
| +++ b/drivers/pci/Kconfig |
| @@ -1,6 +1,10 @@ |
| # |
| # PCI configuration |
| # |
| +config PCI_BUS_ADDR_T_64BIT |
| + def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT) |
| + depends on PCI |
| + |
| config PCI_MSI |
| bool "Message Signaled Interrupts (MSI and MSI-X)" |
| depends on PCI |
| --- a/drivers/pci/bus.c |
| +++ b/drivers/pci/bus.c |
| @@ -92,11 +92,11 @@ void pci_bus_remove_resources(struct pci |
| } |
| |
| static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL}; |
| -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT |
| +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT |
| static struct pci_bus_region pci_64_bit = {0, |
| - (dma_addr_t) 0xffffffffffffffffULL}; |
| -static struct pci_bus_region pci_high = {(dma_addr_t) 0x100000000ULL, |
| - (dma_addr_t) 0xffffffffffffffffULL}; |
| + (pci_bus_addr_t) 0xffffffffffffffffULL}; |
| +static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL, |
| + (pci_bus_addr_t) 0xffffffffffffffffULL}; |
| #endif |
| |
| /* |
| @@ -200,7 +200,7 @@ int pci_bus_alloc_resource(struct pci_bu |
| resource_size_t), |
| void *alignf_data) |
| { |
| -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT |
| +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT |
| int rc; |
| |
| if (res->flags & IORESOURCE_MEM_64) { |
| --- a/drivers/pci/probe.c |
| +++ b/drivers/pci/probe.c |
| @@ -253,8 +253,8 @@ int __pci_read_base(struct pci_dev *dev, |
| } |
| |
| if (res->flags & IORESOURCE_MEM_64) { |
| - if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) && |
| - sz64 > 0x100000000ULL) { |
| + if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8) |
| + && sz64 > 0x100000000ULL) { |
| res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; |
| res->start = 0; |
| res->end = 0; |
| @@ -263,7 +263,7 @@ int __pci_read_base(struct pci_dev *dev, |
| goto out; |
| } |
| |
| - if ((sizeof(dma_addr_t) < 8) && l) { |
| + if ((sizeof(pci_bus_addr_t) < 8) && l) { |
| /* Above 32-bit boundary; try to reallocate */ |
| res->flags |= IORESOURCE_UNSET; |
| res->start = 0; |
| @@ -398,7 +398,7 @@ static void pci_read_bridge_mmio_pref(st |
| struct pci_dev *dev = child->self; |
| u16 mem_base_lo, mem_limit_lo; |
| u64 base64, limit64; |
| - dma_addr_t base, limit; |
| + pci_bus_addr_t base, limit; |
| struct pci_bus_region region; |
| struct resource *res; |
| |
| @@ -425,8 +425,8 @@ static void pci_read_bridge_mmio_pref(st |
| } |
| } |
| |
| - base = (dma_addr_t) base64; |
| - limit = (dma_addr_t) limit64; |
| + base = (pci_bus_addr_t) base64; |
| + limit = (pci_bus_addr_t) limit64; |
| |
| if (base != base64) { |
| dev_err(&dev->dev, "can't handle bridge window above 4GB (bus address %#010llx)\n", |
| --- a/include/linux/pci.h |
| +++ b/include/linux/pci.h |
| @@ -573,9 +573,15 @@ int raw_pci_read(unsigned int domain, un |
| int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, |
| int reg, int len, u32 val); |
| |
| +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT |
| +typedef u64 pci_bus_addr_t; |
| +#else |
| +typedef u32 pci_bus_addr_t; |
| +#endif |
| + |
| struct pci_bus_region { |
| - dma_addr_t start; |
| - dma_addr_t end; |
| + pci_bus_addr_t start; |
| + pci_bus_addr_t end; |
| }; |
| |
| struct pci_dynids { |
| @@ -1120,7 +1126,7 @@ int __must_check pci_bus_alloc_resource( |
| |
| int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); |
| |
| -static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar) |
| +static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar) |
| { |
| struct pci_bus_region region; |
| |
| --- a/include/linux/types.h |
| +++ b/include/linux/types.h |
| @@ -139,12 +139,20 @@ typedef unsigned long blkcnt_t; |
| */ |
| #define pgoff_t unsigned long |
| |
| -/* A dma_addr_t can hold any valid DMA or bus address for the platform */ |
| +/* |
| + * A dma_addr_t can hold any valid DMA address, i.e., any address returned |
| + * by the DMA API. |
| + * |
| + * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32 |
| + * bits wide. Bus addresses, e.g., PCI BARs, may be wider than 32 bits, |
| + * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses, |
| + * so they don't care about the size of the actual bus addresses. |
| + */ |
| #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT |
| typedef u64 dma_addr_t; |
| #else |
| typedef u32 dma_addr_t; |
| -#endif /* dma_addr_t */ |
| +#endif |
| |
| #ifdef __CHECKER__ |
| #else |