| From 7ccd6aa1f0e440c518a608e32d4f580ca6eeb3bd Mon Sep 17 00:00:00 2001 |
| From: Will Deacon <will.deacon@arm.com> |
| Date: Wed, 7 Nov 2018 23:06:15 +0000 |
| Subject: arm64: io: Ensure calls to delay routines are ordered against prior |
| readX() |
| |
| [ Upstream commit 6460d32014717686d3b7963595950ba2c6d1bb5e ] |
| |
| A relatively standard idiom for ensuring that a pair of MMIO writes to a |
| device arrive at that device with a specified minimum delay between them |
| is as follows: |
| |
| writel_relaxed(42, dev_base + CTL1); |
| readl(dev_base + CTL1); |
| udelay(10); |
| writel_relaxed(42, dev_base + CTL2); |
| |
| the intention being that the read-back from the device will push the |
| prior write to CTL1, and the udelay will hold up the write to CTL1 until |
| at least 10us have elapsed. |
| |
| Unfortunately, on arm64 where the underlying delay loop is implemented |
| as a read of the architected counter, the CPU does not guarantee |
| ordering from the readl() to the delay loop and therefore the delay loop |
| could in theory be speculated and not provide the desired interval |
| between the two writes. |
| |
| Fix this in a similar manner to PowerPC by introducing a dummy control |
| dependency on the output of readX() which, combined with the ISB in the |
| read of the architected counter, guarantees that a subsequent delay loop |
| can not be executed until the readX() has returned its result. |
| |
| Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> |
| Cc: Arnd Bergmann <arnd@arndb.de> |
| Signed-off-by: Will Deacon <will.deacon@arm.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| arch/arm64/include/asm/io.h | 31 +++++++++++++++++++++++-------- |
| 1 file changed, 23 insertions(+), 8 deletions(-) |
| |
| diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h |
| index 35b2e50f17fb..b2bc7dbc1fa6 100644 |
| --- a/arch/arm64/include/asm/io.h |
| +++ b/arch/arm64/include/asm/io.h |
| @@ -106,7 +106,22 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) |
| } |
| |
| /* IO barriers */ |
| -#define __iormb() rmb() |
| +#define __iormb(v) \ |
| +({ \ |
| + unsigned long tmp; \ |
| + \ |
| + rmb(); \ |
| + \ |
| + /* \ |
| + * Create a dummy control dependency from the IO read to any \ |
| + * later instructions. This ensures that a subsequent call to \ |
| + * udelay() will be ordered due to the ISB in get_cycles(). \ |
| + */ \ |
| + asm volatile("eor %0, %1, %1\n" \ |
| + "cbnz %0, ." \ |
| + : "=r" (tmp) : "r" (v) : "memory"); \ |
| +}) |
| + |
| #define __iowmb() wmb() |
| |
| #define mmiowb() do { } while (0) |
| @@ -131,10 +146,10 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) |
| * following Normal memory access. Writes are ordered relative to any prior |
| * Normal memory access. |
| */ |
| -#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) |
| -#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) |
| -#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) |
| -#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(); __v; }) |
| +#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(__v); __v; }) |
| +#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(__v); __v; }) |
| +#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) |
| +#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; }) |
| |
| #define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); }) |
| #define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); }) |
| @@ -185,9 +200,9 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); |
| /* |
| * io{read,write}{16,32,64}be() macros |
| */ |
| -#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; }) |
| -#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; }) |
| -#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(); __v; }) |
| +#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(__v); __v; }) |
| +#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(__v); __v; }) |
| +#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(__v); __v; }) |
| |
| #define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); }) |
| #define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); }) |
| -- |
| 2.19.1 |
| |