| From d8b26435b18259639f39aab9ff96cfbf5fdd1076 Mon Sep 17 00:00:00 2001 |
| From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> |
| Date: Wed, 1 Feb 2017 12:48:45 -0700 |
| Subject: [PATCH 044/103] fpga zynq: Use the scatterlist interface |
| |
| This allows the driver to avoid a high order coherent DMA allocation |
| and memory copy. With this patch it can DMA directly from the kernel |
| pages that the bitfile is stored in. |
| |
| Since this is now a gather DMA operation the driver uses the ISR |
| to feed the chips DMA queue with each entry from the SGL. |
| |
| Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> |
| Acked-by: Moritz Fischer <moritz.fischer@ettus.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| drivers/fpga/zynq-fpga.c | 174 ++++++++++++++++++++++++++++++++++++----------- |
| 1 file changed, 135 insertions(+), 39 deletions(-) |
| |
| --- a/drivers/fpga/zynq-fpga.c |
| +++ b/drivers/fpga/zynq-fpga.c |
| @@ -30,6 +30,7 @@ |
| #include <linux/pm.h> |
| #include <linux/regmap.h> |
| #include <linux/string.h> |
| +#include <linux/scatterlist.h> |
| |
| /* Offsets into SLCR regmap */ |
| |
| @@ -80,6 +81,7 @@ |
| |
| /* FPGA init status */ |
| #define STATUS_DMA_Q_F BIT(31) |
| +#define STATUS_DMA_Q_E BIT(30) |
| #define STATUS_PCFG_INIT_MASK BIT(4) |
| |
| /* Interrupt Status/Mask Register Bit definitions */ |
| @@ -98,12 +100,16 @@ |
| #define DMA_INVALID_ADDRESS GENMASK(31, 0) |
| /* Used to unlock the dev */ |
| #define UNLOCK_MASK 0x757bdf0d |
| -/* Timeout for DMA to complete */ |
| -#define DMA_DONE_TIMEOUT msecs_to_jiffies(1000) |
| /* Timeout for polling reset bits */ |
| #define INIT_POLL_TIMEOUT 2500000 |
| /* Delay for polling reset bits */ |
| #define INIT_POLL_DELAY 20 |
| +/* Signal this is the last DMA transfer, wait for the AXI and PCAP before |
| + * interrupting |
| + */ |
| +#define DMA_SRC_LAST_TRANSFER 1 |
| +/* Timeout for DMA completion */ |
| +#define DMA_TIMEOUT_MS 5000 |
| |
| /* Masks for controlling stuff in SLCR */ |
| /* Disable all Level shifters */ |
| @@ -124,6 +130,11 @@ struct zynq_fpga_priv { |
| void __iomem *io_base; |
| struct regmap *slcr; |
| |
| + spinlock_t dma_lock; |
| + unsigned int dma_elm; |
| + unsigned int dma_nelms; |
| + struct scatterlist *cur_sg; |
| + |
| struct completion dma_done; |
| }; |
| |
| @@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(str |
| zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable); |
| } |
| |
| +/* Must be called with dma_lock held */ |
| +static void zynq_step_dma(struct zynq_fpga_priv *priv) |
| +{ |
| + u32 addr; |
| + u32 len; |
| + bool first; |
| + |
| + first = priv->dma_elm == 0; |
| + while (priv->cur_sg) { |
| + /* Feed the DMA queue until it is full. */ |
| + if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F) |
| + break; |
| + |
| + addr = sg_dma_address(priv->cur_sg); |
| + len = sg_dma_len(priv->cur_sg); |
| + if (priv->dma_elm + 1 == priv->dma_nelms) { |
| + /* The last transfer waits for the PCAP to finish too, |
| + * notice this also changes the irq_mask to ignore |
| + * IXR_DMA_DONE_MASK which ensures we do not trigger |
| + * the completion too early. |
| + */ |
| + addr |= DMA_SRC_LAST_TRANSFER; |
| + priv->cur_sg = NULL; |
| + } else { |
| + priv->cur_sg = sg_next(priv->cur_sg); |
| + priv->dma_elm++; |
| + } |
| + |
| + zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr); |
| + zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS); |
| + zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4); |
| + zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); |
| + } |
| + |
| + /* Once the first transfer is queued we can turn on the ISR, future |
| + * calls to zynq_step_dma will happen from the ISR context. The |
| + * dma_lock spinlock guarentees this handover is done coherently, the |
| + * ISR enable is put at the end to avoid another CPU spinning in the |
| + * ISR on this lock. |
| + */ |
| + if (first && priv->cur_sg) { |
| + zynq_fpga_set_irq(priv, |
| + IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK); |
| + } else if (!priv->cur_sg) { |
| + /* The last transfer changes to DMA & PCAP mode since we do |
| + * not want to continue until everything has been flushed into |
| + * the PCAP. |
| + */ |
| + zynq_fpga_set_irq(priv, |
| + IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); |
| + } |
| +} |
| + |
| static irqreturn_t zynq_fpga_isr(int irq, void *data) |
| { |
| struct zynq_fpga_priv *priv = data; |
| + u32 intr_status; |
| |
| - /* disable DMA and error IRQs */ |
| - zynq_fpga_set_irq(priv, 0); |
| + /* If anything other than DMA completion is reported stop and hand |
| + * control back to zynq_fpga_ops_write, something went wrong, |
| + * otherwise progress the DMA. |
| + */ |
| + spin_lock(&priv->dma_lock); |
| + intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); |
| + if (!(intr_status & IXR_ERROR_FLAGS_MASK) && |
| + (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) { |
| + zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK); |
| + zynq_step_dma(priv); |
| + spin_unlock(&priv->dma_lock); |
| + return IRQ_HANDLED; |
| + } |
| + spin_unlock(&priv->dma_lock); |
| |
| + zynq_fpga_set_irq(priv, 0); |
| complete(&priv->dma_done); |
| |
| return IRQ_HANDLED; |
| @@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(stru |
| zynq_fpga_write(priv, CTRL_OFFSET, |
| (CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl)); |
| |
| - /* check that we have room in the command queue */ |
| + /* We expect that the command queue is empty right now. */ |
| status = zynq_fpga_read(priv, STATUS_OFFSET); |
| - if (status & STATUS_DMA_Q_F) { |
| - dev_err(&mgr->dev, "DMA command queue full\n"); |
| + if ((status & STATUS_DMA_Q_F) || |
| + (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) { |
| + dev_err(&mgr->dev, "DMA command queue not right\n"); |
| err = -EBUSY; |
| goto out_err; |
| } |
| @@ -288,27 +367,36 @@ out_err: |
| return err; |
| } |
| |
| -static int zynq_fpga_ops_write(struct fpga_manager *mgr, |
| - const char *buf, size_t count) |
| +static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt) |
| { |
| struct zynq_fpga_priv *priv; |
| const char *why; |
| int err; |
| - char *kbuf; |
| - size_t in_count; |
| - dma_addr_t dma_addr; |
| - u32 transfer_length; |
| u32 intr_status; |
| + unsigned long timeout; |
| + unsigned long flags; |
| + struct scatterlist *sg; |
| + int i; |
| |
| - in_count = count; |
| priv = mgr->priv; |
| |
| - kbuf = |
| - dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL); |
| - if (!kbuf) |
| - return -ENOMEM; |
| + /* The hardware can only DMA multiples of 4 bytes, and it requires the |
| + * starting addresses to be aligned to 64 bits (UG585 pg 212). |
| + */ |
| + for_each_sg(sgt->sgl, sg, sgt->nents, i) { |
| + if ((sg->offset % 8) || (sg->length % 4)) { |
| + dev_err(&mgr->dev, |
| + "Invalid bitstream, chunks must be aligned\n"); |
| + return -EINVAL; |
| + } |
| + } |
| |
| - memcpy(kbuf, buf, count); |
| + priv->dma_nelms = |
| + dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); |
| + if (priv->dma_nelms == 0) { |
| + dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n"); |
| + return -ENOMEM; |
| + } |
| |
| /* enable clock */ |
| err = clk_enable(priv->clk); |
| @@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fp |
| goto out_free; |
| |
| zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); |
| - |
| reinit_completion(&priv->dma_done); |
| |
| - /* enable DMA and error IRQs */ |
| - zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); |
| - |
| - /* the +1 in the src addr is used to hold off on DMA_DONE IRQ |
| - * until both AXI and PCAP are done ... |
| - */ |
| - zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1); |
| - zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS); |
| - |
| - /* convert #bytes to #words */ |
| - transfer_length = (count + 3) / 4; |
| + /* zynq_step_dma will turn on interrupts */ |
| + spin_lock_irqsave(&priv->dma_lock, flags); |
| + priv->dma_elm = 0; |
| + priv->cur_sg = sgt->sgl; |
| + zynq_step_dma(priv); |
| + spin_unlock_irqrestore(&priv->dma_lock, flags); |
| |
| - zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length); |
| - zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); |
| + timeout = wait_for_completion_timeout(&priv->dma_done, |
| + msecs_to_jiffies(DMA_TIMEOUT_MS)); |
| |
| - wait_for_completion(&priv->dma_done); |
| + spin_lock_irqsave(&priv->dma_lock, flags); |
| + zynq_fpga_set_irq(priv, 0); |
| + priv->cur_sg = NULL; |
| + spin_unlock_irqrestore(&priv->dma_lock, flags); |
| |
| intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); |
| - zynq_fpga_write(priv, INT_STS_OFFSET, intr_status); |
| + zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); |
| + |
| + /* There doesn't seem to be a way to force cancel any DMA, so if |
| + * something went wrong we are relying on the hardware to have halted |
| + * the DMA before we get here, if there was we could use |
| + * wait_for_completion_interruptible too. |
| + */ |
| |
| if (intr_status & IXR_ERROR_FLAGS_MASK) { |
| why = "DMA reported error"; |
| @@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fp |
| goto out_report; |
| } |
| |
| - if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { |
| - why = "DMA did not complete"; |
| + if (priv->cur_sg || |
| + !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { |
| + if (timeout == 0) |
| + why = "DMA timed out"; |
| + else |
| + why = "DMA did not complete"; |
| err = -EIO; |
| goto out_report; |
| } |
| @@ -369,7 +464,7 @@ out_clk: |
| clk_disable(priv->clk); |
| |
| out_free: |
| - dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr); |
| + dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); |
| return err; |
| } |
| |
| @@ -433,7 +528,7 @@ static const struct fpga_manager_ops zyn |
| .initial_header_size = 128, |
| .state = zynq_fpga_ops_state, |
| .write_init = zynq_fpga_ops_write_init, |
| - .write = zynq_fpga_ops_write, |
| + .write_sg = zynq_fpga_ops_write, |
| .write_complete = zynq_fpga_ops_write_complete, |
| }; |
| |
| @@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platfo |
| priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); |
| if (!priv) |
| return -ENOMEM; |
| + spin_lock_init(&priv->dma_lock); |
| |
| res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
| priv->io_base = devm_ioremap_resource(dev, res); |