| From 4aa30897cdb6fab241d7c9639ba623dee98568c2 Mon Sep 17 00:00:00 2001 |
| From: Geert Uytterhoeven <geert+renesas@glider.be> |
| Date: Mon, 30 Jun 2014 12:10:24 +0200 |
| Subject: spi: sh-msiof: Add DMA support |
| |
| Add DMA support to the MSIOF driver using platform data. |
| |
| As MSIOF DMA is limited to 32-bit words (requiring byte/wordswapping for |
| smaller wordsizes), and the group length is limited to 256 words, DMA is |
| performed on two fixed pages, allocated and mapped at driver initialization |
| time. |
| |
| Performance figures (in Mbps) on r8a7791/koelsch at different SPI clock |
| frequencies for 1024-byte and 4096-byte transfers: |
| |
| 1024 bytes 4096 bytes |
| - 3.25 MHz: PIO 2.1, DMA 2.6 | PIO 2.8, DMA 3.1 |
| - 6.5 MHz: PIO 3.2, DMA 4.4 | PIO 5.0, DMA 5.9 |
| - 13 MHz: PIO 4.2, DMA 6.6 | PIO 8.2, DMA 10.7 |
| - 26 MHz: PIO 5.9, DMA 10.4 | PIO 12.4, DMA 18.4 |
| |
| Note that DMA is only faster than PIO for transfers that exceed the FIFO |
| size (typically 64 words / 256 bytes). |
| |
| Also note that large transfers (larger than the group length for DMA, or |
| larger than the FIFO size for PIO), should use cs-gpio (with the |
| appropriate pinmux setup), as the hardware chipselect will be deasserted in |
| between chunks. |
| |
| Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> |
| Signed-off-by: Mark Brown <broonie@linaro.org> |
| (cherry picked from commit b0d0ce8b6b91a0f6f99045b6019fc4c824634fb4) |
| Signed-off-by: Simon Horman <horms+renesas@verge.net.au> |
| --- |
| drivers/spi/spi-sh-msiof.c | 348 ++++++++++++++++++++++++++++++++++++++++++- |
| include/linux/spi/sh_msiof.h | 2 + |
| 2 files changed, 343 insertions(+), 7 deletions(-) |
| |
| diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c |
| index 824f44e6bd88..9922ed3a4441 100644 |
| --- a/drivers/spi/spi-sh-msiof.c |
| +++ b/drivers/spi/spi-sh-msiof.c |
| @@ -2,6 +2,7 @@ |
| * SuperH MSIOF SPI Master Interface |
| * |
| * Copyright (c) 2009 Magnus Damm |
| + * Copyright (C) 2014 Glider bvba |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 as |
| @@ -13,6 +14,8 @@ |
| #include <linux/clk.h> |
| #include <linux/completion.h> |
| #include <linux/delay.h> |
| +#include <linux/dma-mapping.h> |
| +#include <linux/dmaengine.h> |
| #include <linux/err.h> |
| #include <linux/gpio.h> |
| #include <linux/interrupt.h> |
| @@ -23,6 +26,7 @@ |
| #include <linux/of_device.h> |
| #include <linux/platform_device.h> |
| #include <linux/pm_runtime.h> |
| +#include <linux/sh_dma.h> |
| |
| #include <linux/spi/sh_msiof.h> |
| #include <linux/spi/spi.h> |
| @@ -37,6 +41,7 @@ struct sh_msiof_chipdata { |
| }; |
| |
| struct sh_msiof_spi_priv { |
| + struct spi_master *master; |
| void __iomem *mapbase; |
| struct clk *clk; |
| struct platform_device *pdev; |
| @@ -45,6 +50,10 @@ struct sh_msiof_spi_priv { |
| struct completion done; |
| int tx_fifo_size; |
| int rx_fifo_size; |
| + void *tx_dma_page; |
| + void *rx_dma_page; |
| + dma_addr_t tx_dma_addr; |
| + dma_addr_t rx_dma_addr; |
| }; |
| |
| #define TMDR1 0x00 /* Transmit Mode Register 1 */ |
| @@ -84,6 +93,8 @@ struct sh_msiof_spi_priv { |
| #define MDR2_WDLEN1(i) (((i) - 1) << 16) /* Word Count (1-64/256 (SH, A1))) */ |
| #define MDR2_GRPMASK1 0x00000001 /* Group Output Mask 1 (SH, A1) */ |
| |
| +#define MAX_WDLEN 256U |
| + |
| /* TSCR and RSCR */ |
| #define SCR_BRPS_MASK 0x1f00 /* Prescaler Setting (1-32) */ |
| #define SCR_BRPS(i) (((i) - 1) << 8) |
| @@ -282,8 +293,6 @@ static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, |
| * 1 0 11 11 0 0 |
| * 1 1 11 11 1 1 |
| */ |
| - sh_msiof_write(p, FCTR, 0); |
| - |
| tmp = MDR1_SYNCMD_SPI | 1 << MDR1_FLD_SHIFT | MDR1_XXSTP; |
| tmp |= !cs_high << MDR1_SYNCAC_SHIFT; |
| tmp |= lsb_first << MDR1_BITLSB_SHIFT; |
| @@ -319,8 +328,6 @@ static void sh_msiof_spi_set_mode_regs(struct sh_msiof_spi_priv *p, |
| |
| if (rx_buf) |
| sh_msiof_write(p, RMDR2, dr2); |
| - |
| - sh_msiof_write(p, IER, STR_TEOF | STR_REOF); |
| } |
| |
| static void sh_msiof_reset_str(struct sh_msiof_spi_priv *p) |
| @@ -563,8 +570,12 @@ static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p, |
| /* the fifo contents need shifting */ |
| fifo_shift = 32 - bits; |
| |
| + /* default FIFO watermarks for PIO */ |
| + sh_msiof_write(p, FCTR, 0); |
| + |
| /* setup msiof transfer mode registers */ |
| sh_msiof_spi_set_mode_regs(p, tx_buf, rx_buf, bits, words); |
| + sh_msiof_write(p, IER, IER_TEOFE | IER_REOFE); |
| |
| /* write tx fifo */ |
| if (tx_buf) |
| @@ -609,11 +620,170 @@ stop_ier: |
| return ret; |
| } |
| |
| +static void sh_msiof_dma_complete(void *arg) |
| +{ |
| + struct sh_msiof_spi_priv *p = arg; |
| + |
| + sh_msiof_write(p, IER, 0); |
| + complete(&p->done); |
| +} |
| + |
| +static int sh_msiof_dma_once(struct sh_msiof_spi_priv *p, const void *tx, |
| + void *rx, unsigned int len) |
| +{ |
| + u32 ier_bits = 0; |
| + struct dma_async_tx_descriptor *desc_tx = NULL, *desc_rx = NULL; |
| + dma_cookie_t cookie; |
| + int ret; |
| + |
| + /* 1 stage FIFO watermarks for DMA */ |
| + sh_msiof_write(p, FCTR, FCTR_TFWM_1 | FCTR_RFWM_1); |
| + |
| + /* setup msiof transfer mode registers (32-bit words) */ |
| + sh_msiof_spi_set_mode_regs(p, tx, rx, 32, len / 4); |
| + |
| + if (tx) { |
| + ier_bits |= IER_TDREQE | IER_TDMAE; |
| + dma_sync_single_for_device(&p->pdev->dev, p->tx_dma_addr, len, |
| + DMA_TO_DEVICE); |
| + desc_tx = dmaengine_prep_slave_single(p->master->dma_tx, |
| + p->tx_dma_addr, len, DMA_TO_DEVICE, |
| + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); |
| + if (!desc_tx) |
| + return -EIO; |
| + } |
| + |
| + if (rx) { |
| + ier_bits |= IER_RDREQE | IER_RDMAE; |
| + desc_rx = dmaengine_prep_slave_single(p->master->dma_rx, |
| + p->rx_dma_addr, len, DMA_FROM_DEVICE, |
| + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); |
| + if (!desc_rx) |
| + return -EIO; |
| + } |
| + sh_msiof_write(p, IER, ier_bits); |
| + |
| + reinit_completion(&p->done); |
| + |
| + if (rx) { |
| + desc_rx->callback = sh_msiof_dma_complete; |
| + desc_rx->callback_param = p; |
| + cookie = dmaengine_submit(desc_rx); |
| + if (dma_submit_error(cookie)) { |
| + ret = cookie; |
| + goto stop_ier; |
| + } |
| + dma_async_issue_pending(p->master->dma_rx); |
| + } |
| + |
| + if (tx) { |
| + if (rx) { |
| + /* No callback */ |
| + desc_tx->callback = NULL; |
| + } else { |
| + desc_tx->callback = sh_msiof_dma_complete; |
| + desc_tx->callback_param = p; |
| + } |
| + cookie = dmaengine_submit(desc_tx); |
| + if (dma_submit_error(cookie)) { |
| + ret = cookie; |
| + goto stop_rx; |
| + } |
| + dma_async_issue_pending(p->master->dma_tx); |
| + } |
| + |
| + ret = sh_msiof_spi_start(p, rx); |
| + if (ret) { |
| + dev_err(&p->pdev->dev, "failed to start hardware\n"); |
| + goto stop_tx; |
| + } |
| + |
| + /* wait for tx fifo to be emptied / rx fifo to be filled */ |
| + ret = wait_for_completion_timeout(&p->done, HZ); |
| + if (!ret) { |
| + dev_err(&p->pdev->dev, "DMA timeout\n"); |
| + ret = -ETIMEDOUT; |
| + goto stop_reset; |
| + } |
| + |
| + /* clear status bits */ |
| + sh_msiof_reset_str(p); |
| + |
| + ret = sh_msiof_spi_stop(p, rx); |
| + if (ret) { |
| + dev_err(&p->pdev->dev, "failed to shut down hardware\n"); |
| + return ret; |
| + } |
| + |
| + if (rx) |
| + dma_sync_single_for_cpu(&p->pdev->dev, p->rx_dma_addr, len, |
| + DMA_FROM_DEVICE); |
| + |
| + return 0; |
| + |
| +stop_reset: |
| + sh_msiof_reset_str(p); |
| + sh_msiof_spi_stop(p, rx); |
| +stop_tx: |
| + if (tx) |
| + dmaengine_terminate_all(p->master->dma_tx); |
| +stop_rx: |
| + if (rx) |
| + dmaengine_terminate_all(p->master->dma_rx); |
| +stop_ier: |
| + sh_msiof_write(p, IER, 0); |
| + return ret; |
| +} |
| + |
| +static void copy_bswap32(u32 *dst, const u32 *src, unsigned int words) |
| +{ |
| + /* src or dst can be unaligned, but not both */ |
| + if ((unsigned long)src & 3) { |
| + while (words--) { |
| + *dst++ = swab32(get_unaligned(src)); |
| + src++; |
| + } |
| + } else if ((unsigned long)dst & 3) { |
| + while (words--) { |
| + put_unaligned(swab32(*src++), dst); |
| + dst++; |
| + } |
| + } else { |
| + while (words--) |
| + *dst++ = swab32(*src++); |
| + } |
| +} |
| + |
| +static void copy_wswap32(u32 *dst, const u32 *src, unsigned int words) |
| +{ |
| + /* src or dst can be unaligned, but not both */ |
| + if ((unsigned long)src & 3) { |
| + while (words--) { |
| + *dst++ = swahw32(get_unaligned(src)); |
| + src++; |
| + } |
| + } else if ((unsigned long)dst & 3) { |
| + while (words--) { |
| + put_unaligned(swahw32(*src++), dst); |
| + dst++; |
| + } |
| + } else { |
| + while (words--) |
| + *dst++ = swahw32(*src++); |
| + } |
| +} |
| + |
| +static void copy_plain32(u32 *dst, const u32 *src, unsigned int words) |
| +{ |
| + memcpy(dst, src, words * 4); |
| +} |
| + |
| static int sh_msiof_transfer_one(struct spi_master *master, |
| struct spi_device *spi, |
| struct spi_transfer *t) |
| { |
| struct sh_msiof_spi_priv *p = spi_master_get_devdata(master); |
| + void (*copy32)(u32 *, const u32 *, unsigned int); |
| void (*tx_fifo)(struct sh_msiof_spi_priv *, const void *, int, int); |
| void (*rx_fifo)(struct sh_msiof_spi_priv *, void *, int, int); |
| const void *tx_buf = t->tx_buf; |
| @@ -624,7 +794,48 @@ static int sh_msiof_transfer_one(struct spi_master *master, |
| unsigned int words; |
| int n; |
| bool swab; |
| + int ret; |
| + |
| + /* setup clocks (clock already enabled in chipselect()) */ |
| + sh_msiof_spi_set_clk_regs(p, clk_get_rate(p->clk), t->speed_hz); |
| + |
| + while (master->dma_tx && len > 15) { |
| + /* |
| + * DMA supports 32-bit words only, hence pack 8-bit and 16-bit |
| + * words, with byte resp. word swapping. |
| + */ |
| + unsigned int l = min(len, MAX_WDLEN * 4); |
| + |
| + if (bits <= 8) { |
| + if (l & 3) |
| + break; |
| + copy32 = copy_bswap32; |
| + } else if (bits <= 16) { |
| + if (l & 1) |
| + break; |
| + copy32 = copy_wswap32; |
| + } else { |
| + copy32 = copy_plain32; |
| + } |
| + |
| + if (tx_buf) |
| + copy32(p->tx_dma_page, tx_buf, l / 4); |
| |
| + ret = sh_msiof_dma_once(p, tx_buf, rx_buf, l); |
| + if (ret) |
| + return ret; |
| + |
| + if (rx_buf) { |
| + copy32(rx_buf, p->rx_dma_page, l / 4); |
| + rx_buf += l; |
| + } |
| + if (tx_buf) |
| + tx_buf += l; |
| + |
| + len -= l; |
| + if (!len) |
| + return 0; |
| + } |
| |
| if (bits <= 8 && len > 15 && !(len & 3)) { |
| bits = 32; |
| @@ -673,9 +884,6 @@ static int sh_msiof_transfer_one(struct spi_master *master, |
| rx_fifo = sh_msiof_spi_read_fifo_32; |
| } |
| |
| - /* setup clocks (clock already enabled in chipselect()) */ |
| - sh_msiof_spi_set_clk_regs(p, clk_get_rate(p->clk), t->speed_hz); |
| - |
| /* transfer in fifo sized chunks */ |
| words = len / bytes_per_word; |
| |
| @@ -745,6 +953,123 @@ static struct sh_msiof_spi_info *sh_msiof_spi_parse_dt(struct device *dev) |
| } |
| #endif |
| |
| +static struct dma_chan *sh_msiof_request_dma_chan(struct device *dev, |
| + enum dma_transfer_direction dir, unsigned int id, dma_addr_t port_addr) |
| +{ |
| + dma_cap_mask_t mask; |
| + struct dma_chan *chan; |
| + struct dma_slave_config cfg; |
| + int ret; |
| + |
| + dma_cap_zero(mask); |
| + dma_cap_set(DMA_SLAVE, mask); |
| + |
| + chan = dma_request_channel(mask, shdma_chan_filter, |
| + (void *)(unsigned long)id); |
| + if (!chan) { |
| + dev_warn(dev, "dma_request_channel failed\n"); |
| + return NULL; |
| + } |
| + |
| + memset(&cfg, 0, sizeof(cfg)); |
| + cfg.slave_id = id; |
| + cfg.direction = dir; |
| + if (dir == DMA_MEM_TO_DEV) |
| + cfg.dst_addr = port_addr; |
| + else |
| + cfg.src_addr = port_addr; |
| + |
| + ret = dmaengine_slave_config(chan, &cfg); |
| + if (ret) { |
| + dev_warn(dev, "dmaengine_slave_config failed %d\n", ret); |
| + dma_release_channel(chan); |
| + return NULL; |
| + } |
| + |
| + return chan; |
| +} |
| + |
| +static int sh_msiof_request_dma(struct sh_msiof_spi_priv *p) |
| +{ |
| + struct platform_device *pdev = p->pdev; |
| + struct device *dev = &pdev->dev; |
| + const struct sh_msiof_spi_info *info = dev_get_platdata(dev); |
| + const struct resource *res; |
| + struct spi_master *master; |
| + |
| + if (!info || !info->dma_tx_id || !info->dma_rx_id) |
| + return 0; /* The driver assumes no error */ |
| + |
| + /* The DMA engine uses the second register set, if present */ |
| + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); |
| + if (!res) |
| + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
| + |
| + master = p->master; |
| + master->dma_tx = sh_msiof_request_dma_chan(dev, DMA_MEM_TO_DEV, |
| + info->dma_tx_id, |
| + res->start + TFDR); |
| + if (!master->dma_tx) |
| + return -ENODEV; |
| + |
| + master->dma_rx = sh_msiof_request_dma_chan(dev, DMA_DEV_TO_MEM, |
| + info->dma_rx_id, |
| + res->start + RFDR); |
| + if (!master->dma_rx) |
| + goto free_tx_chan; |
| + |
| + p->tx_dma_page = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); |
| + if (!p->tx_dma_page) |
| + goto free_rx_chan; |
| + |
| + p->rx_dma_page = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); |
| + if (!p->rx_dma_page) |
| + goto free_tx_page; |
| + |
| + p->tx_dma_addr = dma_map_single(dev, p->tx_dma_page, PAGE_SIZE, |
| + DMA_TO_DEVICE); |
| + if (dma_mapping_error(dev, p->tx_dma_addr)) |
| + goto free_rx_page; |
| + |
| + p->rx_dma_addr = dma_map_single(dev, p->rx_dma_page, PAGE_SIZE, |
| + DMA_FROM_DEVICE); |
| + if (dma_mapping_error(dev, p->rx_dma_addr)) |
| + goto unmap_tx_page; |
| + |
| + dev_info(dev, "DMA available"); |
| + return 0; |
| + |
| +unmap_tx_page: |
| + dma_unmap_single(dev, p->tx_dma_addr, PAGE_SIZE, DMA_TO_DEVICE); |
| +free_rx_page: |
| + free_page((unsigned long)p->rx_dma_page); |
| +free_tx_page: |
| + free_page((unsigned long)p->tx_dma_page); |
| +free_rx_chan: |
| + dma_release_channel(master->dma_rx); |
| +free_tx_chan: |
| + dma_release_channel(master->dma_tx); |
| + master->dma_tx = NULL; |
| + return -ENODEV; |
| +} |
| + |
| +static void sh_msiof_release_dma(struct sh_msiof_spi_priv *p) |
| +{ |
| + struct spi_master *master = p->master; |
| + struct device *dev; |
| + |
| + if (!master->dma_tx) |
| + return; |
| + |
| + dev = &p->pdev->dev; |
| + dma_unmap_single(dev, p->rx_dma_addr, PAGE_SIZE, DMA_FROM_DEVICE); |
| + dma_unmap_single(dev, p->tx_dma_addr, PAGE_SIZE, DMA_TO_DEVICE); |
| + free_page((unsigned long)p->rx_dma_page); |
| + free_page((unsigned long)p->tx_dma_page); |
| + dma_release_channel(master->dma_rx); |
| + dma_release_channel(master->dma_tx); |
| +} |
| + |
| static int sh_msiof_spi_probe(struct platform_device *pdev) |
| { |
| struct resource *r; |
| @@ -763,6 +1088,7 @@ static int sh_msiof_spi_probe(struct platform_device *pdev) |
| p = spi_master_get_devdata(master); |
| |
| platform_set_drvdata(pdev, p); |
| + p->master = master; |
| |
| of_id = of_match_device(sh_msiof_match, &pdev->dev); |
| if (of_id) { |
| @@ -833,6 +1159,10 @@ static int sh_msiof_spi_probe(struct platform_device *pdev) |
| master->auto_runtime_pm = true; |
| master->transfer_one = sh_msiof_transfer_one; |
| |
| + ret = sh_msiof_request_dma(p); |
| + if (ret < 0) |
| + dev_warn(&pdev->dev, "DMA not available, using PIO\n"); |
| + |
| ret = devm_spi_register_master(&pdev->dev, master); |
| if (ret < 0) { |
| dev_err(&pdev->dev, "spi_register_master error.\n"); |
| @@ -842,6 +1172,7 @@ static int sh_msiof_spi_probe(struct platform_device *pdev) |
| return 0; |
| |
| err2: |
| + sh_msiof_release_dma(p); |
| pm_runtime_disable(&pdev->dev); |
| err1: |
| spi_master_put(master); |
| @@ -850,6 +1181,9 @@ static int sh_msiof_spi_probe(struct platform_device *pdev) |
| |
| static int sh_msiof_spi_remove(struct platform_device *pdev) |
| { |
| + struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev); |
| + |
| + sh_msiof_release_dma(p); |
| pm_runtime_disable(&pdev->dev); |
| return 0; |
| } |
| diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h |
| index 2e8db3d2d2e5..88a14d81c49e 100644 |
| --- a/include/linux/spi/sh_msiof.h |
| +++ b/include/linux/spi/sh_msiof.h |
| @@ -5,6 +5,8 @@ struct sh_msiof_spi_info { |
| int tx_fifo_override; |
| int rx_fifo_override; |
| u16 num_chipselect; |
| + unsigned int dma_tx_id; |
| + unsigned int dma_rx_id; |
| }; |
| |
| #endif /* __SPI_SH_MSIOF_H__ */ |
| -- |
| 2.1.2 |
| |