spi: Add DMA mode support to spi-qcom-qspi
Merge series from Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>: There are large number of QSPI irqs that fire during boot/init and later on every suspend/resume. This could be made faster by doing DMA instead of PIO. Below is comparison for number of interrupts raised in 2 scenarios... Boot up and stabilise Suspend/Resume Sequence PIO DMA ======================= Boot-up 69088 19284 S/R 5066 3430 Speed test results... spi-nor read times in sec after 2 min uptime ============================================ PIO - Iterations-1000, min=3.18, max=3.74, avg=3.53 DMA - Iterations-1000, min=1.21, max=2.28, avg=1.79 spi-nor write times in sec after 2 min uptime ============================================= PIO - Iterations-1000, min=3.20, max=8.24, avg=3.58 DMA - Iterations-1000, min=1.25, max=5.13, avg=1.82 Further testing performed... a) multiple entries in sgt (simulated by max_dma_len = 1024) b) fallback to pio (simulated by dma setup failure)
This commit is contained in:
commit
dd69654cd3
@ -29,6 +29,9 @@ properties:
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
iommus:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
// Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
|
||||
|
||||
#include <linux/clk.h>
|
||||
#include <linux/dmapool.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/interconnect.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io.h>
|
||||
@ -62,6 +64,7 @@
|
||||
#define WR_FIFO_FULL BIT(10)
|
||||
#define WR_FIFO_OVERRUN BIT(11)
|
||||
#define TRANSACTION_DONE BIT(16)
|
||||
#define DMA_CHAIN_DONE BIT(31)
|
||||
#define QSPI_ERR_IRQS (RESP_FIFO_UNDERRUN | HRESP_FROM_NOC_ERR | \
|
||||
WR_FIFO_OVERRUN)
|
||||
#define QSPI_ALL_IRQS (QSPI_ERR_IRQS | RESP_FIFO_RDY | \
|
||||
@ -108,18 +111,34 @@
|
||||
#define RD_FIFO_RESET 0x0030
|
||||
#define RESET_FIFO BIT(0)
|
||||
|
||||
#define NEXT_DMA_DESC_ADDR 0x0040
|
||||
#define CURRENT_DMA_DESC_ADDR 0x0044
|
||||
#define CURRENT_MEM_ADDR 0x0048
|
||||
|
||||
#define CUR_MEM_ADDR 0x0048
|
||||
#define HW_VERSION 0x004c
|
||||
#define RD_FIFO 0x0050
|
||||
#define SAMPLING_CLK_CFG 0x0090
|
||||
#define SAMPLING_CLK_STATUS 0x0094
|
||||
|
||||
#define QSPI_ALIGN_REQ 32
|
||||
|
||||
enum qspi_dir {
|
||||
QSPI_READ,
|
||||
QSPI_WRITE,
|
||||
};
|
||||
|
||||
struct qspi_cmd_desc {
|
||||
u32 data_address;
|
||||
u32 next_descriptor;
|
||||
u32 direction:1;
|
||||
u32 multi_io_mode:3;
|
||||
u32 reserved1:4;
|
||||
u32 fragment:1;
|
||||
u32 reserved2:7;
|
||||
u32 length:16;
|
||||
};
|
||||
|
||||
struct qspi_xfer {
|
||||
union {
|
||||
const void *tx_buf;
|
||||
@ -137,11 +156,23 @@ enum qspi_clocks {
|
||||
QSPI_NUM_CLKS
|
||||
};
|
||||
|
||||
/*
|
||||
* Number of entries in sgt returned from spi framework that-
|
||||
* will be supported. Can be modified as required.
|
||||
* In practice, given max_dma_len is 64KB, the number of
|
||||
* entries is not expected to exceed 1.
|
||||
*/
|
||||
#define QSPI_MAX_SG 5
|
||||
|
||||
struct qcom_qspi {
|
||||
void __iomem *base;
|
||||
struct device *dev;
|
||||
struct clk_bulk_data *clks;
|
||||
struct qspi_xfer xfer;
|
||||
struct dma_pool *dma_cmd_pool;
|
||||
dma_addr_t dma_cmd_desc[QSPI_MAX_SG];
|
||||
void *virt_cmd_desc[QSPI_MAX_SG];
|
||||
unsigned int n_cmd_desc;
|
||||
struct icc_path *icc_path_cpu_to_qspi;
|
||||
unsigned long last_speed;
|
||||
/* Lock to protect data accessed by IRQs */
|
||||
@ -153,21 +184,22 @@ static u32 qspi_buswidth_to_iomode(struct qcom_qspi *ctrl,
|
||||
{
|
||||
switch (buswidth) {
|
||||
case 1:
|
||||
return SDR_1BIT << MULTI_IO_MODE_SHFT;
|
||||
return SDR_1BIT;
|
||||
case 2:
|
||||
return SDR_2BIT << MULTI_IO_MODE_SHFT;
|
||||
return SDR_2BIT;
|
||||
case 4:
|
||||
return SDR_4BIT << MULTI_IO_MODE_SHFT;
|
||||
return SDR_4BIT;
|
||||
default:
|
||||
dev_warn_once(ctrl->dev,
|
||||
"Unexpected bus width: %u\n", buswidth);
|
||||
return SDR_1BIT << MULTI_IO_MODE_SHFT;
|
||||
return SDR_1BIT;
|
||||
}
|
||||
}
|
||||
|
||||
static void qcom_qspi_pio_xfer_cfg(struct qcom_qspi *ctrl)
|
||||
{
|
||||
u32 pio_xfer_cfg;
|
||||
u32 iomode;
|
||||
const struct qspi_xfer *xfer;
|
||||
|
||||
xfer = &ctrl->xfer;
|
||||
@ -179,7 +211,8 @@ static void qcom_qspi_pio_xfer_cfg(struct qcom_qspi *ctrl)
|
||||
else
|
||||
pio_xfer_cfg |= TRANSFER_FRAGMENT;
|
||||
pio_xfer_cfg &= ~MULTI_IO_MODE_MSK;
|
||||
pio_xfer_cfg |= qspi_buswidth_to_iomode(ctrl, xfer->buswidth);
|
||||
iomode = qspi_buswidth_to_iomode(ctrl, xfer->buswidth);
|
||||
pio_xfer_cfg |= iomode << MULTI_IO_MODE_SHFT;
|
||||
|
||||
writel(pio_xfer_cfg, ctrl->base + PIO_XFER_CFG);
|
||||
}
|
||||
@ -217,12 +250,22 @@ static void qcom_qspi_pio_xfer(struct qcom_qspi *ctrl)
|
||||
static void qcom_qspi_handle_err(struct spi_master *master,
|
||||
struct spi_message *msg)
|
||||
{
|
||||
u32 int_status;
|
||||
struct qcom_qspi *ctrl = spi_master_get_devdata(master);
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
spin_lock_irqsave(&ctrl->lock, flags);
|
||||
writel(0, ctrl->base + MSTR_INT_EN);
|
||||
int_status = readl(ctrl->base + MSTR_INT_STATUS);
|
||||
writel(int_status, ctrl->base + MSTR_INT_STATUS);
|
||||
ctrl->xfer.rem_bytes = 0;
|
||||
|
||||
/* free cmd descriptors if they are around (DMA mode) */
|
||||
for (i = 0; i < ctrl->n_cmd_desc; i++)
|
||||
dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
|
||||
ctrl->dma_cmd_desc[i]);
|
||||
ctrl->n_cmd_desc = 0;
|
||||
spin_unlock_irqrestore(&ctrl->lock, flags);
|
||||
}
|
||||
|
||||
@ -242,7 +285,7 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz)
|
||||
}
|
||||
|
||||
/*
|
||||
* Set BW quota for CPU as driver supports FIFO mode only.
|
||||
* Set BW quota for CPU.
|
||||
* We don't have explicit peak requirement so keep it equal to avg_bw.
|
||||
*/
|
||||
avg_bw_cpu = Bps_to_icc(speed_hz);
|
||||
@ -258,6 +301,102 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr,
|
||||
uint32_t n_bytes)
|
||||
{
|
||||
struct qspi_cmd_desc *virt_cmd_desc, *prev;
|
||||
dma_addr_t dma_cmd_desc;
|
||||
|
||||
/* allocate for dma cmd descriptor */
|
||||
virt_cmd_desc = dma_pool_alloc(ctrl->dma_cmd_pool, GFP_KERNEL | __GFP_ZERO, &dma_cmd_desc);
|
||||
if (!virt_cmd_desc)
|
||||
return -ENOMEM;
|
||||
|
||||
ctrl->virt_cmd_desc[ctrl->n_cmd_desc] = virt_cmd_desc;
|
||||
ctrl->dma_cmd_desc[ctrl->n_cmd_desc] = dma_cmd_desc;
|
||||
ctrl->n_cmd_desc++;
|
||||
|
||||
/* setup cmd descriptor */
|
||||
virt_cmd_desc->data_address = dma_ptr;
|
||||
virt_cmd_desc->direction = ctrl->xfer.dir;
|
||||
virt_cmd_desc->multi_io_mode = qspi_buswidth_to_iomode(ctrl, ctrl->xfer.buswidth);
|
||||
virt_cmd_desc->fragment = !ctrl->xfer.is_last;
|
||||
virt_cmd_desc->length = n_bytes;
|
||||
|
||||
/* update previous descriptor */
|
||||
if (ctrl->n_cmd_desc >= 2) {
|
||||
prev = (ctrl->virt_cmd_desc)[ctrl->n_cmd_desc - 2];
|
||||
prev->next_descriptor = dma_cmd_desc;
|
||||
prev->fragment = 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int qcom_qspi_setup_dma_desc(struct qcom_qspi *ctrl,
|
||||
struct spi_transfer *xfer)
|
||||
{
|
||||
int ret;
|
||||
struct sg_table *sgt;
|
||||
dma_addr_t dma_ptr_sg;
|
||||
unsigned int dma_len_sg;
|
||||
int i;
|
||||
|
||||
if (ctrl->n_cmd_desc) {
|
||||
dev_err(ctrl->dev, "Remnant dma buffers n_cmd_desc-%d\n", ctrl->n_cmd_desc);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
sgt = (ctrl->xfer.dir == QSPI_READ) ? &xfer->rx_sg : &xfer->tx_sg;
|
||||
if (!sgt->nents || sgt->nents > QSPI_MAX_SG) {
|
||||
dev_warn_once(ctrl->dev, "Cannot handle %d entries in scatter list\n", sgt->nents);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
for (i = 0; i < sgt->nents; i++) {
|
||||
dma_ptr_sg = sg_dma_address(sgt->sgl + i);
|
||||
if (!IS_ALIGNED(dma_ptr_sg, QSPI_ALIGN_REQ)) {
|
||||
dev_warn_once(ctrl->dev, "dma_address not aligned to %d\n", QSPI_ALIGN_REQ);
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < sgt->nents; i++) {
|
||||
dma_ptr_sg = sg_dma_address(sgt->sgl + i);
|
||||
dma_len_sg = sg_dma_len(sgt->sgl + i);
|
||||
|
||||
ret = qcom_qspi_alloc_desc(ctrl, dma_ptr_sg, dma_len_sg);
|
||||
if (ret)
|
||||
goto cleanup;
|
||||
}
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
for (i = 0; i < ctrl->n_cmd_desc; i++)
|
||||
dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
|
||||
ctrl->dma_cmd_desc[i]);
|
||||
ctrl->n_cmd_desc = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void qcom_qspi_dma_xfer(struct qcom_qspi *ctrl)
|
||||
{
|
||||
/* Setup new interrupts */
|
||||
writel(DMA_CHAIN_DONE, ctrl->base + MSTR_INT_EN);
|
||||
|
||||
/* kick off transfer */
|
||||
writel((u32)((ctrl->dma_cmd_desc)[0]), ctrl->base + NEXT_DMA_DESC_ADDR);
|
||||
}
|
||||
|
||||
/* Switch to DMA if transfer length exceeds this */
|
||||
#define QSPI_MAX_BYTES_FIFO 64
|
||||
|
||||
static bool qcom_qspi_can_dma(struct spi_controller *ctlr,
|
||||
struct spi_device *slv, struct spi_transfer *xfer)
|
||||
{
|
||||
return xfer->len > QSPI_MAX_BYTES_FIFO;
|
||||
}
|
||||
|
||||
static int qcom_qspi_transfer_one(struct spi_master *master,
|
||||
struct spi_device *slv,
|
||||
struct spi_transfer *xfer)
|
||||
@ -266,6 +405,7 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
|
||||
int ret;
|
||||
unsigned long speed_hz;
|
||||
unsigned long flags;
|
||||
u32 mstr_cfg;
|
||||
|
||||
speed_hz = slv->max_speed_hz;
|
||||
if (xfer->speed_hz)
|
||||
@ -276,6 +416,7 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
|
||||
return ret;
|
||||
|
||||
spin_lock_irqsave(&ctrl->lock, flags);
|
||||
mstr_cfg = readl(ctrl->base + MSTR_CONFIG);
|
||||
|
||||
/* We are half duplex, so either rx or tx will be set */
|
||||
if (xfer->rx_buf) {
|
||||
@ -290,10 +431,36 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
|
||||
ctrl->xfer.is_last = list_is_last(&xfer->transfer_list,
|
||||
&master->cur_msg->transfers);
|
||||
ctrl->xfer.rem_bytes = xfer->len;
|
||||
|
||||
if (xfer->rx_sg.nents || xfer->tx_sg.nents) {
|
||||
/* do DMA transfer */
|
||||
if (!(mstr_cfg & DMA_ENABLE)) {
|
||||
mstr_cfg |= DMA_ENABLE;
|
||||
writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
|
||||
}
|
||||
|
||||
ret = qcom_qspi_setup_dma_desc(ctrl, xfer);
|
||||
if (ret != -EAGAIN) {
|
||||
if (!ret)
|
||||
qcom_qspi_dma_xfer(ctrl);
|
||||
goto exit;
|
||||
}
|
||||
dev_warn_once(ctrl->dev, "DMA failure, falling back to PIO");
|
||||
ret = 0; /* We'll retry w/ PIO */
|
||||
}
|
||||
|
||||
if (mstr_cfg & DMA_ENABLE) {
|
||||
mstr_cfg &= ~DMA_ENABLE;
|
||||
writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
|
||||
}
|
||||
qcom_qspi_pio_xfer(ctrl);
|
||||
|
||||
exit:
|
||||
spin_unlock_irqrestore(&ctrl->lock, flags);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* We'll call spi_finalize_current_transfer() when done */
|
||||
return 1;
|
||||
}
|
||||
@ -328,6 +495,16 @@ static int qcom_qspi_prepare_message(struct spi_master *master,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int qcom_qspi_alloc_dma(struct qcom_qspi *ctrl)
|
||||
{
|
||||
ctrl->dma_cmd_pool = dmam_pool_create("qspi cmd desc pool",
|
||||
ctrl->dev, sizeof(struct qspi_cmd_desc), 0, 0);
|
||||
if (!ctrl->dma_cmd_pool)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static irqreturn_t pio_read(struct qcom_qspi *ctrl)
|
||||
{
|
||||
u32 rd_fifo_status;
|
||||
@ -426,6 +603,7 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
|
||||
int_status = readl(ctrl->base + MSTR_INT_STATUS);
|
||||
writel(int_status, ctrl->base + MSTR_INT_STATUS);
|
||||
|
||||
/* PIO mode handling */
|
||||
if (ctrl->xfer.dir == QSPI_WRITE) {
|
||||
if (int_status & WR_FIFO_EMPTY)
|
||||
ret = pio_write(ctrl);
|
||||
@ -449,6 +627,22 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
|
||||
spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
|
||||
}
|
||||
|
||||
/* DMA mode handling */
|
||||
if (int_status & DMA_CHAIN_DONE) {
|
||||
int i;
|
||||
|
||||
writel(0, ctrl->base + MSTR_INT_EN);
|
||||
ctrl->xfer.rem_bytes = 0;
|
||||
|
||||
for (i = 0; i < ctrl->n_cmd_desc; i++)
|
||||
dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
|
||||
ctrl->dma_cmd_desc[i]);
|
||||
ctrl->n_cmd_desc = 0;
|
||||
|
||||
ret = IRQ_HANDLED;
|
||||
spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
|
||||
}
|
||||
|
||||
spin_unlock(&ctrl->lock);
|
||||
return ret;
|
||||
}
|
||||
@ -517,7 +711,13 @@ static int qcom_qspi_probe(struct platform_device *pdev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
|
||||
if (ret)
|
||||
return dev_err_probe(dev, ret, "could not set DMA mask\n");
|
||||
|
||||
master->max_speed_hz = 300000000;
|
||||
master->max_dma_len = 65536; /* as per HPG */
|
||||
master->dma_alignment = QSPI_ALIGN_REQ;
|
||||
master->num_chipselect = QSPI_NUM_CS;
|
||||
master->bus_num = -1;
|
||||
master->dev.of_node = pdev->dev.of_node;
|
||||
@ -528,6 +728,8 @@ static int qcom_qspi_probe(struct platform_device *pdev)
|
||||
master->prepare_message = qcom_qspi_prepare_message;
|
||||
master->transfer_one = qcom_qspi_transfer_one;
|
||||
master->handle_err = qcom_qspi_handle_err;
|
||||
if (of_property_read_bool(pdev->dev.of_node, "iommus"))
|
||||
master->can_dma = qcom_qspi_can_dma;
|
||||
master->auto_runtime_pm = true;
|
||||
|
||||
ret = devm_pm_opp_set_clkname(&pdev->dev, "core");
|
||||
@ -540,6 +742,10 @@ static int qcom_qspi_probe(struct platform_device *pdev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = qcom_qspi_alloc_dma(ctrl);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pm_runtime_use_autosuspend(dev);
|
||||
pm_runtime_set_autosuspend_delay(dev, 250);
|
||||
pm_runtime_enable(dev);
|
||||
|
Loading…
x
Reference in New Issue
Block a user