diff options
Diffstat (limited to 'drivers/dma')
-rw-r--r-- | drivers/dma/at_xdmac.c | 42 | ||||
-rw-r--r-- | drivers/dma/dw/core.c | 15 | ||||
-rw-r--r-- | drivers/dma/dw/pci.c | 4 | ||||
-rw-r--r-- | drivers/dma/edma.c | 41 | ||||
-rw-r--r-- | drivers/dma/fsldma.c | 2 | ||||
-rw-r--r-- | drivers/dma/ioat/dma.c | 34 | ||||
-rw-r--r-- | drivers/dma/iop-adma.c | 8 | ||||
-rw-r--r-- | drivers/dma/mv_xor.c | 4 | ||||
-rw-r--r-- | drivers/dma/pxa_dma.c | 8 | ||||
-rw-r--r-- | drivers/dma/qcom/bam_dma.c | 14 |
10 files changed, 137 insertions, 35 deletions
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 64f5d1bdbb48..8e304b1befc5 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -176,6 +176,7 @@ #define AT_XDMAC_MAX_CHAN 0x20 #define AT_XDMAC_MAX_CSIZE 16 /* 16 data */ #define AT_XDMAC_MAX_DWIDTH 8 /* 64 bits */ +#define AT_XDMAC_RESIDUE_MAX_RETRIES 5 #define AT_XDMAC_DMA_BUSWIDTHS\ (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\ @@ -1395,8 +1396,8 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, struct at_xdmac_desc *desc, *_desc; struct list_head *descs_list; enum dma_status ret; - int residue; - u32 cur_nda, mask, value; + int residue, retry; + u32 cur_nda, check_nda, cur_ubc, mask, value; u8 dwidth = 0; unsigned long flags; @@ -1433,7 +1434,42 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, cpu_relax(); } + /* + * When processing the residue, we need to read two registers but we + * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where + * we stand in the descriptor list and AT_XDMAC_CUBC is used + * to know how many data are remaining for the current descriptor. + * Since the dma channel is not paused to not loose data, between the + * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of + * descriptor. + * For that reason, after reading AT_XDMAC_CUBC, we check if we are + * still using the same descriptor by reading a second time + * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to + * read again AT_XDMAC_CUBC. + * Memory barriers are used to ensure the read order of the registers. + * A max number of retries is set because unlikely it can never ends if + * we are transferring a lot of data with small buffers. + */ cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; + rmb(); + cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); + for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) { + rmb(); + check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; + + if (likely(cur_nda == check_nda)) + break; + + cur_nda = check_nda; + rmb(); + cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); + } + + if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) { + ret = DMA_ERROR; + goto spin_unlock; + } + /* * Remove size of all microblocks already transferred and the current * one. Then add the remaining size to transfer of the current @@ -1446,7 +1482,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda) break; } - residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth; + residue += cur_ubc << dwidth; dma_set_residue(txstate, residue); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index e893318560db..5ad0ec1f0e29 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -156,7 +156,6 @@ static void dwc_initialize(struct dw_dma_chan *dwc) /* Enable interrupts */ channel_set_bit(dw, MASK.XFER, dwc->mask); - channel_set_bit(dw, MASK.BLOCK, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); dwc->initialized = true; @@ -588,6 +587,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, spin_unlock_irqrestore(&dwc->lock, flags); } + + /* Re-enable interrupts */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); } /* ------------------------------------------------------------------------- */ @@ -618,11 +620,8 @@ static void dw_dma_tasklet(unsigned long data) dwc_scan_descriptors(dw, dwc); } - /* - * Re-enable interrupts. - */ + /* Re-enable interrupts */ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); - channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); } @@ -1261,6 +1260,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { @@ -1269,7 +1269,12 @@ int dw_dma_cyclic_start(struct dma_chan *chan) } spin_lock_irqsave(&dwc->lock, flags); + + /* Enable interrupts to perform cyclic transfer */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); + dwc_dostart(dwc, dwc->cdesc->desc[0]); + spin_unlock_irqrestore(&dwc->lock, flags); return 0; diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 4c30fdd092b3..358f9689a3f5 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -108,6 +108,10 @@ static const struct pci_device_id dw_pci_id_table[] = { /* Haswell */ { PCI_VDEVICE(INTEL, 0x9c60) }, + + /* Broadwell */ + { PCI_VDEVICE(INTEL, 0x9ce0) }, + { } }; MODULE_DEVICE_TABLE(pci, dw_pci_id_table); diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 29a7723918d9..ee3463e774f8 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -113,6 +113,9 @@ #define GET_NUM_REGN(x) ((x & 0x300000) >> 20) /* bits 20-21 */ #define CHMAP_EXIST BIT(24) +/* CCSTAT register */ +#define EDMA_CCSTAT_ACTV BIT(4) + /* * Max of 20 segments per channel to conserve PaRAM slots * Also note that MAX_NR_SG should be atleast the no.of periods @@ -1687,9 +1690,20 @@ static void edma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&echan->vchan.lock, flags); } +/* + * This limit exists to avoid a possible infinite loop when waiting for proof + * that a particular transfer is completed. This limit can be hit if there + * are large bursts to/from slow devices or the CPU is never able to catch + * the DMA hardware idle. On an AM335x transfering 48 bytes from the UART + * RX-FIFO, as many as 55 loops have been seen. + */ +#define EDMA_MAX_TR_WAIT_LOOPS 1000 + static u32 edma_residue(struct edma_desc *edesc) { bool dst = edesc->direction == DMA_DEV_TO_MEM; + int loop_count = EDMA_MAX_TR_WAIT_LOOPS; + struct edma_chan *echan = edesc->echan; struct edma_pset *pset = edesc->pset; dma_addr_t done, pos; int i; @@ -1698,7 +1712,32 @@ static u32 edma_residue(struct edma_desc *edesc) * We always read the dst/src position from the first RamPar * pset. That's the one which is active now. */ - pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst); + pos = edma_get_position(echan->ecc, echan->slot[0], dst); + + /* + * "pos" may represent a transfer request that is still being + * processed by the EDMACC or EDMATC. We will busy wait until + * any one of the situations occurs: + * 1. the DMA hardware is idle + * 2. a new transfer request is setup + * 3. we hit the loop limit + */ + while (edma_read(echan->ecc, EDMA_CCSTAT) & EDMA_CCSTAT_ACTV) { + /* check if a new transfer request is setup */ + if (edma_get_position(echan->ecc, + echan->slot[0], dst) != pos) { + break; + } + + if (!--loop_count) { + dev_dbg_ratelimited(echan->vchan.chan.device->dev, + "%s: timeout waiting for PaRAM update\n", + __func__); + break; + } + + cpu_relax(); + } /* * Cyclic is simple. Just subtract pset[0].addr from pos. diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 2209f75fdf05..aac85c30c2cf 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -522,6 +522,8 @@ static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan, chan_dbg(chan, "LD %p callback\n", desc); txd->callback(txd->callback_param); } + + dma_descriptor_unmap(txd); } /* Run any dependencies */ diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 5428746f03fb..bd09961443b1 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -759,32 +759,42 @@ void ioat_timer_event(unsigned long data) return; } + spin_lock_bh(&ioat_chan->cleanup_lock); + + /* handle the no-actives case */ + if (!ioat_ring_active(ioat_chan)) { + spin_lock_bh(&ioat_chan->prep_lock); + check_active(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + return; + } + /* if we haven't made progress and we have already * acknowledged a pending completion once, then be more * forceful with a restart */ - spin_lock_bh(&ioat_chan->cleanup_lock); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) __cleanup(ioat_chan, phys_complete); else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) { + u32 chanerr; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + dev_warn(to_dev(ioat_chan), "Restarting channel...\n"); + dev_warn(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n", + status, chanerr); + dev_warn(to_dev(ioat_chan), "Active descriptors: %d\n", + ioat_ring_active(ioat_chan)); + spin_lock_bh(&ioat_chan->prep_lock); ioat_restart_channel(ioat_chan); spin_unlock_bh(&ioat_chan->prep_lock); spin_unlock_bh(&ioat_chan->cleanup_lock); return; - } else { + } else set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); - mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); - } - - if (ioat_ring_active(ioat_chan)) - mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); - else { - spin_lock_bh(&ioat_chan->prep_lock); - check_active(ioat_chan); - spin_unlock_bh(&ioat_chan->prep_lock); - } + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); spin_unlock_bh(&ioat_chan->cleanup_lock); } diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index e4f43125e0fb..f039cfadf17b 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1300,10 +1300,10 @@ static int iop_adma_probe(struct platform_device *pdev) * note: writecombine gives slightly better performance, but * requires that we explicitly flush the writes */ - adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, - plat_data->pool_size, - &adev->dma_desc_pool, - GFP_KERNEL); + adev->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev, + plat_data->pool_size, + &adev->dma_desc_pool, + GFP_KERNEL); if (!adev->dma_desc_pool_virt) { ret = -ENOMEM; goto err_free_adev; diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 14091f878f80..3922a5d56806 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -964,8 +964,8 @@ mv_xor_channel_add(struct mv_xor_device *xordev, * requires that we explicitly flush the writes */ mv_chan->dma_desc_pool_virt = - dma_alloc_writecombine(&pdev->dev, MV_XOR_POOL_SIZE, - &mv_chan->dma_desc_pool, GFP_KERNEL); + dma_alloc_wc(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool, + GFP_KERNEL); if (!mv_chan->dma_desc_pool_virt) return ERR_PTR(-ENOMEM); diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index f2a0310ae771..debca824bed6 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -583,6 +583,8 @@ static void set_updater_desc(struct pxad_desc_sw *sw_desc, (PXA_DCMD_LENGTH & sizeof(u32)); if (flags & DMA_PREP_INTERRUPT) updater->dcmd |= PXA_DCMD_ENDIRQEN; + if (sw_desc->cyclic) + sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first; } static bool is_desc_completed(struct virt_dma_desc *vd) @@ -673,6 +675,10 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) dev_dbg(&chan->vc.chan.dev->device, "%s(): checking txd %p[%x]: completed=%d\n", __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + if (to_pxad_sw_desc(vd)->cyclic) { + vchan_cyclic_callback(vd); + break; + } if (is_desc_completed(vd)) { list_del(&vd->node); vchan_cookie_complete(vd); @@ -1080,7 +1086,7 @@ pxad_prep_dma_cyclic(struct dma_chan *dchan, return NULL; pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); - dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH | period_len); + dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len); dev_dbg(&chan->vc.chan.dev->device, "%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n", __func__, (unsigned long)buf_addr, len, period_len, dir, flags); diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index 2d691a34a0ab..d5e0a9c3ad5d 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -502,8 +502,8 @@ static int bam_alloc_chan(struct dma_chan *chan) return 0; /* allocate FIFO descriptor space, but only if necessary */ - bchan->fifo_virt = dma_alloc_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE, - &bchan->fifo_phys, GFP_KERNEL); + bchan->fifo_virt = dma_alloc_wc(bdev->dev, BAM_DESC_FIFO_SIZE, + &bchan->fifo_phys, GFP_KERNEL); if (!bchan->fifo_virt) { dev_err(bdev->dev, "Failed to allocate desc fifo\n"); @@ -538,8 +538,8 @@ static void bam_free_chan(struct dma_chan *chan) bam_reset_channel(bchan); spin_unlock_irqrestore(&bchan->vc.lock, flags); - dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt, - bchan->fifo_phys); + dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt, + bchan->fifo_phys); bchan->fifo_virt = NULL; /* mask irq for pipe/channel */ @@ -1234,9 +1234,9 @@ static int bam_dma_remove(struct platform_device *pdev) bam_dma_terminate_all(&bdev->channels[i].vc.chan); tasklet_kill(&bdev->channels[i].vc.task); - dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE, - bdev->channels[i].fifo_virt, - bdev->channels[i].fifo_phys); + dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, + bdev->channels[i].fifo_virt, + bdev->channels[i].fifo_phys); } tasklet_kill(&bdev->task); |