summaryrefslogtreecommitdiff
path: root/drivers/dma/ioat/dma.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 12:34:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 12:34:54 -0700
commitb5b131c7473e17275debcdf1c226f452dc3876ed (patch)
treea272e947c38213d4ee989bb3f863a8091d50426b /drivers/dma/ioat/dma.c
parentc7eec380e85a427983782df744f0fb745d867170 (diff)
parent896e041e8e8efb34520d033a693ef25391f9c9f0 (diff)
downloadlinux-b5b131c7473e17275debcdf1c226f452dc3876ed.tar.gz
Merge tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma
Pull dmaengine updates from Vinod Koul: "This is smallish update with minor changes to core and new driver and usual updates. Nothing super exciting here.. - We have made slave address as physical to enable driver to do the mapping. - We now expose the maxburst for slave dma as new capability so clients can know this and program accordingly - addition of device synchronize callbacks on omap and edma. - pl330 updates to support DMAFLUSHP for Rockchip platforms. - Updates and improved sg handling in Xilinx VDMA driver. - New hidma qualcomm dma driver, though some bits are still in progress" * tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (40 commits) dmaengine: IOATDMA: revise channel reset workaround on CB3.3 platforms dmaengine: add Qualcomm Technologies HIDMA channel driver dmaengine: add Qualcomm Technologies HIDMA management driver dmaengine: hidma: Add Device Tree binding dmaengine: qcom_bam_dma: move to qcom directory dmaengine: tegra: Move of_device_id table near to its user dmaengine: xilinx_vdma: Remove unnecessary variable initializations dmaengine: sirf: use __maybe_unused to hide pm functions dmaengine: rcar-dmac: clear pertinence number of channels dmaengine: sh: shdmac: don't open code of_device_get_match_data() dmaengine: tegra: don't open code of_device_get_match_data() dmaengine: qcom_bam_dma: Make driver work for BE dmaengine: sun4i: support module autoloading dma/mic_x100_dma: IS_ERR() vs PTR_ERR() typo dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's dmaengine: xilinx_vdma: Simplify spin lock handling dmaengine: xilinx_vdma: Fix issues with non-parking mode dmaengine: xilinx_vdma: Improve SG engine handling dmaengine: pl330: fix to support the burst mode dmaengine: make slave address physical ...
Diffstat (limited to 'drivers/dma/ioat/dma.c')
-rw-r--r--drivers/dma/ioat/dma.c268
1 files changed, 72 insertions, 196 deletions
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 21539d5c54c3..bd09961443b1 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -31,6 +31,7 @@
#include <linux/dma-mapping.h>
#include <linux/workqueue.h>
#include <linux/prefetch.h>
+#include <linux/sizes.h>
#include "dma.h"
#include "registers.h"
#include "hw.h"
@@ -290,24 +291,30 @@ static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
}
static struct ioat_ring_ent *
-ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
+ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags)
{
struct ioat_dma_descriptor *hw;
struct ioat_ring_ent *desc;
struct ioatdma_device *ioat_dma;
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+ int chunk;
dma_addr_t phys;
+ u8 *pos;
+ off_t offs;
ioat_dma = to_ioatdma_device(chan->device);
- hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys);
- if (!hw)
- return NULL;
+
+ chunk = idx / IOAT_DESCS_PER_2M;
+ idx &= (IOAT_DESCS_PER_2M - 1);
+ offs = idx * IOAT_DESC_SZ;
+ pos = (u8 *)ioat_chan->descs[chunk].virt + offs;
+ phys = ioat_chan->descs[chunk].hw + offs;
+ hw = (struct ioat_dma_descriptor *)pos;
memset(hw, 0, sizeof(*hw));
desc = kmem_cache_zalloc(ioat_cache, flags);
- if (!desc) {
- pci_pool_free(ioat_dma->dma_pool, hw, phys);
+ if (!desc)
return NULL;
- }
dma_async_tx_descriptor_init(&desc->txd, chan);
desc->txd.tx_submit = ioat_tx_submit_unlock;
@@ -318,32 +325,63 @@ ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
{
- struct ioatdma_device *ioat_dma;
-
- ioat_dma = to_ioatdma_device(chan->device);
- pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys);
kmem_cache_free(ioat_cache, desc);
}
struct ioat_ring_ent **
ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
struct ioat_ring_ent **ring;
- int descs = 1 << order;
- int i;
-
- if (order > ioat_get_max_alloc_order())
- return NULL;
+ int total_descs = 1 << order;
+ int i, chunks;
/* allocate the array to hold the software ring */
- ring = kcalloc(descs, sizeof(*ring), flags);
+ ring = kcalloc(total_descs, sizeof(*ring), flags);
if (!ring)
return NULL;
- for (i = 0; i < descs; i++) {
- ring[i] = ioat_alloc_ring_ent(c, flags);
+
+ ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M;
+
+ for (i = 0; i < chunks; i++) {
+ struct ioat_descs *descs = &ioat_chan->descs[i];
+
+ descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
+ SZ_2M, &descs->hw, flags);
+ if (!descs->virt && (i > 0)) {
+ int idx;
+
+ for (idx = 0; idx < i; idx++) {
+ dma_free_coherent(to_dev(ioat_chan), SZ_2M,
+ descs->virt, descs->hw);
+ descs->virt = NULL;
+ descs->hw = 0;
+ }
+
+ ioat_chan->desc_chunks = 0;
+ kfree(ring);
+ return NULL;
+ }
+ }
+
+ for (i = 0; i < total_descs; i++) {
+ ring[i] = ioat_alloc_ring_ent(c, i, flags);
if (!ring[i]) {
+ int idx;
+
while (i--)
ioat_free_ring_ent(ring[i], c);
+
+ for (idx = 0; idx < ioat_chan->desc_chunks; idx++) {
+ dma_free_coherent(to_dev(ioat_chan),
+ SZ_2M,
+ ioat_chan->descs[idx].virt,
+ ioat_chan->descs[idx].hw);
+ ioat_chan->descs[idx].virt = NULL;
+ ioat_chan->descs[idx].hw = 0;
+ }
+
+ ioat_chan->desc_chunks = 0;
kfree(ring);
return NULL;
}
@@ -351,7 +389,7 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
}
/* link descs */
- for (i = 0; i < descs-1; i++) {
+ for (i = 0; i < total_descs-1; i++) {
struct ioat_ring_ent *next = ring[i+1];
struct ioat_dma_descriptor *hw = ring[i]->hw;
@@ -362,114 +400,6 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
return ring;
}
-static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
-{
- /* reshape differs from normal ring allocation in that we want
- * to allocate a new software ring while only
- * extending/truncating the hardware ring
- */
- struct dma_chan *c = &ioat_chan->dma_chan;
- const u32 curr_size = ioat_ring_size(ioat_chan);
- const u16 active = ioat_ring_active(ioat_chan);
- const u32 new_size = 1 << order;
- struct ioat_ring_ent **ring;
- u32 i;
-
- if (order > ioat_get_max_alloc_order())
- return false;
-
- /* double check that we have at least 1 free descriptor */
- if (active == curr_size)
- return false;
-
- /* when shrinking, verify that we can hold the current active
- * set in the new ring
- */
- if (active >= new_size)
- return false;
-
- /* allocate the array to hold the software ring */
- ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
- if (!ring)
- return false;
-
- /* allocate/trim descriptors as needed */
- if (new_size > curr_size) {
- /* copy current descriptors to the new ring */
- for (i = 0; i < curr_size; i++) {
- u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
- u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-
- ring[new_idx] = ioat_chan->ring[curr_idx];
- set_desc_id(ring[new_idx], new_idx);
- }
-
- /* add new descriptors to the ring */
- for (i = curr_size; i < new_size; i++) {
- u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-
- ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT);
- if (!ring[new_idx]) {
- while (i--) {
- u16 new_idx = (ioat_chan->tail+i) &
- (new_size-1);
-
- ioat_free_ring_ent(ring[new_idx], c);
- }
- kfree(ring);
- return false;
- }
- set_desc_id(ring[new_idx], new_idx);
- }
-
- /* hw link new descriptors */
- for (i = curr_size-1; i < new_size; i++) {
- u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
- struct ioat_ring_ent *next =
- ring[(new_idx+1) & (new_size-1)];
- struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
-
- hw->next = next->txd.phys;
- }
- } else {
- struct ioat_dma_descriptor *hw;
- struct ioat_ring_ent *next;
-
- /* copy current descriptors to the new ring, dropping the
- * removed descriptors
- */
- for (i = 0; i < new_size; i++) {
- u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
- u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-
- ring[new_idx] = ioat_chan->ring[curr_idx];
- set_desc_id(ring[new_idx], new_idx);
- }
-
- /* free deleted descriptors */
- for (i = new_size; i < curr_size; i++) {
- struct ioat_ring_ent *ent;
-
- ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i);
- ioat_free_ring_ent(ent, c);
- }
-
- /* fix up hardware ring */
- hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw;
- next = ring[(ioat_chan->tail+new_size) & (new_size-1)];
- hw->next = next->txd.phys;
- }
-
- dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n",
- __func__, new_size);
-
- kfree(ioat_chan->ring);
- ioat_chan->ring = ring;
- ioat_chan->alloc_order = order;
-
- return true;
-}
-
/**
* ioat_check_space_lock - verify space and grab ring producer lock
* @ioat: ioat,3 channel (ring) to operate on
@@ -478,9 +408,6 @@ static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
__acquires(&ioat_chan->prep_lock)
{
- bool retry;
-
- retry:
spin_lock_bh(&ioat_chan->prep_lock);
/* never allow the last descriptor to be consumed, we need at
* least one free at all times to allow for on-the-fly ring
@@ -493,24 +420,8 @@ int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
ioat_chan->produce = num_descs;
return 0; /* with ioat->prep_lock held */
}
- retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
spin_unlock_bh(&ioat_chan->prep_lock);
- /* is another cpu already trying to expand the ring? */
- if (retry)
- goto retry;
-
- spin_lock_bh(&ioat_chan->cleanup_lock);
- spin_lock_bh(&ioat_chan->prep_lock);
- retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1);
- clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
- spin_unlock_bh(&ioat_chan->prep_lock);
- spin_unlock_bh(&ioat_chan->cleanup_lock);
-
- /* if we were able to expand the ring retry the allocation */
- if (retry)
- goto retry;
-
dev_dbg_ratelimited(to_dev(ioat_chan),
"%s: ring full! num_descs: %d (%x:%x:%x)\n",
__func__, num_descs, ioat_chan->head,
@@ -823,19 +734,6 @@ static void check_active(struct ioatdma_chan *ioat_chan)
if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
- else if (ioat_chan->alloc_order > ioat_get_alloc_order()) {
- /* if the ring is idle, empty, and oversized try to step
- * down the size
- */
- reshape_ring(ioat_chan, ioat_chan->alloc_order - 1);
-
- /* keep shrinking until we get back to our minimum
- * default size
- */
- if (ioat_chan->alloc_order > ioat_get_alloc_order())
- mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
- }
-
}
void ioat_timer_event(unsigned long data)
@@ -916,40 +814,6 @@ ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
return dma_cookie_status(c, cookie, txstate);
}
-static int ioat_irq_reinit(struct ioatdma_device *ioat_dma)
-{
- struct pci_dev *pdev = ioat_dma->pdev;
- int irq = pdev->irq, i;
-
- if (!is_bwd_ioat(pdev))
- return 0;
-
- switch (ioat_dma->irq_mode) {
- case IOAT_MSIX:
- for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) {
- struct msix_entry *msix = &ioat_dma->msix_entries[i];
- struct ioatdma_chan *ioat_chan;
-
- ioat_chan = ioat_chan_by_index(ioat_dma, i);
- devm_free_irq(&pdev->dev, msix->vector, ioat_chan);
- }
-
- pci_disable_msix(pdev);
- break;
- case IOAT_MSI:
- pci_disable_msi(pdev);
- /* fall through */
- case IOAT_INTX:
- devm_free_irq(&pdev->dev, irq, ioat_dma);
- break;
- default:
- return 0;
- }
- ioat_dma->irq_mode = IOAT_NOIRQ;
-
- return ioat_dma_setup_interrupts(ioat_dma);
-}
-
int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
{
/* throw away whatever the channel was doing and get it
@@ -989,9 +853,21 @@ int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
}
}
+ if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+ ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000);
+ ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008);
+ ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800);
+ }
+
+
err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200));
- if (!err)
- err = ioat_irq_reinit(ioat_dma);
+ if (!err) {
+ if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+ writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000);
+ writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008);
+ writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800);
+ }
+ }
if (err)
dev_err(&pdev->dev, "Failed to reset: %d\n", err);