From e1031dc1f7ba5c8724ba211062134076df292791 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 May 2015 17:38:07 +0200 Subject: [PATCH 1/3] dmaengine: Support different source and destination stride In interleaved mode, we can expect to have different source and destination strides. Add support for such case to dmaengine. Signed-off-by: Maxime Ripard Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ad419757241f..5d63acb09813 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -122,10 +122,18 @@ enum dma_transfer_direction { * chunk and before first src/dst address for next chunk. * Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false. * Ignored for src(assumed 0), if src_inc is true and src_sgl is false. + * @dst_icg: Number of bytes to jump after last dst address of this + * chunk and before the first dst address for next chunk. + * Ignored if dst_inc is true and dst_sgl is false. + * @src_icg: Number of bytes to jump after last src address of this + * chunk and before the first src address for next chunk. + * Ignored if src_inc is true and src_sgl is false. */ struct data_chunk { size_t size; size_t icg; + size_t dst_icg; + size_t src_icg; }; /** From b9855f03d560d351e95301b9de0bc3cad3b31fe9 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 26 May 2015 23:06:33 +0200 Subject: [PATCH 2/3] dmaengine: virt-dma: don't always free descriptor upon completion This patch attempts to enhance the case of a transfer submitted multiple times, and where the cost of creating the descriptors chain is not negligible. This happens with big video buffers (several megabytes, ie. several thousands of linked descriptors in one scatter-gather list). In these cases, a video driver would want to do : - tx = dmaengine_prep_slave_sg() - dma_engine_submit(tx); - dma_async_issue_pending() - wait for video completion - read video data (or not, skipping a frame is also possible) - dma_engine_submit(tx) => here, the descriptors chain recalculation will take time => the dma coherent allocation over and over might create holes in the dma pool, which is counter-productive. - dma_async_issue_pending() - etc ... In order to cope with this case, virt-dma is modified to prevent freeing the descriptors upon completion if DMA_CTRL_ACK flag is set in the transfer. Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul --- drivers/dma/virt-dma.c | 19 +++++++++++++------ drivers/dma/virt-dma.h | 13 ++++++++++++- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c index 6f80432a3f0a..7d2c17d8d30f 100644 --- a/drivers/dma/virt-dma.c +++ b/drivers/dma/virt-dma.c @@ -29,7 +29,7 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&vc->lock, flags); cookie = dma_cookie_assign(tx); - list_add_tail(&vd->node, &vc->desc_submitted); + list_move_tail(&vd->node, &vc->desc_submitted); spin_unlock_irqrestore(&vc->lock, flags); dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n", @@ -83,8 +83,10 @@ static void vchan_complete(unsigned long arg) cb_data = vd->tx.callback_param; list_del(&vd->node); - - vc->desc_free(vd); + if (async_tx_test_ack(&vd->tx)) + list_add(&vd->node, &vc->desc_allocated); + else + vc->desc_free(vd); if (cb) cb(cb_data); @@ -96,9 +98,13 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head) while (!list_empty(head)) { struct virt_dma_desc *vd = list_first_entry(head, struct virt_dma_desc, node); - list_del(&vd->node); - dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); - vc->desc_free(vd); + if (async_tx_test_ack(&vd->tx)) { + list_move_tail(&vd->node, &vc->desc_allocated); + } else { + dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); + list_del(&vd->node); + vc->desc_free(vd); + } } } EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list); @@ -108,6 +114,7 @@ void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev) dma_cookie_init(&vc->chan); spin_lock_init(&vc->lock); + INIT_LIST_HEAD(&vc->desc_allocated); INIT_LIST_HEAD(&vc->desc_submitted); INIT_LIST_HEAD(&vc->desc_issued); INIT_LIST_HEAD(&vc->desc_completed); diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h index 181b95267866..189e75dbcb15 100644 --- a/drivers/dma/virt-dma.h +++ b/drivers/dma/virt-dma.h @@ -29,6 +29,7 @@ struct virt_dma_chan { spinlock_t lock; /* protected by vc.lock */ + struct list_head desc_allocated; struct list_head desc_submitted; struct list_head desc_issued; struct list_head desc_completed; @@ -55,11 +56,16 @@ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan struct virt_dma_desc *vd, unsigned long tx_flags) { extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *); + unsigned long flags; dma_async_tx_descriptor_init(&vd->tx, &vc->chan); vd->tx.flags = tx_flags; vd->tx.tx_submit = vchan_tx_submit; + spin_lock_irqsave(&vc->lock, flags); + list_add_tail(&vd->node, &vc->desc_allocated); + spin_unlock_irqrestore(&vc->lock, flags); + return &vd->tx; } @@ -122,7 +128,8 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) } /** - * vchan_get_all_descriptors - obtain all submitted and issued descriptors + * vchan_get_all_descriptors - obtain all allocated, submitted and issued + * descriptors * vc: virtual channel to get descriptors from * head: list of descriptors found * @@ -134,6 +141,7 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, struct list_head *head) { + list_splice_tail_init(&vc->desc_allocated, head); list_splice_tail_init(&vc->desc_submitted, head); list_splice_tail_init(&vc->desc_issued, head); list_splice_tail_init(&vc->desc_completed, head); @@ -141,11 +149,14 @@ static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, static inline void vchan_free_chan_resources(struct virt_dma_chan *vc) { + struct virt_dma_desc *vd; unsigned long flags; LIST_HEAD(head); spin_lock_irqsave(&vc->lock, flags); vchan_get_all_descriptors(vc, &head); + list_for_each_entry(vd, &head, node) + async_tx_clear_ack(&vd->tx); spin_unlock_irqrestore(&vc->lock, flags); vchan_dma_desc_free_list(vc, &head); From 5f88d9706fa48084eaab2dbbec27779809c5106b Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 26 May 2015 23:06:34 +0200 Subject: [PATCH 3/3] Documentation: dmaengine: document DMA_CTRL_ACK Add documentation about acking the transfers, and their reusability. Signed-off-by: Robert Jarzmik Acked-by: Maxime Ripard Signed-off-by: Vinod Koul --- Documentation/dmaengine/provider.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt index 05d2280190f1..ca67b0f04c6e 100644 --- a/Documentation/dmaengine/provider.txt +++ b/Documentation/dmaengine/provider.txt @@ -345,11 +345,12 @@ where to put them) that abstracts it away. * DMA_CTRL_ACK - - Undocumented feature - - No one really has an idea of what it's about, besides being - related to reusing the DMA transaction descriptors or having - additional transactions added to it in the async-tx API - - Useless in the case of the slave API + - If set, the transfer can be reused after being completed. + - There is a guarantee the transfer won't be freed until it is acked + by async_tx_ack(). + - As a consequence, if a device driver wants to skip the dma_map_sg() and + dma_unmap_sg() in between 2 transfers, because the DMA'd data wasn't used, + it can resubmit the transfer right after its completion. General Design Notes --------------------