From 9fb407179c6fd910005040bebb040094ef959b6c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Sun, 21 Feb 2021 18:28:05 -0800
Subject: block: Remove unused blk_pm_*() function definitions

Commit a1ce35fa4985 ("block: remove dead elevator code") removed the last
callers of blk_pm_requeue_request(), blk_pm_add_request() and
blk_pm_put_request(). Hence remove the definitions of these functions.
Removing these functions removes all users of the struct request nr_pending
member. Hence also remove 'nr_pending'. Note: 'nr_pending' is no longer
used since commit 7cedffec8e75 ("block: Make blk_get_request() block for
non-PM requests while suspended").

Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-pm.h | 38 --------------------------------------
 1 file changed, 38 deletions(-)

(limited to 'block')

diff --git a/block/blk-pm.h b/block/blk-pm.h
index a2283cc9f716..8a5a0d4b357f 100644
--- a/block/blk-pm.h
+++ b/block/blk-pm.h
@@ -21,31 +21,6 @@ static inline void blk_pm_mark_last_busy(struct request *rq)
 	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
 		pm_runtime_mark_last_busy(rq->q->dev);
 }
-
-static inline void blk_pm_requeue_request(struct request *rq)
-{
-	lockdep_assert_held(&rq->q->queue_lock);
-
-	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
-		rq->q->nr_pending--;
-}
-
-static inline void blk_pm_add_request(struct request_queue *q,
-				      struct request *rq)
-{
-	lockdep_assert_held(&q->queue_lock);
-
-	if (q->dev && !(rq->rq_flags & RQF_PM))
-		q->nr_pending++;
-}
-
-static inline void blk_pm_put_request(struct request *rq)
-{
-	lockdep_assert_held(&rq->q->queue_lock);
-
-	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
-		--rq->q->nr_pending;
-}
 #else
 static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 {
@@ -55,19 +30,6 @@ static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 static inline void blk_pm_mark_last_busy(struct request *rq)
 {
 }
-
-static inline void blk_pm_requeue_request(struct request *rq)
-{
-}
-
-static inline void blk_pm_add_request(struct request_queue *q,
-				      struct request *rq)
-{
-}
-
-static inline void blk_pm_put_request(struct request *rq)
-{
-}
 #endif
 
 #endif /* _BLOCK_BLK_PM_H_ */
-- 
cgit v1.2.1


From b357e4a694ac4b95096715df253548f7e1f2723f Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Sun, 21 Feb 2021 21:29:59 -0800
Subject: block: get rid of the trace rq insert wrapper

Get rid of the wrapper for trace_block_rq_insert() and call the function
directly.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c   | 4 +++-
 block/blk-core.c      | 1 +
 block/blk-mq-sched.c  | 6 ------
 block/blk-mq-sched.h  | 1 -
 block/kyber-iosched.c | 4 +++-
 block/mq-deadline.c   | 4 +++-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'block')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index b398dde53af9..ec482e6641ff 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -125,6 +125,8 @@
 #include <linux/delay.h>
 #include <linux/backing-dev.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
@@ -5621,7 +5623,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
 	spin_unlock_irq(&bfqd->lock);
 
-	blk_mq_sched_request_inserted(rq);
+	trace_block_rq_insert(rq);
 
 	spin_lock_irq(&bfqd->lock);
 	bfqq = bfq_init_rq(rq);
diff --git a/block/blk-core.c b/block/blk-core.c
index 5e752840b41a..fc60ff208497 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -59,6 +59,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert);
 
 DEFINE_IDA(blk_queue_ida);
 
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index deff4e826e23..ddb65e9e6fd9 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -384,12 +384,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
 
-void blk_mq_sched_request_inserted(struct request *rq)
-{
-	trace_block_rq_insert(rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
-
 static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 				       bool has_sched,
 				       struct request *rq)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 0476360f05f1..5b18ab915c65 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -7,7 +7,6 @@
 
 void blk_mq_sched_assign_ioc(struct request *rq);
 
-void blk_mq_sched_request_inserted(struct request *rq);
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs, struct request **merged_request);
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index c25c41d0d061..f13da10953bf 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -13,6 +13,8 @@
 #include <linux/module.h>
 #include <linux/sbitmap.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -602,7 +604,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
 			list_move_tail(&rq->queuelist, head);
 		sbitmap_set_bit(&khd->kcq_map[sched_domain],
 				rq->mq_ctx->index_hw[hctx->type]);
-		blk_mq_sched_request_inserted(rq);
+		trace_block_rq_insert(rq);
 		spin_unlock(&kcq->lock);
 	}
 }
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index b57470e154c8..f3631a287466 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -18,6 +18,8 @@
 #include <linux/rbtree.h>
 #include <linux/sbitmap.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -496,7 +498,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	if (blk_mq_sched_try_insert_merge(q, rq))
 		return;
 
-	blk_mq_sched_request_inserted(rq);
+	trace_block_rq_insert(rq);
 
 	if (at_head || blk_rq_is_passthrough(rq)) {
 		if (at_head)
-- 
cgit v1.2.1


From 6b09b4d33bd964f49d07d3cabfb4204d58cf9811 Mon Sep 17 00:00:00 2001
From: Jeffle Xu <jefflexu@linux.alibaba.com>
Date: Mon, 22 Feb 2021 14:54:52 +0800
Subject: block: fix potential IO hang when turning off io_poll

QUEUE_FLAG_POLL flag will be cleared when turning off 'io_poll', while
at that moment there may be IOs stuck in hw queue uncompleted. The
following polling routine won't help reap these IOs, since blk_poll()
will return immediately because of cleared QUEUE_FLAG_POLL flag. Thus
these IOs will hang until they finnaly time out. The hang out can be
observed by 'fio --engine=io_uring iodepth=1', while turning off
'io_poll' at the same time.

To fix this, freeze and flush the request queue first when turning off
'io_poll'.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ae39c7f3d83d..0f4f0c8a7825 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -434,10 +434,13 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 	if (ret < 0)
 		return ret;
 
-	if (poll_on)
+	if (poll_on) {
 		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
-	else
+	} else {
+		blk_mq_freeze_queue(q);
 		blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
+		blk_mq_unfreeze_queue(q);
+	}
 
 	return ret;
 }
-- 
cgit v1.2.1


From ffa772cfe9356ce94d3061335c2681f60e7c1c5b Mon Sep 17 00:00:00 2001
From: Yang Yang <yang.yang@vivo.com>
Date: Fri, 5 Feb 2021 01:13:10 -0800
Subject: kyber: introduce kyber_depth_updated()

Hang occurs when user changes the scheduler queue depth, by writing to
the 'nr_requests' sysfs file of that device.

The details of the environment that we found the problem are as follows:
  an eMMC block device
  total driver tags: 16
  default queue_depth: 32
  kqd->async_depth initialized in kyber_init_sched() with queue_depth=32

Then we change queue_depth to 256, by writing to the 'nr_requests' sysfs
file. But kqd->async_depth don't be updated after queue_depth changes.
Now the value of async depth is too small for queue_depth=256, this may
cause hang.

This patch introduces kyber_depth_updated(), so that kyber can update
async depth when queue depth changes.

Signed-off-by: Yang Yang <yang.yang@vivo.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/kyber-iosched.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

(limited to 'block')

diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index f13da10953bf..33d34d69cade 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -355,19 +355,9 @@ static void kyber_timer_fn(struct timer_list *t)
 	}
 }
 
-static unsigned int kyber_sched_tags_shift(struct request_queue *q)
-{
-	/*
-	 * All of the hardware queues have the same depth, so we can just grab
-	 * the shift of the first one.
-	 */
-	return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift;
-}
-
 static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
 {
 	struct kyber_queue_data *kqd;
-	unsigned int shift;
 	int ret = -ENOMEM;
 	int i;
 
@@ -402,9 +392,6 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
 		kqd->latency_targets[i] = kyber_latency_targets[i];
 	}
 
-	shift = kyber_sched_tags_shift(q);
-	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
-
 	return kqd;
 
 err_buckets:
@@ -460,9 +447,19 @@ static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
 		INIT_LIST_HEAD(&kcq->rq_list[i]);
 }
 
-static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
 {
 	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
+	struct blk_mq_tags *tags = hctx->sched_tags;
+	unsigned int shift = tags->bitmap_tags->sb.shift;
+
+	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
+
+	sbitmap_queue_min_shallow_depth(tags->bitmap_tags, kqd->async_depth);
+}
+
+static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
 	struct kyber_hctx_data *khd;
 	int i;
 
@@ -504,8 +501,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 	khd->batching = 0;
 
 	hctx->sched_data = khd;
-	sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags,
-					kqd->async_depth);
+	kyber_depth_updated(hctx);
 
 	return 0;
 
@@ -1024,6 +1020,7 @@ static struct elevator_type kyber_sched = {
 		.completed_request = kyber_completed_request,
 		.dispatch_request = kyber_dispatch_request,
 		.has_work = kyber_has_work,
+		.depth_updated = kyber_depth_updated,
 	},
 #ifdef CONFIG_BLK_DEBUG_FS
 	.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
-- 
cgit v1.2.1


From 75ab6afacda01a6bd2d3ecd4cb8485f7c8fa2fdb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 23 Feb 2021 15:41:25 -0700
Subject: block: don't skip empty device in in disk_uevent

Restore the previous behavior by using the correct flag for the whole device
("part0").

Fixes: 99dfc43ecbf6 ("block: use ->bi_bdev for bio based I/O accounting")
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/genhd.c b/block/genhd.c
index 36ff45bbaaaf..6379b000576c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -476,7 +476,7 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
 	struct disk_part_iter piter;
 	struct block_device *part;
 
-	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0);
 	while ((part = disk_part_iter_next(&piter)))
 		kobject_uevent(bdev_kobj(part), action);
 	disk_part_iter_exit(&piter);
-- 
cgit v1.2.1


From 4601b4b130de2329fe06df80ed5d77265f2058e5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 23 Feb 2021 16:18:22 +0100
Subject: block: reopen the device in blkdev_reread_part

Historically the BLKRRPART ioctls called into the now defunct ->revalidate
method, which caused the sd driver to check if any media is present.
When the ->revalidate method was removed this revalidation was lost,
leading to lots of I/O errors when using the eject command.  Fix this by
reopening the device to rescan the partitions, and thus calling the
revalidation logic in the sd driver.

Fixes: 471bd0af544b ("sd: use bdev_check_media_change")
Reported--by: Tom Seewald <tseewald@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Tom Seewald <tseewald@gmail.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/ioctl.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'block')

diff --git a/block/ioctl.c b/block/ioctl.c
index d61d652078f4..ff241e663c01 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -81,20 +81,27 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
 }
 #endif
 
-static int blkdev_reread_part(struct block_device *bdev)
+static int blkdev_reread_part(struct block_device *bdev, fmode_t mode)
 {
-	int ret;
+	struct block_device *tmp;
 
 	if (!disk_part_scan_enabled(bdev->bd_disk) || bdev_is_partition(bdev))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	mutex_lock(&bdev->bd_mutex);
-	ret = bdev_disk_changed(bdev, false);
-	mutex_unlock(&bdev->bd_mutex);
+	/*
+	 * Reopen the device to revalidate the driver state and force a
+	 * partition rescan.
+	 */
+	mode &= ~FMODE_EXCL;
+	set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
 
-	return ret;
+	tmp = blkdev_get_by_dev(bdev->bd_dev, mode, NULL);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
+	blkdev_put(tmp, mode);
+	return 0;
 }
 
 static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
@@ -498,7 +505,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
 		bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
 		return 0;
 	case BLKRRPART:
-		return blkdev_reread_part(bdev);
+		return blkdev_reread_part(bdev, mode);
 	case BLKTRACESTART:
 	case BLKTRACESTOP:
 	case BLKTRACETEARDOWN:
-- 
cgit v1.2.1


From 97f433c3601a24d3513d06f575a389a2ca4e11e4 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 23 Feb 2021 19:25:30 -0700
Subject: blk-settings: align max_sectors on "logical_block_size" boundary

We get I/O errors when we run md-raid1 on the top of dm-integrity on the
top of ramdisk.
device-mapper: integrity: Bio not aligned on 8 sectors: 0xff00, 0xff
device-mapper: integrity: Bio not aligned on 8 sectors: 0xff00, 0xff
device-mapper: integrity: Bio not aligned on 8 sectors: 0xffff, 0x1
device-mapper: integrity: Bio not aligned on 8 sectors: 0xffff, 0x1
device-mapper: integrity: Bio not aligned on 8 sectors: 0x8048, 0xff
device-mapper: integrity: Bio not aligned on 8 sectors: 0x8147, 0xff
device-mapper: integrity: Bio not aligned on 8 sectors: 0x8246, 0xff
device-mapper: integrity: Bio not aligned on 8 sectors: 0x8345, 0xbb

The ramdisk device has logical_block_size 512 and max_sectors 255. The
dm-integrity device uses logical_block_size 4096 and it doesn't affect the
"max_sectors" value - thus, it inherits 255 from the ramdisk. So, we have
a device with max_sectors not aligned on logical_block_size.

The md-raid device sees that the underlying leg has max_sectors 255 and it
will split the bios on 255-sector boundary, making the bios unaligned on
logical_block_size.

In order to fix the bug, we round down max_sectors to logical_block_size.

Cc: stable@vger.kernel.org
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'block')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7dd8be314ac6..b4aa2f37fab6 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -504,6 +504,14 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
 }
 EXPORT_SYMBOL(blk_queue_io_opt);
 
+static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
+{
+	sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
+	if (sectors < PAGE_SIZE >> SECTOR_SHIFT)
+		sectors = PAGE_SIZE >> SECTOR_SHIFT;
+	return sectors;
+}
+
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t:	the stacking driver limits (top device)
@@ -630,6 +638,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		ret = -1;
 	}
 
+	t->max_sectors = blk_round_down_sectors(t->max_sectors, t->logical_block_size);
+	t->max_hw_sectors = blk_round_down_sectors(t->max_hw_sectors, t->logical_block_size);
+	t->max_dev_sectors = blk_round_down_sectors(t->max_dev_sectors, t->logical_block_size);
+
 	/* Discard alignment and granularity */
 	if (b->discard_granularity) {
 		alignment = queue_limit_discard_alignment(b, start);
-- 
cgit v1.2.1


From 452c0bf8754fbeffdf579465b82a3c2bbe373c95 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 23 Feb 2021 16:50:15 +0800
Subject: block: fix logging on capacity change

Local variable of 'capacity' stores the previous disk capacity, and
'size' variable records the latest disk capacity, so swap them for
fixing logging on capacity change.

Cc: Christoph Hellwig <hch@lst.de>
Fixes: a782483cc1f8 ("block: remove the nr_sects field in struct hd_struct")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/genhd.c b/block/genhd.c
index 6379b000576c..fcc530164b5a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -74,7 +74,7 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
 		return false;
 
 	pr_info("%s: detected capacity change from %lld to %lld\n",
-		disk->disk_name, size, capacity);
+		disk->disk_name, capacity, size);
 
 	/*
 	 * Historically we did not send a uevent for changes to/from an empty
-- 
cgit v1.2.1


From 5407334c53e9922c1c3fb28801e489d0b74f2c8d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Feb 2021 08:24:04 +0100
Subject: block-crypto-fallback: use a bio_set for splitting bios

bio_split with a NULL bs argumen used to fall back to kmalloc the
bio, which does not guarantee forward progress and could to deadlocks.
Now that the overloading of the NULL bs argument to bio_alloc_bioset
has been removed it crashes instead.  Fix all that by using a special
crafted bioset.

Fixes: 3175199ab0ac ("block: split bio_kmalloc from bio_alloc_bioset")
Reported-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-crypto-fallback.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index e8327c50d7c9..c176b7af56a7 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -80,6 +80,7 @@ static struct blk_crypto_keyslot {
 static struct blk_keyslot_manager blk_crypto_ksm;
 static struct workqueue_struct *blk_crypto_wq;
 static mempool_t *blk_crypto_bounce_page_pool;
+static struct bio_set crypto_bio_split;
 
 /*
  * This is the key we set when evicting a keyslot. This *should* be the all 0's
@@ -224,7 +225,8 @@ static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr)
 	if (num_sectors < bio_sectors(bio)) {
 		struct bio *split_bio;
 
-		split_bio = bio_split(bio, num_sectors, GFP_NOIO, NULL);
+		split_bio = bio_split(bio, num_sectors, GFP_NOIO,
+				      &crypto_bio_split);
 		if (!split_bio) {
 			bio->bi_status = BLK_STS_RESOURCE;
 			return false;
@@ -538,9 +540,13 @@ static int blk_crypto_fallback_init(void)
 
 	prandom_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE);
 
-	err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+	err = bioset_init(&crypto_bio_split, 64, 0, 0);
 	if (err)
 		goto out;
+
+	err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+	if (err)
+		goto fail_free_bioset;
 	err = -ENOMEM;
 
 	blk_crypto_ksm.ksm_ll_ops = blk_crypto_ksm_ll_ops;
@@ -591,6 +597,8 @@ fail_free_wq:
 	destroy_workqueue(blk_crypto_wq);
 fail_free_ksm:
 	blk_ksm_destroy(&blk_crypto_ksm);
+fail_free_bioset:
+	bioset_exit(&crypto_bio_split);
 out:
 	return err;
 }
-- 
cgit v1.2.1


From b90994c6ab623baf9268df9710692f14920ce9d2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Feb 2021 08:24:05 +0100
Subject: block: fix bounce_clone_bio for passthrough bios

Now that bio_alloc_bioset does not fall back to kmalloc for a NULL
bio_set, handle that case explicitly and simplify the calling
conventions.

Based on an earlier patch from Chaitanya Kulkarni.

Fixes: 3175199ab0ac ("block: split bio_kmalloc from bio_alloc_bioset")
Reported-by: Chaitanya Kulkarni <Chaitanya.Kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bounce.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'block')

diff --git a/block/bounce.c b/block/bounce.c
index fc55314aa426..79d81221446d 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -214,8 +214,7 @@ static void bounce_end_io_read_isa(struct bio *bio)
 	__bounce_end_io_read(bio, &isa_page_pool);
 }
 
-static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
-		struct bio_set *bs)
+static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask)
 {
 	struct bvec_iter iter;
 	struct bio_vec bv;
@@ -242,8 +241,11 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 	 *    asking for trouble and would force extra work on
 	 *    __bio_clone_fast() anyways.
 	 */
-
-	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
+	if (bio_is_passthrough(bio_src))
+		bio = bio_kmalloc(gfp_mask, bio_segments(bio_src));
+	else
+		bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src),
+				       &bounce_bio_set);
 	if (!bio)
 		return NULL;
 	bio->bi_bdev		= bio_src->bi_bdev;
@@ -296,7 +298,6 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	unsigned i = 0;
 	bool bounce = false;
 	int sectors = 0;
-	bool passthrough = bio_is_passthrough(*bio_orig);
 
 	bio_for_each_segment(from, *bio_orig, iter) {
 		if (i++ < BIO_MAX_PAGES)
@@ -307,14 +308,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	if (!bounce)
 		return;
 
-	if (!passthrough && sectors < bio_sectors(*bio_orig)) {
+	if (!bio_is_passthrough(*bio_orig) &&
+	    sectors < bio_sectors(*bio_orig)) {
 		bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
 		bio_chain(bio, *bio_orig);
 		submit_bio_noacct(*bio_orig);
 		*bio_orig = bio;
 	}
-	bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
-			&bounce_bio_set);
+	bio = bounce_clone_bio(*bio_orig, GFP_NOIO);
 
 	/*
 	 * Bvec table can't be updated by bio_for_each_segment_all(),
-- 
cgit v1.2.1


From ebfe4183c77ed18e1d4237ad3b13f32114d9ae1e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Feb 2021 08:24:06 +0100
Subject: block: remove the gfp_mask argument to bounce_clone_bio

The only caller always passes GFP_NOIO.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bounce.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/bounce.c b/block/bounce.c
index 79d81221446d..417faaac36b6 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -214,7 +214,7 @@ static void bounce_end_io_read_isa(struct bio *bio)
 	__bounce_end_io_read(bio, &isa_page_pool);
 }
 
-static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask)
+static struct bio *bounce_clone_bio(struct bio *bio_src)
 {
 	struct bvec_iter iter;
 	struct bio_vec bv;
@@ -242,9 +242,9 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask)
 	 *    __bio_clone_fast() anyways.
 	 */
 	if (bio_is_passthrough(bio_src))
-		bio = bio_kmalloc(gfp_mask, bio_segments(bio_src));
+		bio = bio_kmalloc(GFP_NOIO, bio_segments(bio_src));
 	else
-		bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src),
+		bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
 				       &bounce_bio_set);
 	if (!bio)
 		return NULL;
@@ -271,11 +271,11 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask)
 		break;
 	}
 
-	if (bio_crypt_clone(bio, bio_src, gfp_mask) < 0)
+	if (bio_crypt_clone(bio, bio_src, GFP_NOIO) < 0)
 		goto err_put;
 
 	if (bio_integrity(bio_src) &&
-	    bio_integrity_clone(bio, bio_src, gfp_mask) < 0)
+	    bio_integrity_clone(bio, bio_src, GFP_NOIO) < 0)
 		goto err_put;
 
 	bio_clone_blkg_association(bio, bio_src);
@@ -315,7 +315,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 		submit_bio_noacct(*bio_orig);
 		*bio_orig = bio;
 	}
-	bio = bounce_clone_bio(*bio_orig, GFP_NOIO);
+	bio = bounce_clone_bio(*bio_orig);
 
 	/*
 	 * Bvec table can't be updated by bio_for_each_segment_all(),
-- 
cgit v1.2.1


From 47dc096ac183f465ffb03e86a203a38661695d72 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Feb 2021 08:24:07 +0100
Subject: block: memory allocations in bounce_clone_bio must not fail

The caller can't cope with a failure from bounce_clone_bio, so
use __GFP_NOFAIL for the passthrough case.  bio_alloc_bioset already
won't fail due to the use of mempools.

And yes, we need to get rid of this bock layer bouncing code entirely
sooner or later..

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bounce.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/bounce.c b/block/bounce.c
index 417faaac36b6..87983a35079c 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -242,12 +242,11 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
 	 *    __bio_clone_fast() anyways.
 	 */
 	if (bio_is_passthrough(bio_src))
-		bio = bio_kmalloc(GFP_NOIO, bio_segments(bio_src));
+		bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL,
+				  bio_segments(bio_src));
 	else
 		bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
 				       &bounce_bio_set);
-	if (!bio)
-		return NULL;
 	bio->bi_bdev		= bio_src->bi_bdev;
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
-- 
cgit v1.2.1


From 5f7136db82996089cdfb2939c7664b29e9da141d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 29 Jan 2021 04:38:57 +0000
Subject: block: Add bio_max_segs

It's often inconvenient to use BIO_MAX_PAGES due to min() requiring the
sign to be the same.  Introduce bio_max_segs() and change BIO_MAX_PAGES to
be unsigned to make it easier for the users.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-map.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-map.c b/block/blk-map.c
index 21630dccac62..369e204d14d0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -150,9 +150,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
 	bmd->is_our_pages = !map_data;
 	bmd->is_null_mapped = (map_data && map_data->null_mapped);
 
-	nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
-	if (nr_pages > BIO_MAX_PAGES)
-		nr_pages = BIO_MAX_PAGES;
+	nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
 
 	ret = -ENOMEM;
 	bio = bio_kmalloc(gfp_mask, nr_pages);
-- 
cgit v1.2.1