summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorKeith Busch <kbusch@kernel.org>2023-03-20 12:49:26 -0700
committerJens Axboe <axboe@kernel.dk>2023-03-20 15:30:03 -0600
commit54bdd67d0f88489ac88f7664b56cb7c93799d84d (patch)
tree7c0c5780eb49ae1ca5c33b23c32b566c1b4a7c46 /block
parent4cf2c3ab2cdfed71a7f27fe52b4c87877ee79606 (diff)
downloadlinux-54bdd67d0f88489ac88f7664b56cb7c93799d84d.tar.gz
blk-mq: remove hybrid polling
io_uring provides the only way user space can poll completions, and that always sets BLK_POLL_NOSLEEP. This effectively makes hybrid polling dead code, so remove it and everything supporting it. Hybrid polling was effectively killed off with 9650b453a3d4b1, "block: ignore RWF_HIPRI hint for sync dio", but still potentially reachable through io_uring until d729cf9acb93119, "io_uring: don't sleep when polling for I/O", but hybrid polling probably should not have been reachable through that async interface from the beginning. Fixes: 9650b453a3d4 ("block: ignore RWF_HIPRI hint for sync dio") Fixes: d729cf9acb93 ("io_uring: don't sleep when polling for I/O") Signed-off-by: Keith Busch <kbusch@kernel.org> Link: https://lore.kernel.org/r/20230320194926.3353144-1-kbusch@meta.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c6
-rw-r--r--block/blk-mq-debugfs.c26
-rw-r--r--block/blk-mq.c205
-rw-r--r--block/blk-stat.c18
-rw-r--r--block/blk-sysfs.c25
5 files changed, 7 insertions, 273 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 9e5e0277a4d9..269765d16cfd 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -263,13 +263,7 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
static void blk_free_queue(struct request_queue *q)
{
- if (q->poll_stat)
- blk_stat_remove_callback(q, q->poll_cb);
- blk_stat_free_callback(q->poll_cb);
-
blk_free_queue_stats(q->stats);
- kfree(q->poll_stat);
-
if (queue_is_mq(q))
blk_mq_release(q);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index b01818f8e216..212a7f301e73 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -15,33 +15,8 @@
#include "blk-mq-tag.h"
#include "blk-rq-qos.h"
-static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
-{
- if (stat->nr_samples) {
- seq_printf(m, "samples=%d, mean=%llu, min=%llu, max=%llu",
- stat->nr_samples, stat->mean, stat->min, stat->max);
- } else {
- seq_puts(m, "samples=0");
- }
-}
-
static int queue_poll_stat_show(void *data, struct seq_file *m)
{
- struct request_queue *q = data;
- int bucket;
-
- if (!q->poll_stat)
- return 0;
-
- for (bucket = 0; bucket < (BLK_MQ_POLL_STATS_BKTS / 2); bucket++) {
- seq_printf(m, "read (%d Bytes): ", 1 << (9 + bucket));
- print_stat(m, &q->poll_stat[2 * bucket]);
- seq_puts(m, "\n");
-
- seq_printf(m, "write (%d Bytes): ", 1 << (9 + bucket));
- print_stat(m, &q->poll_stat[2 * bucket + 1]);
- seq_puts(m, "\n");
- }
return 0;
}
@@ -282,7 +257,6 @@ static const char *const rqf_name[] = {
RQF_NAME(STATS),
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED),
- RQF_NAME(MQ_POLL_SLEPT),
RQF_NAME(TIMED_OUT),
RQF_NAME(ELV),
RQF_NAME(RESV),
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a875b1cdff9b..4e30459df815 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -46,51 +46,15 @@
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
-static void blk_mq_poll_stats_start(struct request_queue *q);
-static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-
-static int blk_mq_poll_stats_bkt(const struct request *rq)
-{
- int ddir, sectors, bucket;
-
- ddir = rq_data_dir(rq);
- sectors = blk_rq_stats_sectors(rq);
-
- bucket = ddir + 2 * ilog2(sectors);
-
- if (bucket < 0)
- return -1;
- else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
- return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
-
- return bucket;
-}
-
-#define BLK_QC_T_SHIFT 16
-#define BLK_QC_T_INTERNAL (1U << 31)
-
static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
blk_qc_t qc)
{
- return xa_load(&q->hctx_table,
- (qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT);
-}
-
-static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
- blk_qc_t qc)
-{
- unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1);
-
- if (qc & BLK_QC_T_INTERNAL)
- return blk_mq_tag_to_rq(hctx->sched_tags, tag);
- return blk_mq_tag_to_rq(hctx->tags, tag);
+ return xa_load(&q->hctx_table, qc);
}
static inline blk_qc_t blk_rq_to_qc(struct request *rq)
{
- return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) |
- (rq->tag != -1 ?
- rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL));
+ return rq->mq_hctx->queue_num;
}
/*
@@ -1038,10 +1002,8 @@ static inline void blk_account_io_start(struct request *req)
static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
{
- if (rq->rq_flags & RQF_STATS) {
- blk_mq_poll_stats_start(rq->q);
+ if (rq->rq_flags & RQF_STATS)
blk_stat_add(rq, now);
- }
blk_mq_sched_completed_request(rq, now);
blk_account_io_done(rq, now);
@@ -4222,14 +4184,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/* mark the queue as mq asap */
q->mq_ops = set->ops;
- q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
- blk_mq_poll_stats_bkt,
- BLK_MQ_POLL_STATS_BKTS, q);
- if (!q->poll_cb)
- goto err_exit;
-
if (blk_mq_alloc_ctxs(q))
- goto err_poll;
+ goto err_exit;
/* init q->mq_kobj and sw queues' kobjects */
blk_mq_sysfs_init(q);
@@ -4257,11 +4213,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
q->nr_requests = set->queue_depth;
- /*
- * Default to classic polling
- */
- q->poll_nsec = BLK_MQ_POLL_CLASSIC;
-
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
blk_mq_add_queue_tag_set(set, q);
blk_mq_map_swqueue(q);
@@ -4269,9 +4220,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs:
blk_mq_release(q);
-err_poll:
- blk_stat_free_callback(q->poll_cb);
- q->poll_cb = NULL;
err_exit:
q->mq_ops = NULL;
return -ENOMEM;
@@ -4768,138 +4716,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
}
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
-/* Enable polling stats and return whether they were already enabled. */
-static bool blk_poll_stats_enable(struct request_queue *q)
-{
- if (q->poll_stat)
- return true;
-
- return blk_stats_alloc_enable(q);
-}
-
-static void blk_mq_poll_stats_start(struct request_queue *q)
-{
- /*
- * We don't arm the callback if polling stats are not enabled or the
- * callback is already active.
- */
- if (!q->poll_stat || blk_stat_is_active(q->poll_cb))
- return;
-
- blk_stat_activate_msecs(q->poll_cb, 100);
-}
-
-static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
-{
- struct request_queue *q = cb->data;
- int bucket;
-
- for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
- if (cb->stat[bucket].nr_samples)
- q->poll_stat[bucket] = cb->stat[bucket];
- }
-}
-
-static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
- struct request *rq)
-{
- unsigned long ret = 0;
- int bucket;
-
- /*
- * If stats collection isn't on, don't sleep but turn it on for
- * future users
- */
- if (!blk_poll_stats_enable(q))
- return 0;
-
- /*
- * As an optimistic guess, use half of the mean service time
- * for this type of request. We can (and should) make this smarter.
- * For instance, if the completion latencies are tight, we can
- * get closer than just half the mean. This is especially
- * important on devices where the completion latencies are longer
- * than ~10 usec. We do use the stats for the relevant IO size
- * if available which does lead to better estimates.
- */
- bucket = blk_mq_poll_stats_bkt(rq);
- if (bucket < 0)
- return ret;
-
- if (q->poll_stat[bucket].nr_samples)
- ret = (q->poll_stat[bucket].mean + 1) / 2;
-
- return ret;
-}
-
-static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc)
-{
- struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, qc);
- struct request *rq = blk_qc_to_rq(hctx, qc);
- struct hrtimer_sleeper hs;
- enum hrtimer_mode mode;
- unsigned int nsecs;
- ktime_t kt;
-
- /*
- * If a request has completed on queue that uses an I/O scheduler, we
- * won't get back a request from blk_qc_to_rq.
- */
- if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT))
- return false;
-
- /*
- * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
- *
- * 0: use half of prev avg
- * >0: use this specific value
- */
- if (q->poll_nsec > 0)
- nsecs = q->poll_nsec;
- else
- nsecs = blk_mq_poll_nsecs(q, rq);
-
- if (!nsecs)
- return false;
-
- rq->rq_flags |= RQF_MQ_POLL_SLEPT;
-
- /*
- * This will be replaced with the stats tracking code, using
- * 'avg_completion_time / 2' as the pre-sleep target.
- */
- kt = nsecs;
-
- mode = HRTIMER_MODE_REL;
- hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
- hrtimer_set_expires(&hs.timer, kt);
-
- do {
- if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
- break;
- set_current_state(TASK_UNINTERRUPTIBLE);
- hrtimer_sleeper_start_expires(&hs, mode);
- if (hs.task)
- io_schedule();
- hrtimer_cancel(&hs.timer);
- mode = HRTIMER_MODE_ABS;
- } while (hs.task && !signal_pending(current));
-
- __set_current_state(TASK_RUNNING);
- destroy_hrtimer_on_stack(&hs.timer);
-
- /*
- * If we sleep, have the caller restart the poll loop to reset the
- * state. Like for the other success return cases, the caller is
- * responsible for checking if the IO completed. If the IO isn't
- * complete, we'll get called again and will go straight to the busy
- * poll loop.
- */
- return true;
-}
-
-static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
- struct io_comp_batch *iob, unsigned int flags)
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
+ unsigned int flags)
{
struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
long state = get_current_state();
@@ -4926,17 +4744,6 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
return 0;
}
-int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
- unsigned int flags)
-{
- if (!(flags & BLK_POLL_NOSLEEP) &&
- q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
- if (blk_mq_poll_hybrid(q, cookie))
- return 1;
- }
- return blk_mq_poll_classic(q, cookie, iob, flags);
-}
-
unsigned int blk_mq_rq_cpu(struct request *rq)
{
return rq->mq_ctx->cpu;
diff --git a/block/blk-stat.c b/block/blk-stat.c
index c6ca16abf911..74a1a8c32d86 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -231,21 +231,3 @@ void blk_free_queue_stats(struct blk_queue_stats *stats)
kfree(stats);
}
-
-bool blk_stats_alloc_enable(struct request_queue *q)
-{
- struct blk_rq_stat *poll_stat;
-
- poll_stat = kcalloc(BLK_MQ_POLL_STATS_BKTS, sizeof(*poll_stat),
- GFP_ATOMIC);
- if (!poll_stat)
- return false;
-
- if (cmpxchg(&q->poll_stat, NULL, poll_stat) != NULL) {
- kfree(poll_stat);
- return true;
- }
-
- blk_stat_add_callback(q, q->poll_cb);
- return false;
-}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index f1fce1c7fa44..1a743b4f2958 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -408,35 +408,12 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
{
- int val;
-
- if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
- val = BLK_MQ_POLL_CLASSIC;
- else
- val = q->poll_nsec / 1000;
-
- return sprintf(page, "%d\n", val);
+ return sprintf(page, "%d\n", -1);
}
static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
size_t count)
{
- int err, val;
-
- if (!q->mq_ops || !q->mq_ops->poll)
- return -EINVAL;
-
- err = kstrtoint(page, 10, &val);
- if (err < 0)
- return err;
-
- if (val == BLK_MQ_POLL_CLASSIC)
- q->poll_nsec = BLK_MQ_POLL_CLASSIC;
- else if (val >= 0)
- q->poll_nsec = val * 1000;
- else
- return -EINVAL;
-
return count;
}