summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2011-11-29 01:51:07 -0800
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2011-11-29 01:51:07 -0800
commit0d2cd91bf7b1a7cc1d638296111fcc2bcf5c0bb4 (patch)
treed2ca69347816c27f9dc352581f5d0fe76811cd49 /block
parent3d95fd6ad8d3cf582a70ed65660017114b6e4065 (diff)
parentcaca6a03d365883564885f2c1da3e88dcf65d139 (diff)
downloadlinux-rt-0d2cd91bf7b1a7cc1d638296111fcc2bcf5c0bb4.tar.gz
Merge commit 'v3.2-rc3' into next
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c138
-rw-r--r--block/blk-cgroup.h2
-rw-r--r--block/blk-core.c485
-rw-r--r--block/blk-flush.c3
-rw-r--r--block/blk-integrity.c1
-rw-r--r--block/blk-map.c7
-rw-r--r--block/blk-softirq.c2
-rw-r--r--block/blk-sysfs.c22
-rw-r--r--block/blk-tag.c6
-rw-r--r--block/blk-throttle.c106
-rw-r--r--block/blk.h20
-rw-r--r--block/bsg-lib.c2
-rw-r--r--block/cfq-iosched.c20
-rw-r--r--block/elevator.c39
-rw-r--r--block/genhd.c12
-rw-r--r--block/ioctl.c3
-rw-r--r--block/scsi_ioctl.c3
17 files changed, 496 insertions, 375 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bcaf16ee6ad1..8f630cec906e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -768,27 +768,16 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
return disk_total;
}
-static int blkio_check_dev_num(dev_t dev)
-{
- int part = 0;
- struct gendisk *disk;
-
- disk = get_gendisk(dev, &part);
- if (!disk || part)
- return -ENODEV;
-
- return 0;
-}
-
static int blkio_policy_parse_and_set(char *buf,
struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
{
+ struct gendisk *disk = NULL;
char *s[4], *p, *major_s = NULL, *minor_s = NULL;
- int ret;
- unsigned long major, minor, temp;
- int i = 0;
+ unsigned long major, minor;
+ int i = 0, ret = -EINVAL;
+ int part;
dev_t dev;
- u64 bps, iops;
+ u64 temp;
memset(s, 0, sizeof(s));
@@ -804,43 +793,45 @@ static int blkio_policy_parse_and_set(char *buf,
}
if (i != 2)
- return -EINVAL;
+ goto out;
p = strsep(&s[0], ":");
if (p != NULL)
major_s = p;
else
- return -EINVAL;
+ goto out;
minor_s = s[0];
if (!minor_s)
- return -EINVAL;
+ goto out;
- ret = strict_strtoul(major_s, 10, &major);
- if (ret)
- return -EINVAL;
+ if (strict_strtoul(major_s, 10, &major))
+ goto out;
- ret = strict_strtoul(minor_s, 10, &minor);
- if (ret)
- return -EINVAL;
+ if (strict_strtoul(minor_s, 10, &minor))
+ goto out;
dev = MKDEV(major, minor);
- ret = blkio_check_dev_num(dev);
- if (ret)
- return ret;
+ if (strict_strtoull(s[1], 10, &temp))
+ goto out;
- newpn->dev = dev;
+ /* For rule removal, do not check for device presence. */
+ if (temp) {
+ disk = get_gendisk(dev, &part);
+ if (!disk || part) {
+ ret = -ENODEV;
+ goto out;
+ }
+ }
- if (s[1] == NULL)
- return -EINVAL;
+ newpn->dev = dev;
switch (plid) {
case BLKIO_POLICY_PROP:
- ret = strict_strtoul(s[1], 10, &temp);
- if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
- temp > BLKIO_WEIGHT_MAX)
- return -EINVAL;
+ if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
+ temp > BLKIO_WEIGHT_MAX)
+ goto out;
newpn->plid = plid;
newpn->fileid = fileid;
@@ -850,94 +841,114 @@ static int blkio_policy_parse_and_set(char *buf,
switch(fileid) {
case BLKIO_THROTL_read_bps_device:
case BLKIO_THROTL_write_bps_device:
- ret = strict_strtoull(s[1], 10, &bps);
- if (ret)
- return -EINVAL;
-
newpn->plid = plid;
newpn->fileid = fileid;
- newpn->val.bps = bps;
+ newpn->val.bps = temp;
break;
case BLKIO_THROTL_read_iops_device:
case BLKIO_THROTL_write_iops_device:
- ret = strict_strtoull(s[1], 10, &iops);
- if (ret)
- return -EINVAL;
-
- if (iops > THROTL_IOPS_MAX)
- return -EINVAL;
+ if (temp > THROTL_IOPS_MAX)
+ goto out;
newpn->plid = plid;
newpn->fileid = fileid;
- newpn->val.iops = (unsigned int)iops;
+ newpn->val.iops = (unsigned int)temp;
break;
}
break;
default:
BUG();
}
-
- return 0;
+ ret = 0;
+out:
+ put_disk(disk);
+ return ret;
}
unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
dev_t dev)
{
struct blkio_policy_node *pn;
+ unsigned long flags;
+ unsigned int weight;
+
+ spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
BLKIO_PROP_weight_device);
if (pn)
- return pn->val.weight;
+ weight = pn->val.weight;
else
- return blkcg->weight;
+ weight = blkcg->weight;
+
+ spin_unlock_irqrestore(&blkcg->lock, flags);
+
+ return weight;
}
EXPORT_SYMBOL_GPL(blkcg_get_weight);
uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
{
struct blkio_policy_node *pn;
+ unsigned long flags;
+ uint64_t bps = -1;
+ spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
BLKIO_THROTL_read_bps_device);
if (pn)
- return pn->val.bps;
- else
- return -1;
+ bps = pn->val.bps;
+ spin_unlock_irqrestore(&blkcg->lock, flags);
+
+ return bps;
}
uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
{
struct blkio_policy_node *pn;
+ unsigned long flags;
+ uint64_t bps = -1;
+
+ spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
BLKIO_THROTL_write_bps_device);
if (pn)
- return pn->val.bps;
- else
- return -1;
+ bps = pn->val.bps;
+ spin_unlock_irqrestore(&blkcg->lock, flags);
+
+ return bps;
}
unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
{
struct blkio_policy_node *pn;
+ unsigned long flags;
+ unsigned int iops = -1;
+ spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
BLKIO_THROTL_read_iops_device);
if (pn)
- return pn->val.iops;
- else
- return -1;
+ iops = pn->val.iops;
+ spin_unlock_irqrestore(&blkcg->lock, flags);
+
+ return iops;
}
unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
{
struct blkio_policy_node *pn;
+ unsigned long flags;
+ unsigned int iops = -1;
+
+ spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
BLKIO_THROTL_write_iops_device);
if (pn)
- return pn->val.iops;
- else
- return -1;
+ iops = pn->val.iops;
+ spin_unlock_irqrestore(&blkcg->lock, flags);
+
+ return iops;
}
/* Checks whether user asked for deleting a policy rule */
@@ -1090,6 +1101,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
if (blkio_delete_rule_command(newpn)) {
blkio_policy_delete_node(pn);
+ kfree(pn);
spin_unlock_irq(&blkcg->lock);
goto update_io_group;
}
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index a71d2904ffb9..6f3ace7e792f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -188,7 +188,7 @@ struct blkio_policy_node {
union {
unsigned int weight;
/*
- * Rate read/write in terms of byptes per second
+ * Rate read/write in terms of bytes per second
* Whether this rate represents read or write is determined
* by file type "fileid".
*/
diff --git a/block/blk-core.c b/block/blk-core.c
index 90e1ffdeb415..ea70e6c80cd3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,6 +28,7 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/fault-inject.h>
#include <linux/list_sort.h>
+#include <linux/delay.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -38,8 +39,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
-static int __make_request(struct request_queue *q, struct bio *bio);
-
/*
* For the allocated request tables
*/
@@ -347,31 +346,80 @@ void blk_put_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_put_queue);
-/*
- * Note: If a driver supplied the queue lock, it should not zap that lock
- * unexpectedly as some queue cleanup components like elevator_exit() and
- * blk_throtl_exit() need queue lock.
+/**
+ * blk_drain_queue - drain requests from request_queue
+ * @q: queue to drain
+ * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV
+ *
+ * Drain requests from @q. If @drain_all is set, all requests are drained.
+ * If not, only ELVPRIV requests are drained. The caller is responsible
+ * for ensuring that no new requests which need to be drained are queued.
+ */
+void blk_drain_queue(struct request_queue *q, bool drain_all)
+{
+ while (true) {
+ int nr_rqs;
+
+ spin_lock_irq(q->queue_lock);
+
+ elv_drain_elevator(q);
+ if (drain_all)
+ blk_throtl_drain(q);
+
+ __blk_run_queue(q);
+
+ if (drain_all)
+ nr_rqs = q->rq.count[0] + q->rq.count[1];
+ else
+ nr_rqs = q->rq.elvpriv;
+
+ spin_unlock_irq(q->queue_lock);
+
+ if (!nr_rqs)
+ break;
+ msleep(10);
+ }
+}
+
+/**
+ * blk_cleanup_queue - shutdown a request queue
+ * @q: request queue to shutdown
+ *
+ * Mark @q DEAD, drain all pending requests, destroy and put it. All
+ * future requests will be failed immediately with -ENODEV.
*/
void blk_cleanup_queue(struct request_queue *q)
{
- /*
- * We know we have process context here, so we can be a little
- * cautious and ensure that pending block actions on this device
- * are done before moving on. Going into this function, we should
- * not have processes doing IO to this device.
- */
- blk_sync_queue(q);
+ spinlock_t *lock = q->queue_lock;
- del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
+ /* mark @q DEAD, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
+
+ spin_lock_irq(lock);
+ queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+ queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
+ queue_flag_set(QUEUE_FLAG_DEAD, q);
+
+ if (q->queue_lock != &q->__queue_lock)
+ q->queue_lock = &q->__queue_lock;
+
+ spin_unlock_irq(lock);
mutex_unlock(&q->sysfs_lock);
+ /*
+ * Drain all requests queued before DEAD marking. The caller might
+ * be trying to tear down @q before its elevator is initialized, in
+ * which case we don't want to call into draining.
+ */
if (q->elevator)
- elevator_exit(q->elevator);
+ blk_drain_queue(q, true);
- blk_throtl_exit(q);
+ /* @q won't process any more request, flush async actions */
+ del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
+ blk_sync_queue(q);
+ /* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
EXPORT_SYMBOL(blk_cleanup_queue);
@@ -542,7 +590,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
/*
* This also sets hw/phys segments, boundary and size
*/
- blk_queue_make_request(q, __make_request);
+ blk_queue_make_request(q, blk_queue_bio);
q->sg_reserved_size = INT_MAX;
@@ -577,7 +625,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
}
static struct request *
-blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
@@ -588,12 +636,10 @@ blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
rq->cmd_flags = flags | REQ_ALLOCED;
- if (priv) {
- if (unlikely(elv_set_request(q, rq, gfp_mask))) {
- mempool_free(rq, q->rq.rq_pool);
- return NULL;
- }
- rq->cmd_flags |= REQ_ELVPRIV;
+ if ((flags & REQ_ELVPRIV) &&
+ unlikely(elv_set_request(q, rq, gfp_mask))) {
+ mempool_free(rq, q->rq.rq_pool);
+ return NULL;
}
return rq;
@@ -652,12 +698,13 @@ static void __freed_request(struct request_queue *q, int sync)
* A request has just been released. Account for it, update the full and
* congestion status, wake up any waiters. Called under q->queue_lock.
*/
-static void freed_request(struct request_queue *q, int sync, int priv)
+static void freed_request(struct request_queue *q, unsigned int flags)
{
struct request_list *rl = &q->rq;
+ int sync = rw_is_sync(flags);
rl->count[sync]--;
- if (priv)
+ if (flags & REQ_ELVPRIV)
rl->elvpriv--;
__freed_request(q, sync);
@@ -685,10 +732,19 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
return true;
}
-/*
- * Get a free request, queue_lock must be held.
- * Returns NULL on failure, with queue_lock held.
- * Returns !NULL on success, with queue_lock *not held*.
+/**
+ * get_request - get a free request
+ * @q: request_queue to allocate request from
+ * @rw_flags: RW and SYNC flags
+ * @bio: bio to allocate request for (can be %NULL)
+ * @gfp_mask: allocation mask
+ *
+ * Get a free request from @q. This function may fail under memory
+ * pressure or if @q is dead.
+ *
+ * Must be callled with @q->queue_lock held and,
+ * Returns %NULL on failure, with @q->queue_lock held.
+ * Returns !%NULL on success, with @q->queue_lock *not held*.
*/
static struct request *get_request(struct request_queue *q, int rw_flags,
struct bio *bio, gfp_t gfp_mask)
@@ -697,7 +753,10 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
struct request_list *rl = &q->rq;
struct io_context *ioc = NULL;
const bool is_sync = rw_is_sync(rw_flags) != 0;
- int may_queue, priv = 0;
+ int may_queue;
+
+ if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ return NULL;
may_queue = elv_may_queue(q, rw_flags);
if (may_queue == ELV_MQUEUE_NO)
@@ -741,17 +800,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
rl->count[is_sync]++;
rl->starved[is_sync] = 0;
- if (blk_rq_should_init_elevator(bio)) {
- priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
- if (priv)
- rl->elvpriv++;
+ if (blk_rq_should_init_elevator(bio) &&
+ !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
+ rw_flags |= REQ_ELVPRIV;
+ rl->elvpriv++;
}
if (blk_queue_io_stat(q))
rw_flags |= REQ_IO_STAT;
spin_unlock_irq(q->queue_lock);
- rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
+ rq = blk_alloc_request(q, rw_flags, gfp_mask);
if (unlikely(!rq)) {
/*
* Allocation failed presumably due to memory. Undo anything
@@ -761,7 +820,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* wait queue, but this is pretty rare.
*/
spin_lock_irq(q->queue_lock);
- freed_request(q, is_sync, priv);
+ freed_request(q, rw_flags);
/*
* in the very unlikely event that allocation failed and no
@@ -791,11 +850,18 @@ out:
return rq;
}
-/*
- * No available requests for this queue, wait for some requests to become
- * available.
+/**
+ * get_request_wait - get a free request with retry
+ * @q: request_queue to allocate request from
+ * @rw_flags: RW and SYNC flags
+ * @bio: bio to allocate request for (can be %NULL)
+ *
+ * Get a free request from @q. This function keeps retrying under memory
+ * pressure and fails iff @q is dead.
*
- * Called with q->queue_lock held, and returns with it unlocked.
+ * Must be callled with @q->queue_lock held and,
+ * Returns %NULL on failure, with @q->queue_lock held.
+ * Returns !%NULL on success, with @q->queue_lock *not held*.
*/
static struct request *get_request_wait(struct request_queue *q, int rw_flags,
struct bio *bio)
@@ -809,6 +875,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
struct io_context *ioc;
struct request_list *rl = &q->rq;
+ if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ return NULL;
+
prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
TASK_UNINTERRUPTIBLE);
@@ -839,19 +908,15 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
{
struct request *rq;
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
- return NULL;
-
BUG_ON(rw != READ && rw != WRITE);
spin_lock_irq(q->queue_lock);
- if (gfp_mask & __GFP_WAIT) {
+ if (gfp_mask & __GFP_WAIT)
rq = get_request_wait(q, rw, NULL);
- } else {
+ else
rq = get_request(q, rw, NULL, gfp_mask);
- if (!rq)
- spin_unlock_irq(q->queue_lock);
- }
+ if (!rq)
+ spin_unlock_irq(q->queue_lock);
/* q->queue_lock is unlocked at this point */
return rq;
@@ -1053,14 +1118,13 @@ void __blk_put_request(struct request_queue *q, struct request *req)
* it didn't come out of our reserved rq pools
*/
if (req->cmd_flags & REQ_ALLOCED) {
- int is_sync = rq_is_sync(req) != 0;
- int priv = req->cmd_flags & REQ_ELVPRIV;
+ unsigned int flags = req->cmd_flags;
BUG_ON(!list_empty(&req->queuelist));
BUG_ON(!hlist_unhashed(&req->hash));
blk_free_request(q, req);
- freed_request(q, is_sync, priv);
+ freed_request(q, flags);
}
}
EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1162,24 +1226,41 @@ static bool bio_attempt_front_merge(struct request_queue *q,
return true;
}
-/*
- * Attempts to merge with the plugged list in the current process. Returns
- * true if merge was successful, otherwise false.
+/**
+ * attempt_plug_merge - try to merge with %current's plugged list
+ * @q: request_queue new bio is being queued at
+ * @bio: new bio being queued
+ * @request_count: out parameter for number of traversed plugged requests
+ *
+ * Determine whether @bio being queued on @q can be merged with a request
+ * on %current's plugged list. Returns %true if merge was successful,
+ * otherwise %false.
+ *
+ * This function is called without @q->queue_lock; however, elevator is
+ * accessed iff there already are requests on the plugged list which in
+ * turn guarantees validity of the elevator.
+ *
+ * Note that, on successful merge, elevator operation
+ * elevator_bio_merged_fn() will be called without queue lock. Elevator
+ * must be ready for this.
*/
-static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
- struct bio *bio)
+static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
+ unsigned int *request_count)
{
struct blk_plug *plug;
struct request *rq;
bool ret = false;
- plug = tsk->plug;
+ plug = current->plug;
if (!plug)
goto out;
+ *request_count = 0;
list_for_each_entry_reverse(rq, &plug->list, queuelist) {
int el_ret;
+ (*request_count)++;
+
if (rq->q != q)
continue;
@@ -1200,7 +1281,6 @@ out:
void init_request_from_bio(struct request *req, struct bio *bio)
{
- req->cpu = bio->bi_comp_cpu;
req->cmd_type = REQ_TYPE_FS;
req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
@@ -1213,12 +1293,13 @@ void init_request_from_bio(struct request *req, struct bio *bio)
blk_rq_bio_prep(req->q, req, bio);
}
-static int __make_request(struct request_queue *q, struct bio *bio)
+void blk_queue_bio(struct request_queue *q, struct bio *bio)
{
const bool sync = !!(bio->bi_rw & REQ_SYNC);
struct blk_plug *plug;
int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
struct request *req;
+ unsigned int request_count = 0;
/*
* low level driver can indicate that it wants pages above a
@@ -1237,8 +1318,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
* Check if we can merge with the plugged list before grabbing
* any locks.
*/
- if (attempt_plug_merge(current, q, bio))
- goto out;
+ if (attempt_plug_merge(q, bio, &request_count))
+ return;
spin_lock_irq(q->queue_lock);
@@ -1272,6 +1353,10 @@ get_rq:
* Returns with the queue unlocked.
*/
req = get_request_wait(q, rw_flags, bio);
+ if (unlikely(!req)) {
+ bio_endio(bio, -ENODEV); /* @q is dead */
+ goto out_unlock;
+ }
/*
* After dropping the lock and possibly sleeping here, our request
@@ -1281,8 +1366,7 @@ get_rq:
*/
init_request_from_bio(req, bio);
- if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
- bio_flagged(bio, BIO_CPU_AFFINE))
+ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
req->cpu = raw_smp_processor_id();
plug = current->plug;
@@ -1295,18 +1379,21 @@ get_rq:
*/
if (list_empty(&plug->list))
trace_block_plug(q);
- else if (!plug->should_sort) {
- struct request *__rq;
+ else {
+ if (!plug->should_sort) {
+ struct request *__rq;
- __rq = list_entry_rq(plug->list.prev);
- if (__rq->q != q)
- plug->should_sort = 1;
+ __rq = list_entry_rq(plug->list.prev);
+ if (__rq->q != q)
+ plug->should_sort = 1;
+ }
+ if (request_count >= BLK_MAX_REQUEST_COUNT) {
+ blk_flush_plug_list(plug, false);
+ trace_block_plug(q);
+ }
}
list_add_tail(&req->queuelist, &plug->list);
- plug->count++;
drive_stat_acct(req, 1);
- if (plug->count >= BLK_MAX_REQUEST_COUNT)
- blk_flush_plug_list(plug, false);
} else {
spin_lock_irq(q->queue_lock);
add_acct_request(q, req, where);
@@ -1314,9 +1401,8 @@ get_rq:
out_unlock:
spin_unlock_irq(q->queue_lock);
}
-out:
- return 0;
}
+EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */
/*
* If bio->bi_dev is a partition, remap the location
@@ -1415,165 +1501,135 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
return 0;
}
-/**
- * generic_make_request - hand a buffer to its device driver for I/O
- * @bio: The bio describing the location in memory and on the device.
- *
- * generic_make_request() is used to make I/O requests of block
- * devices. It is passed a &struct bio, which describes the I/O that needs
- * to be done.
- *
- * generic_make_request() does not return any status. The
- * success/failure status of the request, along with notification of
- * completion, is delivered asynchronously through the bio->bi_end_io
- * function described (one day) else where.
- *
- * The caller of generic_make_request must make sure that bi_io_vec
- * are set to describe the memory buffer, and that bi_dev and bi_sector are
- * set to describe the device address, and the
- * bi_end_io and optionally bi_private are set to describe how
- * completion notification should be signaled.
- *
- * generic_make_request and the drivers it calls may use bi_next if this
- * bio happens to be merged with someone else, and may change bi_dev and
- * bi_sector for remaps as it sees fit. So the values of these fields
- * should NOT be depended on after the call to generic_make_request.
- */
-static inline void __generic_make_request(struct bio *bio)
+static noinline_for_stack bool
+generic_make_request_checks(struct bio *bio)
{
struct request_queue *q;
- sector_t old_sector;
- int ret, nr_sectors = bio_sectors(bio);
- dev_t old_dev;
+ int nr_sectors = bio_sectors(bio);
int err = -EIO;
+ char b[BDEVNAME_SIZE];
+ struct hd_struct *part;
might_sleep();
if (bio_check_eod(bio, nr_sectors))
goto end_io;
- /*
- * Resolve the mapping until finished. (drivers are
- * still free to implement/resolve their own stacking
- * by explicitly returning 0)
- *
- * NOTE: we don't repeat the blk_size check for each new device.
- * Stacking drivers are expected to know what they are doing.
- */
- old_sector = -1;
- old_dev = 0;
- do {
- char b[BDEVNAME_SIZE];
- struct hd_struct *part;
-
- q = bdev_get_queue(bio->bi_bdev);
- if (unlikely(!q)) {
- printk(KERN_ERR
- "generic_make_request: Trying to access "
- "nonexistent block-device %s (%Lu)\n",
- bdevname(bio->bi_bdev, b),
- (long long) bio->bi_sector);
- goto end_io;
- }
-
- if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
- nr_sectors > queue_max_hw_sectors(q))) {
- printk(KERN_ERR "bio too big device %s (%u > %u)\n",
- bdevname(bio->bi_bdev, b),
- bio_sectors(bio),
- queue_max_hw_sectors(q));
- goto end_io;
- }
-
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
- goto end_io;
-
- part = bio->bi_bdev->bd_part;
- if (should_fail_request(part, bio->bi_size) ||
- should_fail_request(&part_to_disk(part)->part0,
- bio->bi_size))
- goto end_io;
-
- /*
- * If this device has partitions, remap block n
- * of partition p to block n+start(p) of the disk.
- */
- blk_partition_remap(bio);
+ q = bdev_get_queue(bio->bi_bdev);
+ if (unlikely(!q)) {
+ printk(KERN_ERR
+ "generic_make_request: Trying to access "
+ "nonexistent block-device %s (%Lu)\n",
+ bdevname(bio->bi_bdev, b),
+ (long long) bio->bi_sector);
+ goto end_io;
+ }
- if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
- goto end_io;
+ if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
+ nr_sectors > queue_max_hw_sectors(q))) {
+ printk(KERN_ERR "bio too big device %s (%u > %u)\n",
+ bdevname(bio->bi_bdev, b),
+ bio_sectors(bio),
+ queue_max_hw_sectors(q));
+ goto end_io;
+ }
- if (old_sector != -1)
- trace_block_bio_remap(q, bio, old_dev, old_sector);
+ part = bio->bi_bdev->bd_part;
+ if (should_fail_request(part, bio->bi_size) ||
+ should_fail_request(&part_to_disk(part)->part0,
+ bio->bi_size))
+ goto end_io;
- old_sector = bio->bi_sector;
- old_dev = bio->bi_bdev->bd_dev;
+ /*
+ * If this device has partitions, remap block n
+ * of partition p to block n+start(p) of the disk.
+ */
+ blk_partition_remap(bio);
- if (bio_check_eod(bio, nr_sectors))
- goto end_io;
+ if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
+ goto end_io;
- /*
- * Filter flush bio's early so that make_request based
- * drivers without flush support don't have to worry
- * about them.
- */
- if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
- bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
- if (!nr_sectors) {
- err = 0;
- goto end_io;
- }
- }
+ if (bio_check_eod(bio, nr_sectors))
+ goto end_io;
- if ((bio->bi_rw & REQ_DISCARD) &&
- (!blk_queue_discard(q) ||
- ((bio->bi_rw & REQ_SECURE) &&
- !blk_queue_secdiscard(q)))) {
- err = -EOPNOTSUPP;
+ /*
+ * Filter flush bio's early so that make_request based
+ * drivers without flush support don't have to worry
+ * about them.
+ */
+ if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
+ bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+ if (!nr_sectors) {
+ err = 0;
goto end_io;
}
+ }
- if (blk_throtl_bio(q, &bio))
- goto end_io;
-
- /*
- * If bio = NULL, bio has been throttled and will be submitted
- * later.
- */
- if (!bio)
- break;
-
- trace_block_bio_queue(q, bio);
+ if ((bio->bi_rw & REQ_DISCARD) &&
+ (!blk_queue_discard(q) ||
+ ((bio->bi_rw & REQ_SECURE) &&
+ !blk_queue_secdiscard(q)))) {
+ err = -EOPNOTSUPP;
+ goto end_io;
+ }
- ret = q->make_request_fn(q, bio);
- } while (ret);
+ if (blk_throtl_bio(q, bio))
+ return false; /* throttled, will be resubmitted later */
- return;
+ trace_block_bio_queue(q, bio);
+ return true;
end_io:
bio_endio(bio, err);
+ return false;
}
-/*
- * We only want one ->make_request_fn to be active at a time,
- * else stack usage with stacked devices could be a problem.
- * So use current->bio_list to keep a list of requests
- * submited by a make_request_fn function.
- * current->bio_list is also used as a flag to say if
- * generic_make_request is currently active in this task or not.
- * If it is NULL, then no make_request is active. If it is non-NULL,
- * then a make_request is active, and new requests should be added
- * at the tail
+/**
+ * generic_make_request - hand a buffer to its device driver for I/O
+ * @bio: The bio describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct bio, which describes the I/O that needs
+ * to be done.
+ *
+ * generic_make_request() does not return any status. The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bio->bi_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that bi_io_vec
+ * are set to describe the memory buffer, and that bi_dev and bi_sector are
+ * set to describe the device address, and the
+ * bi_end_io and optionally bi_private are set to describe how
+ * completion notification should be signaled.
+ *
+ * generic_make_request and the drivers it calls may use bi_next if this
+ * bio happens to be merged with someone else, and may resubmit the bio to
+ * a lower device by calling into generic_make_request recursively, which
+ * means the bio should NOT be touched after the call to ->make_request_fn.
*/
void generic_make_request(struct bio *bio)
{
struct bio_list bio_list_on_stack;
+ if (!generic_make_request_checks(bio))
+ return;
+
+ /*
+ * We only want one ->make_request_fn to be active at a time, else
+ * stack usage with stacked devices could be a problem. So use
+ * current->bio_list to keep a list of requests submited by a
+ * make_request_fn function. current->bio_list is also used as a
+ * flag to say if generic_make_request is currently active in this
+ * task or not. If it is NULL, then no make_request is active. If
+ * it is non-NULL, then a make_request is active, and new requests
+ * should be added at the tail
+ */
if (current->bio_list) {
- /* make_request is active */
bio_list_add(current->bio_list, bio);
return;
}
+
/* following loop may be a bit non-obvious, and so deserves some
* explanation.
* Before entering the loop, bio->bi_next is NULL (as all callers
@@ -1581,22 +1637,21 @@ void generic_make_request(struct bio *bio)
* We pretend that we have just taken it off a longer list, so
* we assign bio_list to a pointer to the bio_list_on_stack,
* thus initialising the bio_list of new bios to be
- * added. __generic_make_request may indeed add some more bios
+ * added. ->make_request() may indeed add some more bios
* through a recursive call to generic_make_request. If it
* did, we find a non-NULL value in bio_list and re-enter the loop
* from the top. In this case we really did just take the bio
* of the top of the list (no pretending) and so remove it from
- * bio_list, and call into __generic_make_request again.
- *
- * The loop was structured like this to make only one call to
- * __generic_make_request (which is important as it is large and
- * inlined) and to keep the structure simple.
+ * bio_list, and call into ->make_request() again.
*/
BUG_ON(bio->bi_next);
bio_list_init(&bio_list_on_stack);
current->bio_list = &bio_list_on_stack;
do {
- __generic_make_request(bio);
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+
+ q->make_request_fn(q, bio);
+
bio = bio_list_pop(current->bio_list);
} while (bio);
current->bio_list = NULL; /* deactivate */
@@ -1723,6 +1778,8 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
where = ELEVATOR_INSERT_FLUSH;
add_acct_request(q, rq, where);
+ if (where == ELEVATOR_INSERT_FLUSH)
+ __blk_run_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
return 0;
@@ -2626,6 +2683,20 @@ EXPORT_SYMBOL(kblockd_schedule_delayed_work);
#define PLUG_MAGIC 0x91827364
+/**
+ * blk_start_plug - initialize blk_plug and track it inside the task_struct
+ * @plug: The &struct blk_plug that needs to be initialized
+ *
+ * Description:
+ * Tracking blk_plug inside the task_struct will help with auto-flushing the
+ * pending I/O should the task end up blocking between blk_start_plug() and
+ * blk_finish_plug(). This is important from a performance perspective, but
+ * also ensures that we don't deadlock. For instance, if the task is blocking
+ * for a memory allocation, memory reclaim could end up wanting to free a
+ * page belonging to that request that is currently residing in our private
+ * plug. By flushing the pending I/O when the process goes to sleep, we avoid
+ * this kind of deadlock.
+ */
void blk_start_plug(struct blk_plug *plug)
{
struct task_struct *tsk = current;
@@ -2634,7 +2705,6 @@ void blk_start_plug(struct blk_plug *plug)
INIT_LIST_HEAD(&plug->list);
INIT_LIST_HEAD(&plug->cb_list);
plug->should_sort = 0;
- plug->count = 0;
/*
* If this is a nested plug, don't actually assign it. It will be
@@ -2718,7 +2788,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
return;
list_splice_init(&plug->list, &list);
- plug->count = 0;
if (plug->should_sort) {
list_sort(NULL, &list, plug_rq_cmp);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 491eb30a242d..720ad607ff91 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -320,7 +320,7 @@ void blk_insert_flush(struct request *rq)
return;
}
- BUG_ON(!rq->bio || rq->bio != rq->biotail);
+ BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
/*
* If there's data but flush is not necessary, the request can be
@@ -330,7 +330,6 @@ void blk_insert_flush(struct request *rq)
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
list_add_tail(&rq->queuelist, &q->queue_head);
- blk_run_queue_async(q);
return;
}
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 129b9e209a3b..da2a818c3a92 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -24,6 +24,7 @@
#include <linux/mempool.h>
#include <linux/bio.h>
#include <linux/scatterlist.h>
+#include <linux/export.h>
#include <linux/slab.h>
#include "blk.h"
diff --git a/block/blk-map.c b/block/blk-map.c
index e663ac2d8e68..164cd0059706 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -204,10 +204,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
if (!iov[i].iov_len)
return -EINVAL;
- if (uaddr & queue_dma_alignment(q)) {
+ /*
+ * Keep going so we check length of all segments
+ */
+ if (uaddr & queue_dma_alignment(q))
unaligned = 1;
- break;
- }
}
if (unaligned || (q->dma_pad_mask & len) || map_data)
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 58340d0cb23a..1366a89d8e66 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -115,7 +115,7 @@ void __blk_complete_request(struct request *req)
/*
* Select completion CPU
*/
- if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
+ if (req->cpu != -1) {
ccpu = req->cpu;
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
ccpu = blk_cpu_to_group(ccpu);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 0ee17b5e7fb6..e7f9f657f105 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -258,11 +258,13 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
ret = queue_var_store(&val, page, count);
spin_lock_irq(q->queue_lock);
- if (val) {
+ if (val == 2) {
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
- if (val == 2)
- queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
- } else {
+ queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+ } else if (val == 1) {
+ queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+ queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ } else if (val == 0) {
queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
}
@@ -455,11 +457,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
}
/**
- * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed
- * @kobj: the kobj belonging of the request queue to be released
+ * blk_release_queue: - release a &struct request_queue when it is no longer needed
+ * @kobj: the kobj belonging to the request queue to be released
*
* Description:
- * blk_cleanup_queue is the pair to blk_init_queue() or
+ * blk_release_queue is the pair to blk_init_queue() or
* blk_queue_make_request(). It should be called when a request queue is
* being released; typically when a block device is being de-registered.
* Currently, its primary task it to free all the &struct request
@@ -477,12 +479,18 @@ static void blk_release_queue(struct kobject *kobj)
blk_sync_queue(q);
+ if (q->elevator)
+ elevator_exit(q->elevator);
+
+ blk_throtl_exit(q);
+
if (rl->rq_pool)
mempool_destroy(rl->rq_pool);
if (q->queue_tags)
__blk_queue_free_tags(q);
+ blk_throtl_release(q);
blk_trace_shutdown(q);
bdi_destroy(&q->backing_dev_info);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index ece65fc4c79b..e74d6d13838f 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -286,12 +286,14 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
BUG_ON(tag == -1);
- if (unlikely(tag >= bqt->real_max_depth))
+ if (unlikely(tag >= bqt->max_depth)) {
/*
* This can happen after tag depth has been reduced.
- * FIXME: how about a warning or info message here?
+ * But tag shouldn't be larger than real_max_depth.
*/
+ WARN_ON(tag >= bqt->real_max_depth);
return;
+ }
list_del_init(&rq->queuelist);
rq->cmd_flags &= ~REQ_QUEUED;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a19f58c6fc3a..4553245d9317 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -10,6 +10,7 @@
#include <linux/bio.h>
#include <linux/blktrace_api.h>
#include "blk-cgroup.h"
+#include "blk.h"
/* Max dispatch from a group in 1 round */
static int throtl_grp_quantum = 8;
@@ -302,16 +303,16 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
return tg;
}
-/*
- * This function returns with queue lock unlocked in case of error, like
- * request queue is no more
- */
static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
{
struct throtl_grp *tg = NULL, *__tg = NULL;
struct blkio_cgroup *blkcg;
struct request_queue *q = td->queue;
+ /* no throttling for dead queue */
+ if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ return NULL;
+
rcu_read_lock();
blkcg = task_blkio_cgroup(current);
tg = throtl_find_tg(td, blkcg);
@@ -323,32 +324,22 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
/*
* Need to allocate a group. Allocation of group also needs allocation
* of per cpu stats which in-turn takes a mutex() and can block. Hence
- * we need to drop rcu lock and queue_lock before we call alloc
- *
- * Take the request queue reference to make sure queue does not
- * go away once we return from allocation.
+ * we need to drop rcu lock and queue_lock before we call alloc.
*/
- blk_get_queue(q);
rcu_read_unlock();
spin_unlock_irq(q->queue_lock);
tg = throtl_alloc_tg(td);
- /*
- * We might have slept in group allocation. Make sure queue is not
- * dead
- */
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
- blk_put_queue(q);
- if (tg)
- kfree(tg);
-
- return ERR_PTR(-ENODEV);
- }
- blk_put_queue(q);
/* Group allocated and queue is still alive. take the lock */
spin_lock_irq(q->queue_lock);
+ /* Make sure @q is still alive */
+ if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+ kfree(tg);
+ return NULL;
+ }
+
/*
* Initialize the new group. After sleeping, read the blkcg again.
*/
@@ -1014,11 +1005,6 @@ static void throtl_release_tgs(struct throtl_data *td)
}
}
-static void throtl_td_free(struct throtl_data *td)
-{
- kfree(td);
-}
-
/*
* Blk cgroup controller notification saying that blkio_group object is being
* delinked as associated cgroup object is going away. That also means that
@@ -1123,17 +1109,17 @@ static struct blkio_policy_type blkio_policy_throtl = {
.plid = BLKIO_POLICY_THROTL,
};
-int blk_throtl_bio(struct request_queue *q, struct bio **biop)
+bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
{
struct throtl_data *td = q->td;
struct throtl_grp *tg;
- struct bio *bio = *biop;
bool rw = bio_data_dir(bio), update_disptime = true;
struct blkio_cgroup *blkcg;
+ bool throttled = false;
if (bio->bi_rw & REQ_THROTTLED) {
bio->bi_rw &= ~REQ_THROTTLED;
- return 0;
+ goto out;
}
/*
@@ -1152,7 +1138,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
rw, rw_is_sync(bio->bi_rw));
rcu_read_unlock();
- return 0;
+ goto out;
}
}
rcu_read_unlock();
@@ -1161,18 +1147,10 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
* Either group has not been allocated yet or it is not an unlimited
* IO group
*/
-
spin_lock_irq(q->queue_lock);
tg = throtl_get_tg(td);
-
- if (IS_ERR(tg)) {
- if (PTR_ERR(tg) == -ENODEV) {
- /*
- * Queue is gone. No queue lock held here.
- */
- return -ENODEV;
- }
- }
+ if (unlikely(!tg))
+ goto out_unlock;
if (tg->nr_queued[rw]) {
/*
@@ -1200,7 +1178,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
* So keep on trimming slice even if bio is not queued.
*/
throtl_trim_slice(td, tg, rw);
- goto out;
+ goto out_unlock;
}
queue_bio:
@@ -1212,16 +1190,52 @@ queue_bio:
tg->nr_queued[READ], tg->nr_queued[WRITE]);
throtl_add_bio_tg(q->td, tg, bio);
- *biop = NULL;
+ throttled = true;
if (update_disptime) {
tg_update_disptime(td, tg);
throtl_schedule_next_dispatch(td);
}
+out_unlock:
+ spin_unlock_irq(q->queue_lock);
out:
+ return throttled;
+}
+
+/**
+ * blk_throtl_drain - drain throttled bios
+ * @q: request_queue to drain throttled bios for
+ *
+ * Dispatch all currently throttled bios on @q through ->make_request_fn().
+ */
+void blk_throtl_drain(struct request_queue *q)
+ __releases(q->queue_lock) __acquires(q->queue_lock)
+{
+ struct throtl_data *td = q->td;
+ struct throtl_rb_root *st = &td->tg_service_tree;
+ struct throtl_grp *tg;
+ struct bio_list bl;
+ struct bio *bio;
+
+ WARN_ON_ONCE(!queue_is_locked(q));
+
+ bio_list_init(&bl);
+
+ while ((tg = throtl_rb_first(st))) {
+ throtl_dequeue_tg(td, tg);
+
+ while ((bio = bio_list_peek(&tg->bio_lists[READ])))
+ tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
+ while ((bio = bio_list_peek(&tg->bio_lists[WRITE])))
+ tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
+ }
spin_unlock_irq(q->queue_lock);
- return 0;
+
+ while ((bio = bio_list_pop(&bl)))
+ generic_make_request(bio);
+
+ spin_lock_irq(q->queue_lock);
}
int blk_throtl_init(struct request_queue *q)
@@ -1296,7 +1310,11 @@ void blk_throtl_exit(struct request_queue *q)
* it.
*/
throtl_shutdown_wq(q);
- throtl_td_free(td);
+}
+
+void blk_throtl_release(struct request_queue *q)
+{
+ kfree(q->td);
}
static int __init throtl_init(void)
diff --git a/block/blk.h b/block/blk.h
index 20b900a377c9..3f6551b3c92d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -15,6 +15,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio);
int blk_rq_append_bio(struct request_queue *q, struct request *rq,
struct bio *bio);
+void blk_drain_queue(struct request_queue *q, bool drain_all);
void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
bool __blk_end_bidi_request(struct request *rq, int error,
@@ -188,4 +189,21 @@ static inline int blk_do_io_stat(struct request *rq)
(rq->cmd_flags & REQ_DISCARD));
}
-#endif
+#ifdef CONFIG_BLK_DEV_THROTTLING
+extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
+extern void blk_throtl_drain(struct request_queue *q);
+extern int blk_throtl_init(struct request_queue *q);
+extern void blk_throtl_exit(struct request_queue *q);
+extern void blk_throtl_release(struct request_queue *q);
+#else /* CONFIG_BLK_DEV_THROTTLING */
+static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
+{
+ return false;
+}
+static inline void blk_throtl_drain(struct request_queue *q) { }
+static inline int blk_throtl_init(struct request_queue *q) { return 0; }
+static inline void blk_throtl_exit(struct request_queue *q) { }
+static inline void blk_throtl_release(struct request_queue *q) { }
+#endif /* CONFIG_BLK_DEV_THROTTLING */
+
+#endif /* BLK_INTERNAL_H */
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 6690e6e41037..7ad49c88f6b1 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -25,7 +25,7 @@
#include <linux/delay.h>
#include <linux/scatterlist.h>
#include <linux/bsg-lib.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <scsi/scsi_cmnd.h>
/**
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a33bd4377c61..16ace89613bc 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -130,8 +130,8 @@ struct cfq_queue {
unsigned long slice_end;
long slice_resid;
- /* pending metadata requests */
- int meta_pending;
+ /* pending priority requests */
+ int prio_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
@@ -684,8 +684,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
if (rq_is_sync(rq1) != rq_is_sync(rq2))
return rq_is_sync(rq1) ? rq1 : rq2;
- if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
- return rq1->cmd_flags & REQ_META ? rq1 : rq2;
+ if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO)
+ return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2;
s1 = blk_rq_pos(rq1);
s2 = blk_rq_pos(rq2);
@@ -1612,9 +1612,9 @@ static void cfq_remove_request(struct request *rq)
cfqq->cfqd->rq_queued--;
cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
rq_data_dir(rq), rq_is_sync(rq));
- if (rq->cmd_flags & REQ_META) {
- WARN_ON(!cfqq->meta_pending);
- cfqq->meta_pending--;
+ if (rq->cmd_flags & REQ_PRIO) {
+ WARN_ON(!cfqq->prio_pending);
+ cfqq->prio_pending--;
}
}
@@ -3372,7 +3372,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
* So both queues are sync. Let the new request get disk time if
* it's a metadata request and the current queue is doing regular IO.
*/
- if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
+ if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
return true;
/*
@@ -3439,8 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_io_context *cic = RQ_CIC(rq);
cfqd->rq_queued++;
- if (rq->cmd_flags & REQ_META)
- cfqq->meta_pending++;
+ if (rq->cmd_flags & REQ_PRIO)
+ cfqq->prio_pending++;
cfq_update_io_thinktime(cfqd, cfqq, cic);
cfq_update_io_seektime(cfqd, cfqq, rq);
diff --git a/block/elevator.c b/block/elevator.c
index a3b64bc71d88..66343d6917d0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -31,7 +31,6 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
-#include <linux/delay.h>
#include <linux/blktrace_api.h>
#include <linux/hash.h>
#include <linux/uaccess.h>
@@ -182,7 +181,7 @@ static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,
eq->elevator_data = data;
}
-static char chosen_elevator[16];
+static char chosen_elevator[ELV_NAME_MAX];
static int __init elevator_setup(char *str)
{
@@ -606,43 +605,35 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
void elv_drain_elevator(struct request_queue *q)
{
static int printed;
+
+ lockdep_assert_held(q->queue_lock);
+
while (q->elevator->ops->elevator_dispatch_fn(q, 1))
;
- if (q->nr_sorted == 0)
- return;
- if (printed++ < 10) {
+ if (q->nr_sorted && printed++ < 10) {
printk(KERN_ERR "%s: forced dispatching is broken "
"(nr_sorted=%u), please report this\n",
q->elevator->elevator_type->elevator_name, q->nr_sorted);
}
}
-/*
- * Call with queue lock held, interrupts disabled
- */
void elv_quiesce_start(struct request_queue *q)
{
if (!q->elevator)
return;
+ spin_lock_irq(q->queue_lock);
queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
+ spin_unlock_irq(q->queue_lock);
- /*
- * make sure we don't have any requests in flight
- */
- elv_drain_elevator(q);
- while (q->rq.elvpriv) {
- __blk_run_queue(q);
- spin_unlock_irq(q->queue_lock);
- msleep(10);
- spin_lock_irq(q->queue_lock);
- elv_drain_elevator(q);
- }
+ blk_drain_queue(q, false);
}
void elv_quiesce_end(struct request_queue *q)
{
+ spin_lock_irq(q->queue_lock);
queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+ spin_unlock_irq(q->queue_lock);
}
void __elv_add_request(struct request_queue *q, struct request *rq, int where)
@@ -972,7 +963,6 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
/*
* Turn on BYPASS and drain all requests w/ elevator private data
*/
- spin_lock_irq(q->queue_lock);
elv_quiesce_start(q);
/*
@@ -983,8 +973,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
/*
* attach and start new elevator
*/
+ spin_lock_irq(q->queue_lock);
elevator_attach(q, e, data);
-
spin_unlock_irq(q->queue_lock);
if (old_elevator->registered) {
@@ -999,9 +989,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
* finally exit old elevator and turn off BYPASS.
*/
elevator_exit(old_elevator);
- spin_lock_irq(q->queue_lock);
elv_quiesce_end(q);
- spin_unlock_irq(q->queue_lock);
blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
@@ -1015,10 +1003,7 @@ fail_register:
elevator_exit(e);
q->elevator = old_elevator;
elv_register_queue(q);
-
- spin_lock_irq(q->queue_lock);
- queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
- spin_unlock_irq(q->queue_lock);
+ elv_quiesce_end(q);
return err;
}
diff --git a/block/genhd.c b/block/genhd.c
index e2f67902dd02..02e9fca80825 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -536,7 +536,7 @@ void register_disk(struct gendisk *disk)
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
/* No minors to use for partitions */
- if (!disk_partitionable(disk))
+ if (!disk_part_scan_enabled(disk))
goto exit;
/* No such device (e.g., media were just removed) */
@@ -611,6 +611,12 @@ void add_disk(struct gendisk *disk)
register_disk(disk);
blk_register_queue(disk);
+ /*
+ * Take an extra ref on queue which will be put on disk_release()
+ * so that it sticks around as long as @disk is there.
+ */
+ WARN_ON_ONCE(blk_get_queue(disk->queue));
+
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
"bdi");
WARN_ON(retval);
@@ -841,7 +847,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
+ if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
@@ -1095,6 +1101,8 @@ static void disk_release(struct device *dev)
disk_replace_part_tbl(disk, NULL);
free_part_stats(&disk->part0);
free_part_info(&disk->part0);
+ if (disk->queue)
+ blk_put_queue(disk->queue);
kfree(disk);
}
struct class block_class = {
diff --git a/block/ioctl.c b/block/ioctl.c
index 1124cd297263..ca939fc1030f 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -1,5 +1,6 @@
#include <linux/capability.h>
#include <linux/blkdev.h>
+#include <linux/export.h>
#include <linux/gfp.h>
#include <linux/blkpg.h>
#include <linux/hdreg.h>
@@ -101,7 +102,7 @@ static int blkdev_reread_part(struct block_device *bdev)
struct gendisk *disk = bdev->bd_disk;
int res;
- if (!disk_partitionable(disk) || bdev != bdev->bd_contains)
+ if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 4f4230b79bb6..fbdf0d802ec4 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -565,7 +565,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
{
int err;
- if (!q || blk_get_queue(q))
+ if (!q)
return -ENXIO;
switch (cmd) {
@@ -686,7 +686,6 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
err = -ENOTTY;
}
- blk_put_queue(q);
return err;
}
EXPORT_SYMBOL(scsi_cmd_ioctl);