summaryrefslogtreecommitdiff
path: root/fs/btrfs/raid56.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/raid56.c')
-rw-r--r--fs/btrfs/raid56.c169
1 files changed, 137 insertions, 32 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 5ef4fbb49df2..2d90a6b5eb00 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -21,6 +21,7 @@
#include "raid56.h"
#include "async-thread.h"
#include "file-item.h"
+#include "btrfs_inode.h"
/* set when additional merges to this rbio are not allowed */
#define RBIO_RMW_LOCKED_BIT 1
@@ -1433,14 +1434,56 @@ static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bi
bio_size >> rbio->bioc->fs_info->sectorsize_bits);
}
+/* Verify the data sectors at read time. */
+static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
+ struct bio *bio)
+{
+ struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+ int total_sector_nr = get_bio_sector_nr(rbio, bio);
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ /* No data csum for the whole stripe, no need to verify. */
+ if (!rbio->csum_bitmap || !rbio->csum_buf)
+ return;
+
+ /* P/Q stripes, they have no data csum to verify against. */
+ if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors)
+ return;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ int bv_offset;
+
+ for (bv_offset = bvec->bv_offset;
+ bv_offset < bvec->bv_offset + bvec->bv_len;
+ bv_offset += fs_info->sectorsize, total_sector_nr++) {
+ u8 csum_buf[BTRFS_CSUM_SIZE];
+ u8 *expected_csum = rbio->csum_buf +
+ total_sector_nr * fs_info->csum_size;
+ int ret;
+
+ /* No csum for this sector, skip to the next sector. */
+ if (!test_bit(total_sector_nr, rbio->csum_bitmap))
+ continue;
+
+ ret = btrfs_check_sector_csum(fs_info, bvec->bv_page,
+ bv_offset, csum_buf, expected_csum);
+ if (ret < 0)
+ set_bit(total_sector_nr, rbio->error_bitmap);
+ }
+ }
+}
+
static void raid_wait_read_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (bio->bi_status)
+ if (bio->bi_status) {
rbio_update_error_bitmap(rbio, bio);
- else
+ } else {
set_bio_pages_uptodate(rbio, bio);
+ verify_bio_data_sectors(rbio, bio);
+ }
bio_put(bio);
if (atomic_dec_and_test(&rbio->stripes_pending))
@@ -1469,37 +1512,25 @@ static void submit_read_bios(struct btrfs_raid_bio *rbio,
static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
struct bio_list *bio_list)
{
- const int nr_data_sectors = rbio->stripe_nsectors * rbio->nr_data;
struct bio *bio;
int total_sector_nr;
int ret = 0;
ASSERT(bio_list_size(bio_list) == 0);
- /* Build a list of bios to read all the missing data sectors. */
- for (total_sector_nr = 0; total_sector_nr < nr_data_sectors;
+ /*
+ * Build a list of bios to read all sectors (including data and P/Q).
+ *
+ * This behaviro is to compensate the later csum verification and
+ * recovery.
+ */
+ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
struct sector_ptr *sector;
int stripe = total_sector_nr / rbio->stripe_nsectors;
int sectornr = total_sector_nr % rbio->stripe_nsectors;
- /*
- * We want to find all the sectors missing from the rbio and
- * read them from the disk. If sector_in_rbio() finds a page
- * in the bio list we don't need to read it off the stripe.
- */
- sector = sector_in_rbio(rbio, stripe, sectornr, 1);
- if (sector)
- continue;
-
sector = rbio_stripe_sector(rbio, stripe, sectornr);
- /*
- * The bio cache may have handed us an uptodate page. If so,
- * use it.
- */
- if (sector->uptodate)
- continue;
-
ret = rbio_add_io_sector(rbio, bio_list, sector,
stripe, sectornr, REQ_OP_READ);
if (ret)
@@ -1670,6 +1701,42 @@ fail:
bio_endio(bio);
}
+static int verify_one_sector(struct btrfs_raid_bio *rbio,
+ int stripe_nr, int sector_nr)
+{
+ struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+ struct sector_ptr *sector;
+ u8 csum_buf[BTRFS_CSUM_SIZE];
+ u8 *csum_expected;
+ int ret;
+
+ if (!rbio->csum_bitmap || !rbio->csum_buf)
+ return 0;
+
+ /* No way to verify P/Q as they are not covered by data csum. */
+ if (stripe_nr >= rbio->nr_data)
+ return 0;
+ /*
+ * If we're rebuilding a read, we have to use pages from the
+ * bio list if possible.
+ */
+ if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
+ rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
+ sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
+ } else {
+ sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
+ }
+
+ ASSERT(sector->page);
+
+ csum_expected = rbio->csum_buf +
+ (stripe_nr * rbio->stripe_nsectors + sector_nr) *
+ fs_info->csum_size;
+ ret = btrfs_check_sector_csum(fs_info, sector->page, sector->pgoff,
+ csum_buf, csum_expected);
+ return ret;
+}
+
/*
* Recover a vertical stripe specified by @sector_nr.
* @*pointers are the pre-allocated pointers by the caller, so we don't
@@ -1685,6 +1752,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
int faila;
int failb;
int stripe_nr;
+ int ret = 0;
/*
* Now we just use bitmap to mark the horizontal stripes in
@@ -1805,12 +1873,23 @@ pstripe:
* uptodate.
* Especially if we determine to cache the rbio, we need to
* have at least all data sectors uptodate.
+ *
+ * If possible, also check if the repaired sector matches its data
+ * checksum.
*/
if (faila >= 0) {
+ ret = verify_one_sector(rbio, faila, sector_nr);
+ if (ret < 0)
+ goto cleanup;
+
sector = rbio_stripe_sector(rbio, faila, sector_nr);
sector->uptodate = 1;
}
if (failb >= 0) {
+ ret = verify_one_sector(rbio, faila, sector_nr);
+ if (ret < 0)
+ goto cleanup;
+
sector = rbio_stripe_sector(rbio, failb, sector_nr);
sector->uptodate = 1;
}
@@ -1818,7 +1897,7 @@ pstripe:
cleanup:
for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--)
kunmap_local(unmap_array[stripe_nr]);
- return 0;
+ return ret;
}
static int recover_sectors(struct btrfs_raid_bio *rbio)
@@ -2115,7 +2194,7 @@ no_csum:
rbio->csum_bitmap = NULL;
}
-static int rmw_read_and_wait(struct btrfs_raid_bio *rbio)
+static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio)
{
struct bio_list bio_list;
struct bio *bio;
@@ -2136,6 +2215,12 @@ static int rmw_read_and_wait(struct btrfs_raid_bio *rbio)
submit_read_bios(rbio, &bio_list);
wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
+
+ /*
+ * We may or may not have any corrupted sectors (including missing dev
+ * and csum mismatch), just let recover_sectors() to handle them all.
+ */
+ ret = recover_sectors(rbio);
return ret;
out:
while ((bio = bio_list_pop(&bio_list)))
@@ -2175,6 +2260,28 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
}
}
+/*
+ * To determine if we need to read any sector from the disk.
+ * Should only be utilized in RMW path, to skip cached rbio.
+ */
+static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
+{
+ int i;
+
+ for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) {
+ struct sector_ptr *sector = &rbio->stripe_sectors[i];
+
+ /*
+ * We have a sector which doesn't have page nor uptodate,
+ * thus this rbio can not be cached one, as cached one must
+ * have all its data sectors present and uptodate.
+ */
+ if (!sector->page || !sector->uptodate)
+ return true;
+ }
+ return false;
+}
+
static int rmw_rbio(struct btrfs_raid_bio *rbio)
{
struct bio_list bio_list;
@@ -2189,9 +2296,13 @@ static int rmw_rbio(struct btrfs_raid_bio *rbio)
if (ret < 0)
return ret;
- /* Full stripe write, can write the full stripe right now. */
- if (rbio_is_full(rbio))
+ /*
+ * Either full stripe write, or we have every data sector already
+ * cached, can go to write path immediately.
+ */
+ if (rbio_is_full(rbio) || !need_read_stripe_sectors(rbio))
goto write;
+
/*
* Now we're doing sub-stripe write, also need all data stripes to do
* the full RMW.
@@ -2202,16 +2313,10 @@ static int rmw_rbio(struct btrfs_raid_bio *rbio)
index_rbio_pages(rbio);
- ret = rmw_read_and_wait(rbio);
+ ret = rmw_read_wait_recover(rbio);
if (ret < 0)
return ret;
- /* We have read errors, try recovery path. */
- if (!bitmap_empty(rbio->error_bitmap, rbio->nr_sectors)) {
- ret = recover_rbio(rbio);
- if (ret < 0)
- return ret;
- }
write:
/*
* At this stage we're not allowed to add any new bios to the