diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-26 12:01:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-26 12:01:35 -0700 |
commit | 3f7282139fe1594be464b90141d56738e7a0ea8a (patch) | |
tree | 2a6dfc4449ceeee6b5509c3fcc58c33865c89af6 | |
parent | 752d422e74c41084c3c9c9a159cb8d2795fa0c22 (diff) | |
parent | 1e21270685ae4c14361dd501da62cdc4be867d4e (diff) | |
download | linux-next-3f7282139fe1594be464b90141d56738e7a0ea8a.tar.gz |
Merge tag 'for-5.18/64bit-pi-2022-03-25' of git://git.kernel.dk/linux-block
Pull block layer 64-bit data integrity support from Jens Axboe:
"This adds support for 64-bit data integrity in the block layer and in
NVMe"
* tag 'for-5.18/64bit-pi-2022-03-25' of git://git.kernel.dk/linux-block:
crypto: fix crc64 testmgr digest byte order
nvme: add support for enhanced metadata
block: add pi for extended integrity
crypto: add rocksoft 64b crc guard tag framework
lib: add rocksoft model crc64
linux/kernel: introduce lower_48_bits function
asm-generic: introduce be48 unaligned accessors
nvme: allow integrity on extended metadata formats
block: support pi with extended metadata
-rw-r--r-- | block/Kconfig | 1 | ||||
-rw-r--r-- | block/bio-integrity.c | 1 | ||||
-rw-r--r-- | block/t10-pi.c | 198 | ||||
-rw-r--r-- | crypto/Kconfig | 5 | ||||
-rw-r--r-- | crypto/Makefile | 1 | ||||
-rw-r--r-- | crypto/crc64_rocksoft_generic.c | 89 | ||||
-rw-r--r-- | crypto/testmgr.c | 7 | ||||
-rw-r--r-- | crypto/testmgr.h | 15 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 165 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 4 | ||||
-rw-r--r-- | include/asm-generic/unaligned.h | 26 | ||||
-rw-r--r-- | include/linux/blk-integrity.h | 1 | ||||
-rw-r--r-- | include/linux/crc64.h | 7 | ||||
-rw-r--r-- | include/linux/kernel.h | 9 | ||||
-rw-r--r-- | include/linux/nvme.h | 53 | ||||
-rw-r--r-- | include/linux/t10-pi.h | 20 | ||||
-rw-r--r-- | lib/Kconfig | 9 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/crc64-rocksoft.c | 126 | ||||
-rw-r--r-- | lib/crc64.c | 28 | ||||
-rw-r--r-- | lib/gen_crc64table.c | 51 |
21 files changed, 770 insertions, 47 deletions
diff --git a/block/Kconfig b/block/Kconfig index 7eb5d6d53b3f..50b17e260fa2 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -73,6 +73,7 @@ config BLK_DEV_INTEGRITY_T10 tristate depends on BLK_DEV_INTEGRITY select CRC_T10DIF + select CRC64_ROCKSOFT config BLK_DEV_ZONED bool "Zoned block device support" diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 6996e7bd66e9..32929c89ba8a 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -165,6 +165,7 @@ static blk_status_t bio_integrity_process(struct bio *bio, iter.disk_name = bio->bi_bdev->bd_disk->disk_name; iter.interval = 1 << bi->interval_exp; + iter.tuple_size = bi->tuple_size; iter.seed = proc_iter->bi_sector; iter.prot_buf = bvec_virt(bip->bip_vec); diff --git a/block/t10-pi.c b/block/t10-pi.c index 25a52a2a09a8..914d8cddd43a 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -7,8 +7,10 @@ #include <linux/t10-pi.h> #include <linux/blk-integrity.h> #include <linux/crc-t10dif.h> +#include <linux/crc64.h> #include <linux/module.h> #include <net/checksum.h> +#include <asm/unaligned.h> typedef __be16 (csum_fn) (void *, unsigned int); @@ -44,7 +46,7 @@ static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter, pi->ref_tag = 0; iter->data_buf += iter->interval; - iter->prot_buf += sizeof(struct t10_pi_tuple); + iter->prot_buf += iter->tuple_size; iter->seed++; } @@ -93,7 +95,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, next: iter->data_buf += iter->interval; - iter->prot_buf += sizeof(struct t10_pi_tuple); + iter->prot_buf += iter->tuple_size; iter->seed++; } @@ -278,4 +280,196 @@ const struct blk_integrity_profile t10_pi_type3_ip = { }; EXPORT_SYMBOL(t10_pi_type3_ip); +static __be64 ext_pi_crc64(void *data, unsigned int len) +{ + return cpu_to_be64(crc64_rocksoft(data, len)); +} + +static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter, + enum t10_dif_type type) +{ + unsigned int i; + + for (i = 0 ; i < iter->data_size ; i += iter->interval) { + struct crc64_pi_tuple *pi = iter->prot_buf; + + pi->guard_tag = ext_pi_crc64(iter->data_buf, iter->interval); + pi->app_tag = 0; + + if (type == T10_PI_TYPE1_PROTECTION) + put_unaligned_be48(iter->seed, pi->ref_tag); + else + put_unaligned_be48(0ULL, pi->ref_tag); + + iter->data_buf += iter->interval; + iter->prot_buf += iter->tuple_size; + iter->seed++; + } + + return BLK_STS_OK; +} + +static bool ext_pi_ref_escape(u8 *ref_tag) +{ + static u8 ref_escape[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + + return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0; +} + +static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, + enum t10_dif_type type) +{ + unsigned int i; + + for (i = 0; i < iter->data_size; i += iter->interval) { + struct crc64_pi_tuple *pi = iter->prot_buf; + u64 ref, seed; + __be64 csum; + + if (type == T10_PI_TYPE1_PROTECTION) { + if (pi->app_tag == T10_PI_APP_ESCAPE) + goto next; + + ref = get_unaligned_be48(pi->ref_tag); + seed = lower_48_bits(iter->seed); + if (ref != seed) { + pr_err("%s: ref tag error at location %llu (rcvd %llu)\n", + iter->disk_name, seed, ref); + return BLK_STS_PROTECTION; + } + } else if (type == T10_PI_TYPE3_PROTECTION) { + if (pi->app_tag == T10_PI_APP_ESCAPE && + ext_pi_ref_escape(pi->ref_tag)) + goto next; + } + + csum = ext_pi_crc64(iter->data_buf, iter->interval); + if (pi->guard_tag != csum) { + pr_err("%s: guard tag error at sector %llu " \ + "(rcvd %016llx, want %016llx)\n", + iter->disk_name, (unsigned long long)iter->seed, + be64_to_cpu(pi->guard_tag), be64_to_cpu(csum)); + return BLK_STS_PROTECTION; + } + +next: + iter->data_buf += iter->interval; + iter->prot_buf += iter->tuple_size; + iter->seed++; + } + + return BLK_STS_OK; +} + +static blk_status_t ext_pi_type1_verify_crc64(struct blk_integrity_iter *iter) +{ + return ext_pi_crc64_verify(iter, T10_PI_TYPE1_PROTECTION); +} + +static blk_status_t ext_pi_type1_generate_crc64(struct blk_integrity_iter *iter) +{ + return ext_pi_crc64_generate(iter, T10_PI_TYPE1_PROTECTION); +} + +static void ext_pi_type1_prepare(struct request *rq) +{ + const int tuple_sz = rq->q->integrity.tuple_size; + u64 ref_tag = ext_pi_ref_tag(rq); + struct bio *bio; + + __rq_for_each_bio(bio, rq) { + struct bio_integrity_payload *bip = bio_integrity(bio); + u64 virt = lower_48_bits(bip_get_seed(bip)); + struct bio_vec iv; + struct bvec_iter iter; + + /* Already remapped? */ + if (bip->bip_flags & BIP_MAPPED_INTEGRITY) + break; + + bip_for_each_vec(iv, bip, iter) { + unsigned int j; + void *p; + + p = bvec_kmap_local(&iv); + for (j = 0; j < iv.bv_len; j += tuple_sz) { + struct crc64_pi_tuple *pi = p; + u64 ref = get_unaligned_be48(pi->ref_tag); + + if (ref == virt) + put_unaligned_be48(ref_tag, pi->ref_tag); + virt++; + ref_tag++; + p += tuple_sz; + } + kunmap_local(p); + } + + bip->bip_flags |= BIP_MAPPED_INTEGRITY; + } +} + +static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes) +{ + unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp; + const int tuple_sz = rq->q->integrity.tuple_size; + u64 ref_tag = ext_pi_ref_tag(rq); + struct bio *bio; + + __rq_for_each_bio(bio, rq) { + struct bio_integrity_payload *bip = bio_integrity(bio); + u64 virt = lower_48_bits(bip_get_seed(bip)); + struct bio_vec iv; + struct bvec_iter iter; + + bip_for_each_vec(iv, bip, iter) { + unsigned int j; + void *p; + + p = bvec_kmap_local(&iv); + for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) { + struct crc64_pi_tuple *pi = p; + u64 ref = get_unaligned_be48(pi->ref_tag); + + if (ref == ref_tag) + put_unaligned_be48(virt, pi->ref_tag); + virt++; + ref_tag++; + intervals--; + p += tuple_sz; + } + kunmap_local(p); + } + } +} + +static blk_status_t ext_pi_type3_verify_crc64(struct blk_integrity_iter *iter) +{ + return ext_pi_crc64_verify(iter, T10_PI_TYPE3_PROTECTION); +} + +static blk_status_t ext_pi_type3_generate_crc64(struct blk_integrity_iter *iter) +{ + return ext_pi_crc64_generate(iter, T10_PI_TYPE3_PROTECTION); +} + +const struct blk_integrity_profile ext_pi_type1_crc64 = { + .name = "EXT-DIF-TYPE1-CRC64", + .generate_fn = ext_pi_type1_generate_crc64, + .verify_fn = ext_pi_type1_verify_crc64, + .prepare_fn = ext_pi_type1_prepare, + .complete_fn = ext_pi_type1_complete, +}; +EXPORT_SYMBOL_GPL(ext_pi_type1_crc64); + +const struct blk_integrity_profile ext_pi_type3_crc64 = { + .name = "EXT-DIF-TYPE3-CRC64", + .generate_fn = ext_pi_type3_generate_crc64, + .verify_fn = ext_pi_type3_verify_crc64, + .prepare_fn = t10_pi_type3_prepare, + .complete_fn = t10_pi_type3_complete, +}; +EXPORT_SYMBOL_GPL(ext_pi_type3_crc64); + +MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL"); diff --git a/crypto/Kconfig b/crypto/Kconfig index d6d7e84bb7f8..41068811fd0e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -743,6 +743,11 @@ config CRYPTO_CRCT10DIF_VPMSUM multiply-sum (vpmsum) instructions, introduced in POWER8. Enable on POWER8 and newer processors for improved performance. +config CRYPTO_CRC64_ROCKSOFT + tristate "Rocksoft Model CRC64 algorithm" + depends on CRC64 + select CRYPTO_HASH + config CRYPTO_VPMSUM_TESTER tristate "Powerpc64 vpmsum hardware acceleration tester" depends on CRYPTO_CRCT10DIF_VPMSUM && CRYPTO_CRC32C_VPMSUM diff --git a/crypto/Makefile b/crypto/Makefile index d76bff8d0ffd..f754c4d17d6b 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -152,6 +152,7 @@ obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o +obj-$(CONFIG_CRYPTO_CRC64_ROCKSOFT) += crc64_rocksoft_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o diff --git a/crypto/crc64_rocksoft_generic.c b/crypto/crc64_rocksoft_generic.c new file mode 100644 index 000000000000..9e812bb26dba --- /dev/null +++ b/crypto/crc64_rocksoft_generic.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/crc64.h> +#include <linux/module.h> +#include <crypto/internal/hash.h> +#include <asm/unaligned.h> + +static int chksum_init(struct shash_desc *desc) +{ + u64 *crc = shash_desc_ctx(desc); + + *crc = 0; + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u64 *crc = shash_desc_ctx(desc); + + *crc = crc64_rocksoft_generic(*crc, data, length); + + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + u64 *crc = shash_desc_ctx(desc); + + put_unaligned_le64(*crc, out); + return 0; +} + +static int __chksum_finup(u64 crc, const u8 *data, unsigned int len, u8 *out) +{ + crc = crc64_rocksoft_generic(crc, data, len); + put_unaligned_le64(crc, out); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + u64 *crc = shash_desc_ctx(desc); + + return __chksum_finup(*crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + return __chksum_finup(0, data, length, out); +} + +static struct shash_alg alg = { + .digestsize = sizeof(u64), + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(u64), + .base = { + .cra_name = CRC64_ROCKSOFT_STRING, + .cra_driver_name = "crc64-rocksoft-generic", + .cra_priority = 200, + .cra_blocksize = 1, + .cra_module = THIS_MODULE, + } +}; + +static int __init crc64_rocksoft_init(void) +{ + return crypto_register_shash(&alg); +} + +static void __exit crc64_rocksoft_exit(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc64_rocksoft_init); +module_exit(crc64_rocksoft_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Rocksoft model CRC64 calculation."); +MODULE_ALIAS_CRYPTO("crc64-rocksoft"); +MODULE_ALIAS_CRYPTO("crc64-rocksoft-generic"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 2d632a285869..4948201065cc 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4527,6 +4527,13 @@ static const struct alg_test_desc alg_test_descs[] = { .hash = __VECS(crc32c_tv_template) } }, { + .alg = "crc64-rocksoft", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = __VECS(crc64_rocksoft_tv_template) + } + }, { .alg = "crct10dif", .test = alg_test_hash, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index d1aa90993bbd..4d7449fc6a65 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -5106,6 +5106,21 @@ static const struct hash_testvec rmd160_tv_template[] = { } }; +static const u8 zeroes[4096] = { [0 ... 4095] = 0 }; +static const u8 ones[4096] = { [0 ... 4095] = 0xff }; + +static const struct hash_testvec crc64_rocksoft_tv_template[] = { + { + .plaintext = zeroes, + .psize = 4096, + .digest = "\x4e\xb6\x22\xeb\x67\xd3\x82\x64", + }, { + .plaintext = ones, + .psize = 4096, + .digest = "\xac\xa3\xec\x02\x73\xba\xdd\xc0", + } +}; + static const struct hash_testvec crct10dif_tv_template[] = { { .plaintext = "abc", diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index aeda4d0975fc..677fa4bf76d3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -785,6 +785,30 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, return BLK_STS_OK; } +static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd, + struct request *req) +{ + u32 upper, lower; + u64 ref48; + + /* both rw and write zeroes share the same reftag format */ + switch (ns->guard_type) { + case NVME_NVM_NS_16B_GUARD: + cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); + break; + case NVME_NVM_NS_64B_GUARD: + ref48 = ext_pi_ref_tag(req); + lower = lower_32_bits(ref48); + upper = upper_32_bits(ref48); + + cmnd->rw.reftag = cpu_to_le32(lower); + cmnd->rw.cdw3 = cpu_to_le32(upper); + break; + default: + break; + } +} + static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { @@ -806,8 +830,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, switch (ns->pi_type) { case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: - cmnd->write_zeroes.reftag = - cpu_to_le32(t10_pi_ref_tag(req)); + nvme_set_ref_tag(ns, cmnd, req); break; } } @@ -833,7 +856,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.opcode = op; cmnd->rw.flags = 0; cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); - cmnd->rw.rsvd2 = 0; + cmnd->rw.cdw2 = 0; + cmnd->rw.cdw3 = 0; cmnd->rw.metadata = 0; cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); @@ -864,7 +888,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, NVME_RW_PRINFO_PRCHK_REF; if (op == nvme_cmd_zone_append) control |= NVME_RW_APPEND_PIREMAP; - cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); + nvme_set_ref_tag(ns, cmnd, req); break; } } @@ -1520,33 +1544,58 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, +static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, u32 max_integrity_segments) { struct blk_integrity integrity = { }; - switch (pi_type) { + switch (ns->pi_type) { case NVME_NS_DPS_PI_TYPE3: - integrity.profile = &t10_pi_type3_crc; - integrity.tag_size = sizeof(u16) + sizeof(u32); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + switch (ns->guard_type) { + case NVME_NVM_NS_16B_GUARD: + integrity.profile = &t10_pi_type3_crc; + integrity.tag_size = sizeof(u16) + sizeof(u32); + integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + break; + case NVME_NVM_NS_64B_GUARD: + integrity.profile = &ext_pi_type3_crc64; + integrity.tag_size = sizeof(u16) + 6; + integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + break; + default: + integrity.profile = NULL; + break; + } break; case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: - integrity.profile = &t10_pi_type1_crc; - integrity.tag_size = sizeof(u16); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + switch (ns->guard_type) { + case NVME_NVM_NS_16B_GUARD: + integrity.profile = &t10_pi_type1_crc; + integrity.tag_size = sizeof(u16); + integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + break; + case NVME_NVM_NS_64B_GUARD: + integrity.profile = &ext_pi_type1_crc64; + integrity.tag_size = sizeof(u16); + integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + break; + default: + integrity.profile = NULL; + break; + } break; default: integrity.profile = NULL; break; } - integrity.tuple_size = ms; + + integrity.tuple_size = ns->ms; blk_integrity_register(disk, &integrity); blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, +static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, u32 max_integrity_segments) { } @@ -1588,20 +1637,73 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) a->csi == b->csi; } -static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) { + bool first = id->dps & NVME_NS_DPS_PI_FIRST; + unsigned lbaf = nvme_lbaf_index(id->flbas); struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_command c = { }; + struct nvme_id_ns_nvm *nvm; + int ret = 0; + u32 elbaf; + + ns->pi_size = 0; + ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); + if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { + ns->pi_size = sizeof(struct t10_pi_tuple); + ns->guard_type = NVME_NVM_NS_16B_GUARD; + goto set_pi; + } - /* - * The PI implementation requires the metadata size to be equal to the - * t10 pi tuple size. - */ - ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); - if (ns->ms == sizeof(struct t10_pi_tuple)) + nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); + if (!nvm) + return -ENOMEM; + + c.identify.opcode = nvme_admin_identify; + c.identify.nsid = cpu_to_le32(ns->head->ns_id); + c.identify.cns = NVME_ID_CNS_CS_NS; + c.identify.csi = NVME_CSI_NVM; + + ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, nvm, sizeof(*nvm)); + if (ret) + goto free_data; + + elbaf = le32_to_cpu(nvm->elbaf[lbaf]); + + /* no support for storage tag formats right now */ + if (nvme_elbaf_sts(elbaf)) + goto free_data; + + ns->guard_type = nvme_elbaf_guard_type(elbaf); + switch (ns->guard_type) { + case NVME_NVM_NS_64B_GUARD: + ns->pi_size = sizeof(struct crc64_pi_tuple); + break; + case NVME_NVM_NS_16B_GUARD: + ns->pi_size = sizeof(struct t10_pi_tuple); + break; + default: + break; + } + +free_data: + kfree(nvm); +set_pi: + if (ns->pi_size && (first || ns->ms == ns->pi_size)) ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else ns->pi_type = 0; + return ret; +} + +static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + + if (nvme_init_ms(ns, id)) + return; + ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return; @@ -1717,7 +1819,7 @@ static void nvme_update_disk_info(struct gendisk *disk, if (ns->ms) { if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && (ns->features & NVME_NS_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns->ms, ns->pi_type, + nvme_init_integrity(disk, ns, ns->ctrl->max_integrity_segments); else if (!nvme_ns_has_pi(ns)) capacity = 0; @@ -1772,7 +1874,7 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) { - unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; + unsigned lbaf = nvme_lbaf_index(id->flbas); int ret; blk_mq_freeze_queue(ns->disk->queue); @@ -2117,20 +2219,27 @@ static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) return ret; } -static int nvme_configure_acre(struct nvme_ctrl *ctrl) +static int nvme_configure_host_options(struct nvme_ctrl *ctrl) { struct nvme_feat_host_behavior *host; + u8 acre = 0, lbafee = 0; int ret; /* Don't bother enabling the feature if retry delay is not reported */ - if (!ctrl->crdt[0]) + if (ctrl->crdt[0]) + acre = NVME_ENABLE_ACRE; + if (ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) + lbafee = NVME_ENABLE_LBAFEE; + + if (!acre && !lbafee) return 0; host = kzalloc(sizeof(*host), GFP_KERNEL); if (!host) return 0; - host->acre = NVME_ENABLE_ACRE; + host->acre = acre; + host->lbafee = lbafee; ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0, host, sizeof(*host), NULL); kfree(host); @@ -2968,7 +3077,7 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) if (ret < 0) return ret; - ret = nvme_configure_acre(ctrl); + ret = nvme_configure_host_options(ctrl); if (ret < 0) return ret; @@ -4704,12 +4813,14 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_feat_host_behavior) != 512); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 293a93a15fa6..f4b674a8ce20 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -453,9 +453,11 @@ struct nvme_ns { int lba_shift; u16 ms; + u16 pi_size; u16 sgs; u32 sws; u8 pi_type; + u8 guard_type; #ifdef CONFIG_BLK_DEV_ZONED u64 zsze; #endif @@ -478,7 +480,7 @@ struct nvme_ns { /* NVMe ns supports metadata actions by the controller (generate/strip) */ static inline bool nvme_ns_has_pi(struct nvme_ns *ns) { - return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); + return ns->pi_type && ns->ms == ns->pi_size; } struct nvme_ctrl_ops { diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index 1c4242416c9f..8fc637379899 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -126,4 +126,30 @@ static inline void put_unaligned_le24(const u32 val, void *p) __put_unaligned_le24(val, p); } +static inline void __put_unaligned_be48(const u64 val, __u8 *p) +{ + *p++ = val >> 40; + *p++ = val >> 32; + *p++ = val >> 24; + *p++ = val >> 16; + *p++ = val >> 8; + *p++ = val; +} + +static inline void put_unaligned_be48(const u64 val, void *p) +{ + __put_unaligned_be48(val, p); +} + +static inline u64 __get_unaligned_be48(const u8 *p) +{ + return (u64)p[0] << 40 | (u64)p[1] << 32 | p[2] << 24 | + p[3] << 16 | p[4] << 8 | p[5]; +} + +static inline u64 get_unaligned_be48(const void *p) +{ + return __get_unaligned_be48(p); +} + #endif /* __ASM_GENERIC_UNALIGNED_H */ diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 8a038ea0717e..378b2459efe2 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -19,6 +19,7 @@ struct blk_integrity_iter { sector_t seed; unsigned int data_size; unsigned short interval; + unsigned char tuple_size; const char *disk_name; }; diff --git a/include/linux/crc64.h b/include/linux/crc64.h index c756e65a1b58..e044c60d1e61 100644 --- a/include/linux/crc64.h +++ b/include/linux/crc64.h @@ -7,5 +7,12 @@ #include <linux/types.h> +#define CRC64_ROCKSOFT_STRING "crc64-rocksoft" + u64 __pure crc64_be(u64 crc, const void *p, size_t len); +u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len); + +u64 crc64_rocksoft(const unsigned char *buffer, size_t len); +u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len); + #endif /* _LINUX_CRC64_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a890428bcc1a..08ba5995aa8b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -64,6 +64,15 @@ ) /** + * lower_48_bits() - return bits 0-47 of a number + * @n: the number we're accessing + */ +static inline u64 lower_48_bits(u64 n) +{ + return n & ((1ull << 48) - 1); +} + +/** * upper_32_bits - return bits 32-63 of a number * @n: the number we're accessing * diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9dbc3ef4daf7..4f44f83817a9 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -244,6 +244,7 @@ enum { enum nvme_ctrl_attr { NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), NVME_CTRL_ATTR_TBKAS = (1 << 6), + NVME_CTRL_ATTR_ELBAS = (1 << 15), }; struct nvme_id_ctrl { @@ -399,8 +400,7 @@ struct nvme_id_ns { __le16 endgid; __u8 nguid[16]; __u8 eui64[8]; - struct nvme_lbaf lbaf[16]; - __u8 rsvd192[192]; + struct nvme_lbaf lbaf[64]; __u8 vs[3712]; }; @@ -418,8 +418,7 @@ struct nvme_id_ns_zns { __le32 rrl; __le32 frl; __u8 rsvd20[2796]; - struct nvme_zns_lbafe lbafe[16]; - __u8 rsvd3072[768]; + struct nvme_zns_lbafe lbafe[64]; __u8 vs[256]; }; @@ -428,6 +427,30 @@ struct nvme_id_ctrl_zns { __u8 rsvd1[4095]; }; +struct nvme_id_ns_nvm { + __le64 lbstm; + __u8 pic; + __u8 rsvd9[3]; + __le32 elbaf[64]; + __u8 rsvd268[3828]; +}; + +enum { + NVME_ID_NS_NVM_STS_MASK = 0x3f, + NVME_ID_NS_NVM_GUARD_SHIFT = 7, + NVME_ID_NS_NVM_GUARD_MASK = 0x3, +}; + +static inline __u8 nvme_elbaf_sts(__u32 elbaf) +{ + return elbaf & NVME_ID_NS_NVM_STS_MASK; +} + +static inline __u8 nvme_elbaf_guard_type(__u32 elbaf) +{ + return (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) & NVME_ID_NS_NVM_GUARD_MASK; +} + struct nvme_id_ctrl_nvm { __u8 vsl; __u8 wzsl; @@ -478,6 +501,8 @@ enum { NVME_NS_FEAT_IO_OPT = 1 << 4, NVME_NS_ATTR_RO = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, + NVME_NS_FLBAS_LBA_UMASK = 0x60, + NVME_NS_FLBAS_LBA_SHIFT = 1, NVME_NS_FLBAS_META_EXT = 0x10, NVME_NS_NMIC_SHARED = 1 << 0, NVME_LBAF_RP_BEST = 0, @@ -496,6 +521,18 @@ enum { NVME_NS_DPS_PI_TYPE3 = 3, }; +enum { + NVME_NVM_NS_16B_GUARD = 0, + NVME_NVM_NS_32B_GUARD = 1, + NVME_NVM_NS_64B_GUARD = 2, +}; + +static inline __u8 nvme_lbaf_index(__u8 flbas) +{ + return (flbas & NVME_NS_FLBAS_LBA_MASK) | + ((flbas & NVME_NS_FLBAS_LBA_UMASK) >> NVME_NS_FLBAS_LBA_SHIFT); +} + /* Identify Namespace Metadata Capabilities (MC): */ enum { NVME_MC_EXTENDED_LBA = (1 << 0), @@ -842,7 +879,8 @@ struct nvme_rw_command { __u8 flags; __u16 command_id; __le32 nsid; - __u64 rsvd2; + __le32 cdw2; + __le32 cdw3; __le64 metadata; union nvme_data_ptr dptr; __le64 slba; @@ -996,11 +1034,14 @@ enum { struct nvme_feat_host_behavior { __u8 acre; - __u8 resv1[511]; + __u8 etdas; + __u8 lbafee; + __u8 resv1[509]; }; enum { NVME_ENABLE_ACRE = 1, + NVME_ENABLE_LBAFEE = 1, }; /* Admin commands */ diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index c635c2e014e3..a4b1af581f69 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -53,4 +53,24 @@ extern const struct blk_integrity_profile t10_pi_type1_ip; extern const struct blk_integrity_profile t10_pi_type3_crc; extern const struct blk_integrity_profile t10_pi_type3_ip; +struct crc64_pi_tuple { + __be64 guard_tag; + __be16 app_tag; + __u8 ref_tag[6]; +}; + +static inline u64 ext_pi_ref_tag(struct request *rq) +{ + unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + +#ifdef CONFIG_BLK_DEV_INTEGRITY + if (rq->q->integrity.interval_exp) + shift = rq->q->integrity.interval_exp; +#endif + return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT)); +} + +extern const struct blk_integrity_profile ext_pi_type1_crc64; +extern const struct blk_integrity_profile ext_pi_type3_crc64; + #endif diff --git a/lib/Kconfig b/lib/Kconfig index 9b5a692ce00c..087e06b4cdfd 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -145,6 +145,15 @@ config CRC_T10DIF kernel tree needs to calculate CRC checks for use with the SCSI data integrity subsystem. +config CRC64_ROCKSOFT + tristate "CRC calculation for the Rocksoft model CRC64" + select CRC64 + select CRYPTO + select CRYPTO_CRC64_ROCKSOFT + help + This option provides a CRC64 API to a registered crypto driver. + This is used with the block layer's data integrity subsystem. + config CRC_ITU_T tristate "CRC ITU-T V.41 functions" help diff --git a/lib/Makefile b/lib/Makefile index 2fd40c5bf378..df001b876930 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -174,6 +174,7 @@ obj-$(CONFIG_CRC4) += crc4.o obj-$(CONFIG_CRC7) += crc7.o obj-$(CONFIG_LIBCRC32C) += libcrc32c.o obj-$(CONFIG_CRC8) += crc8.o +obj-$(CONFIG_CRC64_ROCKSOFT) += crc64-rocksoft.o obj-$(CONFIG_XXHASH) += xxhash.o obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o diff --git a/lib/crc64-rocksoft.c b/lib/crc64-rocksoft.c new file mode 100644 index 000000000000..fc9ae0da5df7 --- /dev/null +++ b/lib/crc64-rocksoft.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc64.h> +#include <linux/err.h> +#include <linux/init.h> +#include <crypto/hash.h> +#include <crypto/algapi.h> +#include <linux/static_key.h> +#include <linux/notifier.h> + +static struct crypto_shash __rcu *crc64_rocksoft_tfm; +static DEFINE_STATIC_KEY_TRUE(crc64_rocksoft_fallback); +static DEFINE_MUTEX(crc64_rocksoft_mutex); +static struct work_struct crc64_rocksoft_rehash_work; + +static int crc64_rocksoft_notify(struct notifier_block *self, unsigned long val, void *data) +{ + struct crypto_alg *alg = data; + + if (val != CRYPTO_MSG_ALG_LOADED || + strcmp(alg->cra_name, CRC64_ROCKSOFT_STRING)) + return NOTIFY_DONE; + + schedule_work(&crc64_rocksoft_rehash_work); + return NOTIFY_OK; +} + +static void crc64_rocksoft_rehash(struct work_struct *work) +{ + struct crypto_shash *new, *old; + + mutex_lock(&crc64_rocksoft_mutex); + old = rcu_dereference_protected(crc64_rocksoft_tfm, + lockdep_is_held(&crc64_rocksoft_mutex)); + new = crypto_alloc_shash(CRC64_ROCKSOFT_STRING, 0, 0); + if (IS_ERR(new)) { + mutex_unlock(&crc64_rocksoft_mutex); + return; + } + rcu_assign_pointer(crc64_rocksoft_tfm, new); + mutex_unlock(&crc64_rocksoft_mutex); + + if (old) { + synchronize_rcu(); + crypto_free_shash(old); + } else { + static_branch_disable(&crc64_rocksoft_fallback); + } +} + +static struct notifier_block crc64_rocksoft_nb = { + .notifier_call = crc64_rocksoft_notify, +}; + +u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len) +{ + struct { + struct shash_desc shash; + u64 crc; + } desc; + int err; + + if (static_branch_unlikely(&crc64_rocksoft_fallback)) + return crc64_rocksoft_generic(crc, buffer, len); + + rcu_read_lock(); + desc.shash.tfm = rcu_dereference(crc64_rocksoft_tfm); + desc.crc = crc; + err = crypto_shash_update(&desc.shash, buffer, len); + rcu_read_unlock(); + + BUG_ON(err); + + return desc.crc; +} +EXPORT_SYMBOL_GPL(crc64_rocksoft_update); + +u64 crc64_rocksoft(const unsigned char *buffer, size_t len) +{ + return crc64_rocksoft_update(0, buffer, len); +} +EXPORT_SYMBOL_GPL(crc64_rocksoft); + +static int __init crc64_rocksoft_mod_init(void) +{ + INIT_WORK(&crc64_rocksoft_rehash_work, crc64_rocksoft_rehash); + crypto_register_notifier(&crc64_rocksoft_nb); + crc64_rocksoft_rehash(&crc64_rocksoft_rehash_work); + return 0; +} + +static void __exit crc64_rocksoft_mod_fini(void) +{ + crypto_unregister_notifier(&crc64_rocksoft_nb); + cancel_work_sync(&crc64_rocksoft_rehash_work); + crypto_free_shash(rcu_dereference_protected(crc64_rocksoft_tfm, 1)); +} + +module_init(crc64_rocksoft_mod_init); +module_exit(crc64_rocksoft_mod_fini); + +static int crc64_rocksoft_transform_show(char *buffer, const struct kernel_param *kp) +{ + struct crypto_shash *tfm; + int len; + + if (static_branch_unlikely(&crc64_rocksoft_fallback)) + return sprintf(buffer, "fallback\n"); + + rcu_read_lock(); + tfm = rcu_dereference(crc64_rocksoft_tfm); + len = snprintf(buffer, PAGE_SIZE, "%s\n", + crypto_shash_driver_name(tfm)); + rcu_read_unlock(); + + return len; +} + +module_param_call(transform, NULL, crc64_rocksoft_transform_show, NULL, 0444); + +MODULE_AUTHOR("Keith Busch <kbusch@kernel.org>"); +MODULE_DESCRIPTION("Rocksoft model CRC64 calculation (library API)"); +MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crc64"); diff --git a/lib/crc64.c b/lib/crc64.c index 9f852a89ee2a..61ae8dfb6a1c 100644 --- a/lib/crc64.c +++ b/lib/crc64.c @@ -22,6 +22,13 @@ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + * x^7 + x^4 + x + 1 * + * crc64rocksoft[256] table is from the Rocksoft specification polynomial + * defined as, + * + * x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + x^47 + + * x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + x^26 + x^23 + + * x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 + x^4 + x^3 + 1 + * * Copyright 2018 SUSE Linux. * Author: Coly Li <colyli@suse.de> */ @@ -55,3 +62,24 @@ u64 __pure crc64_be(u64 crc, const void *p, size_t len) return crc; } EXPORT_SYMBOL_GPL(crc64_be); + +/** + * crc64_rocksoft_generic - Calculate bitwise Rocksoft CRC64 + * @crc: seed value for computation. 0 for a new CRC calculation, or the + * previous crc64 value if computing incrementally. + * @p: pointer to buffer over which CRC64 is run + * @len: length of buffer @p + */ +u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len) +{ + const unsigned char *_p = p; + size_t i; + + crc = ~crc; + + for (i = 0; i < len; i++) + crc = (crc >> 8) ^ crc64rocksofttable[(crc & 0xff) ^ *_p++]; + + return ~crc; +} +EXPORT_SYMBOL_GPL(crc64_rocksoft_generic); diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c index 094b43aef8db..55e222acd0b8 100644 --- a/lib/gen_crc64table.c +++ b/lib/gen_crc64table.c @@ -17,10 +17,30 @@ #include <stdio.h> #define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL +#define CRC64_ROCKSOFT_POLY 0x9A6C9329AC4BC9B5ULL static uint64_t crc64_table[256] = {0}; +static uint64_t crc64_rocksoft_table[256] = {0}; -static void generate_crc64_table(void) +static void generate_reflected_crc64_table(uint64_t table[256], uint64_t poly) +{ + uint64_t i, j, c, crc; + + for (i = 0; i < 256; i++) { + crc = 0ULL; + c = i; + + for (j = 0; j < 8; j++) { + if ((crc ^ (c >> j)) & 1) + crc = (crc >> 1) ^ poly; + else + crc >>= 1; + } + table[i] = crc; + } +} + +static void generate_crc64_table(uint64_t table[256], uint64_t poly) { uint64_t i, j, c, crc; @@ -30,26 +50,22 @@ static void generate_crc64_table(void) for (j = 0; j < 8; j++) { if ((crc ^ c) & 0x8000000000000000ULL) - crc = (crc << 1) ^ CRC64_ECMA182_POLY; + crc = (crc << 1) ^ poly; else crc <<= 1; c <<= 1; } - crc64_table[i] = crc; + table[i] = crc; } } -static void print_crc64_table(void) +static void output_table(uint64_t table[256]) { int i; - printf("/* this file is generated - do not edit */\n\n"); - printf("#include <linux/types.h>\n"); - printf("#include <linux/cache.h>\n\n"); - printf("static const u64 ____cacheline_aligned crc64table[256] = {\n"); for (i = 0; i < 256; i++) { - printf("\t0x%016" PRIx64 "ULL", crc64_table[i]); + printf("\t0x%016" PRIx64 "ULL", table[i]); if (i & 0x1) printf(",\n"); else @@ -58,9 +74,22 @@ static void print_crc64_table(void) printf("};\n"); } +static void print_crc64_tables(void) +{ + printf("/* this file is generated - do not edit */\n\n"); + printf("#include <linux/types.h>\n"); + printf("#include <linux/cache.h>\n\n"); + printf("static const u64 ____cacheline_aligned crc64table[256] = {\n"); + output_table(crc64_table); + + printf("\nstatic const u64 ____cacheline_aligned crc64rocksofttable[256] = {\n"); + output_table(crc64_rocksoft_table); +} + int main(int argc, char *argv[]) { - generate_crc64_table(); - print_crc64_table(); + generate_crc64_table(crc64_table, CRC64_ECMA182_POLY); + generate_reflected_crc64_table(crc64_rocksoft_table, CRC64_ROCKSOFT_POLY); + print_crc64_tables(); return 0; } |