diff options
author | David Teigland <teigland@redhat.com> | 2018-07-03 12:14:19 -0500 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2018-07-24 15:46:06 -0500 |
commit | 7ac01dc7fa4108f06d29f818547eda369186e7cc (patch) | |
tree | 665d1a7977eb9b02efcecbad092f898da324b433 | |
parent | 2f43f0393e9d7b85578895aa2e31314a4565e47d (diff) | |
download | lvm2-7ac01dc7fa4108f06d29f818547eda369186e7cc.tar.gz |
Add cache devices
A cache device is a special PV that is added to a VG
to be used for caching, and not for allocating standard LVs.
A cache device (CD) is used to create a cache volume.
A cache volume (CV) is a special LV that can be attached
to a standard LV to do caching.
Terminology:
PV used for caching = cache device = cachedev = CD
LV used for caching = cache volume = cachevol = CV
PV metadata for a cachedev includes the CACHEDEV flag.
LV metadata for a cachevol includes the CACHEVOL flag.
A cachedev is added to a VG with:
vgextend --cachedev VG PV
The --cachedev option tells lvm that the PV should
be added to the VG as a cache device, not a standard PV.
(cache volumes are added by the following commit)
-rw-r--r-- | lib/cache/lvmcache.c | 59 | ||||
-rw-r--r-- | lib/cache/lvmcache.h | 4 | ||||
-rw-r--r-- | lib/device/device.h | 8 | ||||
-rw-r--r-- | lib/format_text/export.c | 97 | ||||
-rw-r--r-- | lib/format_text/flags.c | 1 | ||||
-rw-r--r-- | lib/format_text/format-text.c | 21 | ||||
-rw-r--r-- | lib/format_text/import_vsn1.c | 16 | ||||
-rw-r--r-- | lib/label/label.c | 7 | ||||
-rw-r--r-- | lib/metadata/metadata-exported.h | 8 | ||||
-rw-r--r-- | lib/metadata/metadata.c | 132 | ||||
-rw-r--r-- | lib/metadata/metadata.h | 1 | ||||
-rw-r--r-- | lib/metadata/pv.c | 3 | ||||
-rw-r--r-- | lib/metadata/pv.h | 4 | ||||
-rw-r--r-- | lib/metadata/pv_manip.c | 26 | ||||
-rw-r--r-- | lib/metadata/vg.c | 41 | ||||
-rw-r--r-- | lib/metadata/vg.h | 3 | ||||
-rw-r--r-- | tools/args.h | 3 | ||||
-rw-r--r-- | tools/command-lines.in | 4 | ||||
-rw-r--r-- | tools/command.c | 1 | ||||
-rw-r--r-- | tools/commands.h | 4 | ||||
-rw-r--r-- | tools/pvscan.c | 13 | ||||
-rw-r--r-- | tools/toollib.c | 141 | ||||
-rw-r--r-- | tools/tools.h | 2 | ||||
-rw-r--r-- | tools/vgextend.c | 18 |
24 files changed, 545 insertions, 72 deletions
diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 46470a5fd..b960740b4 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -64,6 +64,7 @@ static struct dm_hash_table *_pvid_hash = NULL; static struct dm_hash_table *_vgid_hash = NULL; static struct dm_hash_table *_vgname_hash = NULL; static DM_LIST_INIT(_vginfos); +static DM_LIST_INIT(_cachedevs); static DM_LIST_INIT(_found_duplicate_devs); static DM_LIST_INIT(_unused_duplicate_devs); static int _scanning_in_progress = 0; @@ -1075,11 +1076,15 @@ static struct device *_device_from_pvid(const struct id *pvid, uint64_t *label_s struct device *lvmcache_device_from_pvid(struct cmd_context *cmd, const struct id *pvid, uint64_t *label_sector) { struct device *dev; + struct cachedev *cd; dev = _device_from_pvid(pvid, label_sector); if (dev) return dev; + if ((cd = lvmcache_cachedev_from_pvid(pvid))) + return cd->dev; + log_debug_devs("No device with uuid %s.", (const char *)pvid); return NULL; } @@ -1563,6 +1568,41 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg return 1; } +/* + * The CDs have no metadata, so the scan put the info structs for + * them into the orphan vg. Using the VG metadata, those CD info + * structs can now be identified and removed, and cachedev entries + * created. + */ +void lvmcache_update_vg_cachedevs(struct volume_group *vg) +{ + struct pv_list *pvl; + struct lvmcache_info *info; + struct cachedev *cd; + + dm_list_iterate_items(pvl, &vg->cds) { + if (!(info = lvmcache_info_from_pvid((const char *)&pvl->pv->id, pvl->pv->dev, 0))) { + log_debug("lvmcache missing info for cachedev pv %s", dev_name(pvl->pv->dev)); + continue; + } + + if (!lvmcache_cachedev_from_pvid(&pvl->pv->id)) { + if (!(cd = zalloc(sizeof(*cd)))) { + stack; + return; + } + + log_debug("lvmcache add cachedev %s %s", dev_name(pvl->pv->dev), pvl->pv->dev->pvid); + cd->dev = pvl->pv->dev; + cd->device_size = info->device_size; + cd->vg_name = strdup(vg->name); + dm_list_add(&_cachedevs, &cd->list); + } + + lvmcache_del(info); + } +} + int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) { struct pv_list *pvl; @@ -2280,3 +2320,22 @@ int lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const ch return 1; } +struct cachedev *lvmcache_cachedev_from_pvid(const struct id *pvid) +{ + struct cachedev *cd; + + dm_list_iterate_items(cd, &_cachedevs) { + if (!memcmp(&cd->dev->pvid, pvid, sizeof(struct id))) { + log_debug("lvmcache_cachedev_from_pvid %s %s", dev_name(cd->dev), cd->dev->pvid); + return cd; + } + } + return NULL; +} + +void lvmcache_del_cachedev(struct cachedev *cd) +{ + log_debug("lvmcache del cachedev %s", dev_name(cd->dev)); + dm_list_del(&cd->list); +} + diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index ba6040552..a5d5b4cda 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -209,4 +209,8 @@ void lvmcache_drop_saved_vgid(const char *vgid); int dev_in_device_list(struct device *dev, struct dm_list *head); +struct cachedev *lvmcache_cachedev_from_pvid(const struct id *pvid); +void lvmcache_update_vg_cachedevs(struct volume_group *vg); +void lvmcache_del_cachedev(struct cachedev *cd); + #endif diff --git a/lib/device/device.h b/lib/device/device.h index e879dbb2e..2a682d966 100644 --- a/lib/device/device.h +++ b/lib/device/device.h @@ -70,6 +70,7 @@ struct device { int read_ahead; int bcache_fd; uint32_t flags; + uint32_t is_pmem:1; unsigned size_seqno; uint64_t size; uint64_t end; @@ -110,6 +111,13 @@ struct device_area { uint64_t size; /* Bytes */ }; +struct cachedev { + struct dm_list list; + struct device *dev; + uint64_t device_size; /* Bytes */ + const char *vg_name; +}; + /* * Support for external device info. */ diff --git a/lib/format_text/export.c b/lib/format_text/export.c index a9a7e159b..c8ca0fb1a 100644 --- a/lib/format_text/export.c +++ b/lib/format_text/export.c @@ -520,59 +520,69 @@ static const char *_get_pv_name(struct formatter *f, struct physical_volume *pv) return _get_pv_name_from_uuid(f, uuid); } -static int _print_pvs(struct formatter *f, struct volume_group *vg) +static int _print_pv(struct formatter *f, struct volume_group *vg, struct physical_volume *pv) { - struct pv_list *pvl; - struct physical_volume *pv; char buffer[PATH_MAX * 2]; const char *name; - outf(f, "physical_volumes {"); + if (!id_write_format(&pv->id, buffer, sizeof(buffer))) + return_0; + + if (!(name = _get_pv_name_from_uuid(f, buffer))) + return_0; + + outnl(f); + outf(f, "%s {", name); _inc_indent(f); - dm_list_iterate_items(pvl, &vg->pvs) { - pv = pvl->pv; + outf(f, "id = \"%s\"", buffer); - if (!id_write_format(&pv->id, buffer, sizeof(buffer))) - return_0; + if (strlen(pv_dev_name(pv)) >= PATH_MAX) { + log_error("pv device name size is out of bounds."); + return 0; + } - if (!(name = _get_pv_name_from_uuid(f, buffer))) - return_0; + outhint(f, "device = \"%s\"", + dm_escape_double_quotes(buffer, pv_dev_name(pv))); + outnl(f); - outnl(f); - outf(f, "%s {", name); - _inc_indent(f); + if (!_print_flag_config(f, pv->status, PV_FLAGS)) + return_0; - outf(f, "id = \"%s\"", buffer); + if (!_out_list(f, &pv->tags, "tags")) + return_0; - if (strlen(pv_dev_name(pv)) >= PATH_MAX) { - log_error("pv device name size is out of bounds."); - return 0; - } + outsize(f, pv->size, "dev_size = " FMTu64, pv->size); - outhint(f, "device = \"%s\"", - dm_escape_double_quotes(buffer, pv_dev_name(pv))); - outnl(f); + outf(f, "pe_start = " FMTu64, pv->pe_start); + outsize(f, vg->extent_size * (uint64_t) pv->pe_count, + "pe_count = %u", pv->pe_count); - if (!_print_flag_config(f, pv->status, PV_FLAGS)) - return_0; + if (pv->ba_start && pv->ba_size) { + outf(f, "ba_start = " FMTu64, pv->ba_start); + outsize(f, pv->ba_size, "ba_size = " FMTu64, pv->ba_size); + } - if (!_out_list(f, &pv->tags, "tags")) - return_0; + _dec_indent(f); + outf(f, "}"); + return 1; +} - outsize(f, pv->size, "dev_size = " FMTu64, pv->size); +static int _print_pvs(struct formatter *f, struct volume_group *vg) +{ + struct pv_list *pvl; - outf(f, "pe_start = " FMTu64, pv->pe_start); - outsize(f, vg->extent_size * (uint64_t) pv->pe_count, - "pe_count = %u", pv->pe_count); + outf(f, "physical_volumes {"); + _inc_indent(f); - if (pv->ba_start && pv->ba_size) { - outf(f, "ba_start = " FMTu64, pv->ba_start); - outsize(f, pv->ba_size, "ba_size = " FMTu64, pv->ba_size); - } + dm_list_iterate_items(pvl, &vg->pvs) { + if (!_print_pv(f, vg, pvl->pv)) + return_0; + } - _dec_indent(f); - outf(f, "}"); + dm_list_iterate_items(pvl, &vg->cds) { + if (!_print_pv(f, vg, pvl->pv)) + return_0; } _dec_indent(f); @@ -971,6 +981,23 @@ static int _build_pv_names(struct formatter *f, struct volume_group *vg) return_0; } + dm_list_iterate_items(pvl, &vg->cds) { + pv = pvl->pv; + + if (dm_snprintf(buffer, sizeof(buffer), "pv%d", count++) < 0) + return_0; + + if (!(name = dm_pool_strdup(f->mem, buffer))) + return_0; + + if (!(uuid = dm_pool_zalloc(f->mem, 64)) || + !id_write_format(&pv->id, uuid, 64)) + return_0; + + if (!dm_hash_insert(f->pv_names, uuid, name)) + return_0; + } + return 1; } diff --git a/lib/format_text/flags.c b/lib/format_text/flags.c index 6f5ff9f7c..218ebb492 100644 --- a/lib/format_text/flags.c +++ b/lib/format_text/flags.c @@ -48,6 +48,7 @@ static const struct flag _pv_flags[] = { {EXPORTED_VG, "EXPORTED", STATUS_FLAG}, {MISSING_PV, "MISSING", COMPATIBLE_FLAG}, {MISSING_PV, "MISSING", STATUS_FLAG}, + {CACHEDEV_PV, "CACHEDEV", STATUS_FLAG}, {PV_MOVED_VG, NULL, 0}, {UNLABELLED_PV, NULL, 0}, {0, NULL, 0} diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index c7544aa26..990de03a1 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -597,6 +597,8 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, uint64_t new_wrap = 0, old_wrap = 0, new_end; int found = 0; int noprecommit = 0; + uint32_t meta_size; + char *meta_buf; const char *old_vg_name = NULL; /* Ignore any mda on a PV outside the VG. vgsplit relies on this */ @@ -615,11 +617,20 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) goto_out; - if (!fidtc->raw_metadata_buf && - !(fidtc->raw_metadata_buf_size = - text_vg_export_raw(vg, "", &fidtc->raw_metadata_buf))) { - log_error("VG %s metadata writing failed", vg->name); - goto out; + /* + * The first time through, for the first mda, we create a new export + * buffer, and subsequent mdas use the same. + */ + if (!fidtc->raw_metadata_buf) { + meta_size = text_vg_export_raw(vg, "", &meta_buf); + + if (!meta_size || !meta_buf) { + log_error("VG %s metadata writing failed", vg->name); + goto out; + } + + fidtc->raw_metadata_buf = meta_buf; + fidtc->raw_metadata_buf_size = meta_size; } rlocn = _read_metadata_location_vg(&mdac->area, mdah, mda_is_primary(mda), old_vg_name ? : vg->name, &noprecommit); diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c index c9b927524..236ab258d 100644 --- a/lib/format_text/import_vsn1.c +++ b/lib/format_text/import_vsn1.c @@ -231,6 +231,12 @@ static int _read_pv(struct format_instance *fid, return 0; } + if (pv->status & CACHEDEV_PV) { + pv->is_cachedev = 1; + if (pv->dev) + set_cachedev_type(pv); + } + if (!pv->dev) pv->status |= MISSING_PV; @@ -310,9 +316,13 @@ static int _read_pv(struct format_instance *fid, if (!alloc_pv_segment_whole_pv(mem, pv)) return_0; - vg->extent_count += pv->pe_count; - vg->free_count += pv->pe_count; - add_pvl_to_vgs(vg, pvl); + if (!pv->is_cachedev) { + vg->extent_count += pv->pe_count; + vg->free_count += pv->pe_count; + add_pvl_to_vgs(vg, pvl); + } else { + dm_list_add(&vg->cds, &pvl->list); + } return 1; } diff --git a/lib/label/label.c b/lib/label/label.c index d2c868564..46786d77b 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -194,6 +194,13 @@ int label_write(struct device *dev, struct label *label) lh->sector_xl = xlate64(label->sector); lh->offset_xl = xlate32(sizeof(*lh)); + /* + * Set pv_header and pv_header_extension fields before + * calculating the crc of the block to set in lh crc. + * The crc covers data from just after the crc field + * to the end of the sector. + */ + if (!(label->labeller->ops->write)(label, buf)) return_0; diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h index 840f6238a..85b36d32d 100644 --- a/lib/metadata/metadata-exported.h +++ b/lib/metadata/metadata-exported.h @@ -56,7 +56,7 @@ #define ALLOCATABLE_PV UINT64_C(0x0000000000000008) /* PV */ #define ARCHIVED_VG ALLOCATABLE_PV /* VG, reuse same bit */ -//#define SPINDOWN_LV UINT64_C(0x0000000000000010) /* LV */ +#define CACHEDEV_PV UINT64_C(0x0000000000000010) /* PV */ //#define BADBLOCK_ON UINT64_C(0x0000000000000020) /* LV */ #define VISIBLE_LV UINT64_C(0x0000000000000040) /* LV */ #define FIXED_MINOR UINT64_C(0x0000000000000080) /* LV */ @@ -608,6 +608,7 @@ struct pvcreate_params { unsigned is_remove : 1; /* is removing PVs, not creating */ unsigned preserve_existing : 1; unsigned check_failed : 1; + unsigned cachedev : 1; }; struct lvresize_params { @@ -748,6 +749,7 @@ int vg_remove(struct volume_group *vg); int vg_rename(struct cmd_context *cmd, struct volume_group *vg, const char *new_name); int vg_extend_each_pv(struct volume_group *vg, struct pvcreate_params *pp); +int vg_extend_each_cd(struct volume_group *vg, struct pvcreate_params *pp); int vgreduce_single(struct cmd_context *cmd, struct volume_group *vg, struct physical_volume *pv, int commit); @@ -1339,4 +1341,8 @@ int is_system_id_allowed(struct cmd_context *cmd, const char *system_id); int vg_strip_outdated_historical_lvs(struct volume_group *vg); +int set_cachedev_type(struct physical_volume *pv); + +int set_cachevol_dev_type(struct logical_volume *lv); + #endif diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 3b28bf583..393e2900f 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -699,6 +699,76 @@ int vg_extend_each_pv(struct volume_group *vg, struct pvcreate_params *pp) return 1; } +int vg_extend_each_cd(struct volume_group *vg, struct pvcreate_params *pp) +{ + struct pv_list *pvl_pp; + struct pv_list *pvl_vg; + struct physical_volume *pv; + const char *pv_name; + uint64_t pe_count; + int used; + + dm_list_iterate_items(pvl_pp, &pp->pvs) { + log_debug_metadata("Adding CD %s to VG %s.", pv_dev_name(pvl_pp->pv), vg->name); + + pv = pvl_pp->pv; + pv_name = pv_dev_name(pv); + + if ((used = is_used_pv(pv)) < 0) + continue; + if (used) { + log_error("PV %s is used by a VG", pv_name); + continue; + } + if (pv_uses_vg(pv, vg)) { + log_error("PV %s might be constructed from same VG.", pv_name); + continue; + } + if (find_pv_in_vg(vg, pv_name)) { + log_error("PV %s already found in VG.", pv_name); + continue; + } + if (find_pv_in_vg_by_uuid(vg, &pv->id)) { + log_error("PV %s UUID already found in VG.", pv_name); + continue; + } + + if (!(pvl_vg = dm_pool_zalloc(vg->vgmem, sizeof(*pvl_vg)))) + return_0; + + if (!(pv->vg_name = dm_pool_strdup(vg->vgmem, vg->name))) + return_0; + + memcpy(&pv->vgid, &vg->id, sizeof(vg->id)); + + if (!set_cachedev_type(pv)) + return_0; + + pv->is_cachedev = 1; + pv->status |= CACHEDEV_PV; + pv->status |= ALLOCATABLE_PV; + + pv->pe_size = vg->extent_size; + pv->pe_alloc_count = 0; + + pv->pe_start = 2048; /* FIXME? */ + + pe_count = (pv->size - pv->pe_start) / vg->extent_size; + if (pe_count > UINT32_MAX) + return_0; + + pv->pe_count = (uint32_t)pe_count; + + if (!alloc_pv_segment_whole_pv(vg->vgmem, pv)) + return_0; + + pvl_vg->pv = pv; + dm_list_add(&vg->cds, &pvl_vg->list); + } + + return 1; +} + int lv_change_tag(struct logical_volume *lv, const char *tag, int add_tag) { char *tag_new; @@ -3775,6 +3845,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || (!use_precommitted && !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { + log_debug("inconsistent vg_read %s from text read", vgname); inconsistent = 1; vg_fmtdata = NULL; continue; @@ -3814,6 +3885,11 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, } fid->ref_count--; + if (correct_vg && !dm_list_empty(&correct_vg->cds)) { + log_debug("vg_read %s updating cds in lvmcache", vgname); + lvmcache_update_vg_cachedevs(correct_vg); + } + /* Ensure every PV in the VG was in the cache */ if (correct_vg) { /* @@ -3829,6 +3905,10 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, */ if (!inconsistent && dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) { + + log_debug("vg_read %s mismatch pvs %d infos %d", + vgname, dm_list_size(&correct_vg->pvs), dm_list_size(pvids)); + dm_list_iterate_items(pvl, &correct_vg->pvs) { if (!pvl->pv->dev) { inconsistent_pvs = 1; @@ -3845,6 +3925,8 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, */ if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 1)) || !lvmcache_is_orphan(info)) { + log_debug("inconsistent vg_read %s bad lvmcache info %p for %s", + vgname, info, dev_name(pvl->pv->dev)); inconsistent_pvs = 1; break; } @@ -4055,6 +4137,8 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, lvmcache_update_vg(correct_vg, (correct_vg->status & PRECOMMITTED)); if (inconsistent) { + log_debug("inconsistent vg_read %s", vgname); + /* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */ if (use_precommitted) { log_error("Inconsistent pre-commit metadata copies " @@ -4163,8 +4247,10 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, } } - if (inconsistent_pvs) + if (inconsistent_pvs) { + log_debug("inconsistent pvs in vg_read %s", vgname); *mdas_consistent = 0; + } return correct_vg; } @@ -5547,3 +5633,47 @@ int vg_strip_outdated_historical_lvs(struct volume_group *vg) { return 1; } + +/* + * dev is pmem if /sys/dev/block/<major>:<minor>/queue/dax is 1 + */ + +int set_cachedev_type(struct physical_volume *pv) +{ + FILE *fp; + struct device *dev = pv->dev; + char path[PATH_MAX]; + char buffer[64]; + int is_pmem = 0; + + if (dm_snprintf(path, sizeof(path), "%sdev/block/%d:%d/queue/dax", + dm_sysfs_dir(), + (int) MAJOR(dev->dev), + (int) MINOR(dev->dev)) < 0) { + log_warn("Sysfs path for %s dax is too long.", dev_name(dev)); + return 0; + } + + if (!(fp = fopen(path, "r"))) + return 0; + + if (!fgets(buffer, sizeof(buffer), fp)) { + log_warn("Failed to read %s.", path); + fclose(fp); + return 0; + } else if (sscanf(buffer, "%d", &is_pmem) != 1) { + log_warn("Failed to parse %s '%s'.", path, buffer); + fclose(fp); + return 0; + } + + fclose(fp); + + if (is_pmem) { + log_debug("cachedev %s is pmem", dev_name(dev)); + dev->is_pmem = 1; + } + + return 1; +} + diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index 164fba27e..478401e1d 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -55,7 +55,6 @@ /* May any free extents on this PV be used or must they be left free? */ -#define SPINDOWN_LV UINT64_C(0x00000010) /* LV */ #define BADBLOCK_ON UINT64_C(0x00000020) /* LV */ //#define VIRTUAL UINT64_C(0x00010000) /* LV - internal use only */ #define PRECOMMITTED UINT64_C(0x00200000) /* VG - internal use only */ diff --git a/lib/metadata/pv.c b/lib/metadata/pv.c index 7b6f778e6..b1893623d 100644 --- a/lib/metadata/pv.c +++ b/lib/metadata/pv.c @@ -237,6 +237,7 @@ char *pv_attr_dup(struct dm_pool *mem, const struct physical_volume *pv) char *repstr; int used = is_used_pv(pv); int duplicate = lvmcache_dev_is_unchosen_duplicate(pv->dev); + int cachedev = lvmcache_cachedev_from_pvid(&pv->id) ? 1 : 0; if (!(repstr = dm_pool_zalloc(mem, 4))) { log_error("dm_pool_alloc failed"); @@ -248,6 +249,8 @@ char *pv_attr_dup(struct dm_pool *mem, const struct physical_volume *pv) */ if (duplicate) repstr[0] = 'd'; + else if (cachedev) + repstr[0] = 'c'; else if (pv->status & ALLOCATABLE_PV) repstr[0] = 'a'; else if (used > 0) diff --git a/lib/metadata/pv.h b/lib/metadata/pv.h index 61aa54e18..6be8c888a 100644 --- a/lib/metadata/pv.h +++ b/lib/metadata/pv.h @@ -57,8 +57,8 @@ struct physical_volume { unsigned long pe_align; unsigned long pe_align_offset; - /* This is true whenever the represented PV has a label associated. */ - uint64_t is_labelled:1; + uint64_t is_labelled:1; /* This is true whenever the represented PV has a label associated. */ + uint64_t is_cachedev:1; /* NB. label_sector is valid whenever is_labelled is true */ uint64_t label_sector; diff --git a/lib/metadata/pv_manip.c b/lib/metadata/pv_manip.c index 5fd80a2ce..3fc03f4dc 100644 --- a/lib/metadata/pv_manip.c +++ b/lib/metadata/pv_manip.c @@ -115,7 +115,8 @@ static struct pv_segment *_pv_split_segment(struct dm_pool *mem, if (peg->lvseg) { peg->pv->pe_alloc_count -= peg_new->len; - peg->lvseg->lv->vg->free_count += peg_new->len; + if (!pv->is_cachedev) + peg->lvseg->lv->vg->free_count += peg_new->len; } return peg_new; @@ -184,7 +185,9 @@ struct pv_segment *assign_peg_to_lvseg(struct physical_volume *pv, peg->lv_area = area_num; peg->pv->pe_alloc_count += area_len; - peg->lvseg->lv->vg->free_count -= area_len; + + if (!pv->is_cachedev) + peg->lvseg->lv->vg->free_count -= area_len; return peg; } @@ -195,6 +198,10 @@ int discard_pv_segment(struct pv_segment *peg, uint32_t discard_area_reduction) uint64_t pe_start = peg->pv->pe_start; char uuid[64] __attribute__((aligned(8))); + /* FIXME: pass in cmd as arg */ + if (peg->pv->is_cachedev) + return 1; + if (!peg->lvseg) { log_error("discard_pv_segment with unallocated segment: " "%s PE %" PRIu32, pv_dev_name(peg->pv), peg->pe); @@ -310,7 +317,8 @@ int release_pv_segment(struct pv_segment *peg, uint32_t area_reduction) if (peg->lvseg->area_len == area_reduction) { peg->pv->pe_alloc_count -= area_reduction; - peg->lvseg->lv->vg->free_count += area_reduction; + if (!peg->pv->is_cachedev) + peg->lvseg->lv->vg->free_count += area_reduction; peg->lvseg = NULL; peg->lv_area = 0; @@ -514,8 +522,10 @@ static int _reduce_pv(struct physical_volume *pv, struct volume_group *vg, pv->pe_count = new_pe_count; - vg->extent_count -= (old_pe_count - new_pe_count); - vg->free_count -= (old_pe_count - new_pe_count); + if (!pv->is_cachedev) { + vg->extent_count -= (old_pe_count - new_pe_count); + vg->free_count -= (old_pe_count - new_pe_count); + } return 1; } @@ -542,8 +552,10 @@ static int _extend_pv(struct physical_volume *pv, struct volume_group *vg, pv->pe_count = new_pe_count; - vg->extent_count += (new_pe_count - old_pe_count); - vg->free_count += (new_pe_count - old_pe_count); + if (!pv->is_cachedev) { + vg->extent_count += (new_pe_count - old_pe_count); + vg->free_count += (new_pe_count - old_pe_count); + } return 1; } diff --git a/lib/metadata/vg.c b/lib/metadata/vg.c index a9cb54174..573cb6b9a 100644 --- a/lib/metadata/vg.c +++ b/lib/metadata/vg.c @@ -54,6 +54,7 @@ struct volume_group *alloc_vg(const char *pool_name, struct cmd_context *cmd, } dm_list_init(&vg->pvs); + dm_list_init(&vg->cds); dm_list_init(&vg->pv_write_list); dm_list_init(&vg->lvs); dm_list_init(&vg->historical_lvs); @@ -664,6 +665,43 @@ char *vg_attr_dup(struct dm_pool *mem, const struct volume_group *vg) return repstr; } +static int _vgreduce_cachedev(struct cmd_context *cmd, struct volume_group *vg, struct physical_volume *pv) +{ + const char *name = pv_dev_name(pv); + struct pv_list *pvl; + struct cachedev *cd; + + log_debug("vgreduce_cachedev VG %s PV %s", vg->name, name); + + if (pv_pe_alloc_count(pv)) { + log_error("Physical volume \"%s\" still in use", name); + return 0; + } + + if ((pvl = find_cd_in_vg(vg, name))) { + /* delete pv entry from vg->cds */ + dm_list_del(&pvl->list); + } + + if (!vg_write(vg) || !vg_commit(vg)) { + log_error("Removal of physical volume \"%s\" from \"%s\" failed", name, vg->name); + return 0; + } + + if ((cd = lvmcache_cachedev_from_pvid(&pvl->pv->id))) { + /* remove ondisk PV header */ + label_remove(cd->dev); + /* delete cd entry from cachedevs */ + lvmcache_del_cachedev(cd); + } + + backup(vg); + + log_print_unless_silent("Removed \"%s\" from volume group \"%s\"", name, vg->name); + + return 1; +} + int vgreduce_single(struct cmd_context *cmd, struct volume_group *vg, struct physical_volume *pv, int commit) { @@ -677,6 +715,9 @@ int vgreduce_single(struct cmd_context *cmd, struct volume_group *vg, return r; } + if (pv->is_cachedev) + return _vgreduce_cachedev(cmd, vg, pv); + log_debug("vgreduce_single VG %s PV %s", vg->name, pv_dev_name(pv)); if (pv_pe_alloc_count(pv)) { diff --git a/lib/metadata/vg.h b/lib/metadata/vg.h index 3fd47569d..780684342 100644 --- a/lib/metadata/vg.h +++ b/lib/metadata/vg.h @@ -75,6 +75,9 @@ struct volume_group { uint32_t pv_count; struct dm_list pvs; + /* cache devices */ + struct dm_list cds; + /* * List of physical volumes that were used in vgextend but do not carry * a PV label yet. They need to be pvcreate'd at vg_write time. diff --git a/tools/args.h b/tools/args.h index adca84b2e..d70953da3 100644 --- a/tools/args.h +++ b/tools/args.h @@ -102,6 +102,9 @@ arg(cache_long_ARG, '\0', "cache", 0, 0, 0, "#lvscan\n" "This option is no longer used.\n") +arg(cachedev_ARG, '\0', "cachedev", 0, 0, 0, + "The specified device is a cache device, not a normal PV.\n") + arg(cachemetadataformat_ARG, '\0', "cachemetadataformat", cachemetadataformat_VAL, 0, 0, "Specifies the cache metadata format used by cache target.\n") diff --git a/tools/command-lines.in b/tools/command-lines.in index 1511098f6..8c65dfbff 100644 --- a/tools/command-lines.in +++ b/tools/command-lines.in @@ -1646,6 +1646,10 @@ OO: --autobackup Bool, --reportformat ReportFmt, --restoremissing ID: vgextend_general +vgextend --cachedev VG PV ... +OO: --reportformat ReportFmt +ID: vgextend_cachedev + --- OO_VGIMPORT: --force, --reportformat ReportFmt diff --git a/tools/command.c b/tools/command.c index 42f9aa3c9..c2d1d28eb 100644 --- a/tools/command.c +++ b/tools/command.c @@ -137,6 +137,7 @@ static inline int configtype_arg(struct cmd_context *cmd __attribute__((unused)) #define DISALLOW_TAG_ARGS 0x00000800 #define GET_VGNAME_FROM_OPTIONS 0x00001000 #define CAN_USE_ONE_SCAN 0x00002000 +#define ENABLE_CACHE_DEVS 0x00004000 /* create foo_CMD enums for command def ID's in command-lines.in */ diff --git a/tools/commands.h b/tools/commands.h index ef5763bd5..7b9906841 100644 --- a/tools/commands.h +++ b/tools/commands.h @@ -145,7 +145,7 @@ xx(pvremove, xx(pvs, "Display information about physical volumes", - PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH | CAN_USE_ONE_SCAN) + PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | ENABLE_ALL_DEVS | ENABLE_CACHE_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH | CAN_USE_ONE_SCAN) xx(pvscan, "List all physical volumes", @@ -216,7 +216,7 @@ xx(vgmknodes, xx(vgreduce, "Remove physical volume(s) from a volume group", - 0) + ENABLE_CACHE_DEVS) xx(vgremove, "Remove volume group(s)", diff --git a/tools/pvscan.c b/tools/pvscan.c index a4c0244eb..e5ff7d126 100644 --- a/tools/pvscan.c +++ b/tools/pvscan.c @@ -376,13 +376,24 @@ static int _online_pv_found(struct cmd_context *cmd, dev_args_in_vg = 1; } + dm_list_iterate_items(pvl, &vg->cds) { + if (!_online_pvid_file_exists((const char *)&pvl->pv->id.uuid)) + pvids_not_online++; + + /* Check if one of the devs on the command line is in this VG. */ + if (dev_args && dev_in_device_list(pvl->pv->dev, dev_args)) + dev_args_in_vg = 1; + } + /* * Return if we did not find an online file for one of the PVIDs * in the VG, which means the VG is not yet complete. */ - if (pvids_not_online) + if (pvids_not_online) { + log_debug("missing %d PVs in VG %s.", pvids_not_online, vg->name); return 1; + } /* * When all PVIDs from the VG are online, then add vgname to diff --git a/tools/toollib.c b/tools/toollib.c index 36ae12550..2b3e5aef1 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -15,6 +15,9 @@ #include "tools.h" #include "lib/format_text/format-text.h" +#include "lib/format_text/layout.h" +#include "lib/mm/xlate.h" +#include "lib/misc/crc.h" #include <sys/stat.h> #include <signal.h> @@ -4155,6 +4158,7 @@ static int _process_duplicate_pvs(struct cmd_context *cmd, static int _process_pvs_in_vg(struct cmd_context *cmd, struct volume_group *vg, + struct dm_list *pv_list, struct dm_list *all_devices, struct dm_list *arg_devices, struct dm_list *arg_tags, @@ -4197,7 +4201,7 @@ static int _process_pvs_in_vg(struct cmd_context *cmd, if (!is_orphan_vg(vg->name)) log_set_report_object_group_and_group_id(vg->name, vg_uuid); - dm_list_iterate_items(pvl, &vg->pvs) { + dm_list_iterate_items(pvl, pv_list) { pv = pvl->pv; pv_name = pv_dev_name(pv); pv_uuid[0]='\0'; @@ -4360,11 +4364,20 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, * vg->pvs entries from devices list. */ - ret = _process_pvs_in_vg(cmd, vg, all_devices, arg_devices, arg_tags, + ret = _process_pvs_in_vg(cmd, vg, &vg->pvs, all_devices, arg_devices, arg_tags, process_all_pvs, process_all_devices, skip, handle, process_single_pv); if (ret != ECMD_PROCESSED) stack; + + if (cmd->cname->flags & ENABLE_CACHE_DEVS) { + ret = _process_pvs_in_vg(cmd, vg, &vg->cds, all_devices, arg_devices, arg_tags, + process_all_pvs, process_all_devices, skip, + handle, process_single_pv); + if (ret != ECMD_PROCESSED) + stack; + } + report_log_ret_code(ret); if (ret > ret_max) ret_max = ret; @@ -5035,6 +5048,7 @@ static int _pvcreate_check_single(struct cmd_context *cmd, log_debug("Found pvcreate arg %s: pv is used in %s.", pd->name, vg->name); pd->is_vg_pv = 1; pd->vg_name = dm_pool_strdup(cmd->mem, vg->name); + } else if (vg && is_orphan_vg(vg->name)) { if (is_used_pv(pv)) { /* Device is used in an unknown VG. */ @@ -5047,6 +5061,12 @@ static int _pvcreate_check_single(struct cmd_context *cmd, } pp->orphan_vg_name = FMT_TEXT_ORPHAN_VG_NAME; + + } else if (lvmcache_cachedev_from_pvid((const struct id *)pv->dev->pvid)) { + log_error("Cannot use cache device %s.", pd->name); + dm_list_move(&pp->arg_fail, &pd->list); + return 1; + } else { log_debug("Found pvcreate arg %s: device is not a PV.", pd->name); /* Device is not a PV. */ @@ -5210,6 +5230,7 @@ static int _pvremove_check_single(struct cmd_context *cmd, struct pvcreate_params *pp = (struct pvcreate_params *) handle->custom_handle; struct pvcreate_device *pd; struct pvcreate_prompt *prompt; + struct cachedev *cd; int found = 0; if (!pv->dev) @@ -5235,12 +5256,13 @@ static int _pvremove_check_single(struct cmd_context *cmd, log_debug("Checking device %s for pvremove %.32s.", pv_dev_name(pv), pv->dev->pvid[0] ? pv->dev->pvid : ""); + cd = lvmcache_cachedev_from_pvid((const struct id *)pv->dev->pvid); /* * Is there a pv here already? * If not, this is an error unless you used -f. */ - if (!lvmcache_has_dev_info(pv->dev)) { + if (!lvmcache_has_dev_info(pv->dev) && !cd) { if (pp->force) { dm_list_move(&pp->arg_process, &pd->list); return 1; @@ -5258,6 +5280,16 @@ static int _pvremove_check_single(struct cmd_context *cmd, /* Device is not a PV. */ log_debug("Found pvremove arg %s: device is not a PV.", pd->name); + } else if (cd) { + /* Device is a cachedev PV used in a VG. */ + log_debug("Found pvremove arg %s: pv is cache device.", pd->name); + if (cd->vg_name) { + pd->is_vg_pv = 1; + pd->vg_name = strdup(cd->vg_name); + } else { + pd->is_used_unknown_pv = 1; + } + } else if (vg && !is_orphan_vg(vg->name)) { /* Device is a PV used in a VG. */ log_debug("Found pvremove arg %s: pv is used in %s.", pd->name, vg->name); @@ -5276,6 +5308,7 @@ static int _pvremove_check_single(struct cmd_context *cmd, } pp->orphan_vg_name = FMT_TEXT_ORPHAN_VG_NAME; + } else { /* FIXME: is it possible to reach here? */ log_debug("Found pvremove arg %s: device is not a PV.", pd->name); @@ -5315,7 +5348,7 @@ static int _pvremove_check_single(struct cmd_context *cmd, if (pd->is_used_unknown_pv) prompt->vg_name_unknown = 1; else - prompt->vg_name = dm_pool_strdup(cmd->mem, vg->name); + prompt->vg_name = dm_pool_strdup(cmd->mem, pd->vg_name); prompt->type |= PROMPT_PVREMOVE_PV_IN_VG; dm_list_add(&pp->prompts, &prompt->list); @@ -5325,6 +5358,85 @@ static int _pvremove_check_single(struct cmd_context *cmd, return 1; } +static struct physical_volume *_cachedev_pv_create(struct cmd_context *cmd, struct device *dev) +{ + char buf[LABEL_SIZE]; + struct physical_volume *pv; + struct label_header *lh; + struct pv_header *pvhdr; + struct pv_header_extension *pvext; + uint32_t pvhdr_len; + uint32_t crc; + + if (!(pv = dm_pool_zalloc(cmd->mem, sizeof(*pv)))) + return_NULL; + + pv->dev = dev; + dm_list_init(&pv->tags); + dm_list_init(&pv->segments); + + if (!id_create(&pv->id)) + return_NULL; + + if (!dev_get_size(dev, &pv->size)) { + log_error("%s: Couldn't get size.", dev_name(dev)); + return NULL; + } + + memset(buf, 0, LABEL_SIZE); + + /* + * Set label_header fields (except crc). + * Set pv_header fields. + * Set pv_header_extension fields. + * Calculate crc. + * Set label_header crc field. + * + * The label_header crc is calculated on data from just after the lh + * crc field (starting with lh.offset) to the end of the label sector, + * which includes the pv_header and pv_header_extension. So, the + * pv_header and pv_header_extenstion fields need to be set before the + * lh crc is calculated and set in the lh. + * + * The pv_header needs to be followed by two empty disk_locn structs, + * the first terminates the non-existant data areas list, and the + * second terminates the non-existant metadata areas list. + */ + + lh = (struct label_header *)buf; + strncpy((char *)lh->id, LABEL_ID, sizeof(lh->id)); + lh->sector_xl = xlate64(DEFAULT_LABELSECTOR); + lh->offset_xl = xlate32(sizeof(struct label_header)); + strncpy((char *)lh->type, LVM2_LABEL, sizeof(lh->type)); + + pvhdr = (struct pv_header *) ((char *)buf + sizeof(struct label_header)); + pvhdr->device_size_xl = xlate64(pv->size); + memcpy(pvhdr->pv_uuid, &pv->id, sizeof(struct id)); + + /* Two empty disk_locn structs follow the struct pv_header. */ + pvhdr_len = sizeof(struct pv_header) + (2 * sizeof(struct disk_locn)); + + pvext = (struct pv_header_extension *) ((char *)pvhdr + pvhdr_len); + pvext->version = xlate32(PV_HEADER_EXTENSION_VSN); + pvext->flags = xlate32(PV_EXT_USED); + + crc = calc_crc(INITIAL_CRC, + (uint8_t *)&lh->offset_xl, + LABEL_SIZE - ((uint8_t *) &lh->offset_xl - (uint8_t *) lh)); + + lh->crc_xl = xlate32(crc); + + if (!dev_write_bytes(dev, DEFAULT_LABELSECTOR << SECTOR_SHIFT, LABEL_SIZE, buf)) { + log_debug_devs("Failed to write label to %s", dev_name(dev)); + return_NULL; + } + + /* Should we add an entry to the cachdevs list as would happen if + this device was scanned? */ + + return pv; +} + /* * This can be used by pvcreate, vgcreate and vgextend to create PVs. The * callers need to set up the pvcreate_each_params structure based on command @@ -5428,7 +5540,7 @@ int pvcreate_each_device(struct cmd_context *cmd, * If it's added to arg_process but needs a prompt or force option, then * a corresponding prompt entry is added to pp->prompts. */ - process_each_pv(cmd, 0, NULL, NULL, 1, PROCESS_SKIP_SCAN | PROCESS_SKIP_ORPHAN_LOCK, + process_each_pv(cmd, 0, NULL, NULL, 1, PROCESS_SKIP_SCAN | PROCESS_SKIP_ORPHAN_LOCK | ENABLE_CACHE_DEVS, handle, pp->is_remove ? _pvremove_check_single : _pvcreate_check_single); /* @@ -5576,7 +5688,7 @@ int pvcreate_each_device(struct cmd_context *cmd, */ dm_list_splice(&pp->arg_confirm, &pp->arg_process); - process_each_pv(cmd, 0, NULL, NULL, 1, PROCESS_SKIP_SCAN | PROCESS_SKIP_ORPHAN_LOCK, + process_each_pv(cmd, 0, NULL, NULL, 1, PROCESS_SKIP_SCAN | PROCESS_SKIP_ORPHAN_LOCK | ENABLE_CACHE_DEVS, handle, _pv_confirm_single); dm_list_iterate_items(pd, &pp->arg_confirm) @@ -5709,6 +5821,23 @@ do_command: label_scan_open_excl(pd->dev); + if (pp->cachedev) { + /* + * cache devs do not hold VG metadata, here we write + * the cachedev PV, and later vgextend will update the + * VG metadata on the other PVs. + */ + if (!(pv = _cachedev_pv_create(cmd, pd->dev))) { + log_error("Failed to create cache device \"%s\".", pv_name); + dm_list_move(&pp->arg_fail, &pd->list); + continue; + } + pvl->pv = pv; + dm_list_add(&pp->pvs, &pvl->list); + log_print_unless_silent("Cache device \"%s\" successfully created.", pv_name); + continue; + } + log_debug("Creating a new PV on %s.", pv_name); if (!(pv = pv_create(cmd, pd->dev, &pp->pva))) { diff --git a/tools/tools.h b/tools/tools.h index 405910fc7..a3e5da584 100644 --- a/tools/tools.h +++ b/tools/tools.h @@ -135,6 +135,8 @@ struct arg_value_group_list { #define GET_VGNAME_FROM_OPTIONS 0x00001000 /* The data read from disk by label scan can be used for vg_read. */ #define CAN_USE_ONE_SCAN 0x00002000 +/* Command can process cache devices */ +#define ENABLE_CACHE_DEVS 0x00004000 void usage(const char *name); diff --git a/tools/vgextend.c b/tools/vgextend.c index 5287a364e..f53b862fc 100644 --- a/tools/vgextend.c +++ b/tools/vgextend.c @@ -93,8 +93,13 @@ static int _vgextend_single(struct cmd_context *cmd, const char *vg_name, if (!archive(vg)) return_ECMD_FAILED; - if (!vg_extend_each_pv(vg, pp)) - goto_out; + if (pp->cachedev) { + if (!vg_extend_each_cd(vg, pp)) + goto_out; + } else { + if (!vg_extend_each_pv(vg, pp)) + goto_out; + } if (arg_is_set(cmd, metadataignore_ARG)) { mda_copies = vg_mda_copies(vg); @@ -128,14 +133,9 @@ int vgextend(struct cmd_context *cmd, int argc, char **argv) struct pvcreate_params *pp = &vp.pp; unsigned restoremissing = arg_is_set(cmd, restoremissing_ARG); const char *vg_name; + int cachedev = (cmd->command->command_enum == vgextend_cachedev_CMD); int ret; - if (!argc) { - log_error("Please enter volume group name and " - "physical volume(s)"); - return EINVALID_CMD_LINE; - } - vg_name = skip_dev_dir(cmd, argv[0], NULL); argc--; argv++; @@ -154,6 +154,8 @@ int vgextend(struct cmd_context *cmd, int argc, char **argv) /* pvcreate within vgextend cannot be forced. */ pp->force = 0; + pp->cachedev = cachedev; + /* Check for old md signatures at the end of devices. */ cmd->use_full_md_check = 1; |