diff options
Diffstat (limited to 'mkfs.c')
-rw-r--r-- | mkfs.c | 407 |
1 files changed, 268 insertions, 139 deletions
@@ -19,6 +19,8 @@ #define _XOPEN_SOURCE 500 #define _GNU_SOURCE +#include "kerncompat.h" + #ifndef __CHECKER__ #include <sys/ioctl.h> #include <sys/mount.h> @@ -37,7 +39,8 @@ #include <linux/fs.h> #include <ctype.h> #include <attr/xattr.h> -#include "kerncompat.h" +#include <blkid/blkid.h> +#include <ftw.h> #include "ctree.h" #include "disk-io.h" #include "volumes.h" @@ -54,32 +57,6 @@ struct directory_name_entry { struct list_head list; }; -static u64 parse_size(char *s) -{ - int len = strlen(s); - char c; - u64 mult = 1; - - if (!isdigit(s[len - 1])) { - c = tolower(s[len - 1]); - switch (c) { - case 'g': - mult *= 1024; - case 'm': - mult *= 1024; - case 'k': - mult *= 1024; - case 'b': - break; - default: - fprintf(stderr, "Unknown size descriptor %c\n", c); - exit(1); - } - s[len - 1] = '\0'; - } - return atol(s) * mult; -} - static int make_root_dir(struct btrfs_root *root, int mixed) { struct btrfs_trans_handle *trans; @@ -90,7 +67,7 @@ static int make_root_dir(struct btrfs_root *root, int mixed) int ret; trans = btrfs_start_transaction(root, 1); - bytes_used = btrfs_super_bytes_used(&root->fs_info->super_copy); + bytes_used = btrfs_super_bytes_used(root->fs_info->super_copy); root->fs_info->system_allocs = 1; ret = btrfs_make_block_group(trans, root, bytes_used, @@ -152,7 +129,7 @@ static int make_root_dir(struct btrfs_root *root, int mixed) location.offset = (u64)-1; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, "default", 7, - btrfs_super_root_dir(&root->fs_info->super_copy), + btrfs_super_root_dir(root->fs_info->super_copy), &location, BTRFS_FT_DIR, 0); if (ret) goto err; @@ -228,19 +205,66 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans, static int create_raid_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 data_profile, - u64 metadata_profile, int mixed) + int data_profile_opt, u64 metadata_profile, + int metadata_profile_opt, int mixed, int ssd) { - u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); - u64 allowed; + u64 num_devices = btrfs_super_num_devices(root->fs_info->super_copy); + u64 allowed = 0; + u64 devices_for_raid = num_devices; int ret; - if (num_devices == 1) - allowed = BTRFS_BLOCK_GROUP_DUP; - else if (num_devices >= 4) { - allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10; - } else - allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1; + /* + * Set default profiles according to number of added devices. + * For mixed groups defaults are single/single. + */ + if (!metadata_profile_opt && !mixed) { + if (num_devices == 1 && ssd) + printf("Detected a SSD, turning off metadata " + "duplication. Mkfs with -m dup if you want to " + "force metadata duplication.\n"); + metadata_profile = (num_devices > 1) ? + BTRFS_BLOCK_GROUP_RAID1 : (ssd) ? 0: BTRFS_BLOCK_GROUP_DUP; + } + if (!data_profile_opt && !mixed) { + data_profile = (num_devices > 1) ? + BTRFS_BLOCK_GROUP_RAID0 : 0; /* raid0 or single */ + } + + if (devices_for_raid > 4) + devices_for_raid = 4; + + switch (devices_for_raid) { + default: + case 4: + allowed |= BTRFS_BLOCK_GROUP_RAID10; + case 3: + allowed |= BTRFS_BLOCK_GROUP_RAID6; + case 2: + allowed |= BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5; + break; + case 1: + allowed |= BTRFS_BLOCK_GROUP_DUP; + } + + if (metadata_profile & ~allowed) { + fprintf(stderr, "unable to create FS with metadata " + "profile %llu (have %llu devices)\n", metadata_profile, + num_devices); + exit(1); + } + if (data_profile & ~allowed) { + fprintf(stderr, "unable to create FS with data " + "profile %llu (have %llu devices)\n", data_profile, + num_devices); + exit(1); + } + + /* allow dup'ed data chunks only in mixed mode */ + if (!mixed && (data_profile & BTRFS_BLOCK_GROUP_DUP)) { + fprintf(stderr, "dup for data is allowed only in mixed mode\n"); + exit(1); + } if (allowed & metadata_profile) { u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA; @@ -302,7 +326,8 @@ static void print_usage(void) fprintf(stderr, "options:\n"); fprintf(stderr, "\t -A --alloc-start the offset to start the FS\n"); fprintf(stderr, "\t -b --byte-count total number of bytes in the FS\n"); - fprintf(stderr, "\t -d --data data profile, raid0, raid1, raid10 or single\n"); + fprintf(stderr, "\t -d --data data profile, raid0, raid1, raid5, raid6, raid10, dup or single\n"); + fprintf(stderr, "\t -f --force force overwrite of existing filesystem\n"); fprintf(stderr, "\t -l --leafsize size of btree leaves\n"); fprintf(stderr, "\t -L --label set a label\n"); fprintf(stderr, "\t -m --metadata metadata profile, values like data profile\n"); @@ -310,6 +335,8 @@ static void print_usage(void) fprintf(stderr, "\t -n --nodesize size of btree nodes\n"); fprintf(stderr, "\t -s --sectorsize min block allocation\n"); fprintf(stderr, "\t -r --rootdir the source directory\n"); + fprintf(stderr, "\t -K --nodiscard do not perform whole device TRIM\n"); + fprintf(stderr, "\t -V --version print the mkfs.btrfs version and exit\n"); fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION); exit(1); } @@ -325,21 +352,27 @@ static u64 parse_profile(char *s) if (strcmp(s, "raid0") == 0) { return BTRFS_BLOCK_GROUP_RAID0; } else if (strcmp(s, "raid1") == 0) { - return BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP; + return BTRFS_BLOCK_GROUP_RAID1; + } else if (strcmp(s, "raid5") == 0) { + return BTRFS_BLOCK_GROUP_RAID5; + } else if (strcmp(s, "raid6") == 0) { + return BTRFS_BLOCK_GROUP_RAID6; } else if (strcmp(s, "raid10") == 0) { - return BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP; + return BTRFS_BLOCK_GROUP_RAID10; + } else if (strcmp(s, "dup") == 0) { + return BTRFS_BLOCK_GROUP_DUP; } else if (strcmp(s, "single") == 0) { return 0; } else { - fprintf(stderr, "Unknown option %s\n", s); + fprintf(stderr, "Unknown profile %s\n", s); print_usage(); } + /* not reached */ return 0; } static char *parse_label(char *input) { - int i; int len = strlen(input); if (len >= BTRFS_LABEL_SIZE) { @@ -347,18 +380,13 @@ static char *parse_label(char *input) BTRFS_LABEL_SIZE - 1); exit(1); } - for (i = 0; i < len; i++) { - if (input[i] == '/' || input[i] == '\\') { - fprintf(stderr, "invalid label %s\n", input); - exit(1); - } - } return strdup(input); } static struct option long_options[] = { { "alloc-start", 1, NULL, 'A'}, { "byte-count", 1, NULL, 'b' }, + { "force", 0, NULL, 'f' }, { "leafsize", 1, NULL, 'l' }, { "label", 1, NULL, 'L'}, { "metadata", 1, NULL, 'm' }, @@ -368,6 +396,7 @@ static struct option long_options[] = { { "data", 1, NULL, 'd' }, { "version", 0, NULL, 'V' }, { "rootdir", 1, NULL, 'r' }, + { "nodiscard", 0, NULL, 'K' }, { 0, 0, 0, 0} }; @@ -768,7 +797,7 @@ static int add_file_items(struct btrfs_trans_handle *trans, fd = open(path_name, O_RDONLY); if (fd == -1) { fprintf(stderr, "%s open failed\n", path_name); - goto end; + return ret; } blocks = st->st_size / sectorsize; @@ -878,8 +907,7 @@ static int traverse_directory(struct btrfs_trans_handle *trans, /* Add list for source directory */ dir_entry = malloc(sizeof(struct directory_name_entry)); dir_entry->dir_name = dir_name; - dir_entry->path = malloc(strlen(dir_name) + 1); - strcpy(dir_entry->path, dir_name); + dir_entry->path = strdup(dir_name); parent_inum = highest_inum + BTRFS_FIRST_FREE_OBJECTID; dir_entry->inum = parent_inum; @@ -1086,16 +1114,30 @@ fail: return -1; } +/* + * This ignores symlinks with unreadable targets and subdirs that can't + * be read. It's a best-effort to give a rough estimate of the size of + * a subdir. It doesn't guarantee that prepopulating btrfs from this + * tree won't still run out of space. + * + * The rounding up to 4096 is questionable. Previous code used du -B 4096. + */ +static u64 global_total_size; +static int ftw_add_entry_size(const char *fpath, const struct stat *st, + int type) +{ + if (type == FTW_F || type == FTW_D) + global_total_size += round_up(st->st_size, 4096); + + return 0; +} + static u64 size_sourcedir(char *dir_name, u64 sectorsize, u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret) { u64 dir_size = 0; u64 total_size = 0; int ret; - char command[1024]; - char path[512]; - char *file_name = "temp_file"; - FILE *file; u64 default_chunk_size = 8 * 1024 * 1024; /* 8MB */ u64 allocated_meta_size = 8 * 1024 * 1024; /* 8MB */ u64 allocated_total_size = 20 * 1024 * 1024; /* 20MB */ @@ -1103,23 +1145,14 @@ static u64 size_sourcedir(char *dir_name, u64 sectorsize, u64 num_of_allocated_meta_chunks = allocated_meta_size / default_chunk_size; - ret = sprintf(command, "du -B 4096 -s "); + global_total_size = 0; + ret = ftw(dir_name, ftw_add_entry_size, 10); + dir_size = global_total_size; if (ret < 0) { - fprintf(stderr, "error executing sprintf for du command\n"); - return -1; + fprintf(stderr, "ftw subdir walk of '%s' failed: %s\n", + dir_name, strerror(errno)); + exit(1); } - strcat(command, dir_name); - strcat(command, " > "); - strcat(command, file_name); - ret = system(command); - - file = fopen(file_name, "r"); - ret = fscanf(file, "%lld %s\n", &dir_size, path); - fclose(file); - remove(file_name); - - dir_size *= sectorsize; - *size_of_data_ret = dir_size; num_of_meta_chunks = (dir_size / 2) / default_chunk_size; if (((dir_size / 2) % default_chunk_size) != 0) @@ -1159,6 +1192,67 @@ static int zero_output_file(int out_fd, u64 size, u32 sectorsize) return ret; } +static int check_leaf_or_node_size(u32 size, u32 sectorsize) +{ + if (size < sectorsize) { + fprintf(stderr, + "Illegal leafsize (or nodesize) %u (smaller than %u)\n", + size, sectorsize); + return -1; + } else if (size > BTRFS_MAX_METADATA_BLOCKSIZE) { + fprintf(stderr, + "Illegal leafsize (or nodesize) %u (larger than %u)\n", + size, BTRFS_MAX_METADATA_BLOCKSIZE); + return -1; + } else if (size & (sectorsize - 1)) { + fprintf(stderr, + "Illegal leafsize (or nodesize) %u (not align to %u)\n", + size, sectorsize); + return -1; + } + return 0; +} + +static int is_ssd(const char *file) +{ + blkid_probe probe; + char wholedisk[32]; + char sysfs_path[PATH_MAX]; + dev_t devno; + int fd; + char rotational; + + probe = blkid_new_probe_from_filename(file); + if (!probe) + return 0; + + /* Device number of this disk (possibly a partition) */ + devno = blkid_probe_get_devno(probe); + if (!devno) + return 0; + + /* Get whole disk name (not full path) for this devno */ + blkid_devno_to_wholedisk(devno, wholedisk, sizeof(wholedisk), NULL); + + snprintf(sysfs_path, PATH_MAX, "/sys/block/%s/queue/rotational", + wholedisk); + + blkid_free_probe(probe); + + fd = open(sysfs_path, O_RDONLY); + if (fd < 0) { + return 0; + } + + if (read(fd, &rotational, sizeof(char)) < sizeof(char)) { + close(fd); + return 0; + } + close(fd); + + return !atoi((const char *)&rotational); +} + int main(int ac, char **av) { char *file; @@ -1170,9 +1264,9 @@ int main(int ac, char **av) u64 dev_block_count = 0; u64 blocks[7]; u64 alloc_start = 0; - u64 metadata_profile = BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP; - u64 data_profile = BTRFS_BLOCK_GROUP_RAID0; - u32 leafsize = getpagesize(); + u64 metadata_profile = 0; + u64 data_profile = 0; + u32 leafsize = sysconf(_SC_PAGESIZE); u32 sectorsize = 4096; u32 nodesize = leafsize; u32 stripesize = 4096; @@ -1184,6 +1278,9 @@ int main(int ac, char **av) int mixed = 0; int data_profile_opt = 0; int metadata_profile_opt = 0; + int nodiscard = 0; + int ssd = 0; + int force_overwrite = 0; char *source_dir = NULL; int source_dir_set = 0; @@ -1191,22 +1288,32 @@ int main(int ac, char **av) u64 size_of_data = 0; u64 source_dir_size = 0; char *pretty_buf; + struct btrfs_super_block *super; + u64 flags; + int dev_cnt = 0; + int saved_optind; + char estr[100]; while(1) { int c; - c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:r:VM", long_options, - &option_index); + c = getopt_long(ac, av, "A:b:fl:n:s:m:d:L:r:VMK", + long_options, &option_index); if (c < 0) break; switch(c) { case 'A': alloc_start = parse_size(optarg); break; + case 'f': + force_overwrite = 1; + break; case 'd': data_profile = parse_profile(optarg); data_profile_opt = 1; break; case 'l': + case 'n': + nodesize = parse_size(optarg); leafsize = parse_size(optarg); break; case 'L': @@ -1219,9 +1326,6 @@ int main(int ac, char **av) case 'M': mixed = 1; break; - case 'n': - nodesize = parse_size(optarg); - break; case 's': sectorsize = parse_size(optarg); break; @@ -1241,50 +1345,67 @@ int main(int ac, char **av) source_dir = optarg; source_dir_set = 1; break; + case 'K': + nodiscard=1; + break; default: print_usage(); } } - sectorsize = max(sectorsize, (u32)getpagesize()); - if (leafsize < sectorsize || (leafsize & (sectorsize - 1))) { - fprintf(stderr, "Illegal leafsize %u\n", leafsize); + sectorsize = max(sectorsize, (u32)sysconf(_SC_PAGESIZE)); + if (check_leaf_or_node_size(leafsize, sectorsize)) exit(1); - } - if (nodesize < sectorsize || (nodesize & (sectorsize - 1))) { - fprintf(stderr, "Illegal nodesize %u\n", nodesize); + if (check_leaf_or_node_size(nodesize, sectorsize)) exit(1); - } - ac = ac - optind; - if (ac == 0) + saved_optind = optind; + dev_cnt = ac - optind; + if (dev_cnt == 0) print_usage(); + if (source_dir_set && dev_cnt > 1) { + fprintf(stderr, + "The -r option is limited to a single device\n"); + exit(1); + } + while (dev_cnt-- > 0) { + file = av[optind++]; + if (is_block_device(file)) + if (test_dev_for_mkfs(file, force_overwrite, estr)) { + fprintf(stderr, "Error: %s", estr); + exit(1); + } + } + + /* if we are here that means all devs are good to btrfsify */ + optind = saved_optind; + dev_cnt = ac - optind; + printf("\nWARNING! - %s IS EXPERIMENTAL\n", BTRFS_BUILD_VERSION); printf("WARNING! - see http://btrfs.wiki.kernel.org before using\n\n"); - if (source_dir == 0) { - file = av[optind++]; - ret = check_mounted(file); - if (ret < 0) { - fprintf(stderr, "error checking %s mount status\n", file); - exit(1); - } - if (ret == 1) { - fprintf(stderr, "%s is mounted\n", file); - exit(1); - } - ac--; + file = av[optind++]; + dev_cnt--; + + if (!source_dir_set) { + /* + * open without O_EXCL so that the problem should not + * occur by the following processing. + * (btrfs_register_one_device() fails if O_EXCL is on) + */ fd = open(file, O_RDWR); if (fd < 0) { - fprintf(stderr, "unable to open %s\n", file); + fprintf(stderr, "unable to open %s: %s\n", file, + strerror(errno)); exit(1); } first_file = file; - ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count, &mixed); - if (block_count == 0) - block_count = dev_block_count; + ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count, + block_count, &mixed, nodiscard); + if (block_count && block_count > dev_block_count) { + fprintf(stderr, "%s is smaller than requested size\n", file); + exit(1); + } } else { - ac = 0; - file = av[optind++]; fd = open_target(file); if (fd < 0) { fprintf(stderr, "unable to open the %s\n", file); @@ -1301,13 +1422,13 @@ int main(int ac, char **av) fprintf(stderr, "unable to zero the output file\n"); exit(1); } + /* our "device" is the new image file */ + dev_block_count = block_count; } - if (mixed) { - if (!metadata_profile_opt) - metadata_profile = 0; - if (!data_profile_opt) - data_profile = 0; + ssd = is_ssd(file); + + if (mixed) { if (metadata_profile != data_profile) { fprintf(stderr, "With mixed block groups data and metadata " "profiles must be the same\n"); @@ -1321,14 +1442,20 @@ int main(int ac, char **av) leafsize * i; } - ret = make_btrfs(fd, file, label, blocks, block_count, + ret = make_btrfs(fd, file, label, blocks, dev_block_count, nodesize, leafsize, sectorsize, stripesize); if (ret) { fprintf(stderr, "error during mkfs %d\n", ret); exit(1); } + root = open_ctree(file, 0, O_RDWR); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + close(fd); + exit(1); + } root->fs_info->alloc_start = alloc_start; ret = make_root_dir(root, mixed); @@ -1339,33 +1466,26 @@ int main(int ac, char **av) trans = btrfs_start_transaction(root, 1); - if (ac == 0) + if (dev_cnt == 0) goto raid_groups; btrfs_register_one_device(file); - if (!root) { - fprintf(stderr, "ctree init failed\n"); - return -1; - } zero_end = 1; - while(ac-- > 0) { + while (dev_cnt-- > 0) { int old_mixed = mixed; file = av[optind++]; - ret = check_mounted(file); - if (ret < 0) { - fprintf(stderr, "error checking %s mount status\n", - file); - exit(1); - } - if (ret == 1) { - fprintf(stderr, "%s is mounted\n", file); - exit(1); - } + + /* + * open without O_EXCL so that the problem should not + * occur by the following processing. + * (btrfs_register_one_device() fails if O_EXCL is on) + */ fd = open(file, O_RDWR); if (fd < 0) { - fprintf(stderr, "unable to open %s\n", file); + fprintf(stderr, "unable to open %s: %s\n", file, + strerror(errno)); exit(1); } ret = btrfs_device_already_in_root(root, fd, @@ -1376,8 +1496,8 @@ int main(int ac, char **av) close(fd); continue; } - ret = btrfs_prepare_device(fd, file, zero_end, - &dev_block_count, &mixed); + ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count, + block_count, &mixed, nodiscard); mixed = old_mixed; BUG_ON(ret); @@ -1390,25 +1510,36 @@ int main(int ac, char **av) raid_groups: if (!source_dir_set) { ret = create_raid_groups(trans, root, data_profile, - metadata_profile, mixed); + data_profile_opt, metadata_profile, + metadata_profile_opt, mixed, ssd); BUG_ON(ret); } ret = create_data_reloc_tree(trans, root); BUG_ON(ret); - if (mixed) { - struct btrfs_super_block *super = &root->fs_info->super_copy; - u64 flags = btrfs_super_incompat_flags(super); + super = root->fs_info->super_copy; + flags = btrfs_super_incompat_flags(super); + if (mixed) flags |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS; + + btrfs_set_super_incompat_flags(super, flags); + + if ((data_profile | metadata_profile) & + (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { + struct btrfs_super_block *super = root->fs_info->super_copy; + u64 flags = btrfs_super_incompat_flags(super); + + flags |= BTRFS_FEATURE_INCOMPAT_RAID56; btrfs_set_super_incompat_flags(super, flags); + printf("Setting RAID5/6 feature flag\n"); } printf("fs created label %s on %s\n\tnodesize %u leafsize %u " "sectorsize %u size %s\n", label, first_file, nodesize, leafsize, sectorsize, - pretty_buf = pretty_sizes(btrfs_super_total_bytes(&root->fs_info->super_copy))); + pretty_buf = pretty_sizes(btrfs_super_total_bytes(root->fs_info->super_copy))); free(pretty_buf); printf("%s\n", BTRFS_BUILD_VERSION); @@ -1427,8 +1558,6 @@ raid_groups: ret = close_ctree(root); BUG_ON(ret); - free(label); return 0; } - |