summaryrefslogtreecommitdiff
path: root/mkfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'mkfs.c')
-rw-r--r--mkfs.c407
1 files changed, 268 insertions, 139 deletions
diff --git a/mkfs.c b/mkfs.c
index e3ced19..7ff60e5 100644
--- a/mkfs.c
+++ b/mkfs.c
@@ -19,6 +19,8 @@
#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
+#include "kerncompat.h"
+
#ifndef __CHECKER__
#include <sys/ioctl.h>
#include <sys/mount.h>
@@ -37,7 +39,8 @@
#include <linux/fs.h>
#include <ctype.h>
#include <attr/xattr.h>
-#include "kerncompat.h"
+#include <blkid/blkid.h>
+#include <ftw.h>
#include "ctree.h"
#include "disk-io.h"
#include "volumes.h"
@@ -54,32 +57,6 @@ struct directory_name_entry {
struct list_head list;
};
-static u64 parse_size(char *s)
-{
- int len = strlen(s);
- char c;
- u64 mult = 1;
-
- if (!isdigit(s[len - 1])) {
- c = tolower(s[len - 1]);
- switch (c) {
- case 'g':
- mult *= 1024;
- case 'm':
- mult *= 1024;
- case 'k':
- mult *= 1024;
- case 'b':
- break;
- default:
- fprintf(stderr, "Unknown size descriptor %c\n", c);
- exit(1);
- }
- s[len - 1] = '\0';
- }
- return atol(s) * mult;
-}
-
static int make_root_dir(struct btrfs_root *root, int mixed)
{
struct btrfs_trans_handle *trans;
@@ -90,7 +67,7 @@ static int make_root_dir(struct btrfs_root *root, int mixed)
int ret;
trans = btrfs_start_transaction(root, 1);
- bytes_used = btrfs_super_bytes_used(&root->fs_info->super_copy);
+ bytes_used = btrfs_super_bytes_used(root->fs_info->super_copy);
root->fs_info->system_allocs = 1;
ret = btrfs_make_block_group(trans, root, bytes_used,
@@ -152,7 +129,7 @@ static int make_root_dir(struct btrfs_root *root, int mixed)
location.offset = (u64)-1;
ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
"default", 7,
- btrfs_super_root_dir(&root->fs_info->super_copy),
+ btrfs_super_root_dir(root->fs_info->super_copy),
&location, BTRFS_FT_DIR, 0);
if (ret)
goto err;
@@ -228,19 +205,66 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans,
static int create_raid_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 data_profile,
- u64 metadata_profile, int mixed)
+ int data_profile_opt, u64 metadata_profile,
+ int metadata_profile_opt, int mixed, int ssd)
{
- u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
- u64 allowed;
+ u64 num_devices = btrfs_super_num_devices(root->fs_info->super_copy);
+ u64 allowed = 0;
+ u64 devices_for_raid = num_devices;
int ret;
- if (num_devices == 1)
- allowed = BTRFS_BLOCK_GROUP_DUP;
- else if (num_devices >= 4) {
- allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10;
- } else
- allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1;
+ /*
+ * Set default profiles according to number of added devices.
+ * For mixed groups defaults are single/single.
+ */
+ if (!metadata_profile_opt && !mixed) {
+ if (num_devices == 1 && ssd)
+ printf("Detected a SSD, turning off metadata "
+ "duplication. Mkfs with -m dup if you want to "
+ "force metadata duplication.\n");
+ metadata_profile = (num_devices > 1) ?
+ BTRFS_BLOCK_GROUP_RAID1 : (ssd) ? 0: BTRFS_BLOCK_GROUP_DUP;
+ }
+ if (!data_profile_opt && !mixed) {
+ data_profile = (num_devices > 1) ?
+ BTRFS_BLOCK_GROUP_RAID0 : 0; /* raid0 or single */
+ }
+
+ if (devices_for_raid > 4)
+ devices_for_raid = 4;
+
+ switch (devices_for_raid) {
+ default:
+ case 4:
+ allowed |= BTRFS_BLOCK_GROUP_RAID10;
+ case 3:
+ allowed |= BTRFS_BLOCK_GROUP_RAID6;
+ case 2:
+ allowed |= BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5;
+ break;
+ case 1:
+ allowed |= BTRFS_BLOCK_GROUP_DUP;
+ }
+
+ if (metadata_profile & ~allowed) {
+ fprintf(stderr, "unable to create FS with metadata "
+ "profile %llu (have %llu devices)\n", metadata_profile,
+ num_devices);
+ exit(1);
+ }
+ if (data_profile & ~allowed) {
+ fprintf(stderr, "unable to create FS with data "
+ "profile %llu (have %llu devices)\n", data_profile,
+ num_devices);
+ exit(1);
+ }
+
+ /* allow dup'ed data chunks only in mixed mode */
+ if (!mixed && (data_profile & BTRFS_BLOCK_GROUP_DUP)) {
+ fprintf(stderr, "dup for data is allowed only in mixed mode\n");
+ exit(1);
+ }
if (allowed & metadata_profile) {
u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA;
@@ -302,7 +326,8 @@ static void print_usage(void)
fprintf(stderr, "options:\n");
fprintf(stderr, "\t -A --alloc-start the offset to start the FS\n");
fprintf(stderr, "\t -b --byte-count total number of bytes in the FS\n");
- fprintf(stderr, "\t -d --data data profile, raid0, raid1, raid10 or single\n");
+ fprintf(stderr, "\t -d --data data profile, raid0, raid1, raid5, raid6, raid10, dup or single\n");
+ fprintf(stderr, "\t -f --force force overwrite of existing filesystem\n");
fprintf(stderr, "\t -l --leafsize size of btree leaves\n");
fprintf(stderr, "\t -L --label set a label\n");
fprintf(stderr, "\t -m --metadata metadata profile, values like data profile\n");
@@ -310,6 +335,8 @@ static void print_usage(void)
fprintf(stderr, "\t -n --nodesize size of btree nodes\n");
fprintf(stderr, "\t -s --sectorsize min block allocation\n");
fprintf(stderr, "\t -r --rootdir the source directory\n");
+ fprintf(stderr, "\t -K --nodiscard do not perform whole device TRIM\n");
+ fprintf(stderr, "\t -V --version print the mkfs.btrfs version and exit\n");
fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION);
exit(1);
}
@@ -325,21 +352,27 @@ static u64 parse_profile(char *s)
if (strcmp(s, "raid0") == 0) {
return BTRFS_BLOCK_GROUP_RAID0;
} else if (strcmp(s, "raid1") == 0) {
- return BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP;
+ return BTRFS_BLOCK_GROUP_RAID1;
+ } else if (strcmp(s, "raid5") == 0) {
+ return BTRFS_BLOCK_GROUP_RAID5;
+ } else if (strcmp(s, "raid6") == 0) {
+ return BTRFS_BLOCK_GROUP_RAID6;
} else if (strcmp(s, "raid10") == 0) {
- return BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP;
+ return BTRFS_BLOCK_GROUP_RAID10;
+ } else if (strcmp(s, "dup") == 0) {
+ return BTRFS_BLOCK_GROUP_DUP;
} else if (strcmp(s, "single") == 0) {
return 0;
} else {
- fprintf(stderr, "Unknown option %s\n", s);
+ fprintf(stderr, "Unknown profile %s\n", s);
print_usage();
}
+ /* not reached */
return 0;
}
static char *parse_label(char *input)
{
- int i;
int len = strlen(input);
if (len >= BTRFS_LABEL_SIZE) {
@@ -347,18 +380,13 @@ static char *parse_label(char *input)
BTRFS_LABEL_SIZE - 1);
exit(1);
}
- for (i = 0; i < len; i++) {
- if (input[i] == '/' || input[i] == '\\') {
- fprintf(stderr, "invalid label %s\n", input);
- exit(1);
- }
- }
return strdup(input);
}
static struct option long_options[] = {
{ "alloc-start", 1, NULL, 'A'},
{ "byte-count", 1, NULL, 'b' },
+ { "force", 0, NULL, 'f' },
{ "leafsize", 1, NULL, 'l' },
{ "label", 1, NULL, 'L'},
{ "metadata", 1, NULL, 'm' },
@@ -368,6 +396,7 @@ static struct option long_options[] = {
{ "data", 1, NULL, 'd' },
{ "version", 0, NULL, 'V' },
{ "rootdir", 1, NULL, 'r' },
+ { "nodiscard", 0, NULL, 'K' },
{ 0, 0, 0, 0}
};
@@ -768,7 +797,7 @@ static int add_file_items(struct btrfs_trans_handle *trans,
fd = open(path_name, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "%s open failed\n", path_name);
- goto end;
+ return ret;
}
blocks = st->st_size / sectorsize;
@@ -878,8 +907,7 @@ static int traverse_directory(struct btrfs_trans_handle *trans,
/* Add list for source directory */
dir_entry = malloc(sizeof(struct directory_name_entry));
dir_entry->dir_name = dir_name;
- dir_entry->path = malloc(strlen(dir_name) + 1);
- strcpy(dir_entry->path, dir_name);
+ dir_entry->path = strdup(dir_name);
parent_inum = highest_inum + BTRFS_FIRST_FREE_OBJECTID;
dir_entry->inum = parent_inum;
@@ -1086,16 +1114,30 @@ fail:
return -1;
}
+/*
+ * This ignores symlinks with unreadable targets and subdirs that can't
+ * be read. It's a best-effort to give a rough estimate of the size of
+ * a subdir. It doesn't guarantee that prepopulating btrfs from this
+ * tree won't still run out of space.
+ *
+ * The rounding up to 4096 is questionable. Previous code used du -B 4096.
+ */
+static u64 global_total_size;
+static int ftw_add_entry_size(const char *fpath, const struct stat *st,
+ int type)
+{
+ if (type == FTW_F || type == FTW_D)
+ global_total_size += round_up(st->st_size, 4096);
+
+ return 0;
+}
+
static u64 size_sourcedir(char *dir_name, u64 sectorsize,
u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret)
{
u64 dir_size = 0;
u64 total_size = 0;
int ret;
- char command[1024];
- char path[512];
- char *file_name = "temp_file";
- FILE *file;
u64 default_chunk_size = 8 * 1024 * 1024; /* 8MB */
u64 allocated_meta_size = 8 * 1024 * 1024; /* 8MB */
u64 allocated_total_size = 20 * 1024 * 1024; /* 20MB */
@@ -1103,23 +1145,14 @@ static u64 size_sourcedir(char *dir_name, u64 sectorsize,
u64 num_of_allocated_meta_chunks =
allocated_meta_size / default_chunk_size;
- ret = sprintf(command, "du -B 4096 -s ");
+ global_total_size = 0;
+ ret = ftw(dir_name, ftw_add_entry_size, 10);
+ dir_size = global_total_size;
if (ret < 0) {
- fprintf(stderr, "error executing sprintf for du command\n");
- return -1;
+ fprintf(stderr, "ftw subdir walk of '%s' failed: %s\n",
+ dir_name, strerror(errno));
+ exit(1);
}
- strcat(command, dir_name);
- strcat(command, " > ");
- strcat(command, file_name);
- ret = system(command);
-
- file = fopen(file_name, "r");
- ret = fscanf(file, "%lld %s\n", &dir_size, path);
- fclose(file);
- remove(file_name);
-
- dir_size *= sectorsize;
- *size_of_data_ret = dir_size;
num_of_meta_chunks = (dir_size / 2) / default_chunk_size;
if (((dir_size / 2) % default_chunk_size) != 0)
@@ -1159,6 +1192,67 @@ static int zero_output_file(int out_fd, u64 size, u32 sectorsize)
return ret;
}
+static int check_leaf_or_node_size(u32 size, u32 sectorsize)
+{
+ if (size < sectorsize) {
+ fprintf(stderr,
+ "Illegal leafsize (or nodesize) %u (smaller than %u)\n",
+ size, sectorsize);
+ return -1;
+ } else if (size > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ fprintf(stderr,
+ "Illegal leafsize (or nodesize) %u (larger than %u)\n",
+ size, BTRFS_MAX_METADATA_BLOCKSIZE);
+ return -1;
+ } else if (size & (sectorsize - 1)) {
+ fprintf(stderr,
+ "Illegal leafsize (or nodesize) %u (not align to %u)\n",
+ size, sectorsize);
+ return -1;
+ }
+ return 0;
+}
+
+static int is_ssd(const char *file)
+{
+ blkid_probe probe;
+ char wholedisk[32];
+ char sysfs_path[PATH_MAX];
+ dev_t devno;
+ int fd;
+ char rotational;
+
+ probe = blkid_new_probe_from_filename(file);
+ if (!probe)
+ return 0;
+
+ /* Device number of this disk (possibly a partition) */
+ devno = blkid_probe_get_devno(probe);
+ if (!devno)
+ return 0;
+
+ /* Get whole disk name (not full path) for this devno */
+ blkid_devno_to_wholedisk(devno, wholedisk, sizeof(wholedisk), NULL);
+
+ snprintf(sysfs_path, PATH_MAX, "/sys/block/%s/queue/rotational",
+ wholedisk);
+
+ blkid_free_probe(probe);
+
+ fd = open(sysfs_path, O_RDONLY);
+ if (fd < 0) {
+ return 0;
+ }
+
+ if (read(fd, &rotational, sizeof(char)) < sizeof(char)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ return !atoi((const char *)&rotational);
+}
+
int main(int ac, char **av)
{
char *file;
@@ -1170,9 +1264,9 @@ int main(int ac, char **av)
u64 dev_block_count = 0;
u64 blocks[7];
u64 alloc_start = 0;
- u64 metadata_profile = BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP;
- u64 data_profile = BTRFS_BLOCK_GROUP_RAID0;
- u32 leafsize = getpagesize();
+ u64 metadata_profile = 0;
+ u64 data_profile = 0;
+ u32 leafsize = sysconf(_SC_PAGESIZE);
u32 sectorsize = 4096;
u32 nodesize = leafsize;
u32 stripesize = 4096;
@@ -1184,6 +1278,9 @@ int main(int ac, char **av)
int mixed = 0;
int data_profile_opt = 0;
int metadata_profile_opt = 0;
+ int nodiscard = 0;
+ int ssd = 0;
+ int force_overwrite = 0;
char *source_dir = NULL;
int source_dir_set = 0;
@@ -1191,22 +1288,32 @@ int main(int ac, char **av)
u64 size_of_data = 0;
u64 source_dir_size = 0;
char *pretty_buf;
+ struct btrfs_super_block *super;
+ u64 flags;
+ int dev_cnt = 0;
+ int saved_optind;
+ char estr[100];
while(1) {
int c;
- c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:r:VM", long_options,
- &option_index);
+ c = getopt_long(ac, av, "A:b:fl:n:s:m:d:L:r:VMK",
+ long_options, &option_index);
if (c < 0)
break;
switch(c) {
case 'A':
alloc_start = parse_size(optarg);
break;
+ case 'f':
+ force_overwrite = 1;
+ break;
case 'd':
data_profile = parse_profile(optarg);
data_profile_opt = 1;
break;
case 'l':
+ case 'n':
+ nodesize = parse_size(optarg);
leafsize = parse_size(optarg);
break;
case 'L':
@@ -1219,9 +1326,6 @@ int main(int ac, char **av)
case 'M':
mixed = 1;
break;
- case 'n':
- nodesize = parse_size(optarg);
- break;
case 's':
sectorsize = parse_size(optarg);
break;
@@ -1241,50 +1345,67 @@ int main(int ac, char **av)
source_dir = optarg;
source_dir_set = 1;
break;
+ case 'K':
+ nodiscard=1;
+ break;
default:
print_usage();
}
}
- sectorsize = max(sectorsize, (u32)getpagesize());
- if (leafsize < sectorsize || (leafsize & (sectorsize - 1))) {
- fprintf(stderr, "Illegal leafsize %u\n", leafsize);
+ sectorsize = max(sectorsize, (u32)sysconf(_SC_PAGESIZE));
+ if (check_leaf_or_node_size(leafsize, sectorsize))
exit(1);
- }
- if (nodesize < sectorsize || (nodesize & (sectorsize - 1))) {
- fprintf(stderr, "Illegal nodesize %u\n", nodesize);
+ if (check_leaf_or_node_size(nodesize, sectorsize))
exit(1);
- }
- ac = ac - optind;
- if (ac == 0)
+ saved_optind = optind;
+ dev_cnt = ac - optind;
+ if (dev_cnt == 0)
print_usage();
+ if (source_dir_set && dev_cnt > 1) {
+ fprintf(stderr,
+ "The -r option is limited to a single device\n");
+ exit(1);
+ }
+ while (dev_cnt-- > 0) {
+ file = av[optind++];
+ if (is_block_device(file))
+ if (test_dev_for_mkfs(file, force_overwrite, estr)) {
+ fprintf(stderr, "Error: %s", estr);
+ exit(1);
+ }
+ }
+
+ /* if we are here that means all devs are good to btrfsify */
+ optind = saved_optind;
+ dev_cnt = ac - optind;
+
printf("\nWARNING! - %s IS EXPERIMENTAL\n", BTRFS_BUILD_VERSION);
printf("WARNING! - see http://btrfs.wiki.kernel.org before using\n\n");
- if (source_dir == 0) {
- file = av[optind++];
- ret = check_mounted(file);
- if (ret < 0) {
- fprintf(stderr, "error checking %s mount status\n", file);
- exit(1);
- }
- if (ret == 1) {
- fprintf(stderr, "%s is mounted\n", file);
- exit(1);
- }
- ac--;
+ file = av[optind++];
+ dev_cnt--;
+
+ if (!source_dir_set) {
+ /*
+ * open without O_EXCL so that the problem should not
+ * occur by the following processing.
+ * (btrfs_register_one_device() fails if O_EXCL is on)
+ */
fd = open(file, O_RDWR);
if (fd < 0) {
- fprintf(stderr, "unable to open %s\n", file);
+ fprintf(stderr, "unable to open %s: %s\n", file,
+ strerror(errno));
exit(1);
}
first_file = file;
- ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count, &mixed);
- if (block_count == 0)
- block_count = dev_block_count;
+ ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count,
+ block_count, &mixed, nodiscard);
+ if (block_count && block_count > dev_block_count) {
+ fprintf(stderr, "%s is smaller than requested size\n", file);
+ exit(1);
+ }
} else {
- ac = 0;
- file = av[optind++];
fd = open_target(file);
if (fd < 0) {
fprintf(stderr, "unable to open the %s\n", file);
@@ -1301,13 +1422,13 @@ int main(int ac, char **av)
fprintf(stderr, "unable to zero the output file\n");
exit(1);
}
+ /* our "device" is the new image file */
+ dev_block_count = block_count;
}
- if (mixed) {
- if (!metadata_profile_opt)
- metadata_profile = 0;
- if (!data_profile_opt)
- data_profile = 0;
+ ssd = is_ssd(file);
+
+ if (mixed) {
if (metadata_profile != data_profile) {
fprintf(stderr, "With mixed block groups data and metadata "
"profiles must be the same\n");
@@ -1321,14 +1442,20 @@ int main(int ac, char **av)
leafsize * i;
}
- ret = make_btrfs(fd, file, label, blocks, block_count,
+ ret = make_btrfs(fd, file, label, blocks, dev_block_count,
nodesize, leafsize,
sectorsize, stripesize);
if (ret) {
fprintf(stderr, "error during mkfs %d\n", ret);
exit(1);
}
+
root = open_ctree(file, 0, O_RDWR);
+ if (!root) {
+ fprintf(stderr, "Open ctree failed\n");
+ close(fd);
+ exit(1);
+ }
root->fs_info->alloc_start = alloc_start;
ret = make_root_dir(root, mixed);
@@ -1339,33 +1466,26 @@ int main(int ac, char **av)
trans = btrfs_start_transaction(root, 1);
- if (ac == 0)
+ if (dev_cnt == 0)
goto raid_groups;
btrfs_register_one_device(file);
- if (!root) {
- fprintf(stderr, "ctree init failed\n");
- return -1;
- }
zero_end = 1;
- while(ac-- > 0) {
+ while (dev_cnt-- > 0) {
int old_mixed = mixed;
file = av[optind++];
- ret = check_mounted(file);
- if (ret < 0) {
- fprintf(stderr, "error checking %s mount status\n",
- file);
- exit(1);
- }
- if (ret == 1) {
- fprintf(stderr, "%s is mounted\n", file);
- exit(1);
- }
+
+ /*
+ * open without O_EXCL so that the problem should not
+ * occur by the following processing.
+ * (btrfs_register_one_device() fails if O_EXCL is on)
+ */
fd = open(file, O_RDWR);
if (fd < 0) {
- fprintf(stderr, "unable to open %s\n", file);
+ fprintf(stderr, "unable to open %s: %s\n", file,
+ strerror(errno));
exit(1);
}
ret = btrfs_device_already_in_root(root, fd,
@@ -1376,8 +1496,8 @@ int main(int ac, char **av)
close(fd);
continue;
}
- ret = btrfs_prepare_device(fd, file, zero_end,
- &dev_block_count, &mixed);
+ ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count,
+ block_count, &mixed, nodiscard);
mixed = old_mixed;
BUG_ON(ret);
@@ -1390,25 +1510,36 @@ int main(int ac, char **av)
raid_groups:
if (!source_dir_set) {
ret = create_raid_groups(trans, root, data_profile,
- metadata_profile, mixed);
+ data_profile_opt, metadata_profile,
+ metadata_profile_opt, mixed, ssd);
BUG_ON(ret);
}
ret = create_data_reloc_tree(trans, root);
BUG_ON(ret);
- if (mixed) {
- struct btrfs_super_block *super = &root->fs_info->super_copy;
- u64 flags = btrfs_super_incompat_flags(super);
+ super = root->fs_info->super_copy;
+ flags = btrfs_super_incompat_flags(super);
+ if (mixed)
flags |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS;
+
+ btrfs_set_super_incompat_flags(super, flags);
+
+ if ((data_profile | metadata_profile) &
+ (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
+ struct btrfs_super_block *super = root->fs_info->super_copy;
+ u64 flags = btrfs_super_incompat_flags(super);
+
+ flags |= BTRFS_FEATURE_INCOMPAT_RAID56;
btrfs_set_super_incompat_flags(super, flags);
+ printf("Setting RAID5/6 feature flag\n");
}
printf("fs created label %s on %s\n\tnodesize %u leafsize %u "
"sectorsize %u size %s\n",
label, first_file, nodesize, leafsize, sectorsize,
- pretty_buf = pretty_sizes(btrfs_super_total_bytes(&root->fs_info->super_copy)));
+ pretty_buf = pretty_sizes(btrfs_super_total_bytes(root->fs_info->super_copy)));
free(pretty_buf);
printf("%s\n", BTRFS_BUILD_VERSION);
@@ -1427,8 +1558,6 @@ raid_groups:
ret = close_ctree(root);
BUG_ON(ret);
-
free(label);
return 0;
}
-