diff options
-rw-r--r-- | .gitignore | 26 | ||||
-rw-r--r-- | INSTALL | 5 | ||||
-rw-r--r-- | Makefile | 243 | ||||
-rw-r--r-- | bit-radix.c | 211 | ||||
-rw-r--r-- | bitops.h | 220 | ||||
-rw-r--r-- | btrfs-calc-size.c (renamed from calc-size.c) | 0 | ||||
-rw-r--r-- | btrfs-convert.c (renamed from convert.c) | 116 | ||||
-rw-r--r-- | btrfs-corrupt-block.c | 274 | ||||
-rw-r--r-- | btrfs-crc.c | 89 | ||||
-rw-r--r-- | btrfs-debug-tree.c (renamed from debug-tree.c) | 80 | ||||
-rw-r--r-- | btrfs-find-root.c (renamed from find-root.c) | 60 | ||||
-rw-r--r-- | btrfs-fragments.c | 454 | ||||
-rw-r--r-- | btrfs-image.c | 918 | ||||
-rw-r--r-- | btrfs-list.c | 1363 | ||||
-rw-r--r-- | btrfs-list.h | 164 | ||||
-rw-r--r-- | btrfs-map-logical.c | 9 | ||||
-rw-r--r-- | btrfs-select-super.c | 10 | ||||
-rw-r--r-- | btrfs-show-super.c | 281 | ||||
-rw-r--r-- | btrfs-show.c | 156 | ||||
-rw-r--r-- | btrfs-vol.c | 169 | ||||
-rw-r--r-- | btrfs-zero-log.c | 4 | ||||
-rw-r--r-- | btrfs.c | 600 | ||||
-rw-r--r-- | btrfs_cmds.c | 1307 | ||||
-rw-r--r-- | btrfs_cmds.h | 44 | ||||
-rw-r--r-- | btrfsctl.c | 272 | ||||
-rw-r--r-- | btrfslabel.c | 121 | ||||
-rw-r--r-- | btrfslabel.h | 5 | ||||
-rw-r--r-- | btrfstune.c | 61 | ||||
-rw-r--r-- | cmds-balance.c | 728 | ||||
-rw-r--r-- | cmds-check.c (renamed from btrfsck.c) | 1657 | ||||
-rw-r--r-- | cmds-device.c | 411 | ||||
-rw-r--r-- | cmds-filesystem.c | 534 | ||||
-rw-r--r-- | cmds-inspect.c | 315 | ||||
-rw-r--r-- | cmds-qgroup.c | 441 | ||||
-rw-r--r-- | cmds-quota.c | 174 | ||||
-rw-r--r-- | cmds-receive.c | 993 | ||||
-rw-r--r-- | cmds-replace.c | 585 | ||||
-rw-r--r-- | cmds-restore.c (renamed from restore.c) | 391 | ||||
-rw-r--r-- | cmds-scrub.c (renamed from scrub.c) | 348 | ||||
-rw-r--r-- | cmds-send.c | 695 | ||||
-rw-r--r-- | cmds-subvolume.c | 970 | ||||
-rw-r--r-- | commands.h | 121 | ||||
-rw-r--r-- | crc32c.c | 125 | ||||
-rw-r--r-- | crc32c.h | 5 | ||||
-rw-r--r-- | ctree.c | 284 | ||||
-rw-r--r-- | ctree.h | 351 | ||||
-rw-r--r-- | dir-test.c | 11 | ||||
-rw-r--r-- | disk-io.c | 437 | ||||
-rw-r--r-- | disk-io.h | 12 | ||||
-rw-r--r-- | extent-cache.h | 6 | ||||
-rw-r--r-- | extent-tree.c | 495 | ||||
-rw-r--r-- | extent_io.c | 85 | ||||
-rw-r--r-- | extent_io.h | 20 | ||||
-rw-r--r-- | file-item.c | 8 | ||||
-rw-r--r-- | free-space-cache.c | 867 | ||||
-rw-r--r-- | free-space-cache.h | 55 | ||||
-rw-r--r-- | help.c | 214 | ||||
-rw-r--r-- | ioctl.h | 270 | ||||
-rw-r--r-- | kerncompat.h | 41 | ||||
-rw-r--r-- | list.h | 40 | ||||
-rw-r--r-- | man/Makefile | 34 | ||||
-rw-r--r-- | man/btrfs-image.8.in | 11 | ||||
-rw-r--r-- | man/btrfs-show.8.in | 22 | ||||
-rw-r--r-- | man/btrfs.8.in | 258 | ||||
-rw-r--r-- | man/btrfsck.8.in | 1 | ||||
-rw-r--r-- | man/btrfsctl.8.in | 48 | ||||
-rw-r--r-- | man/mkfs.btrfs.8.in | 48 | ||||
-rw-r--r-- | mkfs.c | 407 | ||||
-rw-r--r-- | print-tree.c | 326 | ||||
-rw-r--r-- | qgroup.c | 154 | ||||
-rw-r--r-- | qgroup.h (renamed from bit-radix.h) | 26 | ||||
-rw-r--r-- | quick-test.c | 20 | ||||
-rw-r--r-- | radix-tree.h | 4 | ||||
-rw-r--r-- | raid6.c | 99 | ||||
-rw-r--r-- | random-test.c | 8 | ||||
-rw-r--r-- | rbtree.h | 6 | ||||
-rw-r--r-- | repair.c | 50 | ||||
-rw-r--r-- | repair.h | 32 | ||||
-rw-r--r-- | root-tree.c | 52 | ||||
-rw-r--r-- | send-stream.c | 485 | ||||
-rw-r--r-- | send-stream.h | 67 | ||||
-rw-r--r-- | send-test.c | 458 | ||||
-rw-r--r-- | send-utils.c | 461 | ||||
-rw-r--r-- | send-utils.h | 82 | ||||
-rw-r--r-- | send.h | 142 | ||||
-rw-r--r-- | transaction.h | 1 | ||||
-rw-r--r-- | utils.c | 839 | ||||
-rw-r--r-- | utils.h | 28 | ||||
-rw-r--r-- | version.sh | 2 | ||||
-rw-r--r-- | volumes.c | 254 | ||||
-rw-r--r-- | volumes.h | 56 |
91 files changed, 18922 insertions, 4510 deletions
@@ -1,15 +1,35 @@ *.o +*.static.o .*.o.d version.h +version man/*.gz btrfs +btrfs.static btrfs-debug-tree btrfs-map-logical -btrfs-show -btrfs-vol +btrfs-fragments btrfsck -btrfsctl +calc-size +ioctl-test +dir-test +send-test +quick-test find-root mkfs.btrfs +mkfs.btrfs.static repair restore +btrfs-convert +btrfs-find-root +btrfs-find-root.static +btrfs-image +btrfs-show-super +btrfs-zero-log +btrfs-corrupt-block +btrfs-select-super +btrfstune +libbtrfs.a +libbtrfs.so +libbtrfs.so.0 +libbtrfs.so.0.1 @@ -1,10 +1,5 @@ Install Instructions -Btrfs puts snapshots and subvolumes into the root directory of the FS. This -directory can only be changed by btrfsctl right now, and normal filesystem -operations do not work on it. The default subvolume is called 'default', -and you can create files and directories in mount_point/default - Btrfs uses libcrc32c in the kernel for file and metadata checksums. You need to compile the kernel with: @@ -1,10 +1,20 @@ CC = gcc -AM_CFLAGS = -Wall -D_FILE_OFFSET_BITS=64 -D_FORTIFY_SOURCE=2 -CFLAGS = -g -O0 +LN = ln +AR = ar +AM_CFLAGS = -Wall -D_FILE_OFFSET_BITS=64 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -DBTRFS_FLAT_INCLUDES -fPIC +CFLAGS = -g -O1 objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ - root-tree.o dir-item.o file-item.o inode-item.o \ - inode-map.o crc32c.o rbtree.o extent-cache.o extent_io.o \ - volumes.o utils.o btrfs-list.o btrfslabel.o + root-tree.o dir-item.o file-item.o inode-item.o inode-map.o \ + extent-cache.o extent_io.o volumes.o utils.o repair.o \ + qgroup.o raid6.o free-space-cache.o +cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \ + cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \ + cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o \ + cmds-restore.o +libbtrfs_objects = send-stream.o send-utils.o rbtree.o btrfs-list.o crc32c.o +libbtrfs_headers = send-stream.h send-utils.h send.h rbtree.h btrfs-list.h \ + crc32c.h list.h kerncompat.h radix-tree.h extent-cache.h \ + extent_io.h ioctl.h ctree.h CHECKFLAGS= -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ -Wuninitialized -Wshadow -Wundef @@ -13,119 +23,194 @@ DEPFLAGS = -Wp,-MMD,$(@D)/.$(@F).d,-MT,$@ INSTALL = install prefix ?= /usr/local bindir = $(prefix)/bin -LIBS=-luuid -RESTORE_LIBS=-lz +lib_LIBS = -luuid -lblkid -lm -lz -llzo2 -L. +libdir ?= $(prefix)/lib +incdir = $(prefix)/include/btrfs +LIBS = $(lib_LIBS) $(libs_static) -progs = btrfsctl mkfs.btrfs btrfs-debug-tree btrfs-show btrfs-vol btrfsck \ - btrfs btrfs-map-logical restore find-root calc-size btrfs-corrupt-block +ifeq ("$(origin V)", "command line") + BUILD_VERBOSE = $(V) +endif +ifndef BUILD_VERBOSE + BUILD_VERBOSE = 0 +endif -# make C=1 to enable sparse -ifdef C - check = sparse $(CHECKFLAGS) +ifeq ($(BUILD_VERBOSE),1) + Q = else - check = ls + Q = @ endif -.c.o: - $(check) $< - $(CC) $(DEPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c $< - -MAKE:=$(MAKE) --no-print-directory +MAKEOPTS = --no-print-directory Q=$(Q) ifeq ($(SUB),) .DEFAULT_GOAL := all clean: - @$(MAKE) SUB=1 $@ + @$(MAKE) $(MAKEOPTS) SUB=1 $@ %: - @$(MAKE) SUB=1 version - @$(MAKE) SUB=1 $@ + @$(MAKE) $(MAKEOPTS) SUB=1 version.h + @$(MAKE) $(MAKEOPTS) SUB=1 $@ else -all: $(progs) manpages - -version: version.sh - bash version.sh - -btrfs: $(objects) btrfs.o btrfs_cmds.o scrub.o - $(CC) $(CFLAGS) -o btrfs btrfs.o btrfs_cmds.o scrub.o \ - $(objects) $(LDFLAGS) $(LIBS) -lpthread - -calc-size: $(objects) calc-size.o - gcc $(CFLAGS) -o calc-size calc-size.o $(objects) $(LDFLAGS) $(LIBS) +progs = mkfs.btrfs btrfs-debug-tree btrfsck \ + btrfs btrfs-map-logical btrfs-image btrfs-zero-log \ + btrfs-find-root btrfstune btrfs-show-super -find-root: $(objects) find-root.o - gcc $(CFLAGS) -o find-root find-root.o $(objects) $(LDFLAGS) $(LIBS) +# external libs required by various binaries; for btrfs-foo, +# specify btrfs_foo_libs = <list of libs>; see $($(subst...)) rules below +btrfs_convert_libs = -lext2fs -lcom_err +btrfs_image_libs = -lpthread +btrfs_fragment_libs = -lgd -lpng -ljpeg -lfreetype -restore: $(objects) restore.o - gcc $(CFLAGS) -o restore restore.o $(objects) $(LDFLAGS) $(LIBS) $(RESTORE_LIBS) +# Create all the static targets +static_objects = $(patsubst %.o, %.static.o, $(objects)) +static_cmds_objects = $(patsubst %.o, %.static.o, $(cmds_objects)) +static_libbtrfs_objects = $(patsubst %.o, %.static.o, $(libbtrfs_objects)) -btrfsctl: $(objects) btrfsctl.o - $(CC) $(CFLAGS) -o btrfsctl btrfsctl.o $(objects) $(LDFLAGS) $(LIBS) +# Define static compilation flags +STATIC_CFLAGS = $(CFLAGS) -ffunction-sections -fdata-sections +STATIC_LDFLAGS = -static -Wl,--gc-sections +STATIC_LIBS = $(lib_LIBS) -lpthread -btrfs-vol: $(objects) btrfs-vol.o - $(CC) $(CFLAGS) -o btrfs-vol btrfs-vol.o $(objects) $(LDFLAGS) $(LIBS) +libs_shared = libbtrfs.so.0.1 +libs_static = libbtrfs.a +libs = $(libs_shared) $(libs_static) +lib_links = libbtrfs.so.0 libbtrfs.so +headers = $(libbtrfs_headers) -btrfs-show: $(objects) btrfs-show.o - $(CC) $(CFLAGS) -o btrfs-show btrfs-show.o $(objects) $(LDFLAGS) $(LIBS) - -btrfsck: $(objects) btrfsck.o - $(CC) $(CFLAGS) -o btrfsck btrfsck.o $(objects) $(LDFLAGS) $(LIBS) - -mkfs.btrfs: $(objects) mkfs.o - $(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) - -btrfs-debug-tree: $(objects) debug-tree.o - $(CC) $(CFLAGS) -o btrfs-debug-tree $(objects) debug-tree.o $(LDFLAGS) $(LIBS) +# make C=1 to enable sparse +ifdef C + check = sparse $(CHECKFLAGS) +else + check = true +endif -btrfs-zero-log: $(objects) btrfs-zero-log.o - $(CC) $(CFLAGS) -o btrfs-zero-log $(objects) btrfs-zero-log.o $(LDFLAGS) $(LIBS) +.c.o: + $(Q)$(check) $< + @echo " [CC] $@" + $(Q)$(CC) $(DEPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c $< + +%.static.o: %.c + @echo " [CC] $@" + $(Q)$(CC) $(DEPFLAGS) $(AM_CFLAGS) $(STATIC_CFLAGS) -c $< -o $@ + +all: version.h $(progs) manpages + +# +# NOTE: For static compiles, you need to have all the required libs +# static equivalent available +# +static: version.h btrfs.static mkfs.btrfs.static btrfs-find-root.static + +version.h: version.sh + @echo " [SH] $@" + $(Q)bash version.sh + +$(libs_shared): $(libbtrfs_objects) $(lib_links) send.h + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) $(libbtrfs_objects) $(LDFLAGS) $(lib_LIBS) \ + -shared -Wl,-soname,libbtrfs.so -o libbtrfs.so.0.1 + +$(libs_static): $(libbtrfs_objects) + @echo " [AR] $@" + $(Q)$(AR) cru libbtrfs.a $(libbtrfs_objects) + +$(lib_links): + @echo " [LN] $@" + $(Q)$(LN) -sf libbtrfs.so.0.1 libbtrfs.so.0 + $(Q)$(LN) -sf libbtrfs.so.0.1 libbtrfs.so + +# keep intermediate files from the below implicit rules around +.PRECIOUS: $(addsuffix .o,$(progs)) + +# Make any btrfs-foo out of btrfs-foo.o, with appropriate libs. +# The $($(subst...)) bits below takes the btrfs_*_libs definitions above and +# turns them into a list of libraries to link against if they exist +# +# For static variants, use an extra $(subst) to get rid of the ".static" +# from the target name before translating to list of libs + +btrfs-%.static: version.h $(static_objects) btrfs-%.static.o $(static_libbtrfs_objects) + @echo " [LD] $@" + $(Q)$(CC) $(STATIC_CFLAGS) -o $@ $@.o $(static_objects) \ + $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS) \ + $($(subst -,_,$(subst .static,,$@)-libs)) + +btrfs-%: version.h $(objects) $(libs) btrfs-%.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o $@ $(objects) $@.o $(LDFLAGS) $(LIBS) $($(subst -,_,$@-libs)) + +btrfs: $(objects) btrfs.o help.o $(cmds_objects) $(libs) + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o btrfs btrfs.o help.o $(cmds_objects) \ + $(objects) $(LDFLAGS) $(LIBS) -lpthread -btrfs-select-super: $(objects) btrfs-select-super.o - $(CC) $(CFLAGS) -o btrfs-select-super $(objects) btrfs-select-super.o $(LDFLAGS) $(LIBS) +btrfs.static: $(static_objects) btrfs.static.o help.static.o $(static_cmds_objects) $(static_libbtrfs_objects) + @echo " [LD] $@" + $(Q)$(CC) $(STATIC_CFLAGS) -o btrfs.static btrfs.static.o help.static.o $(static_cmds_objects) \ + $(static_objects) $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS) -btrfstune: $(objects) btrfstune.o - $(CC) $(CFLAGS) -o btrfstune $(objects) btrfstune.o $(LDFLAGS) $(LIBS) +# For backward compatibility, 'btrfs' changes behaviour to fsck if it's named 'btrfsck' +btrfsck: btrfs + @echo " [LN] $@" + $(Q)$(LN) -f btrfs btrfsck -btrfs-map-logical: $(objects) btrfs-map-logical.o - $(CC) $(CFLAGS) -o btrfs-map-logical $(objects) btrfs-map-logical.o $(LDFLAGS) $(LIBS) +mkfs.btrfs: $(objects) $(libs) mkfs.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) -btrfs-corrupt-block: $(objects) btrfs-corrupt-block.o - $(CC) $(CFLAGS) -o btrfs-corrupt-block $(objects) btrfs-corrupt-block.o $(LDFLAGS) $(LIBS) +mkfs.btrfs.static: $(static_objects) mkfs.static.o $(static_libbtrfs_objects) + @echo " [LD] $@" + $(Q)$(CC) $(STATIC_CFLAGS) -o mkfs.btrfs.static mkfs.static.o $(static_objects) \ + $(static_libbtrfs_objects) $(STATIC_LDFLAGS) $(STATIC_LIBS) -btrfs-image: $(objects) btrfs-image.o - $(CC) $(CFLAGS) -o btrfs-image $(objects) btrfs-image.o -lpthread -lz $(LDFLAGS) $(LIBS) +btrfstune: $(objects) $(libs) btrfstune.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o btrfstune $(objects) btrfstune.o $(LDFLAGS) $(LIBS) -dir-test: $(objects) dir-test.o - $(CC) $(CFLAGS) -o dir-test $(objects) dir-test.o $(LDFLAGS) $(LIBS) +dir-test: $(objects) $(libs) dir-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o dir-test $(objects) dir-test.o $(LDFLAGS) $(LIBS) -quick-test: $(objects) quick-test.o - $(CC) $(CFLAGS) -o quick-test $(objects) quick-test.o $(LDFLAGS) $(LIBS) +quick-test: $(objects) $(libs) quick-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o quick-test $(objects) quick-test.o $(LDFLAGS) $(LIBS) -convert: $(objects) convert.o - $(CC) $(CFLAGS) -o btrfs-convert $(objects) convert.o -lext2fs -lcom_err $(LDFLAGS) $(LIBS) +ioctl-test: $(objects) $(libs) ioctl-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o ioctl-test $(objects) ioctl-test.o $(LDFLAGS) $(LIBS) -ioctl-test: $(objects) ioctl-test.o - $(CC) $(CFLAGS) -o ioctl-test $(objects) ioctl-test.o $(LDFLAGS) $(LIBS) +send-test: $(objects) $(libs) send-test.o + @echo " [LD] $@" + $(Q)$(CC) $(CFLAGS) -o send-test $(objects) send-test.o $(LDFLAGS) $(LIBS) -lpthread manpages: - cd man; make + $(Q)$(MAKE) $(MAKEOPTS) -C man install-man: - cd man; make install + cd man; $(MAKE) install clean : - rm -f $(progs) cscope.out *.o .*.d btrfs-convert btrfs-image btrfs-select-super \ - btrfs-zero-log btrfstune dir-test ioctl-test quick-test version.h - cd man; make clean - -install: $(progs) install-man + @echo "Cleaning" + $(Q)rm -f $(progs) cscope.out *.o .*.d btrfs-convert btrfs-image btrfs-select-super \ + btrfs-zero-log btrfstune dir-test ioctl-test quick-test send-test btrfsck \ + btrfs.static mkfs.btrfs.static btrfs-calc-size \ + version.h \ + $(libs) $(lib_links) + $(Q)$(MAKE) $(MAKEOPTS) -C man $@ + +install: $(libs) $(progs) install-man $(INSTALL) -m755 -d $(DESTDIR)$(bindir) $(INSTALL) $(progs) $(DESTDIR)$(bindir) - if [ -e btrfs-convert ]; then $(INSTALL) btrfs-convert $(DESTDIR)$(bindir); fi + $(INSTALL) -m755 -d $(DESTDIR)$(libdir) + $(INSTALL) $(libs) $(DESTDIR)$(libdir) + cp -a $(lib_links) $(DESTDIR)$(libdir) + $(INSTALL) -m755 -d $(DESTDIR)$(incdir) + $(INSTALL) -m644 $(headers) $(DESTDIR)$(incdir) -include .*.d diff --git a/bit-radix.c b/bit-radix.c deleted file mode 100644 index 57f6f3c..0000000 --- a/bit-radix.c +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include "kerncompat.h" -#include "radix-tree.h" - -#define BIT_ARRAY_BYTES 256 -#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) - -int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - int ret; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) { - bits = malloc(BIT_ARRAY_BYTES); - if (!bits) - return -ENOMEM; - memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); - bits[0] = slot; - radix_tree_preload(GFP_NOFS); - ret = radix_tree_insert(radix, slot, bits); - radix_tree_preload_end(); - if (ret) - return ret; - } - __set_bit(bit_slot, bits + 1); - return 0; -} - -int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) - return 0; - return test_bit(bit_slot, bits + 1); -} - -int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - int i; - int empty = 1; - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) - return 0; - __clear_bit(bit_slot, bits + 1); - for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { - if (bits[i]) { - empty = 0; - break; - } - } - if (empty) { - bits = radix_tree_delete(radix, slot); - BUG_ON(!bits); - free(bits); - } - return 0; -} - -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) - -/** - * __ffs - find first bit in word. - * @word: The word to search - * - * Undefined if no bit exists, so code should check against 0 first. - */ -static unsigned long __ffs(unsigned long word) -{ - int num = 0; - - if (sizeof(long) == 8 && (word & 0xffffffff) == 0) { - num += 32; - word >>= sizeof(long) * 4; - } - if ((word & 0xffff) == 0) { - num += 16; - word >>= 16; - } - if ((word & 0xff) == 0) { - num += 8; - word >>= 8; - } - if ((word & 0xf) == 0) { - num += 4; - word >>= 4; - } - if ((word & 0x3) == 0) { - num += 2; - word >>= 2; - } - if ((word & 0x1) == 0) - num += 1; - return num; -} - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} - -int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - unsigned long start, int nr) -{ - unsigned long *bits; - unsigned long *gang[4]; - int found; - int ret; - int i; - int total_found = 0; - unsigned long slot; - - slot = start / BIT_RADIX_BITS_PER_ARRAY; - ret = radix_tree_gang_lookup(radix, (void *)gang, slot, - ARRAY_SIZE(gang)); - found = start % BIT_RADIX_BITS_PER_ARRAY; - for (i = 0; i < ret && nr > 0; i++) { - bits = gang[i]; - while(nr > 0) { - found = find_next_bit(bits + 1, - BIT_RADIX_BITS_PER_ARRAY, - found); - if (found < BIT_RADIX_BITS_PER_ARRAY) { - *retbits = bits[0] * - BIT_RADIX_BITS_PER_ARRAY + found; - retbits++; - nr--; - total_found++; - found++; - } else - break; - } - found = 0; - } - return total_found; -} diff --git a/bitops.h b/bitops.h new file mode 100644 index 0000000..323c571 --- /dev/null +++ b/bitops.h @@ -0,0 +1,220 @@ +#ifndef _PERF_LINUX_BITOPS_H_ +#define _PERF_LINUX_BITOPS_H_ + +#include <linux/kernel.h> + +#define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +static inline void set_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); +} + +static inline void clear_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG)); +} + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +} + +static inline unsigned long hweight64(__u64 w) +{ +#if BITS_PER_LONG == 32 + return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); +#elif BITS_PER_LONG == 64 + __u64 res = w - ((w >> 1) & 0x5555555555555555ul); + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); + res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; + res = res + (res >> 8); + res = res + (res >> 16); + return (res + (res >> 32)) & 0x00000000000000FFul; +#endif +} + +static inline unsigned long hweight_long(unsigned long w) +{ + return sizeof(w) == 4 ? hweight32(w) : hweight64(w); +} + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __always_inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +#define ffz(x) __ffs(~(x)) + +/* + * Find the first set bit in a memory region. + */ +static inline unsigned long +find_first_bit(const unsigned long *addr, unsigned long size) +{ + const unsigned long *p = addr; + unsigned long result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found: + return result + __ffs(tmp); +} + +/* + * Find the next set bit in a memory region. + */ +static inline unsigned long +find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} + +/* + * This implementation of find_{first,next}_zero_bit was stolen from + * Linus' asm-alpha/bitops.h. + */ +static inline unsigned long +find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ffz(tmp); +} +#endif diff --git a/calc-size.c b/btrfs-calc-size.c index c4adfb0..c4adfb0 100644 --- a/calc-size.c +++ b/btrfs-calc-size.c diff --git a/convert.c b/btrfs-convert.c index 291dc27..399856f 100644 --- a/convert.c +++ b/btrfs-convert.c @@ -18,6 +18,9 @@ #define _XOPEN_SOURCE 600 #define _GNU_SOURCE 1 + +#include "kerncompat.h" + #ifndef __CHECKER__ #include <sys/ioctl.h> #include <sys/mount.h> @@ -31,7 +34,7 @@ #include <unistd.h> #include <uuid/uuid.h> #include <linux/fs.h> -#include "kerncompat.h" + #include "ctree.h" #include "disk-io.h" #include "volumes.h" @@ -249,7 +252,7 @@ static u8 filetype_conversion_table[EXT2_FT_MAX] = { }; static int dir_iterate_proc(ext2_ino_t dir, int entry, - struct ext2_dir_entry *old, + struct ext2_dir_entry *dirent, int offset, int blocksize, char *buf,void *priv_data) { @@ -259,12 +262,14 @@ static int dir_iterate_proc(ext2_ino_t dir, int entry, u64 inode_size; char dotdot[] = ".."; struct btrfs_key location; - struct ext2_dir_entry_2 *dirent = (struct ext2_dir_entry_2 *)old; struct dir_iterate_data *idata = (struct dir_iterate_data *)priv_data; + int name_len; + + name_len = dirent->name_len & 0xFF; objectid = dirent->inode + INO_OFFSET; - if (!strncmp(dirent->name, dotdot, dirent->name_len)) { - if (dirent->name_len == 2) { + if (!strncmp(dirent->name, dotdot, name_len)) { + if (name_len == 2) { BUG_ON(idata->parent != 0); idata->parent = objectid; } @@ -277,24 +282,24 @@ static int dir_iterate_proc(ext2_ino_t dir, int entry, location.offset = 0; btrfs_set_key_type(&location, BTRFS_INODE_ITEM_KEY); - file_type = dirent->file_type; + file_type = dirent->name_len >> 8; BUG_ON(file_type > EXT2_FT_SYMLINK); ret = btrfs_insert_dir_item(idata->trans, idata->root, - dirent->name, dirent->name_len, + dirent->name, name_len, idata->objectid, &location, filetype_conversion_table[file_type], idata->index_cnt); if (ret) goto fail; ret = btrfs_insert_inode_ref(idata->trans, idata->root, - dirent->name, dirent->name_len, + dirent->name, name_len, objectid, idata->objectid, idata->index_cnt); if (ret) goto fail; idata->index_cnt++; inode_size = btrfs_stack_inode_size(idata->inode) + - dirent->name_len * 2; + name_len * 2; btrfs_set_stack_inode_size(idata->inode, inode_size); return 0; fail: @@ -1258,7 +1263,7 @@ static int create_ext2_image(struct btrfs_root *root, ext2_filsys ext2_fs, u64 total_bytes; u32 sectorsize = root->sectorsize; - total_bytes = btrfs_super_total_bytes(&fs_info->super_copy); + total_bytes = btrfs_super_total_bytes(fs_info->super_copy); first_free = BTRFS_SUPER_INFO_OFFSET + sectorsize * 2 - 1; first_free &= ~((u64)sectorsize - 1); @@ -1504,66 +1509,6 @@ fail: return new_root; } -/* - * Fixup block accounting. The initial block accounting created by - * make_block_groups isn't accuracy in this case. - */ -static int fixup_block_accounting(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - int ret; - int slot; - u64 start = 0; - u64 bytes_used = 0; - struct btrfs_path path; - struct btrfs_key key; - struct extent_buffer *leaf; - struct btrfs_block_group_cache *cache; - struct btrfs_fs_info *fs_info = root->fs_info; - - while(1) { - cache = btrfs_lookup_block_group(fs_info, start); - if (!cache) - break; - start = cache->key.objectid + cache->key.offset; - btrfs_set_block_group_used(&cache->item, 0); - cache->space_info->bytes_used = 0; - } - - btrfs_init_path(&path); - key.offset = 0; - key.objectid = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(trans, root->fs_info->extent_root, - &key, &path, 0, 0); - if (ret < 0) - return ret; - while(1) { - leaf = path.nodes[0]; - slot = path.slots[0]; - if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root, &path); - if (ret < 0) - return ret; - if (ret > 0) - break; - leaf = path.nodes[0]; - slot = path.slots[0]; - } - btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.type == BTRFS_EXTENT_ITEM_KEY) { - bytes_used += key.offset; - ret = btrfs_update_block_group(trans, root, - key.objectid, key.offset, 1, 0); - BUG_ON(ret); - } - path.slots[0]++; - } - btrfs_set_super_bytes_used(&root->fs_info->super_copy, bytes_used); - btrfs_release_path(root, &path); - return 0; -} - static int create_chunk_mapping(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -1584,7 +1529,7 @@ static int create_chunk_mapping(struct btrfs_trans_handle *trans, btrfs_init_path(&path); - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; BUG_ON(list_empty(&info->fs_devices->devices)); @@ -1735,7 +1680,7 @@ static int init_btrfs(struct btrfs_root *root) ret = btrfs_make_block_groups(trans, root); if (ret) goto err; - ret = fixup_block_accounting(trans, root); + ret = btrfs_fix_block_accounting(trans, root); if (ret) goto err; ret = create_chunk_mapping(trans, root); @@ -1748,13 +1693,13 @@ static int init_btrfs(struct btrfs_root *root) memcpy(&location, &root->root_key, sizeof(location)); location.offset = (u64)-1; ret = btrfs_insert_dir_item(trans, fs_info->tree_root, "default", 7, - btrfs_super_root_dir(&fs_info->super_copy), + btrfs_super_root_dir(fs_info->super_copy), &location, BTRFS_FT_DIR, 0); if (ret) goto err; ret = btrfs_insert_inode_ref(trans, fs_info->tree_root, "default", 7, location.objectid, - btrfs_super_root_dir(&fs_info->super_copy), 0); + btrfs_super_root_dir(fs_info->super_copy), 0); if (ret) goto err; btrfs_set_root_dirid(&fs_info->fs_root->root_item, @@ -2284,7 +2229,7 @@ static int fixup_chunk_mapping(struct btrfs_root *root) btrfs_release_path(chunk_root, &path); /* fixup the system chunk array in super block */ - btrfs_set_super_sys_array_size(&info->super_copy, 0); + btrfs_set_super_sys_array_size(info->super_copy, 0); key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = 0; @@ -2334,7 +2279,8 @@ err: int do_convert(const char *devname, int datacsum, int packing, int noxattr) { - int i, fd, ret; + int i, ret; + int fd = -1; u32 blocksize; u64 blocks[7]; u64 total_bytes; @@ -2464,6 +2410,8 @@ int do_convert(const char *devname, int datacsum, int packing, int noxattr) printf("conversion complete.\n"); return 0; fail: + if (fd != -1) + close(fd); fprintf(stderr, "conversion aborted.\n"); return -1; } @@ -2479,15 +2427,15 @@ static int may_rollback(struct btrfs_root *root) int num_stripes; int ret; - if (btrfs_super_num_devices(&info->super_copy) != 1) + if (btrfs_super_num_devices(info->super_copy) != 1) goto fail; bytenr = BTRFS_SUPER_INFO_OFFSET; - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); while (1) { ret = btrfs_map_block(&info->mapping_tree, WRITE, bytenr, - &length, &multi, 0); + &length, &multi, 0, NULL); if (ret) goto fail; @@ -2509,7 +2457,7 @@ fail: int do_rollback(const char *devname, int force) { - int fd; + int fd = -1; int ret; int i; struct btrfs_root *root; @@ -2525,7 +2473,7 @@ int do_rollback(const char *devname, int force) struct btrfs_key key; struct btrfs_path path; struct extent_io_tree io_tree; - char *buf; + char *buf = NULL; char *name; u64 bytenr; u64 num_bytes; @@ -2713,7 +2661,7 @@ next_extent: goto fail; } /* create a system chunk that maps the whole device */ - ret = prepare_system_chunk_sb(&root->fs_info->super_copy); + ret = prepare_system_chunk_sb(root->fs_info->super_copy); if (ret) { fprintf(stderr, "unable to update system chunk\n"); goto fail; @@ -2805,7 +2753,11 @@ next_sector: extent_io_tree_cleanup(&io_tree); printf("rollback complete.\n"); return 0; + fail: + if (fd != -1) + close(fd); + free(buf); fprintf(stderr, "rollback aborted.\n"); return -1; } diff --git a/btrfs-corrupt-block.c b/btrfs-corrupt-block.c index ace61c1..8176fad 100644 --- a/btrfs-corrupt-block.c +++ b/btrfs-corrupt-block.c @@ -32,11 +32,6 @@ #include "list.h" #include "version.h" -/* we write the mirror info to stdout unless they are dumping the data - * to stdout - * */ -static FILE *info_file; - struct extent_buffer *debug_corrupt_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, int copy) { @@ -55,20 +50,21 @@ struct extent_buffer *debug_corrupt_block(struct btrfs_root *root, u64 bytenr, length = blocksize; while (1) { ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - eb->start, &length, &multi, mirror_num); + eb->start, &length, &multi, + mirror_num, NULL); BUG_ON(ret); device = multi->stripes[0].dev; eb->fd = device->fd; device->total_ios++; eb->dev_bytenr = multi->stripes[0].physical; - fprintf(info_file, "mirror %d logical %Lu physical %Lu " + fprintf(stdout, "mirror %d logical %Lu physical %Lu " "device %s\n", mirror_num, (unsigned long long)bytenr, (unsigned long long)eb->dev_bytenr, device->name); kfree(multi); if (!copy || mirror_num == copy) { - ret = read_extent_from_disk(eb); + ret = read_extent_from_disk(eb, 0, eb->len); printf("corrupting %llu copy %d\n", eb->start, mirror_num); memset(eb->data, 0, eb->len); @@ -90,40 +86,220 @@ struct extent_buffer *debug_corrupt_block(struct btrfs_root *root, u64 bytenr, static void print_usage(void) { - fprintf(stderr, "usage: btrfs-map-logical [options] mount_point\n"); - fprintf(stderr, "\t-l Logical extent to map\n"); - fprintf(stderr, "\t-c Copy of the extent to read (usually 1 or 2)\n"); - fprintf(stderr, "\t-o Output file to hold the extent\n"); - fprintf(stderr, "\t-b Number of bytes to read\n"); + fprintf(stderr, "usage: btrfs-corrupt-block [options] device\n"); + fprintf(stderr, "\t-l Logical extent to be corrupted\n"); + fprintf(stderr, "\t-c Copy of the extent to be corrupted" + " (usually 1 or 2, default: 0)\n"); + fprintf(stderr, "\t-b Number of bytes to be corrupted\n"); + fprintf(stderr, "\t-e Extent to be corrupted\n"); + fprintf(stderr, "\t-E The whole extent free to be corrupted\n"); exit(1); } +static void corrupt_keys(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb) +{ + int slot; + int bad_slot; + int nr; + struct btrfs_disk_key bad_key;; + + nr = btrfs_header_nritems(eb); + if (nr == 0) + return; + + slot = rand() % nr; + bad_slot = rand() % nr; + + if (bad_slot == slot) + return; + + fprintf(stderr, "corrupting keys in block %llu slot %d swapping with %d\n", + (unsigned long long)eb->start, slot, bad_slot); + + if (btrfs_header_level(eb) == 0) { + btrfs_item_key(eb, &bad_key, bad_slot); + btrfs_set_item_key(eb, &bad_key, slot); + } else { + btrfs_node_key(eb, &bad_key, bad_slot); + btrfs_set_node_key(eb, &bad_key, slot); + } + btrfs_mark_buffer_dirty(eb); + if (!trans) { + csum_tree_block(root, eb, 0); + write_extent_to_disk(eb); + } +} + + +static int corrupt_keys_in_block(struct btrfs_root *root, u64 bytenr) +{ + struct extent_buffer *eb; + + eb = read_tree_block(root, bytenr, root->leafsize, 0); + if (!eb) + return -EIO;; + + corrupt_keys(NULL, root, eb); + free_extent_buffer(eb); + return 0; +} + +static int corrupt_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, int copy) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + u32 item_size; + unsigned long ptr; + struct btrfs_path *path; + int ret; + int slot; + int should_del = rand() % 3; + + path = btrfs_alloc_path(); + + key.objectid = bytenr; + key.type = (u8)-1; + key.offset = (u64)-1; + + while(1) { + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, path, -1, 1); + if (ret < 0) + break; + + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + ret = 0; + } + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != bytenr) + break; + + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_TREE_BLOCK_REF_KEY && + key.type != BTRFS_EXTENT_DATA_REF_KEY && + key.type != BTRFS_EXTENT_REF_V0_KEY && + key.type != BTRFS_SHARED_BLOCK_REF_KEY && + key.type != BTRFS_SHARED_DATA_REF_KEY) + goto next; + + if (should_del) { + fprintf(stderr, "deleting extent record: key %Lu %u %Lu\n", + key.objectid, key.type, key.offset); + + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + /* make sure this extent doesn't get + * reused for other purposes */ + btrfs_pin_extent(root->fs_info, + key.objectid, key.offset); + } + + btrfs_del_item(trans, root, path); + } else { + fprintf(stderr, "corrupting extent record: key %Lu %u %Lu\n", + key.objectid, key.type, key.offset); + ptr = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(leaf, slot); + memset_extent_buffer(leaf, 0, ptr, item_size); + btrfs_mark_buffer_dirty(leaf); + } +next: + btrfs_release_path(NULL, path); + + if (key.offset > 0) + key.offset--; + if (key.offset == 0) + break; + } + + btrfs_free_path(path); + return 0; +} + +static void btrfs_corrupt_extent_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *eb) +{ + u32 nr = btrfs_header_nritems(eb); + u32 victim = rand() % nr; + u64 objectid; + struct btrfs_key key; + + btrfs_item_key_to_cpu(eb, &key, victim); + objectid = key.objectid; + corrupt_extent(trans, root, objectid, 1); +} + +static void btrfs_corrupt_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *eb) +{ + int i; + u32 nr; + + if (!eb) + return; + + nr = btrfs_header_nritems(eb); + if (btrfs_is_leaf(eb)) { + btrfs_corrupt_extent_leaf(trans, root, eb); + return; + } + + if (btrfs_header_level(eb) == 1 && eb != root->node) { + if (rand() % 5) + return; + } + + for (i = 0; i < nr; i++) { + struct extent_buffer *next; + + next = read_tree_block(root, btrfs_node_blockptr(eb, i), + root->leafsize, btrfs_node_ptr_generation(eb, i)); + if (!next) + continue; + btrfs_corrupt_extent_tree(trans, root, next); + free_extent_buffer(next); + } +} + static struct option long_options[] = { /* { "byte-count", 1, NULL, 'b' }, */ { "logical", 1, NULL, 'l' }, { "copy", 1, NULL, 'c' }, { "bytes", 1, NULL, 'b' }, + { "extent-record", 0, NULL, 'e' }, + { "extent-tree", 0, NULL, 'E' }, + { "keys", 0, NULL, 'k' }, { 0, 0, 0, 0} }; + int main(int ac, char **av) { struct cache_tree root_cache; struct btrfs_root *root; struct extent_buffer *eb; char *dev; - char *output_file = NULL; u64 logical = 0; int ret = 0; int option_index = 0; int copy = 0; u64 bytes = 4096; - int out_fd = 0; - int err; + int extent_rec = 0; + int extent_tree = 0; + int corrupt_block_keys = 0; + + srand(128); while(1) { int c; - c = getopt_long(ac, av, "l:c:", long_options, + c = getopt_long(ac, av, "l:c:b:eEk", long_options, &option_index); if (c < 0) break; @@ -152,6 +328,15 @@ int main(int ac, char **av) print_usage(); } break; + case 'e': + extent_rec = 1; + break; + case 'E': + extent_tree = 1; + break; + case 'k': + corrupt_block_keys = 1; + break; default: print_usage(); } @@ -159,7 +344,7 @@ int main(int ac, char **av) ac = ac - optind; if (ac == 0) print_usage(); - if (logical == 0) + if (logical == 0 && !extent_tree) print_usage(); if (copy < 0) print_usage(); @@ -174,23 +359,20 @@ int main(int ac, char **av) fprintf(stderr, "Open ctree failed\n"); exit(1); } - - info_file = stdout; - if (output_file) { - if (strcmp(output_file, "-") == 0) { - out_fd = 1; - info_file = stderr; - } else { - out_fd = open(output_file, O_RDWR | O_CREAT, 0600); - if (out_fd < 0) - goto close; - err = ftruncate(out_fd, 0); - if (err) { - close(out_fd); - goto close; - } - info_file = stdout; - } + if (extent_rec) { + struct btrfs_trans_handle *trans; + trans = btrfs_start_transaction(root, 1); + ret = corrupt_extent (trans, root, logical, 0); + btrfs_commit_transaction(trans, root); + goto out_close; + } + if (extent_tree) { + struct btrfs_trans_handle *trans; + trans = btrfs_start_transaction(root, 1); + btrfs_corrupt_extent_tree(trans, root->fs_info->extent_root, + root->fs_info->extent_root->node); + btrfs_commit_transaction(trans, root); + goto out_close; } if (bytes == 0) @@ -200,22 +382,18 @@ int main(int ac, char **av) bytes *= root->sectorsize; while (bytes > 0) { - eb = debug_corrupt_block(root, logical, root->sectorsize, copy); - if (eb && output_file) { - err = write(out_fd, eb->data, eb->len); - if (err < 0 || err != eb->len) { - fprintf(stderr, "output file write failed\n"); - goto out_close_fd; - } + if (corrupt_block_keys) { + corrupt_keys_in_block(root, logical); + } else { + eb = debug_corrupt_block(root, logical, + root->sectorsize, copy); + free_extent_buffer(eb); } - free_extent_buffer(eb); logical += root->sectorsize; bytes -= root->sectorsize; } - -out_close_fd: - if (output_file && out_fd != 1) - close(out_fd); -close: + return ret; +out_close: + close_ctree(root); return ret; } diff --git a/btrfs-crc.c b/btrfs-crc.c new file mode 100644 index 0000000..e4cda43 --- /dev/null +++ b/btrfs-crc.c @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2013 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "crc32c.h" + +void usage(void) +{ + printf("usage: btrfs-crc filename\n"); + printf(" print out the btrfs crc for \"filename\"\n"); + printf("usage: btrfs-crc filename -c crc [-s seed] [-l length]\n"); + printf(" brute force search for file names with the given crc\n"); + printf(" -s seed the random seed (default: random)\n"); + printf(" -l length the length of the file names (default: 10)\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + char c; + unsigned long checksum = 0; + char *str; + char *buf; + int length = 10; + int seed = getpid() ^ getppid(); + int loop = 0; + int i; + + while ((c = getopt(argc, argv, "l:c:s:h")) != -1) { + switch (c) { + case 'l': + length = atol(optarg); + break; + case 'c': + sscanf(optarg, "%li", &checksum); + loop = 1; + break; + case 's': + seed = atol(optarg); + break; + case 'h': + usage(); + case '?': + return 255; + } + } + + str = argv[optind]; + + if (!loop) { + if (optind >= argc) { + fprintf(stderr, "not enough arguments\n"); + return 255; + } + printf("%12u - %s\n", crc32c(~1, str, strlen(str)), str); + return 0; + } + + buf = malloc(length); + if (!buf) + return -ENOMEM; + srand(seed); + + while (1) { + for (i = 0; i < length; i++) + buf[i] = rand() % 94 + 33; + if (crc32c(~1, buf, length) == checksum) + printf("%12lu - %.*s\n", checksum, length, buf); + } + + return 0; +} diff --git a/debug-tree.c b/btrfs-debug-tree.c index 2aeabfd..bae7f94 100644 --- a/debug-tree.c +++ b/btrfs-debug-tree.c @@ -30,7 +30,15 @@ static int print_usage(void) { - fprintf(stderr, "usage: debug-tree [ -e ] device\n"); + fprintf(stderr, "usage: btrfs-debug-tree [ -e ] [ -d ] [ -r ] [ -R ]\n"); + fprintf(stderr, " [-b block_num ] device\n"); + fprintf(stderr, "\t-e : print detailed extents info\n"); + fprintf(stderr, "\t-d : print info of btrfs device and root tree dirs" + " only\n"); + fprintf(stderr, "\t-r : print info of roots only\n"); + fprintf(stderr, "\t-R : print info of roots and root backups\n"); + fprintf(stderr, "\t-b block_num : print info of the specified block" + " only\n"); fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION); exit(1); } @@ -44,6 +52,11 @@ static void print_extents(struct btrfs_root *root, struct extent_buffer *eb) if (!eb) return; + if (btrfs_is_leaf(eb)) { + btrfs_print_leaf(root, eb); + return; + } + size = btrfs_level_size(root, btrfs_header_level(eb) - 1); nr = btrfs_header_nritems(eb); for (i = 0; i < nr; i++) { @@ -104,6 +117,7 @@ static void print_old_roots(struct btrfs_super_block *super) int main(int ac, char **av) { struct btrfs_root *root; + struct btrfs_fs_info *info; struct btrfs_path path; struct btrfs_key key; struct btrfs_root_item ri; @@ -152,12 +166,18 @@ int main(int ac, char **av) if (ac != 1) print_usage(); - root = open_ctree(av[optind], 0, 0); - if (!root) { + info = open_ctree_fs_info(av[optind], 0, 0, 0, 1); + if (!info) { fprintf(stderr, "unable to open %s\n", av[optind]); exit(1); } + root = info->fs_root; + if (block_only) { + if (!root) { + fprintf(stderr, "unable to open %s\n", av[optind]); + exit(1); + } leaf = read_tree_block(root, block_only, root->leafsize, 0); @@ -184,25 +204,32 @@ int main(int ac, char **av) if (!extent_only) { if (roots_only) { printf("root tree: %llu level %d\n", - (unsigned long long)root->fs_info->tree_root->node->start, - btrfs_header_level(root->fs_info->tree_root->node)); + (unsigned long long)info->tree_root->node->start, + btrfs_header_level(info->tree_root->node)); printf("chunk tree: %llu level %d\n", - (unsigned long long)root->fs_info->chunk_root->node->start, - btrfs_header_level(root->fs_info->chunk_root->node)); + (unsigned long long)info->chunk_root->node->start, + btrfs_header_level(info->chunk_root->node)); } else { - printf("root tree\n"); - btrfs_print_tree(root->fs_info->tree_root, - root->fs_info->tree_root->node, 1); + if (info->tree_root->node) { + printf("root tree\n"); + btrfs_print_tree(info->tree_root, + info->tree_root->node, 1); + } - printf("chunk tree\n"); - btrfs_print_tree(root->fs_info->chunk_root, - root->fs_info->chunk_root->node, 1); + if (info->chunk_root->node) { + printf("chunk tree\n"); + btrfs_print_tree(info->chunk_root, + info->chunk_root->node, 1); + } } } - tree_root_scan = root->fs_info->tree_root; + tree_root_scan = info->tree_root; btrfs_init_path(&path); again: + if (!extent_buffer_uptodate(tree_root_scan->node)) + goto no_node; + key.offset = 0; key.objectid = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); @@ -232,6 +259,9 @@ again: btrfs_level_size(tree_root_scan, btrfs_root_level(&ri)), 0); + if (!extent_buffer_uptodate(buf)) + goto next; + switch(found_key.objectid) { case BTRFS_ROOT_TREE_OBJECTID: if (!skip) @@ -295,6 +325,12 @@ again: if (!skip) { printf("extent checksum"); } + break; + case BTRFS_QUOTA_TREE_OBJECTID: + if (!skip) { + printf("quota"); + } + break; case BTRFS_MULTIPLE_OBJECTIDS: if (!skip) { printf("multiple"); @@ -320,13 +356,15 @@ again: } } } +next: path.slots[0]++; } +no_node: btrfs_release_path(root, &path); - if (tree_root_scan == root->fs_info->tree_root && - root->fs_info->log_root_tree) { - tree_root_scan = root->fs_info->log_root_tree; + if (tree_root_scan == info->tree_root && + info->log_root_tree) { + tree_root_scan = info->log_root_tree; goto again; } @@ -334,14 +372,14 @@ again: return 0; if (root_backups) - print_old_roots(&root->fs_info->super_copy); + print_old_roots(info->super_copy); printf("total bytes %llu\n", - (unsigned long long)btrfs_super_total_bytes(&root->fs_info->super_copy)); + (unsigned long long)btrfs_super_total_bytes(info->super_copy)); printf("bytes used %llu\n", - (unsigned long long)btrfs_super_bytes_used(&root->fs_info->super_copy)); + (unsigned long long)btrfs_super_bytes_used(info->super_copy)); uuidbuf[36] = '\0'; - uuid_unparse(root->fs_info->super_copy.fsid, uuidbuf); + uuid_unparse(info->super_copy->fsid, uuidbuf); printf("uuid %s\n", uuidbuf); printf("%s\n", BTRFS_BUILD_VERSION); return 0; diff --git a/find-root.c b/btrfs-find-root.c index c0f38b8..810d835 100644 --- a/find-root.c +++ b/btrfs-find-root.c @@ -35,13 +35,12 @@ #include "utils.h" #include "crc32c.h" -static int verbose = 0; static u16 csum_size = 0; static u64 search_objectid = BTRFS_ROOT_TREE_OBJECTID; static void usage() { - fprintf(stderr, "Usage: find-roots [-v] <device>\n"); + fprintf(stderr, "Usage: find-roots [-o search_objectid] <device>\n"); } int csum_block(void *buf, u32 len) @@ -66,32 +65,6 @@ int csum_block(void *buf, u32 len) return ret; } -static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, - u32 stripesize, struct btrfs_root *root, - struct btrfs_fs_info *fs_info, u64 objectid) -{ - root->node = NULL; - root->commit_root = NULL; - root->sectorsize = sectorsize; - root->nodesize = nodesize; - root->leafsize = leafsize; - root->stripesize = stripesize; - root->ref_cows = 0; - root->track_dirty = 0; - - root->fs_info = fs_info; - root->objectid = objectid; - root->last_trans = 0; - root->highest_inode = 0; - root->last_inode_alloc = 0; - - INIT_LIST_HEAD(&root->dirty_list); - memset(&root->root_key, 0, sizeof(root->root_key)); - memset(&root->root_item, 0, sizeof(root->root_item)); - root->root_key.objectid = objectid; - return 0; -} - static int close_all_devices(struct btrfs_fs_info *fs_info) { struct list_head *list; @@ -143,6 +116,7 @@ static struct btrfs_root *open_ctree_broken(int fd, const char *device) } memset(fs_info, 0, sizeof(*fs_info)); + fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE); fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -174,7 +148,7 @@ static struct btrfs_root *open_ctree_broken(int fd, const char *device) goto out_cleanup; fs_info->super_bytenr = BTRFS_SUPER_INFO_OFFSET; - disk_super = &fs_info->super_copy; + disk_super = fs_info->super_copy; ret = btrfs_read_dev_super(fs_devices->latest_bdev, disk_super, BTRFS_SUPER_INFO_OFFSET); if (ret) { @@ -261,10 +235,10 @@ out: static int search_iobuf(struct btrfs_root *root, void *iobuf, size_t iobuf_size, off_t offset) { - u64 gen = btrfs_super_generation(&root->fs_info->super_copy); + u64 gen = btrfs_super_generation(root->fs_info->super_copy); u64 objectid = search_objectid; - u32 size = btrfs_super_nodesize(&root->fs_info->super_copy); - u8 level = root->fs_info->super_copy.root_level; + u32 size = btrfs_super_nodesize(root->fs_info->super_copy); + u8 level = root->fs_info->super_copy->root_level; size_t block_off = 0; while (block_off < iobuf_size) { @@ -349,8 +323,8 @@ static int find_root(struct btrfs_root *root) int ret = 1; printf("Super think's the tree root is at %Lu, chunk root %Lu\n", - btrfs_super_root(&root->fs_info->super_copy), - btrfs_super_chunk_root(&root->fs_info->super_copy)); + btrfs_super_root(root->fs_info->super_copy), + btrfs_super_chunk_root(root->fs_info->super_copy)); err = btrfs_next_metadata(&root->fs_info->mapping_tree, &metadata_offset, &metadata_size); @@ -363,7 +337,7 @@ static int find_root(struct btrfs_root *root) u64 type; if (offset > - btrfs_super_total_bytes(&root->fs_info->super_copy)) { + btrfs_super_total_bytes(root->fs_info->super_copy)) { printf("Went past the fs size, exiting"); break; } @@ -378,7 +352,8 @@ static int find_root(struct btrfs_root *root) offset = metadata_offset; } err = __btrfs_map_block(&root->fs_info->mapping_tree, READ, - offset, &map_length, &type, &multi, 0); + offset, &map_length, &type, + &multi, 0, NULL); if (err) { offset += map_length; continue; @@ -386,6 +361,7 @@ static int find_root(struct btrfs_root *root) if (!(type & BTRFS_BLOCK_GROUP_METADATA)) { offset += map_length; + kfree(multi); continue; } @@ -414,11 +390,8 @@ int main(int argc, char **argv) int opt; int ret; - while ((opt = getopt(argc, argv, "vo:")) != -1) { + while ((opt = getopt(argc, argv, "o:")) != -1) { switch(opt) { - case 'v': - verbose++; - break; case 'o': errno = 0; search_objectid = (u64)strtoll(optarg, NULL, @@ -448,10 +421,13 @@ int main(int argc, char **argv) root = open_ctree_broken(dev_fd, argv[optind]); close(dev_fd); - if (!root) + + if (!root) { + fprintf(stderr, "Open ctree failed\n"); exit(1); + } - csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); + csum_size = btrfs_super_csum_size(root->fs_info->super_copy); ret = find_root(root); close_ctree(root); return ret; diff --git a/btrfs-fragments.c b/btrfs-fragments.c new file mode 100644 index 0000000..a012fe1 --- /dev/null +++ b/btrfs-fragments.c @@ -0,0 +1,454 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <dirent.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <libgen.h> +#include <limits.h> +#include <uuid/uuid.h> +#include <ctype.h> + +#include <gd.h> + +#undef ULONG_MAX + +#include "kerncompat.h" +#include "ctree.h" +#include "ioctl.h" +#include "utils.h" + +static int use_color; +static void +push_im(gdImagePtr im, char *name, char *dir) +{ + char fullname[2000]; + FILE *pngout; + + if (!im) + return; + + snprintf(fullname, sizeof(fullname), "%s/%s", dir, name); + pngout = fopen(fullname, "w"); + if (!pngout) { + printf("unable to create file %s\n", fullname); + exit(1); + } + + gdImagePng(im, pngout); + + fclose(pngout); + gdImageDestroy(im); +} + +static char * +chunk_type(u64 flags) +{ + switch (flags & (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_DATA | + BTRFS_BLOCK_GROUP_METADATA)) { + case BTRFS_BLOCK_GROUP_SYSTEM: + return "system"; + case BTRFS_BLOCK_GROUP_DATA: + return "data"; + case BTRFS_BLOCK_GROUP_METADATA: + return "metadata"; + case BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA: + return "mixed"; + default: + return "invalid"; + } +} + +static void +print_bg(FILE *html, char *name, u64 start, u64 len, u64 used, u64 flags, + u64 areas) +{ + double frag = (double)areas / (len / 4096) * 2; + + fprintf(html, "<p>%s chunk starts at %lld, size is %s, %.2f%% used, " + "%.2f%% fragmented</p>\n", chunk_type(flags), start, + pretty_sizes(len), 100.0 * used / len, 100.0 * frag); + fprintf(html, "<img src=\"%s\" border=\"1\" />\n", name); +} + +enum tree_colors { + COLOR_ROOT = 0, + COLOR_EXTENT, + COLOR_CHUNK, + COLOR_DEV, + COLOR_FS, + COLOR_CSUM, + COLOR_RELOC, + COLOR_DATA, + COLOR_UNKNOWN, + COLOR_MAX +}; + +static int +get_color(struct btrfs_extent_item *item, int len) +{ + u64 refs; + u64 flags; + u8 type; + u64 offset; + struct btrfs_extent_inline_ref *ref; + + refs = btrfs_stack_extent_refs(item); + flags = btrfs_stack_extent_flags(item); + + if (flags & BTRFS_EXTENT_FLAG_DATA) + return COLOR_DATA; + if (refs > 1) { + /* this must be an fs tree */ + return COLOR_FS; + } + + ref = (void *)item + sizeof(struct btrfs_extent_item) + + sizeof(struct btrfs_tree_block_info); + type = btrfs_stack_extent_inline_ref_type(ref); + offset = btrfs_stack_extent_inline_ref_offset(ref); + + switch (type) { + case BTRFS_EXTENT_DATA_REF_KEY: + return COLOR_DATA; + case BTRFS_SHARED_BLOCK_REF_KEY: + case BTRFS_SHARED_DATA_REF_KEY: + return COLOR_FS; + case BTRFS_TREE_BLOCK_REF_KEY: + break; + default: + return COLOR_UNKNOWN; + } + + switch (offset) { + case BTRFS_ROOT_TREE_OBJECTID: + return COLOR_ROOT; + case BTRFS_EXTENT_TREE_OBJECTID: + return COLOR_EXTENT; + case BTRFS_CHUNK_TREE_OBJECTID: + return COLOR_CHUNK; + case BTRFS_DEV_TREE_OBJECTID: + return COLOR_DEV; + case BTRFS_FS_TREE_OBJECTID: + return COLOR_FS; + case BTRFS_CSUM_TREE_OBJECTID: + return COLOR_CSUM; + case BTRFS_DATA_RELOC_TREE_OBJECTID: + return COLOR_RELOC; + } + + return COLOR_UNKNOWN; +} + +static void +init_colors(gdImagePtr im, int *colors) +{ + colors[COLOR_ROOT] = gdImageColorAllocate(im, 255, 0, 0); + colors[COLOR_EXTENT] = gdImageColorAllocate(im, 0, 255, 0); + colors[COLOR_CHUNK] = gdImageColorAllocate(im, 255, 0, 0); + colors[COLOR_DEV] = gdImageColorAllocate(im, 255, 0, 0); + colors[COLOR_FS] = gdImageColorAllocate(im, 0, 0, 0); + colors[COLOR_CSUM] = gdImageColorAllocate(im, 0, 0, 255); + colors[COLOR_RELOC] = gdImageColorAllocate(im, 128, 128, 128); + colors[COLOR_DATA] = gdImageColorAllocate(im, 100, 0, 0); + colors[COLOR_UNKNOWN] = gdImageColorAllocate(im, 50, 50, 50); +} + +int +list_fragments(int fd, u64 flags, char *dir) +{ + int ret; + struct btrfs_ioctl_search_args args; + struct btrfs_ioctl_search_key *sk = &args.key; + int i; + struct btrfs_ioctl_search_header *sh; + unsigned long off = 0; + int bgnum = 0; + u64 bgstart = 0; + u64 bglen = 0; + u64 bgend = 0; + u64 bgflags = 0; + u64 bgused = 0; + u64 saved_extent = 0; + u64 saved_len = 0; + u64 saved_flags = 0; + int saved_color = 0; + u64 last_end = 0; + u64 areas = 0; + long px; + char name[1000]; + FILE *html; + int colors[COLOR_MAX]; + + gdImagePtr im = NULL; + int black = 0; + int white = 0; + int width = 800; + + snprintf(name, sizeof(name), "%s/index.html", dir); + html = fopen(name, "w"); + if (!html) { + printf("unable to create %s\n", name); + exit(1); + } + + fprintf(html, "<html><header>\n"); + fprintf(html, "<title>Btrfs Block Group Allocation Map</title>\n"); + fprintf(html, "<style type=\"text/css\">\n"); + fprintf(html, "img {margin-left: 1em; margin-bottom: 2em;}\n"); + fprintf(html, "</style>\n"); + fprintf(html, "</header><body>\n"); + + memset(&args, 0, sizeof(args)); + + sk->tree_id = 2; + sk->max_type = -1; + sk->min_type = 0; + sk->max_objectid = (u64)-1; + sk->max_offset = (u64)-1; + sk->max_transid = (u64)-1; + + /* just a big number, doesn't matter much */ + sk->nr_items = 4096; + + while(1) { + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args); + if (ret < 0) { + fprintf(stderr, "ERROR: can't perform the search\n"); + return ret; + } + /* the ioctl returns the number of item it found in nr_items */ + if (sk->nr_items == 0) + break; + + off = 0; + for (i = 0; i < sk->nr_items; i++) { + int j; + + sh = (struct btrfs_ioctl_search_header *)(args.buf + + off); + off += sizeof(*sh); + if (sh->type == BTRFS_BLOCK_GROUP_ITEM_KEY) { + struct btrfs_block_group_item *bg; + + if (im) { + push_im(im, name, dir); + im = NULL; + + print_bg(html, name, bgstart, bglen, + bgused, bgflags, areas); + } + + ++bgnum; + + bg = (struct btrfs_block_group_item *) + (args.buf + off); + bgflags = btrfs_block_group_flags(bg); + bgused = btrfs_block_group_used(bg); + + printf("found block group %lld len %lld " + "flags %lld\n", sh->objectid, + sh->offset, bgflags); + if (!(bgflags & flags)) { + /* skip this block group */ + sk->min_objectid = sh->objectid + + sh->offset; + sk->min_type = 0; + sk->min_offset = 0; + break; + } + im = gdImageCreate(width, + (sh->offset / 4096 + 799) / width); + + white = gdImageColorAllocate(im, 255, 255, 255); + black = gdImageColorAllocate(im, 0, 0, 0); + + for (j = 0; j < 10; ++j) + colors[j] = black; + + init_colors(im, colors); + bgstart = sh->objectid; + bglen = sh->offset; + bgend = bgstart + bglen; + + snprintf(name, sizeof(name), "bg%d.png", bgnum); + + last_end = bgstart; + if (saved_len) { + px = (saved_extent - bgstart) / 4096; + for (j = 0; j < saved_len / 4096; ++j) { + int x = (px + j) % width; + int y = (px + j) / width; + gdImageSetPixel(im, x, y, + saved_color); + } + last_end += saved_len; + } + areas = 0; + saved_len = 0; + } + if (im && sh->type == BTRFS_EXTENT_ITEM_KEY) { + u64 e_flags; + int c; + struct btrfs_extent_item *item; + + item = (struct btrfs_extent_item *) + (args.buf + off); + e_flags = btrfs_stack_extent_flags(item); + + if (use_color) + c = colors[get_color(item, sh->len)]; + else + c = black; + if (sh->objectid > bgend) { + printf("WARN: extent %lld is without " + "block group\n", sh->objectid); + goto skip; + } + if (sh->objectid == bgend) { + saved_extent = sh->objectid; + saved_len = sh->offset; + saved_flags = e_flags; + saved_color = c; + goto skip; + } + px = (sh->objectid - bgstart) / 4096; + for (j = 0; j < sh->offset / 4096; ++j) { + int x = (px + j) % width; + int y = (px + j) / width; + gdImageSetPixel(im, x, y, c); + } + if (sh->objectid != last_end) + ++areas; + last_end = sh->objectid + sh->offset; +skip:; + } + off += sh->len; + + /* + * record the mins in sk so we can make sure the + * next search doesn't repeat this root + */ + sk->min_objectid = sh->objectid; + sk->min_type = sh->type; + sk->min_offset = sh->offset; + } + sk->nr_items = 4096; + + /* increment by one */ + if (++sk->min_offset == 0) + if (++sk->min_type == 0) + if (++sk->min_objectid == 0) + break; + } + + if (im) { + push_im(im, name, dir); + print_bg(html, name, bgstart, bglen, bgused, bgflags, areas); + } + + if (use_color) { + fprintf(html, "<p>"); + fprintf(html, "data - dark red, "); + fprintf(html, "fs tree - black, "); + fprintf(html, "extent tree - green, "); + fprintf(html, "csum tree - blue, "); + fprintf(html, "reloc tree - grey, "); + fprintf(html, "other trees - red, "); + fprintf(html, "unknown tree - dark grey"); + fprintf(html, "</p>"); + } + fprintf(html, "</body></html>\n"); + + return ret; +} + +void +usage(void) +{ + printf("usage: btrfs-fragments [options] <path>\n"); + printf(" -c use color\n"); + printf(" -d print data chunks\n"); + printf(" -m print metadata chunks\n"); + printf(" -s print system chunks\n"); + printf(" (default is data+metadata)\n"); + printf(" -o <dir> output directory, default is html\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + char *path; + int fd; + int ret; + u64 flags = 0; + char *dir = "html"; + + while (1) { + int c = getopt(argc, argv, "cmso:h"); + if (c < 0) + break; + switch (c) { + case 'c': + use_color = 1; + break; + case 'd': + flags |= BTRFS_BLOCK_GROUP_DATA; + break; + case 'm': + flags |= BTRFS_BLOCK_GROUP_METADATA; + break; + case 's': + flags |= BTRFS_BLOCK_GROUP_SYSTEM; + break; + case 'o': + dir = optarg; + break; + case 'h': + default: + usage(); + } + } + + if (optind < argc) { + path = argv[optind++]; + } else { + usage(); + exit(1); + } + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", path); + exit(1); + } + + if (flags == 0) + flags = BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA; + + ret = list_fragments(fd, flags, dir); + if (ret) + exit(1); + + exit(0); +} diff --git a/btrfs-image.c b/btrfs-image.c index f2bbcc8..739ae35 100644 --- a/btrfs-image.c +++ b/btrfs-image.c @@ -34,7 +34,7 @@ #include "transaction.h" #include "utils.h" #include "version.h" - +#include "volumes.h" #define HEADER_MAGIC 0xbd5c25e27295668bULL #define MAX_PENDING_SIZE (256 * 1024) @@ -72,6 +72,7 @@ struct async_work { u64 size; u8 *buffer; size_t bufsize; + int error; }; struct metadump_struct { @@ -95,6 +96,7 @@ struct metadump_struct { int compress_level; int done; + int data; }; struct mdrestore_struct { @@ -108,9 +110,15 @@ struct mdrestore_struct { struct list_head list; size_t num_items; + u64 leafsize; + u64 devid; + u8 uuid[BTRFS_UUID_SIZE]; + u8 fsid[BTRFS_FSID_SIZE]; int compress_method; int done; + int error; + int old_restore; }; static void csum_block(u8 *buf, size_t len) @@ -219,7 +227,9 @@ static void *dump_worker(void *data) ret = compress2(async->buffer, (unsigned long *)&async->bufsize, orig, async->size, md->compress_level); - BUG_ON(ret != Z_OK); + + if (ret != Z_OK) + async->error = 1; free(orig); } @@ -249,7 +259,7 @@ static void meta_cluster_init(struct metadump_struct *md, u64 start) static int metadump_init(struct metadump_struct *md, struct btrfs_root *root, FILE *out, int num_threads, int compress_level) { - int i, ret; + int i, ret = 0; memset(md, 0, sizeof(*md)); pthread_cond_init(&md->cond, NULL); @@ -261,8 +271,11 @@ static int metadump_init(struct metadump_struct *md, struct btrfs_root *root, md->pending_start = (u64)-1; md->compress_level = compress_level; md->cluster = calloc(1, BLOCK_SIZE); - if (!md->cluster) + if (!md->cluster) { + pthread_cond_destroy(&md->cond); + pthread_mutex_destroy(&md->mutex); return -ENOMEM; + } meta_cluster_init(md, 0); if (!num_threads) @@ -270,13 +283,34 @@ static int metadump_init(struct metadump_struct *md, struct btrfs_root *root, md->num_threads = num_threads; md->threads = calloc(num_threads, sizeof(pthread_t)); - if (!md->threads) + if (!md->threads) { + free(md->cluster); + pthread_cond_destroy(&md->cond); + pthread_mutex_destroy(&md->mutex); return -ENOMEM; + } + for (i = 0; i < num_threads; i++) { ret = pthread_create(md->threads + i, NULL, dump_worker, md); if (ret) break; } + + if (ret) { + pthread_mutex_lock(&md->mutex); + md->done = 1; + pthread_cond_broadcast(&md->cond); + pthread_mutex_unlock(&md->mutex); + + for (i--; i >= 0; i--) + pthread_join(md->threads[i], NULL); + + pthread_cond_destroy(&md->cond); + pthread_mutex_destroy(&md->mutex); + free(md->cluster); + free(md->threads); + } + return ret; } @@ -311,6 +345,7 @@ static int write_buffers(struct metadump_struct *md, u64 *next) u64 bytenr = 0; u32 nritems = 0; int ret; + int err = 0; if (list_empty(&md->ordered)) goto out; @@ -336,7 +371,10 @@ static int write_buffers(struct metadump_struct *md, u64 *next) header->nritems = cpu_to_le32(nritems); ret = fwrite(md->cluster, BLOCK_SIZE, 1, md->out); - BUG_ON(ret != 1); + if (ret != 1) { + fprintf(stderr, "Error writing out cluster: %d\n", errno); + return -EIO; + } /* write buffers */ bytenr += le64_to_cpu(header->bytenr) + BLOCK_SIZE; @@ -346,23 +384,88 @@ static int write_buffers(struct metadump_struct *md, u64 *next) list_del_init(&async->ordered); bytenr += async->bufsize; - ret = fwrite(async->buffer, async->bufsize, 1, md->out); - BUG_ON(ret != 1); + if (!err) + ret = fwrite(async->buffer, async->bufsize, 1, + md->out); + if (ret != 1) { + err = -EIO; + ret = 0; + fprintf(stderr, "Error writing out cluster: %d\n", + errno); + } free(async->buffer); free(async); } /* zero unused space in the last block */ - if (bytenr & BLOCK_MASK) { + if (!err && bytenr & BLOCK_MASK) { size_t size = BLOCK_SIZE - (bytenr & BLOCK_MASK); bytenr += size; ret = write_zero(md->out, size); - BUG_ON(ret != 1); + if (ret != 1) { + fprintf(stderr, "Error zeroing out buffer: %d\n", + errno); + err = -EIO; + } } out: *next = bytenr; + return err; +} + +static int read_data_extent(struct metadump_struct *md, + struct async_work *async) +{ + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + u64 bytes_left = async->size; + u64 logical = async->start; + u64 offset = 0; + u64 bytenr; + u64 read_len; + ssize_t done; + int fd; + int ret; + + while (bytes_left) { + read_len = bytes_left; + ret = btrfs_map_block(&md->root->fs_info->mapping_tree, READ, + logical, &read_len, &multi, 0, NULL); + if (ret) { + fprintf(stderr, "Couldn't map data block %d\n", ret); + return ret; + } + + device = multi->stripes[0].dev; + + if (device->fd == 0) { + fprintf(stderr, + "Device we need to read from is not open\n"); + free(multi); + return -EIO; + } + fd = device->fd; + bytenr = multi->stripes[0].physical; + free(multi); + + read_len = min(read_len, bytes_left); + done = pread64(fd, async->buffer+offset, read_len, bytenr); + if (done < read_len) { + if (done < 0) + fprintf(stderr, "Error reading extent %d\n", + errno); + else + fprintf(stderr, "Short read\n"); + return -EIO; + } + + bytes_left -= done; + offset += done; + logical += done; + } + return 0; } @@ -374,7 +477,7 @@ static int flush_pending(struct metadump_struct *md, int done) u64 start; u64 size; size_t offset; - int ret; + int ret = 0; if (md->pending_size) { async = calloc(1, sizeof(*async)); @@ -385,18 +488,38 @@ static int flush_pending(struct metadump_struct *md, int done) async->size = md->pending_size; async->bufsize = async->size; async->buffer = malloc(async->bufsize); - + if (!async->buffer) { + free(async); + return -ENOMEM; + } offset = 0; start = async->start; size = async->size; - while (size > 0) { - eb = read_tree_block(md->root, start, blocksize, 0); - BUG_ON(!eb); + + if (md->data) { + ret = read_data_extent(md, async); + if (ret) { + free(async->buffer); + free(async); + return ret; + } + } + + while (!md->data && size > 0) { + u64 this_read = min(blocksize, size); + eb = read_tree_block(md->root, start, this_read, 0); + if (!eb) { + free(async->buffer); + free(async); + fprintf(stderr, + "Error reading metadata block\n"); + return -EIO; + } copy_buffer(async->buffer + offset, eb); free_extent_buffer(eb); - start += blocksize; - offset += blocksize; - size -= blocksize; + start += this_read; + offset += this_read; + size -= this_read; } md->pending_start = (u64)-1; @@ -418,17 +541,22 @@ static int flush_pending(struct metadump_struct *md, int done) } if (md->num_items >= ITEMS_PER_CLUSTER || done) { ret = write_buffers(md, &start); - BUG_ON(ret); - meta_cluster_init(md, start); + if (ret) + fprintf(stderr, "Error writing buffers %d\n", + errno); + else + meta_cluster_init(md, start); } pthread_mutex_unlock(&md->mutex); - return 0; + return ret; } -static int add_metadata(u64 start, u64 size, struct metadump_struct *md) +static int add_extent(u64 start, u64 size, struct metadump_struct *md, + int data) { int ret; - if (md->pending_size + size > MAX_PENDING_SIZE || + if (md->data != data || + md->pending_size + size > MAX_PENDING_SIZE || md->pending_start + md->pending_size != start) { ret = flush_pending(md, 0); if (ret) @@ -437,6 +565,7 @@ static int add_metadata(u64 start, u64 size, struct metadump_struct *md) } readahead_tree_block(md->root, start, size, 0); md->pending_size += size; + md->data = data; return 0; } @@ -455,7 +584,8 @@ static int is_tree_block(struct btrfs_root *extent_root, path->slots[0]++; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(extent_root, path); - BUG_ON(ret < 0); + if (ret < 0) + return ret; if (ret > 0) break; leaf = path->nodes[0]; @@ -476,95 +606,333 @@ static int is_tree_block(struct btrfs_root *extent_root, } #endif -static int create_metadump(const char *input, FILE *out, int num_threads, - int compress_level) +static int copy_tree_blocks(struct btrfs_root *root, struct extent_buffer *eb, + struct metadump_struct *metadump, int root_tree) { - struct btrfs_root *root; - struct btrfs_root *extent_root; - struct btrfs_path *path; - struct extent_buffer *leaf; - struct btrfs_extent_item *ei; + struct extent_buffer *tmp; + struct btrfs_root_item *ri; struct btrfs_key key; - struct metadump_struct metadump; u64 bytenr; - u64 num_bytes; + int level; + int nritems = 0; + int i = 0; int ret; - root = open_ctree(input, 0, 0); - BUG_ON(root->nodesize != root->leafsize); + ret = add_extent(btrfs_header_bytenr(eb), root->leafsize, metadump, 0); + if (ret) { + fprintf(stderr, "Error adding metadata block\n"); + return ret; + } - ret = metadump_init(&metadump, root, out, num_threads, - compress_level); - BUG_ON(ret); + if (btrfs_header_level(eb) == 0 && !root_tree) + return 0; - ret = add_metadata(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump); - BUG_ON(ret); + level = btrfs_header_level(eb); + nritems = btrfs_header_nritems(eb); + for (i = 0; i < nritems; i++) { + if (level == 0) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_ROOT_ITEM_KEY) + continue; + ri = btrfs_item_ptr(eb, i, struct btrfs_root_item); + bytenr = btrfs_disk_root_bytenr(eb, ri); + tmp = read_tree_block(root, bytenr, root->leafsize, 0); + if (!tmp) { + fprintf(stderr, + "Error reading log root block\n"); + return -EIO; + } + ret = copy_tree_blocks(root, tmp, metadump, 0); + free_extent_buffer(tmp); + if (ret) + return ret; + } else { + bytenr = btrfs_node_blockptr(eb, i); + tmp = read_tree_block(root, bytenr, root->leafsize, 0); + if (!tmp) { + fprintf(stderr, "Error reading log block\n"); + return -EIO; + } + ret = copy_tree_blocks(root, tmp, metadump, root_tree); + free_extent_buffer(tmp); + if (ret) + return ret; + } + } - extent_root = root->fs_info->extent_root; - path = btrfs_alloc_path(); + return 0; +} + +static int copy_log_trees(struct btrfs_root *root, + struct metadump_struct *metadump, + struct btrfs_path *path) +{ + u64 blocknr = btrfs_super_log_root(root->fs_info->super_copy); + + if (blocknr == 0) + return 0; + + if (!root->fs_info->log_root_tree || + !root->fs_info->log_root_tree->node) { + fprintf(stderr, "Error copying tree log, it wasn't setup\n"); + return -EIO; + } + + return copy_tree_blocks(root, root->fs_info->log_root_tree->node, + metadump, 1); +} + +static int copy_space_cache(struct btrfs_root *root, + struct metadump_struct *metadump, + struct btrfs_path *path) +{ + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 bytenr, num_bytes; + int ret; + + root = root->fs_info->tree_root; + + key.objectid = 0; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + fprintf(stderr, "Error searching for free space inode %d\n", + ret); + return ret; + } + + while (1) { + leaf = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + return ret; + } + if (ret > 0) + break; + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type != BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + continue; + } + + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) != + BTRFS_FILE_EXTENT_REG) { + path->slots[0]++; + continue; + } + + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + ret = add_extent(bytenr, num_bytes, metadump, 1); + if (ret) { + fprintf(stderr, "Error adding space cache blocks %d\n", + ret); + btrfs_release_path(root, path); + return ret; + } + path->slots[0]++; + } + + return 0; +} + +static int copy_from_extent_tree(struct metadump_struct *metadump, + struct btrfs_path *path) +{ + struct btrfs_root *extent_root; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u64 bytenr; + u64 num_bytes; + int ret; + extent_root = metadump->root->fs_info->extent_root; bytenr = BTRFS_SUPER_INFO_OFFSET + 4096; key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); - BUG_ON(ret < 0); + if (ret < 0) { + fprintf(stderr, "Error searching extent root %d\n", ret); + return ret; + } + ret = 0; while (1) { leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(extent_root, path); - BUG_ON(ret < 0); - if (ret > 0) + if (ret < 0) { + fprintf(stderr, "Error going to next leaf %d" + "\n", ret); + break; + } + if (ret > 0) { + ret = 0; break; + } leaf = path->nodes[0]; } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); if (key.objectid < bytenr || - key.type != BTRFS_EXTENT_ITEM_KEY) { + (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY)) { path->slots[0]++; continue; } bytenr = key.objectid; - num_bytes = key.offset; + if (key.type == BTRFS_METADATA_ITEM_KEY) + num_bytes = key.offset; + else + num_bytes = extent_root->leafsize; if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) { ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); if (btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) { - ret = add_metadata(bytenr, num_bytes, - &metadump); - BUG_ON(ret); + ret = add_extent(bytenr, num_bytes, metadump, + 0); + if (ret) { + fprintf(stderr, "Error adding block " + "%d\n", ret); + break; + } } } else { #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (is_tree_block(extent_root, path, bytenr)) { - ret = add_metadata(bytenr, num_bytes, - &metadump); - BUG_ON(ret); + ret = is_tree_block(extent_root, path, bytenr); + if (ret < 0) { + fprintf(stderr, "Error checking tree block " + "%d\n", ret); + break; } + + if (ret) { + ret = add_extent(bytenr, num_bytes, metadump, + 0); + if (ret) { + fprintf(stderr, "Error adding block " + "%d\n", ret); + break; + } + } + ret = 0; #else - BUG_ON(1); + fprintf(stderr, "Either extent tree corruption or " + "you haven't built with V0 support\n"); + ret = -EIO; + break; #endif } bytenr += num_bytes; } + btrfs_release_path(extent_root, path); + + return ret; +} + +static int create_metadump(const char *input, FILE *out, int num_threads, + int compress_level, int walk_trees) +{ + struct btrfs_root *root; + struct btrfs_path *path = NULL; + struct metadump_struct metadump; + int ret; + int err = 0; + + root = open_ctree(input, 0, 0); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + return -EIO; + } + + BUG_ON(root->nodesize != root->leafsize); + + ret = metadump_init(&metadump, root, out, num_threads, + compress_level); + if (ret) { + fprintf(stderr, "Error initing metadump %d\n", ret); + close_ctree(root); + return ret; + } + + ret = add_extent(BTRFS_SUPER_INFO_OFFSET, 4096, &metadump, 0); + if (ret) { + fprintf(stderr, "Error adding metadata %d\n", ret); + err = ret; + goto out; + } + + path = btrfs_alloc_path(); + if (!path) { + fprintf(stderr, "Out of memory allocing path\n"); + err = -ENOMEM; + goto out; + } + + if (walk_trees) { + ret = copy_tree_blocks(root, root->fs_info->chunk_root->node, + &metadump, 1); + if (ret) { + err = ret; + goto out; + } + + ret = copy_tree_blocks(root, root->fs_info->tree_root->node, + &metadump, 1); + if (ret) { + err = ret; + goto out; + } + } else { + ret = copy_from_extent_tree(&metadump, path); + if (ret) { + err = ret; + goto out; + } + } + + ret = copy_log_trees(root, &metadump, path); + if (ret) { + err = ret; + goto out; + } + + ret = copy_space_cache(root, &metadump, path); +out: ret = flush_pending(&metadump, 1); - BUG_ON(ret); + if (ret) { + if (!err) + err = ret; + fprintf(stderr, "Error flushing pending %d\n", ret); + } metadump_destroy(&metadump); btrfs_free_path(path); ret = close_ctree(root); - return 0; + return err ? err : ret; } -static void update_super(u8 *buffer) +static void update_super_old(u8 *buffer) { struct btrfs_super_block *super = (struct btrfs_super_block *)buffer; struct btrfs_chunk *chunk; @@ -599,6 +967,221 @@ static void update_super(u8 *buffer) csum_block(buffer, 4096); } +static int update_super(u8 *buffer) +{ + struct btrfs_super_block *super = (struct btrfs_super_block *)buffer; + struct btrfs_chunk *chunk; + struct btrfs_disk_key *disk_key; + struct btrfs_key key; + u32 new_array_size = 0; + u32 array_size; + u32 cur = 0; + u32 new_cur = 0; + u8 *ptr, *write_ptr; + int old_num_stripes; + + write_ptr = ptr = super->sys_chunk_array; + array_size = btrfs_super_sys_array_size(super); + + while (cur < array_size) { + disk_key = (struct btrfs_disk_key *)ptr; + btrfs_disk_key_to_cpu(&key, disk_key); + + new_array_size += sizeof(*disk_key); + memmove(write_ptr, ptr, sizeof(*disk_key)); + + write_ptr += sizeof(*disk_key); + ptr += sizeof(*disk_key); + cur += sizeof(*disk_key); + new_cur += sizeof(*disk_key); + + if (key.type == BTRFS_CHUNK_ITEM_KEY) { + chunk = (struct btrfs_chunk *)ptr; + old_num_stripes = btrfs_stack_chunk_num_stripes(chunk); + chunk = (struct btrfs_chunk *)write_ptr; + + memmove(write_ptr, ptr, sizeof(*chunk)); + btrfs_set_stack_chunk_num_stripes(chunk, 1); + btrfs_set_stack_chunk_sub_stripes(chunk, 0); + btrfs_set_stack_chunk_type(chunk, + BTRFS_BLOCK_GROUP_SYSTEM); + chunk->stripe.devid = super->dev_item.devid; + chunk->stripe.offset = cpu_to_le64(key.offset); + memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, + BTRFS_UUID_SIZE); + new_array_size += sizeof(*chunk); + new_cur += sizeof(*chunk); + } else { + fprintf(stderr, "Bogus key in the sys chunk array " + "%d\n", key.type); + return -EIO; + } + write_ptr += sizeof(*chunk); + ptr += btrfs_chunk_item_size(old_num_stripes); + cur += btrfs_chunk_item_size(old_num_stripes); + } + + btrfs_set_super_sys_array_size(super, new_array_size); + csum_block(buffer, 4096); + + return 0; +} + +static struct extent_buffer *alloc_dummy_eb(u64 bytenr, u32 size) +{ + struct extent_buffer *eb; + + eb = malloc(sizeof(struct extent_buffer) + size); + if (!eb) + return NULL; + memset(eb, 0, sizeof(struct extent_buffer) + size); + + eb->start = bytenr; + eb->len = size; + return eb; +} + +static void truncate_item(struct extent_buffer *eb, int slot, u32 new_size) +{ + struct btrfs_item *item; + u32 nritems; + u32 old_size; + u32 old_data_start; + u32 size_diff; + u32 data_end; + int i; + + old_size = btrfs_item_size_nr(eb, slot); + if (old_size == new_size) + return; + + nritems = btrfs_header_nritems(eb); + data_end = btrfs_item_offset_nr(eb, nritems - 1); + + old_data_start = btrfs_item_offset_nr(eb, slot); + size_diff = old_size - new_size; + + for (i = slot; i < nritems; i++) { + u32 ioff; + item = btrfs_item_nr(eb, i); + ioff = btrfs_item_offset(eb, item); + btrfs_set_item_offset(eb, item, ioff + size_diff); + } + + memmove_extent_buffer(eb, btrfs_leaf_data(eb) + data_end + size_diff, + btrfs_leaf_data(eb) + data_end, + old_data_start + new_size - data_end); + item = btrfs_item_nr(eb, slot); + btrfs_set_item_size(eb, item, new_size); +} + +static int fixup_chunk_tree_block(struct mdrestore_struct *mdres, + struct async_work *async, u8 *buffer, + size_t size) +{ + struct extent_buffer *eb; + size_t size_left = size; + u64 bytenr = async->start; + int i; + + if (size_left % mdres->leafsize) + return 0; + + eb = alloc_dummy_eb(bytenr, mdres->leafsize); + if (!eb) + return -ENOMEM; + + while (size_left) { + eb->start = bytenr; + memcpy(eb->data, buffer, mdres->leafsize); + + if (btrfs_header_bytenr(eb) != bytenr) + break; + if (memcmp(mdres->fsid, + eb->data + offsetof(struct btrfs_header, fsid), + BTRFS_FSID_SIZE)) + break; + + if (btrfs_header_owner(eb) != BTRFS_CHUNK_TREE_OBJECTID) + goto next; + + if (btrfs_header_level(eb) != 0) + goto next; + + for (i = 0; i < btrfs_header_nritems(eb); i++) { + struct btrfs_chunk chunk; + struct btrfs_key key; + u64 type; + + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_CHUNK_ITEM_KEY) + continue; + truncate_item(eb, i, sizeof(chunk)); + read_extent_buffer(eb, &chunk, + btrfs_item_ptr_offset(eb, i), + sizeof(chunk)); + + /* Zero out the RAID profile */ + type = btrfs_stack_chunk_type(&chunk); + type &= (BTRFS_BLOCK_GROUP_DATA | + BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA); + btrfs_set_stack_chunk_type(&chunk, type); + + btrfs_set_stack_chunk_num_stripes(&chunk, 1); + btrfs_set_stack_chunk_sub_stripes(&chunk, 0); + btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid); + btrfs_set_stack_stripe_offset(&chunk.stripe, key.offset); + memcpy(chunk.stripe.dev_uuid, mdres->uuid, + BTRFS_UUID_SIZE); + write_extent_buffer(eb, &chunk, + btrfs_item_ptr_offset(eb, i), + sizeof(chunk)); + } + memcpy(buffer, eb->data, eb->len); + csum_block(buffer, eb->len); +next: + size_left -= mdres->leafsize; + buffer += mdres->leafsize; + bytenr += mdres->leafsize; + } + + return 0; +} + +static void write_backup_supers(int fd, u8 *buf) +{ + struct stat st; + u64 size; + u64 bytenr; + int i; + int ret; + + if (fstat(fd, &st)) { + fprintf(stderr, "Couldn't stat restore point, won't be able " + "to write backup supers: %d\n", errno); + return; + } + + size = btrfs_device_size(fd, &st); + + for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + 4096 > size) + break; + ret = pwrite64(fd, buf, 4096, bytenr); + if (ret < 4096) { + if (ret < 0) + fprintf(stderr, "Problem writing out backup " + "super block %d, err %d\n", i, errno); + else + fprintf(stderr, "Short write writing out " + "backup super block\n"); + break; + } + } +} + static void *restore_worker(void *data) { struct mdrestore_struct *mdres = (struct mdrestore_struct *)data; @@ -611,11 +1194,20 @@ static void *restore_worker(void *data) outfd = fileno(mdres->out); buffer = malloc(MAX_PENDING_SIZE * 2); - BUG_ON(!buffer); + if (!buffer) { + fprintf(stderr, "Error allocing buffer\n"); + pthread_mutex_lock(&mdres->mutex); + if (!mdres->error) + mdres->error = -ENOMEM; + pthread_mutex_unlock(&mdres->mutex); + goto out; + } while (1) { + int err = 0; + pthread_mutex_lock(&mdres->mutex); - while (list_empty(&mdres->list)) { + while (!mdres->leafsize || list_empty(&mdres->list)) { if (mdres->done) { pthread_mutex_unlock(&mdres->mutex); goto out; @@ -630,20 +1222,49 @@ static void *restore_worker(void *data) size = MAX_PENDING_SIZE * 2; ret = uncompress(buffer, (unsigned long *)&size, async->buffer, async->bufsize); - BUG_ON(ret != Z_OK); + if (ret != Z_OK) { + fprintf(stderr, "Error decompressing %d\n", + ret); + err = -EIO; + } outbuf = buffer; } else { outbuf = async->buffer; size = async->bufsize; } - if (async->start == BTRFS_SUPER_INFO_OFFSET) - update_super(outbuf); + if (async->start == BTRFS_SUPER_INFO_OFFSET) { + if (mdres->old_restore) { + update_super_old(outbuf); + } else { + ret = update_super(outbuf); + if (ret) + err = ret; + } + } else if (!mdres->old_restore) { + ret = fixup_chunk_tree_block(mdres, async, outbuf, size); + if (ret) + err = ret; + } ret = pwrite64(outfd, outbuf, size, async->start); - BUG_ON(ret != size); + if (ret < size) { + if (ret < 0) { + fprintf(stderr, "Error writing to device %d\n", + errno); + err = errno; + } else { + fprintf(stderr, "Short write\n"); + err = -EIO; + } + } + + if (async->start == BTRFS_SUPER_INFO_OFFSET) + write_backup_supers(outfd, outbuf); pthread_mutex_lock(&mdres->mutex); + if (err && !mdres->error) + mdres->error = err; mdres->num_items--; pthread_mutex_unlock(&mdres->mutex); @@ -655,8 +1276,25 @@ out: pthread_exit(NULL); } -static int mdresotre_init(struct mdrestore_struct *mdres, - FILE *in, FILE *out, int num_threads) +static void mdrestore_destroy(struct mdrestore_struct *mdres) +{ + int i; + pthread_mutex_lock(&mdres->mutex); + mdres->done = 1; + pthread_cond_broadcast(&mdres->cond); + pthread_mutex_unlock(&mdres->mutex); + + for (i = 0; i < mdres->num_threads; i++) + pthread_join(mdres->threads[i], NULL); + + pthread_cond_destroy(&mdres->cond); + pthread_mutex_destroy(&mdres->mutex); + free(mdres->threads); +} + +static int mdrestore_init(struct mdrestore_struct *mdres, + FILE *in, FILE *out, int old_restore, + int num_threads) { int i, ret = 0; @@ -666,6 +1304,7 @@ static int mdresotre_init(struct mdrestore_struct *mdres, INIT_LIST_HEAD(&mdres->list); mdres->in = in; mdres->out = out; + mdres->old_restore = old_restore; if (!num_threads) return 0; @@ -680,23 +1319,45 @@ static int mdresotre_init(struct mdrestore_struct *mdres, if (ret) break; } + if (ret) + mdrestore_destroy(mdres); return ret; } -static void mdresotre_destroy(struct mdrestore_struct *mdres) +static int fill_mdres_info(struct mdrestore_struct *mdres, + struct async_work *async) { - int i; - pthread_mutex_lock(&mdres->mutex); - mdres->done = 1; - pthread_cond_broadcast(&mdres->cond); - pthread_mutex_unlock(&mdres->mutex); + struct btrfs_super_block *super; + u8 *buffer = NULL; + u8 *outbuf; + int ret; - for (i = 0; i < mdres->num_threads; i++) - pthread_join(mdres->threads[i], NULL); + if (mdres->compress_method == COMPRESS_ZLIB) { + size_t size = MAX_PENDING_SIZE * 2; - pthread_cond_destroy(&mdres->cond); - pthread_mutex_destroy(&mdres->mutex); - free(mdres->threads); + buffer = malloc(MAX_PENDING_SIZE * 2); + if (!buffer) + return -ENOMEM; + ret = uncompress(buffer, (unsigned long *)&size, + async->buffer, async->bufsize); + if (ret != Z_OK) { + fprintf(stderr, "Error decompressing %d\n", ret); + free(buffer); + return -EIO; + } + outbuf = buffer; + } else { + outbuf = async->buffer; + } + + super = (struct btrfs_super_block *)outbuf; + mdres->leafsize = btrfs_super_leafsize(super); + memcpy(mdres->fsid, super->fsid, BTRFS_FSID_SIZE); + memcpy(mdres->uuid, super->dev_item.uuid, + BTRFS_UUID_SIZE); + mdres->devid = le64_to_cpu(super->dev_item.devid); + free(buffer); + return 0; } static int add_cluster(struct meta_cluster *cluster, @@ -717,14 +1378,38 @@ static int add_cluster(struct meta_cluster *cluster, for (i = 0; i < nritems; i++) { item = &cluster->items[i]; async = calloc(1, sizeof(*async)); + if (!async) { + fprintf(stderr, "Error allocating async\n"); + return -ENOMEM; + } async->start = le64_to_cpu(item->bytenr); async->bufsize = le32_to_cpu(item->size); async->buffer = malloc(async->bufsize); + if (!async->buffer) { + fprintf(stderr, "Error allocing async buffer\n"); + free(async); + return -ENOMEM; + } ret = fread(async->buffer, async->bufsize, 1, mdres->in); - BUG_ON(ret != 1); + if (ret != 1) { + fprintf(stderr, "Error reading buffer %d\n", errno); + free(async->buffer); + free(async); + return -EIO; + } bytenr += async->bufsize; pthread_mutex_lock(&mdres->mutex); + if (async->start == BTRFS_SUPER_INFO_OFFSET) { + ret = fill_mdres_info(mdres, async); + if (ret) { + fprintf(stderr, "Error setting up restore\n"); + pthread_mutex_unlock(&mdres->mutex); + free(async->buffer); + free(async); + return ret; + } + } list_add_tail(&async->list, &mdres->list); mdres->num_items++; pthread_cond_signal(&mdres->cond); @@ -736,7 +1421,10 @@ static int add_cluster(struct meta_cluster *cluster, bytenr += size; ret = fread(buffer, size, 1, mdres->in); - BUG_ON(ret != 1); + if (ret != 1) { + fprintf(stderr, "Error reading in buffer %d\n", errno); + return -EIO; + } } *next = bytenr; return 0; @@ -744,8 +1432,11 @@ static int add_cluster(struct meta_cluster *cluster, static int wait_for_worker(struct mdrestore_struct *mdres) { + int ret = 0; + pthread_mutex_lock(&mdres->mutex); - while (mdres->num_items > 0) { + ret = mdres->error; + while (!ret && mdres->num_items > 0) { struct timespec ts = { .tv_sec = 0, .tv_nsec = 10000000, @@ -753,19 +1444,21 @@ static int wait_for_worker(struct mdrestore_struct *mdres) pthread_mutex_unlock(&mdres->mutex); nanosleep(&ts, NULL); pthread_mutex_lock(&mdres->mutex); + ret = mdres->error; } pthread_mutex_unlock(&mdres->mutex); - return 0; + return ret; } -static int restore_metadump(const char *input, FILE *out, int num_threads) +static int restore_metadump(const char *input, FILE *out, int old_restore, + int num_threads) { - struct meta_cluster *cluster; + struct meta_cluster *cluster = NULL; struct meta_cluster_header *header; struct mdrestore_struct mdrestore; u64 bytenr = 0; - FILE *in; - int ret; + FILE *in = NULL; + int ret = 0; if (!strcmp(input, "-")) { in = stdin; @@ -778,10 +1471,21 @@ static int restore_metadump(const char *input, FILE *out, int num_threads) } cluster = malloc(BLOCK_SIZE); - BUG_ON(!cluster); + if (!cluster) { + fprintf(stderr, "Error allocating cluster\n"); + if (in != stdin) + fclose(in); + return -ENOMEM; + } - ret = mdresotre_init(&mdrestore, in, out, num_threads); - BUG_ON(ret); + ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads); + if (ret) { + fprintf(stderr, "Error initing mdrestore %d\n", ret); + if (in != stdin) + fclose(in); + free(cluster); + return ret; + } while (1) { ret = fread(cluster, BLOCK_SIZE, 1, in); @@ -792,15 +1496,24 @@ static int restore_metadump(const char *input, FILE *out, int num_threads) if (le64_to_cpu(header->magic) != HEADER_MAGIC || le64_to_cpu(header->bytenr) != bytenr) { fprintf(stderr, "bad header in metadump image\n"); - return 1; + ret = -EIO; + break; } ret = add_cluster(cluster, &mdrestore, &bytenr); - BUG_ON(ret); + if (ret) { + fprintf(stderr, "Error adding cluster\n"); + break; + } - wait_for_worker(&mdrestore); + ret = wait_for_worker(&mdrestore); + if (ret) { + fprintf(stderr, "One of the threads errored out %d\n", + ret); + break; + } } - mdresotre_destroy(&mdrestore); + mdrestore_destroy(&mdrestore); free(cluster); if (in != stdin) fclose(in); @@ -813,6 +1526,8 @@ static void print_usage(void) fprintf(stderr, "\t-r \trestore metadump image\n"); fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n"); fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n"); + fprintf(stderr, "\t-o \tdon't mess with the chunk tree when restoring\n"); + fprintf(stderr, "\t-w \twalk all trees instead of using extent tree, do this if your extent tree is broken\n"); exit(1); } @@ -823,11 +1538,13 @@ int main(int argc, char *argv[]) int num_threads = 0; int compress_level = 0; int create = 1; + int old_restore = 0; + int walk_trees = 0; int ret; FILE *out; while (1) { - int c = getopt(argc, argv, "rc:t:"); + int c = getopt(argc, argv, "rc:t:ow"); if (c < 0) break; switch (c) { @@ -844,11 +1561,20 @@ int main(int argc, char *argv[]) if (compress_level < 0 || compress_level > 9) print_usage(); break; + case 'o': + old_restore = 1; + break; + case 'w': + walk_trees = 1; + break; default: print_usage(); } } + if (old_restore && create) + print_usage(); + argc = argc - optind; if (argc != 2) print_usage(); @@ -873,14 +1599,14 @@ int main(int argc, char *argv[]) if (create) ret = create_metadump(source, out, num_threads, - compress_level); + compress_level, walk_trees); else - ret = restore_metadump(source, out, 1); + ret = restore_metadump(source, out, old_restore, 1); if (out == stdout) fflush(out); else fclose(out); - exit(ret); + return ret; } diff --git a/btrfs-list.c b/btrfs-list.c index 5f4a9be..c3d35de 100644 --- a/btrfs-list.c +++ b/btrfs-list.c @@ -30,10 +30,14 @@ #include <unistd.h> #include <dirent.h> #include <libgen.h> -#include "kerncompat.h" #include "ctree.h" #include "transaction.h" #include "utils.h" +#include <uuid/uuid.h> +#include "btrfs-list.h" + +#define BTRFS_LIST_NFILTERS_INCREASE (2 * BTRFS_LIST_FILTER_MAX) +#define BTRFS_LIST_NCOMPS_INCREASE (2 * BTRFS_LIST_COMP_MAX) /* we store all the roots we find in an rbtree so that we can * search for them later. @@ -42,45 +46,289 @@ struct root_lookup { struct rb_root root; }; -/* - * one of these for each root we find. - */ -struct root_info { - struct rb_node rb_node; +struct { + char *name; + char *column_name; + int need_print; +} btrfs_list_columns[] = { + { + .name = "ID", + .column_name = "ID", + .need_print = 0, + }, + { + .name = "gen", + .column_name = "Gen", + .need_print = 0, + }, + { + .name = "cgen", + .column_name = "CGen", + .need_print = 0, + }, + { + .name = "parent", + .column_name = "Parent", + .need_print = 0, + }, + { + .name = "top level", + .column_name = "Top Level", + .need_print = 0, + }, + { + .name = "otime", + .column_name = "OTime", + .need_print = 0, + }, + { + .name = "parent_uuid", + .column_name = "Parent UUID", + .need_print = 0, + }, + { + .name = "uuid", + .column_name = "UUID", + .need_print = 0, + }, + { + .name = "path", + .column_name = "Path", + .need_print = 0, + }, + { + .name = NULL, + .column_name = NULL, + .need_print = 0, + }, +}; - /* this root's id */ - u64 root_id; +static btrfs_list_filter_func all_filter_funcs[]; +static btrfs_list_comp_func all_comp_funcs[]; - /* the id of the root that references this one */ - u64 ref_tree; +void btrfs_list_setup_print_column(enum btrfs_list_column_enum column) +{ + int i; - /* the dir id we're in from ref_tree */ - u64 dir_id; + BUG_ON(column < 0 || column > BTRFS_LIST_ALL); - /* path from the subvol we live in to this root, including the - * root's name. This is null until we do the extra lookup ioctl. - */ - char *path; + if (column < BTRFS_LIST_ALL) { + btrfs_list_columns[column].need_print = 1; + return; + } - /* the name of this root in the directory it lives in */ - char name[]; -}; + for (i = 0; i < BTRFS_LIST_ALL; i++) + btrfs_list_columns[i].need_print = 1; +} static void root_lookup_init(struct root_lookup *tree) { tree->root.rb_node = NULL; } -static int comp_entry(struct root_info *entry, u64 root_id, u64 ref_tree) +static int comp_entry_with_rootid(struct root_info *entry1, + struct root_info *entry2, + int is_descending) { - if (entry->root_id > root_id) - return 1; - if (entry->root_id < root_id) - return -1; - if (entry->ref_tree > ref_tree) - return 1; - if (entry->ref_tree < ref_tree) - return -1; + int ret; + + if (entry1->root_id > entry2->root_id) + ret = 1; + else if (entry1->root_id < entry2->root_id) + ret = -1; + else + ret = 0; + + return is_descending ? -ret : ret; +} + +static int comp_entry_with_gen(struct root_info *entry1, + struct root_info *entry2, + int is_descending) +{ + int ret; + + if (entry1->gen > entry2->gen) + ret = 1; + else if (entry1->gen < entry2->gen) + ret = -1; + else + ret = 0; + + return is_descending ? -ret : ret; +} + +static int comp_entry_with_ogen(struct root_info *entry1, + struct root_info *entry2, + int is_descending) +{ + int ret; + + if (entry1->ogen > entry2->ogen) + ret = 1; + else if (entry1->ogen < entry2->ogen) + ret = -1; + else + ret = 0; + + return is_descending ? -ret : ret; +} + +static int comp_entry_with_path(struct root_info *entry1, + struct root_info *entry2, + int is_descending) +{ + int ret; + + if (strcmp(entry1->full_path, entry2->full_path) > 0) + ret = 1; + else if (strcmp(entry1->full_path, entry2->full_path) < 0) + ret = -1; + else + ret = 0; + + return is_descending ? -ret : ret; +} + +static btrfs_list_comp_func all_comp_funcs[] = { + [BTRFS_LIST_COMP_ROOTID] = comp_entry_with_rootid, + [BTRFS_LIST_COMP_OGEN] = comp_entry_with_ogen, + [BTRFS_LIST_COMP_GEN] = comp_entry_with_gen, + [BTRFS_LIST_COMP_PATH] = comp_entry_with_path, +}; + +static char *all_sort_items[] = { + [BTRFS_LIST_COMP_ROOTID] = "rootid", + [BTRFS_LIST_COMP_OGEN] = "ogen", + [BTRFS_LIST_COMP_GEN] = "gen", + [BTRFS_LIST_COMP_PATH] = "path", + [BTRFS_LIST_COMP_MAX] = NULL, +}; + +static int btrfs_list_get_sort_item(char *sort_name) +{ + int i; + + for (i = 0; i < BTRFS_LIST_COMP_MAX; i++) { + if (strcmp(sort_name, all_sort_items[i]) == 0) + return i; + } + return -1; +} + +struct btrfs_list_comparer_set *btrfs_list_alloc_comparer_set(void) +{ + struct btrfs_list_comparer_set *set; + int size; + + size = sizeof(struct btrfs_list_comparer_set) + + BTRFS_LIST_NCOMPS_INCREASE * sizeof(struct btrfs_list_comparer); + set = malloc(size); + if (!set) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + + memset(set, 0, size); + set->total = BTRFS_LIST_NCOMPS_INCREASE; + + return set; +} + +void btrfs_list_free_comparer_set(struct btrfs_list_comparer_set *comp_set) +{ + free(comp_set); +} + +int btrfs_list_setup_comparer(struct btrfs_list_comparer_set **comp_set, + enum btrfs_list_comp_enum comparer, + int is_descending) +{ + struct btrfs_list_comparer_set *set = *comp_set; + int size; + + BUG_ON(!set); + BUG_ON(comparer >= BTRFS_LIST_COMP_MAX); + BUG_ON(set->ncomps > set->total); + + if (set->ncomps == set->total) { + size = set->total + BTRFS_LIST_NCOMPS_INCREASE; + size = sizeof(*set) + size * sizeof(struct btrfs_list_comparer); + set = realloc(set, size); + if (!set) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + + memset(&set->comps[set->total], 0, + BTRFS_LIST_NCOMPS_INCREASE * + sizeof(struct btrfs_list_comparer)); + set->total += BTRFS_LIST_NCOMPS_INCREASE; + *comp_set = set; + } + + BUG_ON(set->comps[set->ncomps].comp_func); + + set->comps[set->ncomps].comp_func = all_comp_funcs[comparer]; + set->comps[set->ncomps].is_descending = is_descending; + set->ncomps++; + return 0; +} + +static int sort_comp(struct root_info *entry1, struct root_info *entry2, + struct btrfs_list_comparer_set *set) +{ + int rootid_compared = 0; + int i, ret = 0; + + if (!set || !set->ncomps) + goto comp_rootid; + + for (i = 0; i < set->ncomps; i++) { + if (!set->comps[i].comp_func) + break; + + ret = set->comps[i].comp_func(entry1, entry2, + set->comps[i].is_descending); + if (ret) + return ret; + + if (set->comps[i].comp_func == comp_entry_with_rootid) + rootid_compared = 1; + } + + if (!rootid_compared) { +comp_rootid: + ret = comp_entry_with_rootid(entry1, entry2, 0); + } + + return ret; +} + +static int sort_tree_insert(struct root_lookup *sort_tree, + struct root_info *ins, + struct btrfs_list_comparer_set *comp_set) +{ + struct rb_node **p = &sort_tree->root.rb_node; + struct rb_node *parent = NULL; + struct root_info *curr; + int ret; + + while (*p) { + parent = *p; + curr = rb_entry(parent, struct root_info, sort_node); + + ret = sort_comp(ins, curr, comp_set); + if (ret < 0) + p = &(*p)->rb_left; + else if (ret > 0) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + rb_link_node(&ins->sort_node, parent, p); + rb_insert_color(&ins->sort_node, &sort_tree->root); return 0; } @@ -89,102 +337,175 @@ static int comp_entry(struct root_info *entry, u64 root_id, u64 ref_tree) * if one is already there. Both root_id and ref_tree are used * as the key */ -static struct rb_node *tree_insert(struct rb_root *root, u64 root_id, - u64 ref_tree, struct rb_node *node) +static int root_tree_insert(struct root_lookup *root_tree, + struct root_info *ins) { - struct rb_node ** p = &root->rb_node; + struct rb_node **p = &root_tree->root.rb_node; struct rb_node * parent = NULL; - struct root_info *entry; - int comp; + struct root_info *curr; + int ret; while(*p) { parent = *p; - entry = rb_entry(parent, struct root_info, rb_node); - - comp = comp_entry(entry, root_id, ref_tree); + curr = rb_entry(parent, struct root_info, rb_node); - if (comp < 0) + ret = comp_entry_with_rootid(ins, curr, 0); + if (ret < 0) p = &(*p)->rb_left; - else if (comp > 0) + else if (ret > 0) p = &(*p)->rb_right; else - return parent; + return -EEXIST; } - entry = rb_entry(parent, struct root_info, rb_node); - rb_link_node(node, parent, p); - rb_insert_color(node, root); - return NULL; + rb_link_node(&ins->rb_node, parent, p); + rb_insert_color(&ins->rb_node, &root_tree->root); + return 0; } /* * find a given root id in the tree. We return the smallest one, * rb_next can be used to move forward looking for more if required */ -static struct root_info *tree_search(struct rb_root *root, u64 root_id) +static struct root_info *root_tree_search(struct root_lookup *root_tree, + u64 root_id) { - struct rb_node * n = root->rb_node; + struct rb_node *n = root_tree->root.rb_node; struct root_info *entry; + struct root_info tmp; + int ret; + + tmp.root_id = root_id; while(n) { entry = rb_entry(n, struct root_info, rb_node); - if (entry->root_id < root_id) + ret = comp_entry_with_rootid(&tmp, entry, 0); + if (ret < 0) n = n->rb_left; - else if (entry->root_id > root_id) + else if (ret > 0) n = n->rb_right; - else { - struct root_info *prev; - struct rb_node *prev_n; - while (1) { - prev_n = rb_prev(n); - if (!prev_n) - break; - prev = rb_entry(prev_n, struct root_info, - rb_node); - if (prev->root_id != root_id) - break; - entry = prev; - n = prev_n; - } + else return entry; - } } return NULL; } +static int update_root(struct root_lookup *root_lookup, + u64 root_id, u64 ref_tree, u64 root_offset, u64 flags, + u64 dir_id, char *name, int name_len, u64 ogen, u64 gen, + time_t ot, void *uuid, void *puuid) +{ + struct root_info *ri; + + ri = root_tree_search(root_lookup, root_id); + if (!ri || ri->root_id != root_id) + return -ENOENT; + if (name && name_len > 0) { + if (ri->name) + free(ri->name); + + ri->name = malloc(name_len + 1); + if (!ri->name) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + strncpy(ri->name, name, name_len); + ri->name[name_len] = 0; + } + if (ref_tree) + ri->ref_tree = ref_tree; + if (root_offset) + ri->root_offset = root_offset; + if (flags) + ri->flags = flags; + if (dir_id) + ri->dir_id = dir_id; + if (gen) + ri->gen = gen; + if (ogen) + ri->ogen = ogen; + if (!ri->ogen && root_offset) + ri->ogen = root_offset; + if (ot) + ri->otime = ot; + if (uuid) + memcpy(&ri->uuid, uuid, BTRFS_UUID_SIZE); + if (puuid) + memcpy(&ri->puuid, puuid, BTRFS_UUID_SIZE); + + return 0; +} + /* - * this allocates a new root in the lookup tree. - * - * root_id should be the object id of the root - * - * ref_tree is the objectid of the referring root. - * - * dir_id is the directory in ref_tree where this root_id can be found. - * - * name is the name of root_id in that directory - * - * name_len is the length of name + * add_root - update the existed root, or allocate a new root and insert it + * into the lookup tree. + * root_id: object id of the root + * ref_tree: object id of the referring root. + * root_offset: offset value of the root'key + * dir_id: inode id of the directory in ref_tree where this root can be found. + * name: the name of root_id in that directory + * name_len: the length of name + * ogen: the original generation of the root + * gen: the current generation of the root + * ot: the original time(create time) of the root + * uuid: uuid of the root + * puuid: uuid of the root parent if any */ static int add_root(struct root_lookup *root_lookup, - u64 root_id, u64 ref_tree, u64 dir_id, char *name, - int name_len) + u64 root_id, u64 ref_tree, u64 root_offset, u64 flags, + u64 dir_id, char *name, int name_len, u64 ogen, u64 gen, + time_t ot, void *uuid, void *puuid) { struct root_info *ri; - struct rb_node *ret; - ri = malloc(sizeof(*ri) + name_len + 1); + int ret; + + ret = update_root(root_lookup, root_id, ref_tree, root_offset, flags, + dir_id, name, name_len, ogen, gen, ot, uuid, puuid); + if (!ret) + return 0; + + ri = malloc(sizeof(*ri)); if (!ri) { printf("memory allocation failed\n"); exit(1); } - memset(ri, 0, sizeof(*ri) + name_len + 1); - ri->path = NULL; - ri->dir_id = dir_id; + memset(ri, 0, sizeof(*ri)); ri->root_id = root_id; - ri->ref_tree = ref_tree; - strncpy(ri->name, name, name_len); - ret = tree_insert(&root_lookup->root, root_id, ref_tree, &ri->rb_node); + if (name && name_len > 0) { + ri->name = malloc(name_len + 1); + if (!ri->name) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + strncpy(ri->name, name, name_len); + ri->name[name_len] = 0; + } + if (ref_tree) + ri->ref_tree = ref_tree; + if (dir_id) + ri->dir_id = dir_id; + if (root_offset) + ri->root_offset = root_offset; + if (flags) + ri->flags = flags; + if (gen) + ri->gen = gen; + if (ogen) + ri->ogen = ogen; + if (!ri->ogen && root_offset) + ri->ogen = root_offset; + if (ot) + ri->otime = ot; + + if (uuid) + memcpy(&ri->uuid, uuid, BTRFS_UUID_SIZE); + + if (puuid) + memcpy(&ri->puuid, puuid, BTRFS_UUID_SIZE); + + ret = root_tree_insert(root_lookup, ri); if (ret) { printf("failed to insert tree %llu\n", (unsigned long long)root_id); exit(1); @@ -192,6 +513,35 @@ static int add_root(struct root_lookup *root_lookup, return 0; } +void __free_root_info(struct root_info *ri) +{ + if (ri->name) + free(ri->name); + + if (ri->path) + free(ri->path); + + if (ri->full_path) + free(ri->full_path); + + free(ri); +} + +void __free_all_subvolumn(struct root_lookup *root_tree) +{ + struct root_info *entry; + struct rb_node *n; + + n = rb_first(&root_tree->root); + while (n) { + entry = rb_entry(n, struct root_info, rb_node); + rb_erase(n, &root_tree->root); + __free_root_info(entry); + + n = rb_first(&root_tree->root); + } +} + /* * for a given root_info, search through the root_lookup tree to construct * the full path name to it. @@ -200,7 +550,7 @@ static int add_root(struct root_lookup *root_lookup, * in by lookup_ino_path */ static int resolve_root(struct root_lookup *rl, struct root_info *ri, - u64 *root_id, u64 *parent_id, u64 *top_id, char **path) + u64 top_id) { char *full_path = NULL; int len = 0; @@ -210,16 +560,30 @@ static int resolve_root(struct root_lookup *rl, struct root_info *ri, * we go backwards from the root_info object and add pathnames * from parent directories as we go. */ - *parent_id = 0; found = ri; while (1) { char *tmp; u64 next; - int add_len = strlen(found->path); + int add_len; + + /* + * ref_tree = 0 indicates the subvolumes + * has been deleted. + */ + if (!found->ref_tree) { + free(full_path); + return -ENOENT; + } + + add_len = strlen(found->path); - /* room for / and for null */ - tmp = malloc(add_len + 2 + len); if (full_path) { + /* room for / and for null */ + tmp = malloc(add_len + 2 + len); + if (!tmp) { + perror("malloc failed"); + exit(1); + } memcpy(tmp + add_len + 1, full_path, len); tmp[add_len] = '/'; memcpy(tmp, found->path, add_len); @@ -233,29 +597,33 @@ static int resolve_root(struct root_lookup *rl, struct root_info *ri, } next = found->ref_tree; - /* record the first parent */ - if (*parent_id == 0) - *parent_id = next; - /* if the ref_tree refers to ourselves, we're at the top */ - if (next == found->root_id) { - *top_id = next; + if (next == top_id) { + ri->top_id = top_id; break; } /* - * if the ref_tree wasn't in our tree of roots, we're - * at the top - */ - found = tree_search(&rl->root, next); - if (!found) { - *top_id = next; + * if the ref_tree = BTRFS_FS_TREE_OBJECTID, + * we are at the top + */ + if (next == BTRFS_FS_TREE_OBJECTID) { + ri->top_id = next; break; } + + /* + * if the ref_tree wasn't in our tree of roots, the + * subvolume was deleted. + */ + found = root_tree_search(rl, next); + if (!found) { + free(full_path); + return -ENOENT; + } } - *root_id = ri->root_id; - *path = full_path; + ri->full_path = full_path; return 0; } @@ -275,6 +643,9 @@ static int lookup_ino_path(int fd, struct root_info *ri) if (ri->path) return 0; + if (!ri->ref_tree) + return -ENOENT; + memset(&args, 0, sizeof(args)); args.treeid = ri->ref_tree; args.objectid = ri->dir_id; @@ -282,6 +653,10 @@ static int lookup_ino_path(int fd, struct root_info *ri) ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args); e = errno; if (ret) { + if (e == ENOENT) { + ri->ref_tree = 0; + return -ENOENT; + } fprintf(stderr, "ERROR: Failed to lookup path for root %llu - %s\n", (unsigned long long)ri->ref_tree, strerror(e)); @@ -323,7 +698,7 @@ static u64 find_root_gen(int fd) int ret; struct btrfs_ioctl_search_args args; struct btrfs_ioctl_search_key *sk = &args.key; - struct btrfs_ioctl_search_header *sh; + struct btrfs_ioctl_search_header sh; unsigned long off = 0; u64 max_found = 0; int i; @@ -374,22 +749,21 @@ static u64 find_root_gen(int fd) off = 0; for (i = 0; i < sk->nr_items; i++) { struct btrfs_root_item *item; - sh = (struct btrfs_ioctl_search_header *)(args.buf + - off); - off += sizeof(*sh); + memcpy(&sh, args.buf + off, sizeof(sh)); + off += sizeof(sh); item = (struct btrfs_root_item *)(args.buf + off); - off += sh->len; + off += sh.len; - sk->min_objectid = sh->objectid; - sk->min_type = sh->type; - sk->min_offset = sh->offset; + sk->min_objectid = sh.objectid; + sk->min_type = sh.type; + sk->min_offset = sh.offset; - if (sh->objectid > ino_args.treeid) + if (sh.objectid > ino_args.treeid) break; - if (sh->objectid == ino_args.treeid && - sh->type == BTRFS_ROOT_ITEM_KEY) { + if (sh.objectid == ino_args.treeid && + sh.type == BTRFS_ROOT_ITEM_KEY) { max_found = max(max_found, btrfs_root_generation(item)); } @@ -401,7 +775,7 @@ static u64 find_root_gen(int fd) if (sk->min_type != BTRFS_ROOT_ITEM_KEY) break; - if (sk->min_objectid != BTRFS_ROOT_ITEM_KEY) + if (sk->min_objectid != ino_args.treeid) break; } return max_found; @@ -553,26 +927,83 @@ build: return full; } +int btrfs_list_get_default_subvolume(int fd, u64 *default_id) +{ + struct btrfs_ioctl_search_args args; + struct btrfs_ioctl_search_key *sk = &args.key; + struct btrfs_ioctl_search_header *sh; + u64 found = 0; + int ret; + + memset(&args, 0, sizeof(args)); + + /* + * search for a dir item with a name 'default' in the tree of + * tree roots, it should point us to a default root + */ + sk->tree_id = 1; + + /* don't worry about ancient format and request only one item */ + sk->nr_items = 1; + + sk->max_objectid = BTRFS_ROOT_TREE_DIR_OBJECTID; + sk->min_objectid = BTRFS_ROOT_TREE_DIR_OBJECTID; + sk->max_type = BTRFS_DIR_ITEM_KEY; + sk->min_type = BTRFS_DIR_ITEM_KEY; + sk->max_offset = (u64)-1; + sk->max_transid = (u64)-1; + + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args); + if (ret < 0) + return ret; + + /* the ioctl returns the number of items it found in nr_items */ + if (sk->nr_items == 0) + goto out; + + sh = (struct btrfs_ioctl_search_header *)args.buf; + + if (sh->type == BTRFS_DIR_ITEM_KEY) { + struct btrfs_dir_item *di; + int name_len; + char *name; + + di = (struct btrfs_dir_item *)(sh + 1); + name_len = btrfs_stack_dir_name_len(di); + name = (char *)(di + 1); + + if (!strncmp("default", name, name_len)) + found = btrfs_disk_key_objectid(&di->location); + } + +out: + *default_id = found; + return 0; +} + static int __list_subvol_search(int fd, struct root_lookup *root_lookup) { int ret; struct btrfs_ioctl_search_args args; struct btrfs_ioctl_search_key *sk = &args.key; - struct btrfs_ioctl_search_header *sh; + struct btrfs_ioctl_search_header sh; struct btrfs_root_ref *ref; + struct btrfs_root_item *ri; unsigned long off = 0; int name_len; char *name; u64 dir_id; + u64 gen = 0; + u64 ogen; + u64 flags; int i; + time_t t; + u8 uuid[BTRFS_UUID_SIZE]; + u8 puuid[BTRFS_UUID_SIZE]; root_lookup_init(root_lookup); memset(&args, 0, sizeof(args)); - root_lookup_init(root_lookup); - - memset(&args, 0, sizeof(args)); - /* search in the tree of tree roots */ sk->tree_id = 1; @@ -581,13 +1012,15 @@ static int __list_subvol_search(int fd, struct root_lookup *root_lookup) * only send back this type of key now. */ sk->max_type = BTRFS_ROOT_BACKREF_KEY; - sk->min_type = BTRFS_ROOT_BACKREF_KEY; + sk->min_type = BTRFS_ROOT_ITEM_KEY; + + sk->min_objectid = BTRFS_FIRST_FREE_OBJECTID; /* * set all the other params to the max, we'll take any objectid * and any trans */ - sk->max_objectid = (u64)-1; + sk->max_objectid = BTRFS_LAST_FREE_OBJECTID; sk->max_offset = (u64)-1; sk->max_transid = (u64)-1; @@ -609,47 +1042,265 @@ static int __list_subvol_search(int fd, struct root_lookup *root_lookup) * read the root_ref item it contains */ for (i = 0; i < sk->nr_items; i++) { - sh = (struct btrfs_ioctl_search_header *)(args.buf + - off); - off += sizeof(*sh); - if (sh->type == BTRFS_ROOT_BACKREF_KEY) { + memcpy(&sh, args.buf + off, sizeof(sh)); + off += sizeof(sh); + if (sh.type == BTRFS_ROOT_BACKREF_KEY) { ref = (struct btrfs_root_ref *)(args.buf + off); name_len = btrfs_stack_root_ref_name_len(ref); name = (char *)(ref + 1); dir_id = btrfs_stack_root_ref_dirid(ref); - add_root(root_lookup, sh->objectid, sh->offset, - dir_id, name, name_len); + add_root(root_lookup, sh.objectid, sh.offset, + 0, 0, dir_id, name, name_len, 0, 0, 0, + NULL, NULL); + } else if (sh.type == BTRFS_ROOT_ITEM_KEY) { + ri = (struct btrfs_root_item *)(args.buf + off); + gen = btrfs_root_generation(ri); + flags = btrfs_root_flags(ri); + if(sh.len > + sizeof(struct btrfs_root_item_v0)) { + t = ri->otime.sec; + ogen = btrfs_root_otransid(ri); + memcpy(uuid, ri->uuid, BTRFS_UUID_SIZE); + memcpy(puuid, ri->parent_uuid, BTRFS_UUID_SIZE); + } else { + t = 0; + ogen = 0; + memset(uuid, 0, BTRFS_UUID_SIZE); + memset(puuid, 0, BTRFS_UUID_SIZE); + } + + add_root(root_lookup, sh.objectid, 0, + sh.offset, flags, 0, NULL, 0, ogen, + gen, t, uuid, puuid); } - off += sh->len; + off += sh.len; /* * record the mins in sk so we can make sure the * next search doesn't repeat this root */ - sk->min_objectid = sh->objectid; - sk->min_type = sh->type; - sk->min_offset = sh->offset; + sk->min_objectid = sh.objectid; + sk->min_type = sh.type; + sk->min_offset = sh.offset; } sk->nr_items = 4096; - /* this iteration is done, step forward one root for the next - * ioctl - */ - if (sk->min_type < BTRFS_ROOT_BACKREF_KEY) { - sk->min_type = BTRFS_ROOT_BACKREF_KEY; - sk->min_offset = 0; - } else if (sk->min_objectid < (u64)-1) { + sk->min_offset++; + if (!sk->min_offset) /* overflow */ + sk->min_type++; + else + continue; + + if (sk->min_type > BTRFS_ROOT_BACKREF_KEY) { + sk->min_type = BTRFS_ROOT_ITEM_KEY; sk->min_objectid++; - sk->min_type = BTRFS_ROOT_BACKREF_KEY; - sk->min_offset = 0; } else + continue; + + if (sk->min_objectid > sk->max_objectid) break; } return 0; } +static int filter_by_rootid(struct root_info *ri, u64 data) +{ + return ri->root_id == data; +} + +static int filter_snapshot(struct root_info *ri, u64 data) +{ + return !!ri->root_offset; +} + +static int filter_flags(struct root_info *ri, u64 flags) +{ + return ri->flags & flags; +} + +static int filter_gen_more(struct root_info *ri, u64 data) +{ + return ri->gen >= data; +} + +static int filter_gen_less(struct root_info *ri, u64 data) +{ + return ri->gen <= data; +} + +static int filter_gen_equal(struct root_info *ri, u64 data) +{ + return ri->gen == data; +} + +static int filter_cgen_more(struct root_info *ri, u64 data) +{ + return ri->ogen >= data; +} + +static int filter_cgen_less(struct root_info *ri, u64 data) +{ + return ri->ogen <= data; +} + +static int filter_cgen_equal(struct root_info *ri, u64 data) +{ + return ri->ogen == data; +} + +static int filter_topid_equal(struct root_info *ri, u64 data) +{ + return ri->top_id == data; +} + +static int filter_full_path(struct root_info *ri, u64 data) +{ + if (ri->full_path && ri->top_id != data) { + char *tmp; + char p[] = "<FS_TREE>"; + int add_len = strlen(p); + int len = strlen(ri->full_path); + + tmp = malloc(len + add_len + 2); + if (!tmp) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + memcpy(tmp + add_len + 1, ri->full_path, len); + tmp[len + add_len + 1] = '\0'; + tmp[add_len] = '/'; + memcpy(tmp, p, add_len); + free(ri->full_path); + ri->full_path = tmp; + } + return 1; +} + +static int filter_by_parent(struct root_info *ri, u64 data) +{ + return !uuid_compare(ri->puuid, (u8 *)(unsigned long)data); +} + +static btrfs_list_filter_func all_filter_funcs[] = { + [BTRFS_LIST_FILTER_ROOTID] = filter_by_rootid, + [BTRFS_LIST_FILTER_SNAPSHOT_ONLY] = filter_snapshot, + [BTRFS_LIST_FILTER_FLAGS] = filter_flags, + [BTRFS_LIST_FILTER_GEN_MORE] = filter_gen_more, + [BTRFS_LIST_FILTER_GEN_LESS] = filter_gen_less, + [BTRFS_LIST_FILTER_GEN_EQUAL] = filter_gen_equal, + [BTRFS_LIST_FILTER_CGEN_MORE] = filter_cgen_more, + [BTRFS_LIST_FILTER_CGEN_LESS] = filter_cgen_less, + [BTRFS_LIST_FILTER_CGEN_EQUAL] = filter_cgen_equal, + [BTRFS_LIST_FILTER_TOPID_EQUAL] = filter_topid_equal, + [BTRFS_LIST_FILTER_FULL_PATH] = filter_full_path, + [BTRFS_LIST_FILTER_BY_PARENT] = filter_by_parent, +}; + +struct btrfs_list_filter_set *btrfs_list_alloc_filter_set(void) +{ + struct btrfs_list_filter_set *set; + int size; + + size = sizeof(struct btrfs_list_filter_set) + + BTRFS_LIST_NFILTERS_INCREASE * sizeof(struct btrfs_list_filter); + set = malloc(size); + if (!set) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + + memset(set, 0, size); + set->total = BTRFS_LIST_NFILTERS_INCREASE; + + return set; +} + +void btrfs_list_free_filter_set(struct btrfs_list_filter_set *filter_set) +{ + free(filter_set); +} + +int btrfs_list_setup_filter(struct btrfs_list_filter_set **filter_set, + enum btrfs_list_filter_enum filter, u64 data) +{ + struct btrfs_list_filter_set *set = *filter_set; + int size; + + BUG_ON(!set); + BUG_ON(filter >= BTRFS_LIST_FILTER_MAX); + BUG_ON(set->nfilters > set->total); + + if (set->nfilters == set->total) { + size = set->total + BTRFS_LIST_NFILTERS_INCREASE; + size = sizeof(*set) + size * sizeof(struct btrfs_list_filter); + set = realloc(set, size); + if (!set) { + fprintf(stderr, "memory allocation failed\n"); + exit(1); + } + + memset(&set->filters[set->total], 0, + BTRFS_LIST_NFILTERS_INCREASE * + sizeof(struct btrfs_list_filter)); + set->total += BTRFS_LIST_NFILTERS_INCREASE; + *filter_set = set; + } + + BUG_ON(set->filters[set->nfilters].filter_func); + + set->filters[set->nfilters].filter_func = all_filter_funcs[filter]; + set->filters[set->nfilters].data = data; + set->nfilters++; + return 0; +} + +static int filter_root(struct root_info *ri, + struct btrfs_list_filter_set *set) +{ + int i, ret; + + if (!set || !set->nfilters) + return 1; + + for (i = 0; i < set->nfilters; i++) { + if (!set->filters[i].filter_func) + break; + ret = set->filters[i].filter_func(ri, set->filters[i].data); + if (!ret) + return 0; + } + return 1; +} + +static void __filter_and_sort_subvol(struct root_lookup *all_subvols, + struct root_lookup *sort_tree, + struct btrfs_list_filter_set *filter_set, + struct btrfs_list_comparer_set *comp_set, + u64 top_id) +{ + struct rb_node *n; + struct root_info *entry; + int ret; + + root_lookup_init(sort_tree); + + n = rb_last(&all_subvols->root); + while (n) { + entry = rb_entry(n, struct root_info, rb_node); + + ret = resolve_root(all_subvols, entry, top_id); + if (ret == -ENOENT) + goto skip; + ret = filter_root(entry, filter_set); + if (ret) + sort_tree_insert(sort_tree, entry, comp_set); +skip: + n = rb_prev(n); + } +} + static int __list_subvol_fill_paths(int fd, struct root_lookup *root_lookup) { struct rb_node *n; @@ -660,7 +1311,7 @@ static int __list_subvol_fill_paths(int fd, struct root_lookup *root_lookup) int ret; entry = rb_entry(n, struct root_info, rb_node); ret = lookup_ino_path(fd, entry); - if(ret < 0) + if (ret && ret != -ENOENT) return ret; n = rb_next(n); } @@ -668,13 +1319,174 @@ static int __list_subvol_fill_paths(int fd, struct root_lookup *root_lookup) return 0; } -int list_subvols(int fd, int print_parent) +static void print_subvolume_column(struct root_info *subv, + enum btrfs_list_column_enum column) +{ + char tstr[256]; + char uuidparse[37]; + + BUG_ON(column >= BTRFS_LIST_ALL || column < 0); + + switch (column) { + case BTRFS_LIST_OBJECTID: + printf("%llu", subv->root_id); + break; + case BTRFS_LIST_GENERATION: + printf("%llu", subv->gen); + break; + case BTRFS_LIST_OGENERATION: + printf("%llu", subv->ogen); + break; + case BTRFS_LIST_PARENT: + printf("%llu", subv->ref_tree); + break; + case BTRFS_LIST_TOP_LEVEL: + printf("%llu", subv->top_id); + break; + case BTRFS_LIST_OTIME: + if (subv->otime) + strftime(tstr, 256, "%Y-%m-%d %X", + localtime(&subv->otime)); + else + strcpy(tstr, "-"); + printf("%s", tstr); + break; + case BTRFS_LIST_UUID: + if (uuid_is_null(subv->uuid)) + strcpy(uuidparse, "-"); + else + uuid_unparse(subv->uuid, uuidparse); + printf("%s", uuidparse); + break; + case BTRFS_LIST_PUUID: + if (uuid_is_null(subv->puuid)) + strcpy(uuidparse, "-"); + else + uuid_unparse(subv->puuid, uuidparse); + printf("%s", uuidparse); + break; + case BTRFS_LIST_PATH: + BUG_ON(!subv->full_path); + printf("%s", subv->full_path); + break; + default: + break; + } +} + +static void print_single_volume_info_raw(struct root_info *subv, char *raw_prefix) +{ + int i; + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + if (!btrfs_list_columns[i].need_print) + continue; + + if (raw_prefix) + printf("%s",raw_prefix); + + print_subvolume_column(subv, i); + } + printf("\n"); +} + +static void print_single_volume_info_table(struct root_info *subv) +{ + int i; + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + if (!btrfs_list_columns[i].need_print) + continue; + + print_subvolume_column(subv, i); + + if (i != BTRFS_LIST_PATH) + printf("\t"); + + if (i == BTRFS_LIST_TOP_LEVEL) + printf("\t"); + } + printf("\n"); +} + +static void print_single_volume_info_default(struct root_info *subv) +{ + int i; + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + if (!btrfs_list_columns[i].need_print) + continue; + + printf("%s ", btrfs_list_columns[i].name); + print_subvolume_column(subv, i); + + if (i != BTRFS_LIST_PATH) + printf(" "); + } + printf("\n"); +} + +static void print_all_volume_info_tab_head() +{ + int i; + int len; + char barrier[20]; + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + if (btrfs_list_columns[i].need_print) + printf("%s\t", btrfs_list_columns[i].name); + + if (i == BTRFS_LIST_ALL-1) + printf("\n"); + } + + for (i = 0; i < BTRFS_LIST_ALL; i++) { + memset(barrier, 0, sizeof(barrier)); + + if (btrfs_list_columns[i].need_print) { + len = strlen(btrfs_list_columns[i].name); + while (len--) + strcat(barrier, "-"); + + printf("%s\t", barrier); + } + if (i == BTRFS_LIST_ALL-1) + printf("\n"); + } +} + +static void print_all_volume_info(struct root_lookup *sorted_tree, + int layout, char *raw_prefix) { - struct root_lookup root_lookup; struct rb_node *n; + struct root_info *entry; + + if (layout == BTRFS_LIST_LAYOUT_TABLE) + print_all_volume_info_tab_head(); + + n = rb_first(&sorted_tree->root); + while (n) { + entry = rb_entry(n, struct root_info, sort_node); + switch (layout) { + case BTRFS_LIST_LAYOUT_DEFAULT: + print_single_volume_info_default(entry); + break; + case BTRFS_LIST_LAYOUT_TABLE: + print_single_volume_info_table(entry); + break; + case BTRFS_LIST_LAYOUT_RAW: + print_single_volume_info_raw(entry, raw_prefix); + break; + } + n = rb_next(n); + } +} + +int btrfs_list_subvols(int fd, struct root_lookup *root_lookup) +{ int ret; - ret = __list_subvol_search(fd, &root_lookup); + ret = __list_subvol_search(fd, root_lookup); if (ret) { fprintf(stderr, "ERROR: can't perform the search - %s\n", strerror(errno)); @@ -685,38 +1497,79 @@ int list_subvols(int fd, int print_parent) * now we have an rbtree full of root_info objects, but we need to fill * in their path names within the subvol that is referencing each one. */ - ret = __list_subvol_fill_paths(fd, &root_lookup); - if (ret < 0) + ret = __list_subvol_fill_paths(fd, root_lookup); + return ret; +} + +int btrfs_list_subvols_print(int fd, struct btrfs_list_filter_set *filter_set, + struct btrfs_list_comparer_set *comp_set, + int layout, int full_path, char *raw_prefix) +{ + struct root_lookup root_lookup; + struct root_lookup root_sort; + int ret = 0; + u64 top_id = 0; + + if (full_path) + ret = btrfs_list_get_path_rootid(fd, &top_id); + if (ret) return ret; - /* now that we have all the subvol-relative paths filled in, - * we have to string the subvols together so that we can get - * a path all the way back to the FS root - */ - n = rb_last(&root_lookup.root); - while (n) { - struct root_info *entry; - u64 root_id; - u64 level; - u64 parent_id; - char *path; - entry = rb_entry(n, struct root_info, rb_node); - resolve_root(&root_lookup, entry, &root_id, &parent_id, - &level, &path); - if (print_parent) { - printf("ID %llu parent %llu top level %llu path %s\n", - (unsigned long long)root_id, - (unsigned long long)parent_id, - (unsigned long long)level, path); - } else { - printf("ID %llu top level %llu path %s\n", - (unsigned long long)root_id, - (unsigned long long)level, path); + ret = btrfs_list_subvols(fd, &root_lookup); + if (ret) + return ret; + __filter_and_sort_subvol(&root_lookup, &root_sort, filter_set, + comp_set, top_id); + + print_all_volume_info(&root_sort, layout, raw_prefix); + __free_all_subvolumn(&root_lookup); + + return 0; +} + +char *strdup_or_null(const char *s) +{ + if (!s) + return NULL; + return strdup(s); +} + +int btrfs_get_subvol(int fd, struct root_info *the_ri) +{ + int ret, rr; + struct root_lookup rl; + struct rb_node *rbn; + struct root_info *ri; + u64 root_id; + + ret = btrfs_list_get_path_rootid(fd, &root_id); + if (ret) + return ret; + + ret = btrfs_list_subvols(fd, &rl); + if (ret) + return ret; + + rbn = rb_first(&rl.root); + while(rbn) { + ri = rb_entry(rbn, struct root_info, rb_node); + rr = resolve_root(&rl, ri, root_id); + if (rr == -ENOENT) { + ret = -ENOENT; + rbn = rb_next(rbn); + continue; } - free(path); - n = rb_prev(n); + if (!comp_entry_with_rootid(the_ri, ri, 0)) { + memcpy(the_ri, ri, offsetof(struct root_info, path)); + the_ri->path = strdup_or_null(ri->path); + the_ri->name = strdup_or_null(ri->name); + the_ri->full_path = strdup_or_null(ri->full_path); + ret = 0; + break; + } + rbn = rb_next(rbn); } - + __free_all_subvolumn(&rl); return ret; } @@ -799,12 +1652,12 @@ static int print_one_extent(int fd, struct btrfs_ioctl_search_header *sh, return 0; } -int find_updated_files(int fd, u64 root_id, u64 oldest_gen) +int btrfs_list_find_updated_files(int fd, u64 root_id, u64 oldest_gen) { int ret; struct btrfs_ioctl_search_args args; struct btrfs_ioctl_search_key *sk = &args.key; - struct btrfs_ioctl_search_header *sh; + struct btrfs_ioctl_search_header sh; struct btrfs_file_extent_item *item; unsigned long off = 0; u64 found_gen; @@ -854,35 +1707,34 @@ int find_updated_files(int fd, u64 root_id, u64 oldest_gen) * read the root_ref item it contains */ for (i = 0; i < sk->nr_items; i++) { - sh = (struct btrfs_ioctl_search_header *)(args.buf + - off); - off += sizeof(*sh); + memcpy(&sh, args.buf + off, sizeof(sh)); + off += sizeof(sh); /* * just in case the item was too big, pass something other * than garbage */ - if (sh->len == 0) + if (sh.len == 0) item = &backup; else item = (struct btrfs_file_extent_item *)(args.buf + off); found_gen = btrfs_stack_file_extent_generation(item); - if (sh->type == BTRFS_EXTENT_DATA_KEY && + if (sh.type == BTRFS_EXTENT_DATA_KEY && found_gen >= oldest_gen) { - print_one_extent(fd, sh, item, found_gen, + print_one_extent(fd, &sh, item, found_gen, &cache_dirid, &cache_dir_name, &cache_ino, &cache_full_name); } - off += sh->len; + off += sh.len; /* * record the mins in sk so we can make sure the * next search doesn't repeat this root */ - sk->min_objectid = sh->objectid; - sk->min_offset = sh->offset; - sk->min_type = sh->type; + sk->min_objectid = sh.objectid; + sk->min_offset = sh.offset; + sk->min_type = sh.type; } sk->nr_items = 4096; if (sk->min_offset < (u64)-1) @@ -900,12 +1752,17 @@ int find_updated_files(int fd, u64 root_id, u64 oldest_gen) return ret; } -char *path_for_root(int fd, u64 root) +char *btrfs_list_path_for_root(int fd, u64 root) { struct root_lookup root_lookup; struct rb_node *n; char *ret_path = NULL; int ret; + u64 top_id; + + ret = btrfs_list_get_path_rootid(fd, &top_id); + if (ret) + return ERR_PTR(ret); ret = __list_subvol_search(fd, &root_lookup); if (ret < 0) @@ -918,19 +1775,135 @@ char *path_for_root(int fd, u64 root) n = rb_last(&root_lookup.root); while (n) { struct root_info *entry; - u64 root_id; - u64 parent_id; - u64 level; - char *path; + entry = rb_entry(n, struct root_info, rb_node); - resolve_root(&root_lookup, entry, &root_id, &parent_id, &level, - &path); - if (root_id == root) - ret_path = path; - else - free(path); + ret = resolve_root(&root_lookup, entry, top_id); + if (ret == -ENOENT && entry->root_id == root) { + ret_path = NULL; + break; + } + if (entry->root_id == root) { + ret_path = entry->full_path; + entry->full_path = NULL; + } + n = rb_prev(n); } + __free_all_subvolumn(&root_lookup); return ret_path; } + +int btrfs_list_parse_sort_string(char *optarg, + struct btrfs_list_comparer_set **comps) +{ + int order; + int flag; + char *p; + char **ptr_argv; + int what_to_sort; + + while ((p = strtok(optarg, ",")) != NULL) { + flag = 0; + ptr_argv = all_sort_items; + + while (*ptr_argv) { + if (strcmp(*ptr_argv, p) == 0) { + flag = 1; + break; + } else { + p++; + if (strcmp(*ptr_argv, p) == 0) { + flag = 1; + p--; + break; + } + p--; + } + ptr_argv++; + } + + if (flag == 0) + return -1; + + else { + if (*p == '+') { + order = 0; + p++; + } else if (*p == '-') { + order = 1; + p++; + } else + order = 0; + + what_to_sort = btrfs_list_get_sort_item(p); + btrfs_list_setup_comparer(comps, what_to_sort, order); + } + optarg = NULL; + } + + return 0; +} + +/* + * This function is used to parse the argument of filter condition. + * + * type is the filter object. + */ +int btrfs_list_parse_filter_string(char *optarg, + struct btrfs_list_filter_set **filters, + enum btrfs_list_filter_enum type) +{ + + u64 arg; + char *ptr_parse_end = NULL; + char *ptr_optarg_end = optarg + strlen(optarg); + + switch (*(optarg++)) { + case '+': + arg = (u64)strtol(optarg, &ptr_parse_end, 10); + type += 2; + if (ptr_parse_end != ptr_optarg_end) + return -1; + + btrfs_list_setup_filter(filters, type, arg); + break; + case '-': + arg = (u64)strtoll(optarg, &ptr_parse_end, 10); + type += 1; + if (ptr_parse_end != ptr_optarg_end) + return -1; + + btrfs_list_setup_filter(filters, type, arg); + break; + default: + optarg--; + arg = (u64)strtoll(optarg, &ptr_parse_end, 10); + + if (ptr_parse_end != ptr_optarg_end) + return -1; + btrfs_list_setup_filter(filters, type, arg); + break; + } + + return 0; +} + +int btrfs_list_get_path_rootid(int fd, u64 *treeid) +{ + int ret; + struct btrfs_ioctl_ino_lookup_args args; + + memset(&args, 0, sizeof(args)); + args.objectid = BTRFS_FIRST_FREE_OBJECTID; + + ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args); + if (ret < 0) { + fprintf(stderr, + "ERROR: can't perform the search -%s\n", + strerror(errno)); + return ret; + } + *treeid = args.treeid; + return 0; +} diff --git a/btrfs-list.h b/btrfs-list.h new file mode 100644 index 0000000..d3fd9e2 --- /dev/null +++ b/btrfs-list.h @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2012 Fujitsu. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#if BTRFS_FLAT_INCLUDES +#include "kerncompat.h" +#else +#include <btrfs/kerncompat.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +#define BTRFS_LIST_LAYOUT_DEFAULT 0 +#define BTRFS_LIST_LAYOUT_TABLE 1 +#define BTRFS_LIST_LAYOUT_RAW 2 + +/* + * one of these for each root we find. + */ +struct root_info { + struct rb_node rb_node; + struct rb_node sort_node; + + /* this root's id */ + u64 root_id; + + /* equal the offset of the root's key */ + u64 root_offset; + + /* flags of the root */ + u64 flags; + + /* the id of the root that references this one */ + u64 ref_tree; + + /* the dir id we're in from ref_tree */ + u64 dir_id; + + u64 top_id; + + /* generation when the root is created or last updated */ + u64 gen; + + /* creation generation of this root in sec*/ + u64 ogen; + + /* creation time of this root in sec*/ + time_t otime; + + u8 uuid[BTRFS_UUID_SIZE]; + u8 puuid[BTRFS_UUID_SIZE]; + + /* path from the subvol we live in to this root, including the + * root's name. This is null until we do the extra lookup ioctl. + */ + char *path; + + /* the name of this root in the directory it lives in */ + char *name; + + char *full_path; +}; + +typedef int (*btrfs_list_filter_func)(struct root_info *, u64); +typedef int (*btrfs_list_comp_func)(struct root_info *, struct root_info *, + int); + +struct btrfs_list_filter { + btrfs_list_filter_func filter_func; + u64 data; +}; + +struct btrfs_list_comparer { + btrfs_list_comp_func comp_func; + int is_descending; +}; + +struct btrfs_list_filter_set { + int total; + int nfilters; + struct btrfs_list_filter filters[0]; +}; + +struct btrfs_list_comparer_set { + int total; + int ncomps; + struct btrfs_list_comparer comps[0]; +}; + +enum btrfs_list_column_enum { + BTRFS_LIST_OBJECTID, + BTRFS_LIST_GENERATION, + BTRFS_LIST_OGENERATION, + BTRFS_LIST_PARENT, + BTRFS_LIST_TOP_LEVEL, + BTRFS_LIST_OTIME, + BTRFS_LIST_PUUID, + BTRFS_LIST_UUID, + BTRFS_LIST_PATH, + BTRFS_LIST_ALL, +}; + +enum btrfs_list_filter_enum { + BTRFS_LIST_FILTER_ROOTID, + BTRFS_LIST_FILTER_SNAPSHOT_ONLY, + BTRFS_LIST_FILTER_FLAGS, + BTRFS_LIST_FILTER_GEN, + BTRFS_LIST_FILTER_GEN_EQUAL = BTRFS_LIST_FILTER_GEN, + BTRFS_LIST_FILTER_GEN_LESS, + BTRFS_LIST_FILTER_GEN_MORE, + BTRFS_LIST_FILTER_CGEN, + BTRFS_LIST_FILTER_CGEN_EQUAL = BTRFS_LIST_FILTER_CGEN, + BTRFS_LIST_FILTER_CGEN_LESS, + BTRFS_LIST_FILTER_CGEN_MORE, + BTRFS_LIST_FILTER_TOPID_EQUAL, + BTRFS_LIST_FILTER_FULL_PATH, + BTRFS_LIST_FILTER_BY_PARENT, + BTRFS_LIST_FILTER_MAX, +}; + +enum btrfs_list_comp_enum { + BTRFS_LIST_COMP_ROOTID, + BTRFS_LIST_COMP_OGEN, + BTRFS_LIST_COMP_GEN, + BTRFS_LIST_COMP_PATH, + BTRFS_LIST_COMP_MAX, +}; + +int btrfs_list_parse_sort_string(char *optarg, + struct btrfs_list_comparer_set **comps); +int btrfs_list_parse_filter_string(char *optarg, + struct btrfs_list_filter_set **filters, + enum btrfs_list_filter_enum type); +void btrfs_list_setup_print_column(enum btrfs_list_column_enum column); +struct btrfs_list_filter_set *btrfs_list_alloc_filter_set(void); +void btrfs_list_free_filter_set(struct btrfs_list_filter_set *filter_set); +int btrfs_list_setup_filter(struct btrfs_list_filter_set **filter_set, + enum btrfs_list_filter_enum filter, u64 data); +struct btrfs_list_comparer_set *btrfs_list_alloc_comparer_set(void); +void btrfs_list_free_comparer_set(struct btrfs_list_comparer_set *comp_set); +int btrfs_list_setup_comparer(struct btrfs_list_comparer_set **comp_set, + enum btrfs_list_comp_enum comparer, + int is_descending); + +int btrfs_list_subvols_print(int fd, struct btrfs_list_filter_set *filter_set, + struct btrfs_list_comparer_set *comp_set, + int is_tab_result, int full_path, char *raw_prefix); +int btrfs_list_find_updated_files(int fd, u64 root_id, u64 oldest_gen); +int btrfs_list_get_default_subvolume(int fd, u64 *default_id); +char *btrfs_list_path_for_root(int fd, u64 root); +int btrfs_list_get_path_rootid(int fd, u64 *treeid); +int btrfs_get_subvol(int fd, struct root_info *the_ri); diff --git a/btrfs-map-logical.c b/btrfs-map-logical.c index d79a73a..b9635f7 100644 --- a/btrfs-map-logical.c +++ b/btrfs-map-logical.c @@ -55,7 +55,8 @@ struct extent_buffer *debug_read_block(struct btrfs_root *root, u64 bytenr, length = blocksize; while (1) { ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - eb->start, &length, &multi, mirror_num); + eb->start, &length, &multi, + mirror_num, NULL); BUG_ON(ret); device = multi->stripes[0].dev; eb->fd = device->fd; @@ -68,7 +69,7 @@ struct extent_buffer *debug_read_block(struct btrfs_root *root, u64 bytenr, kfree(multi); if (!copy || mirror_num == copy) - ret = read_extent_from_disk(eb); + ret = read_extent_from_disk(eb, 0, eb->len); num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); @@ -84,7 +85,7 @@ struct extent_buffer *debug_read_block(struct btrfs_root *root, u64 bytenr, static void print_usage(void) { - fprintf(stderr, "usage: btrfs-map-logical [options] mount_point\n"); + fprintf(stderr, "usage: btrfs-map-logical [options] device\n"); fprintf(stderr, "\t-l Logical extent to map\n"); fprintf(stderr, "\t-c Copy of the extent to read (usually 1 or 2)\n"); fprintf(stderr, "\t-o Output file to hold the extent\n"); @@ -96,7 +97,7 @@ static struct option long_options[] = { /* { "byte-count", 1, NULL, 'b' }, */ { "logical", 1, NULL, 'l' }, { "copy", 1, NULL, 'c' }, - { "output", 1, NULL, 'c' }, + { "output", 1, NULL, 'o' }, { "bytes", 1, NULL, 'b' }, { 0, 0, 0, 0} }; diff --git a/btrfs-select-super.c b/btrfs-select-super.c index 51eb9c9..6a458b8 100644 --- a/btrfs-select-super.c +++ b/btrfs-select-super.c @@ -43,7 +43,7 @@ int main(int ac, char **av) { struct btrfs_root *root; int ret; - int num; + int num = 0; u64 bytenr = 0; while(1) { @@ -55,8 +55,6 @@ int main(int ac, char **av) case 's': num = atol(optarg); bytenr = btrfs_sb_offset(num); - printf("using SB copy %d, bytenr %llu\n", num, - (unsigned long long)bytenr); break; default: print_usage(); @@ -84,8 +82,10 @@ int main(int ac, char **av) root = open_ctree(av[optind], bytenr, 1); - if (root == NULL) + if (!root) { + fprintf(stderr, "Open ctree failed\n"); return 1; + } /* make the super writing code think we've read the first super */ root->fs_info->super_bytenr = BTRFS_SUPER_INFO_OFFSET; @@ -95,5 +95,7 @@ int main(int ac, char **av) * transaction commit. We just want the super copy we pulled off the * disk to overwrite all the other copies */ + printf("using SB copy %d, bytenr %llu\n", num, + (unsigned long long)bytenr); return ret; } diff --git a/btrfs-show-super.c b/btrfs-show-super.c new file mode 100644 index 0000000..f587f10 --- /dev/null +++ b/btrfs-show-super.c @@ -0,0 +1,281 @@ +/* + * Copyright (C) 2012 STRATO AG. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#define _XOPEN_SOURCE 500 +#define _GNU_SOURCE 1 +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <ctype.h> +#include <uuid/uuid.h> +#include <errno.h> + +#include "kerncompat.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" +#include "list.h" +#include "version.h" +#include "utils.h" +#include "crc32c.h" + +static void print_usage(void); +static void dump_superblock(struct btrfs_super_block *sb); +int main(int argc, char **argv); +static int load_and_dump_sb(char *, int fd, u64 sb_bytenr); + + +static void print_usage(void) +{ + fprintf(stderr, + "usage: btrfs-show-super [-i super_mirror|-a] dev [dev..]\n"); + fprintf(stderr, "\tThe super_mirror number is between 0 and %d.\n", + BTRFS_SUPER_MIRROR_MAX - 1); + fprintf(stderr, "\tIf -a is passed all the superblocks are showed.\n"); + fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION); +} + +int main(int argc, char **argv) +{ + int opt; + int all = 0; + char *filename; + int fd = -1; + int arg, i; + u64 sb_bytenr = btrfs_sb_offset(0); + + while ((opt = getopt(argc, argv, "ai:")) != -1) { + switch (opt) { + case 'i': + arg = atoi(optarg); + + if (arg < 0 || arg >= BTRFS_SUPER_MIRROR_MAX) { + fprintf(stderr, + "Illegal super_mirror %d\n", + arg); + print_usage(); + exit(1); + } + sb_bytenr = btrfs_sb_offset(arg); + break; + + case 'a': + all = 1; + break; + + default: + print_usage(); + exit(1); + } + } + + if (argc < optind + 1) { + print_usage(); + exit(1); + } + + for (i = optind; i < argc; i++) { + filename = argv[i]; + fd = open(filename, O_RDONLY, 0666); + if (fd < 0) { + fprintf(stderr, "Could not open %s\n", filename); + exit(1); + } + + if (all) { + int idx; + for (idx = 0; idx < BTRFS_SUPER_MIRROR_MAX; idx++) { + sb_bytenr = btrfs_sb_offset(idx); + if (load_and_dump_sb(filename, fd, sb_bytenr)) { + close(fd); + exit(1); + } + + putchar('\n'); + } + } else { + load_and_dump_sb(filename, fd, sb_bytenr); + putchar('\n'); + } + close(fd); + } + + exit(0); +} + +static int load_and_dump_sb(char *filename, int fd, u64 sb_bytenr) +{ + u8 super_block_data[BTRFS_SUPER_INFO_SIZE]; + struct btrfs_super_block *sb; + u64 ret; + + sb = (struct btrfs_super_block *)super_block_data; + + ret = pread64(fd, super_block_data, BTRFS_SUPER_INFO_SIZE, sb_bytenr); + if (ret != BTRFS_SUPER_INFO_SIZE) { + int e = errno; + + /* check if the disk if too short for further superblock */ + if (ret == 0 && e == 0) + return 0; + + fprintf(stderr, + "ERROR: Failed to read the superblock on %s at %llu\n", + filename, (unsigned long long)sb_bytenr); + fprintf(stderr, + "ERROR: error = '%s', errno = %d\n", strerror(e), e); + return 1; + } + printf("superblock: bytenr=%llu, device=%s\n", sb_bytenr, filename); + printf("---------------------------------------------------------\n"); + dump_superblock(sb); + return 0; +} + +static int check_csum_sblock(void *sb, int csum_size) +{ + char result[csum_size]; + u32 crc = ~(u32)0; + + crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, + crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, result); + + return !memcmp(sb, &result, csum_size); +} + +static void dump_superblock(struct btrfs_super_block *sb) +{ + int i; + char *s, buf[36+1]; + u8 *p; + + printf("csum\t\t\t0x"); + for (i = 0, p = sb->csum; i < btrfs_super_csum_size(sb); i++) + printf("%02x", p[i]); + if (check_csum_sblock(sb, btrfs_super_csum_size(sb))) + printf(" [match]"); + else + printf(" [DON'T MATCH]"); + putchar('\n'); + + printf("bytenr\t\t\t%llu\n", + (unsigned long long)btrfs_super_bytenr(sb)); + printf("flags\t\t\t0x%llx\n", + (unsigned long long)btrfs_super_flags(sb)); + + printf("magic\t\t\t"); + s = (char *) &sb->magic; + for (i = 0; i < 8; i++) + putchar(isprint(s[i]) ? s[i] : '.'); + if (sb->magic == cpu_to_le64(BTRFS_MAGIC)) + printf(" [match]\n"); + else + printf(" [DON'T MATCH]\n"); + + uuid_unparse(sb->fsid, buf); + printf("fsid\t\t\t%s\n", buf); + + printf("label\t\t\t"); + s = sb->label; + for (i = 0; i < BTRFS_LABEL_SIZE && s[i]; i++) + putchar(isprint(s[i]) ? s[i] : '.'); + putchar('\n'); + + printf("generation\t\t%llu\n", + (unsigned long long)btrfs_super_generation(sb)); + printf("root\t\t\t%llu\n", (unsigned long long)btrfs_super_root(sb)); + printf("sys_array_size\t\t%llu\n", + (unsigned long long)btrfs_super_sys_array_size(sb)); + printf("chunk_root_generation\t%llu\n", + (unsigned long long)btrfs_super_chunk_root_generation(sb)); + printf("root_level\t\t%llu\n", + (unsigned long long)btrfs_super_root_level(sb)); + printf("chunk_root\t\t%llu\n", + (unsigned long long)btrfs_super_chunk_root(sb)); + printf("chunk_root_level\t%llu\n", + (unsigned long long)btrfs_super_chunk_root_level(sb)); + printf("log_root\t\t%llu\n", + (unsigned long long)btrfs_super_log_root(sb)); + printf("log_root_transid\t%llu\n", + (unsigned long long)btrfs_super_log_root_transid(sb)); + printf("log_root_level\t\t%llu\n", + (unsigned long long)btrfs_super_log_root_level(sb)); + printf("total_bytes\t\t%llu\n", + (unsigned long long)btrfs_super_total_bytes(sb)); + printf("bytes_used\t\t%llu\n", + (unsigned long long)btrfs_super_bytes_used(sb)); + printf("sectorsize\t\t%llu\n", + (unsigned long long)btrfs_super_sectorsize(sb)); + printf("nodesize\t\t%llu\n", + (unsigned long long)btrfs_super_nodesize(sb)); + printf("leafsize\t\t%llu\n", + (unsigned long long)btrfs_super_leafsize(sb)); + printf("stripesize\t\t%llu\n", + (unsigned long long)btrfs_super_stripesize(sb)); + printf("root_dir\t\t%llu\n", + (unsigned long long)btrfs_super_root_dir(sb)); + printf("num_devices\t\t%llu\n", + (unsigned long long)btrfs_super_num_devices(sb)); + printf("compat_flags\t\t0x%llx\n", + (unsigned long long)btrfs_super_compat_flags(sb)); + printf("compat_ro_flags\t\t0x%llx\n", + (unsigned long long)btrfs_super_compat_ro_flags(sb)); + printf("incompat_flags\t\t0x%llx\n", + (unsigned long long)btrfs_super_incompat_flags(sb)); + printf("csum_type\t\t%llu\n", + (unsigned long long)btrfs_super_csum_type(sb)); + printf("csum_size\t\t%llu\n", + (unsigned long long)btrfs_super_csum_size(sb)); + printf("cache_generation\t%llu\n", + (unsigned long long)btrfs_super_cache_generation(sb)); + + uuid_unparse(sb->dev_item.uuid, buf); + printf("dev_item.uuid\t\t%s\n", buf); + + uuid_unparse(sb->dev_item.fsid, buf); + printf("dev_item.fsid\t\t%s %s\n", buf, + !memcmp(sb->dev_item.fsid, sb->fsid, BTRFS_FSID_SIZE) ? + "[match]" : "[DON'T MATCH]"); + + printf("dev_item.type\t\t%llu\n", (unsigned long long) + btrfs_stack_device_type(&sb->dev_item)); + printf("dev_item.total_bytes\t%llu\n", (unsigned long long) + btrfs_stack_device_total_bytes(&sb->dev_item)); + printf("dev_item.bytes_used\t%llu\n", (unsigned long long) + btrfs_stack_device_bytes_used(&sb->dev_item)); + printf("dev_item.io_align\t%u\n", (unsigned int) + btrfs_stack_device_io_align(&sb->dev_item)); + printf("dev_item.io_width\t%u\n", (unsigned int) + btrfs_stack_device_io_width(&sb->dev_item)); + printf("dev_item.sector_size\t%u\n", (unsigned int) + btrfs_stack_device_sector_size(&sb->dev_item)); + printf("dev_item.devid\t\t%llu\n", + btrfs_stack_device_id(&sb->dev_item)); + printf("dev_item.dev_group\t%u\n", (unsigned int) + btrfs_stack_device_group(&sb->dev_item)); + printf("dev_item.seek_speed\t%u\n", (unsigned int) + btrfs_stack_device_seek_speed(&sb->dev_item)); + printf("dev_item.bandwidth\t%u\n", (unsigned int) + btrfs_stack_device_bandwidth(&sb->dev_item)); + printf("dev_item.generation\t%llu\n", (unsigned long long) + btrfs_stack_device_generation(&sb->dev_item)); +} diff --git a/btrfs-show.c b/btrfs-show.c deleted file mode 100644 index 8210fd2..0000000 --- a/btrfs-show.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#define _GNU_SOURCE -#ifndef __CHECKER__ -#include <sys/ioctl.h> -#include <sys/mount.h> -#include "ioctl.h" -#endif -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <dirent.h> -#include <uuid/uuid.h> -#include "kerncompat.h" -#include "ctree.h" -#include "transaction.h" -#include "utils.h" -#include "volumes.h" -#include "version.h" - -static int uuid_search(struct btrfs_fs_devices *fs_devices, char *search) -{ - struct list_head *cur; - struct btrfs_device *device; - - list_for_each(cur, &fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); - if ((device->label && strcmp(device->label, search) == 0) || - strcmp(device->name, search) == 0) - return 1; - } - return 0; -} - -static void print_one_uuid(struct btrfs_fs_devices *fs_devices) -{ - char uuidbuf[37]; - struct list_head *cur; - struct btrfs_device *device; - char *super_bytes_used; - u64 devs_found = 0; - u64 total; - - uuid_unparse(fs_devices->fsid, uuidbuf); - device = list_entry(fs_devices->devices.next, struct btrfs_device, - dev_list); - if (device->label && device->label[0]) - printf("Label: %s ", device->label); - else - printf("Label: none "); - - super_bytes_used = pretty_sizes(device->super_bytes_used); - - total = device->total_devs; - printf(" uuid: %s\n\tTotal devices %llu FS bytes used %s\n", uuidbuf, - (unsigned long long)total, super_bytes_used); - - free(super_bytes_used); - - list_for_each(cur, &fs_devices->devices) { - char *total_bytes; - char *bytes_used; - device = list_entry(cur, struct btrfs_device, dev_list); - total_bytes = pretty_sizes(device->total_bytes); - bytes_used = pretty_sizes(device->bytes_used); - printf("\tdevid %4llu size %s used %s path %s\n", - (unsigned long long)device->devid, - total_bytes, bytes_used, device->name); - free(total_bytes); - free(bytes_used); - devs_found++; - } - if (devs_found < total) { - printf("\t*** Some devices missing\n"); - } - printf("\n"); -} - -static void print_usage(void) -{ - fprintf(stderr, "usage: btrfs-show [search label or device]\n"); - fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION); - exit(1); -} - -static struct option long_options[] = { - /* { "byte-count", 1, NULL, 'b' }, */ - { 0, 0, 0, 0} -}; - -int main(int ac, char **av) -{ - struct list_head *all_uuids; - struct btrfs_fs_devices *fs_devices; - struct list_head *cur_uuid; - char *search = NULL; - int ret; - int option_index = 0; - - printf( "**\n" - "** WARNING: this program is considered deprecated\n" - "** Please consider to switch to the btrfs utility\n" - "**\n"); - - while(1) { - int c; - c = getopt_long(ac, av, "", long_options, - &option_index); - if (c < 0) - break; - switch(c) { - default: - print_usage(); - } - } - ac = ac - optind; - if (ac != 0) { - search = av[optind]; - } - - ret = btrfs_scan_one_dir("/dev", 0); - if (ret) - fprintf(stderr, "error %d while scanning\n", ret); - - all_uuids = btrfs_scanned_uuids(); - list_for_each(cur_uuid, all_uuids) { - fs_devices = list_entry(cur_uuid, struct btrfs_fs_devices, - list); - if (search && uuid_search(fs_devices, search) == 0) - continue; - print_one_uuid(fs_devices); - } - printf("%s\n", BTRFS_BUILD_VERSION); - return 0; -} - diff --git a/btrfs-vol.c b/btrfs-vol.c deleted file mode 100644 index 0efdbc1..0000000 --- a/btrfs-vol.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#define _GNU_SOURCE -#ifndef __CHECKER__ -#include <sys/ioctl.h> -#include <sys/mount.h> -#include "ioctl.h" -#endif -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <dirent.h> -#include <uuid/uuid.h> -#include "kerncompat.h" -#include "ctree.h" -#include "transaction.h" -#include "utils.h" -#include "volumes.h" - -#ifdef __CHECKER__ -#define BLKGETSIZE64 0 -#define BTRFS_IOC_SNAP_CREATE 0 -#define BTRFS_IOC_ADD_DEV 0 -#define BTRFS_IOC_RM_DEV 0 -#define BTRFS_VOL_NAME_MAX 255 -struct btrfs_ioctl_vol_args { char name[BTRFS_VOL_NAME_MAX]; }; -static inline int ioctl(int fd, int define, void *arg) { return 0; } -#endif - -static void print_usage(void) -{ - fprintf(stderr, "usage: btrfs-vol [options] mount_point\n"); - fprintf(stderr, "\t-a device add one device\n"); - fprintf(stderr, "\t-b balance chunks across all devices\n"); - fprintf(stderr, "\t-r device remove one device\n"); - exit(1); -} - -static struct option long_options[] = { - /* { "byte-count", 1, NULL, 'b' }, */ - { "add", 1, NULL, 'a' }, - { "balance", 0, NULL, 'b' }, - { "remove", 1, NULL, 'r' }, - { 0, 0, 0, 0} -}; - -int main(int ac, char **av) -{ - struct stat st; - char *device = NULL; - char *mnt = NULL; - int ret; - int option_index = 0; - int cmd = 0; - int fd; - int devfd = 0; - DIR *dirstream; - struct btrfs_ioctl_vol_args args; - u64 dev_block_count = 0; - - printf( "**\n" - "** WARNING: this program is considered deprecated\n" - "** Please consider to switch to the btrfs utility\n" - "**\n"); - - while(1) { - int c; - c = getopt_long(ac, av, "a:br:", long_options, - &option_index); - if (c < 0) - break; - switch(c) { - case 'a': - device = strdup(optarg); - cmd = BTRFS_IOC_ADD_DEV; - break; - case 'b': - cmd = BTRFS_IOC_BALANCE; - break; - case 'r': - device = strdup(optarg); - cmd = BTRFS_IOC_RM_DEV; - break; - default: - print_usage(); - } - } - ac = ac - optind; - if (ac == 0) - print_usage(); - mnt = av[optind]; - - if (device && strcmp(device, "missing") == 0 && - cmd == BTRFS_IOC_RM_DEV) { - fprintf(stderr, "removing missing devices from %s\n", mnt); - } else if (cmd != BTRFS_IOC_BALANCE) { - if (cmd == BTRFS_IOC_ADD_DEV) { - ret = check_mounted(device); - if (ret < 0) { - fprintf(stderr, - "error checking %s mount status\n", - device); - exit(1); - } - if (ret == 1) { - fprintf(stderr, "%s is mounted\n", device); - exit(1); - } - } - devfd = open(device, O_RDWR); - if (devfd < 0) { - fprintf(stderr, "Unable to open device %s\n", device); - exit(1); - } - ret = fstat(devfd, &st); - if (ret) { - fprintf(stderr, "Unable to stat %s\n", device); - exit(1); - } - if (!S_ISBLK(st.st_mode)) { - fprintf(stderr, "%s is not a block device\n", device); - exit(1); - } - } - dirstream = opendir(mnt); - if (!dirstream) { - fprintf(stderr, "Unable to open directory %s\n", mnt); - exit(1); - } - if (cmd == BTRFS_IOC_ADD_DEV) { - int mixed = 0; - - ret = btrfs_prepare_device(devfd, device, 1, &dev_block_count, &mixed); - if (ret) { - fprintf(stderr, "Unable to init %s\n", device); - exit(1); - } - } - fd = dirfd(dirstream); - if (device) - strcpy(args.name, device); - else - args.name[0] = '\0'; - - ret = ioctl(fd, cmd, &args); - printf("ioctl returns %d\n", ret); - return 0; -} - diff --git a/btrfs-zero-log.c b/btrfs-zero-log.c index 1ea867b..f249aec 100644 --- a/btrfs-zero-log.c +++ b/btrfs-zero-log.c @@ -64,8 +64,8 @@ int main(int ac, char **av) return 1; trans = btrfs_start_transaction(root, 1); - btrfs_set_super_log_root(&root->fs_info->super_copy, 0); - btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); + btrfs_set_super_log_root(root->fs_info->super_copy, 0); + btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); btrfs_commit_transaction(trans, root); close_ctree(root); return ret; @@ -19,444 +19,278 @@ #include <stdlib.h> #include <string.h> -#include "kerncompat.h" -#include "btrfs_cmds.h" +#include "crc32c.h" +#include "commands.h" #include "version.h" -#define BASIC_HELP 0 -#define ADVANCED_HELP 1 - -typedef int (*CommandFunction)(int argc, char **argv); - -struct Command { - CommandFunction func; /* function which implements the command */ - int nargs; /* if == 999, any number of arguments - if >= 0, number of arguments, - if < 0, _minimum_ number of arguments */ - char *verb; /* verb */ - char *help; /* help lines; from the 2nd line onward they - are automatically indented */ - char *adv_help; /* advanced help message; from the 2nd line - onward they are automatically indented */ - - /* the following fields are run-time filled by the program */ - char **cmds; /* array of subcommands */ - int ncmds; /* number of subcommand */ +static const char * const btrfs_cmd_group_usage[] = { + "btrfs [--help] [--version] <group> [<group>...] <command> [<args>]", + NULL }; -static struct Command commands[] = { +static const char btrfs_cmd_group_info[] = + "Use --help as an argument for information on a specific group or command."; - /* - avoid short commands different for the case only - */ - { do_clone, -2, - "subvolume snapshot", "[-r] <source> [<dest>/]<name>\n" - "Create a writable/readonly snapshot of the subvolume <source> with\n" - "the name <name> in the <dest> directory.", - NULL - }, - { do_delete_subvolume, 1, - "subvolume delete", "<subvolume>\n" - "Delete the subvolume <subvolume>.", - NULL - }, - { do_create_subvol, 1, - "subvolume create", "[<dest>/]<name>\n" - "Create a subvolume in <dest> (or the current directory if\n" - "not passed).", - NULL - }, - { do_subvol_list, -1, "subvolume list", "[-p] <path>\n" - "List the snapshot/subvolume of a filesystem.", - "[-p] <path>\n" - "List the snapshot/subvolume of a filesystem.\n" - "-p print parent ID" - }, - { do_set_default_subvol, 2, - "subvolume set-default", "<id> <path>\n" - "Set the subvolume of the filesystem <path> which will be mounted\n" - "as default.", - NULL - }, - { do_find_newer, 2, "subvolume find-new", "<path> <last_gen>\n" - "List the recently modified files in a filesystem.", - NULL - }, - { do_defrag, -1, - "filesystem defragment", "[-vf] [-c[zlib,lzo]] [-s start] [-l len] [-t size] <file>|<dir> [<file>|<dir>...]\n" - "Defragment a file or a directory.", - "[-vcf] [-s start] [-l len] [-t size] <file>|<dir> [<file>|<dir>...]\n" - "Defragment file data or directory metadata.\n" - "-v be verbose\n" - "-c compress the file while defragmenting\n" - "-f flush data to disk immediately after defragmenting\n" - "-s start defragment only from byte onward\n" - "-l len defragment only up to len bytes\n" - "-t size minimal size of file to be considered for defragmenting\n" - }, - { do_get_default_subvol, 1, "subvolume get-default", "<path>\n" - "Get the default subvolume of a filesystem." - }, - { do_fssync, 1, - "filesystem sync", "<path>\n" - "Force a sync on the filesystem <path>.", - NULL - }, - { do_resize, 2, - "filesystem resize", "[+/-]<newsize>[gkm]|max <filesystem>\n" - "Resize the file system. If 'max' is passed, the filesystem\n" - "will occupe all available space on the device.", - NULL - }, - { do_show_filesystem, 999, - "filesystem show", "[--all-devices][<uuid>|<label>]\n" - "Show the info of a btrfs filesystem. If no argument\n" - "is passed, info of all the btrfs filesystem are shown.", - NULL - }, - { do_df_filesystem, 1, - "filesystem df", "<path>\n" - "Show space usage information for a mount point.", - NULL - }, - { do_balance, 1, - "filesystem balance", "<path>\n" - "Balance the chunks across the device.", - NULL - }, - { do_change_label, -1, - "filesystem label", "<device> [<newlabel>]\n" - "With one argument, get the label of filesystem on <device>.\n" - "If <newlabel> is passed, set the filesystem label to <newlabel>.\n" - "The filesystem must be unmounted.\n" - }, - { do_scrub_start, -1, - "scrub start", "[-Bdqr] <path>|<device>\n" - "Start a new scrub.", - "\n-B do not background\n" - "-d stats per device (-B only)\n" - "-q quiet\n" - "-r read only mode\n" - }, - { do_scrub_cancel, 1, - "scrub cancel", "<path>|<device>\n" - "Cancel a running scrub.", - NULL - }, - { do_scrub_resume, -1, - "scrub resume", "[-Bdqr] <path>|<device>\n" - "Resume previously canceled or interrupted scrub.", - NULL - }, - { do_scrub_status, -1, - "scrub status", "[-d] <path>|<device>\n" - "Show status of running or finished scrub.", - NULL - }, - { do_scan, 999, - "device scan", "[<device>...]\n" - "Scan all device for or the passed device for a btrfs\n" - "filesystem.", - NULL - }, - { do_add_volume, -2, - "device add", "<device> [<device>...] <path>\n" - "Add a device to a filesystem.", - NULL - }, - { do_remove_volume, -2, - "device delete", "<device> [<device>...] <path>\n" - "Remove a device from a filesystem.", - NULL - }, - { do_ino_to_path, -2, - "inspect-internal inode-resolve", "[-v] <inode> <path>\n" - "get file system paths for the given inode.", - NULL - }, - { do_logical_to_ino, -2, - "inspect-internal logical-resolve", "[-v] [-P] <logical> <path>\n" - "get file system paths for the given logical address.", - NULL - }, - { 0, 0, 0, 0 } -}; +char argv0_buf[ARGV0_BUF_SIZE] = "btrfs"; -static char *get_prgname(char *programname) +static inline const char *skip_prefix(const char *str, const char *prefix) { - char *np; - np = strrchr(programname,'/'); - if(!np) - np = programname; - else - np++; - - return np; + size_t len = strlen(prefix); + return strncmp(str, prefix, len) ? NULL : str + len; } -static void print_help(char *programname, struct Command *cmd, int helptype) +int prefixcmp(const char *str, const char *prefix) { - char *pc; - - printf("\t%s %s ", programname, cmd->verb ); + for (; ; str++, prefix++) + if (!*prefix) + return 0; + else if (*str != *prefix) + return (unsigned char)*prefix - (unsigned char)*str; +} - if (helptype == ADVANCED_HELP && cmd->adv_help) - for(pc = cmd->adv_help; *pc; pc++){ - putchar(*pc); - if(*pc == '\n') - printf("\t\t"); - } - else - for(pc = cmd->help; *pc; pc++){ - putchar(*pc); - if(*pc == '\n') - printf("\t\t"); +static int parse_one_token(const char *arg, const struct cmd_group *grp, + const struct cmd_struct **cmd_ret) +{ + const struct cmd_struct *cmd = grp->commands; + const struct cmd_struct *abbrev_cmd = NULL, *ambiguous_cmd = NULL; + + for (; cmd->token; cmd++) { + const char *rest; + + rest = skip_prefix(arg, cmd->token); + if (!rest) { + if (!prefixcmp(cmd->token, arg)) { + if (abbrev_cmd) { + /* + * If this is abbreviated, it is + * ambiguous. So when there is no + * exact match later, we need to + * error out. + */ + ambiguous_cmd = abbrev_cmd; + } + abbrev_cmd = cmd; + } + continue; } + if (*rest) + continue; - putchar('\n'); -} + *cmd_ret = cmd; + return 0; + } -static void help(char *np) -{ - struct Command *cp; + if (ambiguous_cmd) + return -2; - printf("Usage:\n"); - for( cp = commands; cp->verb; cp++ ) - print_help(np, cp, BASIC_HELP); + if (abbrev_cmd) { + *cmd_ret = abbrev_cmd; + return 0; + } - printf("\n\t%s help|--help|-h\n\t\tShow the help.\n",np); - printf("\n\t%s <cmd> --help\n\t\tShow detailed help for a command or\n\t\t" - "subset of commands.\n",np); - printf("\n%s\n", BTRFS_BUILD_VERSION); + return -1; } -static int split_command(char *cmd, char ***commands) +static const struct cmd_struct * +parse_command_token(const char *arg, const struct cmd_group *grp) { - int c, l; - char *p, *s; + const struct cmd_struct *cmd = NULL; - for( *commands = 0, l = c = 0, p = s = cmd ; ; p++, l++ ){ - if ( *p && *p != ' ' ) - continue; - - /* c + 2 so that we have room for the null */ - (*commands) = realloc( (*commands), sizeof(char *)*(c + 2)); - (*commands)[c] = strndup(s, l); - c++; - l = 0; - s = p+1; - if( !*p ) break; + switch(parse_one_token(arg, grp, &cmd)) { + case -1: + help_unknown_token(arg, grp); + case -2: + help_ambiguous_token(arg, grp); } - (*commands)[c] = 0; - return c; + return cmd; } -/* - This function checks if the passed command is ambiguous -*/ -static int check_ambiguity(struct Command *cmd, char **argv){ - int i; - struct Command *cp; - /* check for ambiguity */ - for( i = 0 ; i < cmd->ncmds ; i++ ){ - int match; - for( match = 0, cp = commands; cp->verb; cp++ ){ - int j, skip; - char *s1, *s2; - - if( cp->ncmds < i ) - continue; - - for( skip = 0, j = 0 ; j < i ; j++ ) - if( strcmp(cmd->cmds[j], cp->cmds[j])){ - skip=1; - break; - } - if(skip) - continue; - - if( !strcmp(cmd->cmds[i], cp->cmds[i])) - continue; - for(s2 = cp->cmds[i], s1 = argv[i+1]; - *s1 == *s2 && *s1; s1++, s2++ ) ; - if( !*s1 ) - match++; - } - if(match){ - int j; - fprintf(stderr, "ERROR: in command '"); - for( j = 0 ; j <= i ; j++ ) - fprintf(stderr, "%s%s",j?" ":"", argv[j+1]); - fprintf(stderr, "', '%s' is ambiguous\n",argv[j]); - return -2; +void handle_help_options_next_level(const struct cmd_struct *cmd, + int argc, char **argv) +{ + if (argc < 2) + return; + + if (!strcmp(argv[1], "--help")) { + if (cmd->next) { + argc--; + argv++; + help_command_group(cmd->next, argc, argv); + } else { + usage_command(cmd, 1, 0); } + + exit(0); } - return 0; } -/* - * This function, compacts the program name and the command in the first - * element of the '*av' array - */ -static int prepare_args(int *ac, char ***av, char *prgname, struct Command *cmd ){ - - char **ret; - int i; - char *newname; - - ret = (char **)malloc(sizeof(char*)*(*ac+1)); - newname = (char*)malloc(strlen(prgname)+strlen(cmd->verb)+2); - if( !ret || !newname ){ - free(ret); - free(newname); - return -1; - } +static void fixup_argv0(char **argv, const char *token) +{ + int len = strlen(argv0_buf); - ret[0] = newname; - for(i=0; i < *ac ; i++ ) - ret[i+1] = (*av)[i]; + snprintf(argv0_buf + len, sizeof(argv0_buf) - len, " %s", token); + argv[0] = argv0_buf; +} - strcpy(newname, prgname); - strcat(newname, " "); - strcat(newname, cmd->verb); +int handle_command_group(const struct cmd_group *grp, int argc, + char **argv) - (*ac)++; - *av = ret; +{ + const struct cmd_struct *cmd; - return 0; + argc--; + argv++; + if (argc < 1) { + usage_command_group(grp, 0, 0); + exit(1); + } + + cmd = parse_command_token(argv[0], grp); + handle_help_options_next_level(cmd, argc, argv); + + fixup_argv0(argv, cmd->token); + return cmd->fn(argc, argv); } +int check_argc_exact(int nargs, int expected) +{ + if (nargs < expected) + fprintf(stderr, "%s: too few arguments\n", argv0_buf); + if (nargs > expected) + fprintf(stderr, "%s: too many arguments\n", argv0_buf); + return nargs != expected; +} -/* - This function performs the following jobs: - - show the help if '--help' or 'help' or '-h' are passed - - verify that a command is not ambiguous, otherwise show which - part of the command is ambiguous - - if after a (even partial) command there is '--help' show detailed help - for all the matching commands - - if the command doesn't match show an error - - finally, if a command matches, they return which command matched and - the arguments - - The function return 0 in case of help is requested; <0 in case - of uncorrect command; >0 in case of matching commands - argc, argv are the arg-counter and arg-vector (input) - *nargs_ is the number of the arguments after the command (output) - **cmd_ is the invoked command (output) - ***args_ are the arguments after the command - -*/ -static int parse_args(int argc, char **argv, - CommandFunction *func_, - int *nargs_, char **cmd_, char ***args_ ) +int check_argc_min(int nargs, int expected) { - struct Command *cp; - struct Command *matchcmd=0; - char *prgname = get_prgname(argv[0]); - int i=0, helprequested=0; - - if( argc < 2 || !strcmp(argv[1], "help") || - !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")){ - help(prgname); - return 0; + if (nargs < expected) { + fprintf(stderr, "%s: too few arguments\n", argv0_buf); + return 1; } - for( cp = commands; cp->verb; cp++ ) - if( !cp->ncmds) - cp->ncmds = split_command(cp->verb, &(cp->cmds)); - - for( cp = commands; cp->verb; cp++ ){ - int match; + return 0; +} - if( argc-1 < cp->ncmds ) - continue; - for( match = 1, i = 0 ; i < cp->ncmds ; i++ ){ - char *s1, *s2; - s1 = cp->cmds[i]; - s2 = argv[i+1]; - - for(s2 = cp->cmds[i], s1 = argv[i+1]; - *s1 == *s2 && *s1; - s1++, s2++ ) ; - if( *s1 ){ - match=0; - break; - } - } +int check_argc_max(int nargs, int expected) +{ + if (nargs > expected) { + fprintf(stderr, "%s: too many arguments\n", argv0_buf); + return 1; + } - /* If you understand why this code works ... - you are a genious !! */ - if(argc>i+1 && !strcmp(argv[i+1],"--help")){ - if(!helprequested) - printf("Usage:\n"); - print_help(prgname, cp, ADVANCED_HELP); - helprequested=1; - continue; - } + return 0; +} - if(!match) - continue; +const struct cmd_group btrfs_cmd_group; - matchcmd = cp; - *nargs_ = argc-matchcmd->ncmds-1; - *cmd_ = matchcmd->verb; - *args_ = argv+matchcmd->ncmds+1; - *func_ = cp->func; +static const char * const cmd_help_usage[] = { + "btrfs help [--full]", + "Dislay help information", + "", + "--full display detailed help on every command", + NULL +}; - break; - } +static int cmd_help(int argc, char **argv) +{ + help_command_group(&btrfs_cmd_group, argc, argv); + return 0; +} - if(helprequested){ - printf("\n%s\n", BTRFS_BUILD_VERSION); - return 0; - } +static const char * const cmd_version_usage[] = { + "btrfs version", + "Display btrfs-progs version", + NULL +}; - if(!matchcmd){ - fprintf( stderr, "ERROR: unknown command '%s'\n",argv[1]); - help(prgname); - return -1; - } +static int cmd_version(int argc, char **argv) +{ + printf("%s\n", BTRFS_BUILD_VERSION); + return 0; +} - if(check_ambiguity(matchcmd, argv)) - return -2; +static int handle_options(int *argc, char ***argv) +{ + char **orig_argv = *argv; + + while (*argc > 0) { + const char *arg = (*argv)[0]; + if (arg[0] != '-') + break; + + if (!strcmp(arg, "--help")) { + break; + } else if (!strcmp(arg, "--version")) { + break; + } else { + fprintf(stderr, "Unknown option: %s\n", arg); + fprintf(stderr, "usage: %s\n", + btrfs_cmd_group.usagestr[0]); + exit(129); + } - /* check the number of argument */ - if (matchcmd->nargs < 0 && matchcmd->nargs < -*nargs_ ){ - fprintf(stderr, "ERROR: '%s' requires minimum %d arg(s)\n", - matchcmd->verb, -matchcmd->nargs); - return -2; - } - if(matchcmd->nargs >= 0 && matchcmd->nargs != *nargs_ && matchcmd->nargs != 999){ - fprintf(stderr, "ERROR: '%s' requires %d arg(s)\n", - matchcmd->verb, matchcmd->nargs); - return -2; + (*argv)++; + (*argc)--; } - - if (prepare_args( nargs_, args_, prgname, matchcmd )){ - fprintf(stderr, "ERROR: not enough memory\\n"); - return -20; - } - - return 1; + return (*argv) - orig_argv; } -int main(int ac, char **av ) -{ - char *cmd=0, **args=0; - int nargs=0, r; - CommandFunction func=0; +const struct cmd_group btrfs_cmd_group = { + btrfs_cmd_group_usage, btrfs_cmd_group_info, { + { "subvolume", cmd_subvolume, NULL, &subvolume_cmd_group, 0 }, + { "filesystem", cmd_filesystem, NULL, &filesystem_cmd_group, 0 }, + { "balance", cmd_balance, NULL, &balance_cmd_group, 0 }, + { "device", cmd_device, NULL, &device_cmd_group, 0 }, + { "scrub", cmd_scrub, NULL, &scrub_cmd_group, 0 }, + { "check", cmd_check, cmd_check_usage, NULL, 0 }, + { "restore", cmd_restore, cmd_restore_usage, NULL, 0 }, + { "inspect-internal", cmd_inspect, NULL, &inspect_cmd_group, 0 }, + { "send", cmd_send, cmd_send_usage, NULL, 0 }, + { "receive", cmd_receive, cmd_receive_usage, NULL, 0 }, + { "quota", cmd_quota, NULL, "a_cmd_group, 0 }, + { "qgroup", cmd_qgroup, NULL, &qgroup_cmd_group, 0 }, + { "replace", cmd_replace, NULL, &replace_cmd_group, 0 }, + { "help", cmd_help, cmd_help_usage, NULL, 0 }, + { "version", cmd_version, cmd_version_usage, NULL, 0 }, + { 0, 0, 0, 0, 0 } + }, +}; - r = parse_args(ac, av, &func, &nargs, &cmd, &args); - if( r <= 0 ){ - /* error or no command to parse*/ - exit(-r); +int main(int argc, char **argv) +{ + const struct cmd_struct *cmd; + const char *bname; + + if ((bname = strrchr(argv[0], '/')) != NULL) + bname++; + else + bname = argv[0]; + + if (!strcmp(bname, "btrfsck")) { + argv[0] = "check"; + } else { + argc--; + argv++; + handle_options(&argc, &argv); + if (argc > 0) { + if (!prefixcmp(argv[0], "--")) + argv[0] += 2; + } else { + usage_command_group(&btrfs_cmd_group, 0, 0); + exit(1); + } } - exit(func(nargs, args)); + cmd = parse_command_token(argv[0], &btrfs_cmd_group); -} + handle_help_options_next_level(cmd, argc, argv); + + crc32c_optimization_init(); + fixup_argv0(argv, cmd->token); + exit(cmd->fn(argc, argv)); +} diff --git a/btrfs_cmds.c b/btrfs_cmds.c deleted file mode 100644 index b59e9cb..0000000 --- a/btrfs_cmds.c +++ /dev/null @@ -1,1307 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> -#include <sys/types.h> -#include <dirent.h> -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> -#include <libgen.h> -#include <limits.h> -#include <uuid/uuid.h> -#include <ctype.h> - -#undef ULONG_MAX - -#include "kerncompat.h" -#include "ctree.h" -#include "transaction.h" -#include "utils.h" -#include "version.h" -#include "ioctl.h" -#include "volumes.h" - -#include "btrfs_cmds.h" -#include "btrfslabel.h" - -#ifdef __CHECKER__ -#define BLKGETSIZE64 0 -#define BTRFS_IOC_SNAP_CREATE_V2 0 -#define BTRFS_VOL_NAME_MAX 255 -struct btrfs_ioctl_vol_args { char name[BTRFS_VOL_NAME_MAX]; }; -static inline int ioctl(int fd, int define, void *arg) { return 0; } -#endif - -/* - * test if path is a subvolume: - * this function return - * 0-> path exists but it is not a subvolume - * 1-> path exists and it is a subvolume - * -1 -> path is unaccessible - */ -static int test_issubvolume(char *path) -{ - - struct stat st; - int res; - - res = stat(path, &st); - if(res < 0 ) - return -1; - - return (st.st_ino == 256) && S_ISDIR(st.st_mode); - -} - -/* - * test if path is a directory - * this function return - * 0-> path exists but it is not a directory - * 1-> path exists and it is a directory - * -1 -> path is unaccessible - */ -static int test_isdir(char *path) -{ - struct stat st; - int res; - - res = stat(path, &st); - if(res < 0 ) - return -1; - - return S_ISDIR(st.st_mode); - -} - -int open_file_or_dir(const char *fname) -{ - int ret; - struct stat st; - DIR *dirstream; - int fd; - - ret = stat(fname, &st); - if (ret < 0) { - return -1; - } - if (S_ISDIR(st.st_mode)) { - dirstream = opendir(fname); - if (!dirstream) { - return -2; - } - fd = dirfd(dirstream); - } else { - fd = open(fname, O_RDWR); - } - if (fd < 0) { - return -3; - } - return fd; -} - -static u64 parse_size(char *s) -{ - int len = strlen(s); - char c; - u64 mult = 1; - - if (!isdigit(s[len - 1])) { - c = tolower(s[len - 1]); - switch (c) { - case 'g': - mult *= 1024; - case 'm': - mult *= 1024; - case 'k': - mult *= 1024; - case 'b': - break; - default: - fprintf(stderr, "Unknown size descriptor %c\n", c); - exit(1); - } - s[len - 1] = '\0'; - } - return atoll(s) * mult; -} - -static int parse_compress_type(char *s) -{ - if (strcmp(optarg, "zlib") == 0) - return BTRFS_COMPRESS_ZLIB; - else if (strcmp(optarg, "lzo") == 0) - return BTRFS_COMPRESS_LZO; - else { - fprintf(stderr, "Unknown compress type %s\n", s); - exit(1); - }; -} - -int do_defrag(int ac, char **av) -{ - int fd; - int flush = 0; - u64 start = 0; - u64 len = (u64)-1; - u32 thresh = 0; - int i; - int errors = 0; - int ret = 0; - int verbose = 0; - int fancy_ioctl = 0; - struct btrfs_ioctl_defrag_range_args range; - int e=0; - int compress_type = BTRFS_COMPRESS_NONE; - - optind = 1; - while(1) { - int c = getopt(ac, av, "vc::fs:l:t:"); - if (c < 0) - break; - switch(c) { - case 'c': - compress_type = BTRFS_COMPRESS_ZLIB; - if (optarg) - compress_type = parse_compress_type(optarg); - fancy_ioctl = 1; - break; - case 'f': - flush = 1; - fancy_ioctl = 1; - break; - case 'v': - verbose = 1; - break; - case 's': - start = parse_size(optarg); - fancy_ioctl = 1; - break; - case 'l': - len = parse_size(optarg); - fancy_ioctl = 1; - break; - case 't': - thresh = parse_size(optarg); - fancy_ioctl = 1; - break; - default: - fprintf(stderr, "Invalid arguments for defragment\n"); - free(av); - return 1; - } - } - if (ac - optind == 0) { - fprintf(stderr, "Invalid arguments for defragment\n"); - free(av); - return 1; - } - - memset(&range, 0, sizeof(range)); - range.start = start; - range.len = len; - range.extent_thresh = thresh; - if (compress_type) { - range.flags |= BTRFS_DEFRAG_RANGE_COMPRESS; - range.compress_type = compress_type; - } - if (flush) - range.flags |= BTRFS_DEFRAG_RANGE_START_IO; - - for (i = optind; i < ac; i++) { - if (verbose) - printf("%s\n", av[i]); - fd = open_file_or_dir(av[i]); - if (fd < 0) { - fprintf(stderr, "failed to open %s\n", av[i]); - perror("open:"); - errors++; - continue; - } - if (!fancy_ioctl) { - ret = ioctl(fd, BTRFS_IOC_DEFRAG, NULL); - e=errno; - } else { - ret = ioctl(fd, BTRFS_IOC_DEFRAG_RANGE, &range); - if (ret && errno == ENOTTY) { - fprintf(stderr, "ERROR: defrag range ioctl not " - "supported in this kernel, please try " - "without any options.\n"); - errors++; - close(fd); - break; - } - } - if (ret) { - fprintf(stderr, "ERROR: defrag failed on %s - %s\n", - av[i], strerror(e)); - errors++; - } - close(fd); - } - if (verbose) - printf("%s\n", BTRFS_BUILD_VERSION); - if (errors) { - fprintf(stderr, "total %d failures\n", errors); - exit(1); - } - - free(av); - return errors + 20; -} - -int do_find_newer(int argc, char **argv) -{ - int fd; - int ret; - char *subvol; - u64 last_gen; - - subvol = argv[1]; - last_gen = atoll(argv[2]); - - ret = test_issubvolume(subvol); - if (ret < 0) { - fprintf(stderr, "ERROR: error accessing '%s'\n", subvol); - return 12; - } - if (!ret) { - fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol); - return 13; - } - - fd = open_file_or_dir(subvol); - if (fd < 0) { - fprintf(stderr, "ERROR: can't access '%s'\n", subvol); - return 12; - } - ret = find_updated_files(fd, 0, last_gen); - if (ret) - return 19; - return 0; -} - -int do_subvol_list(int argc, char **argv) -{ - int fd; - int ret; - int print_parent = 0; - char *subvol; - int optind = 1; - - while(1) { - int c = getopt(argc, argv, "p"); - if (c < 0) break; - switch(c) { - case 'p': - print_parent = 1; - optind++; - break; - } - } - - if (argc - optind != 1) { - fprintf(stderr, "ERROR: invalid arguments for subvolume list\n"); - return 1; - } - - subvol = argv[optind]; - - ret = test_issubvolume(subvol); - if (ret < 0) { - fprintf(stderr, "ERROR: error accessing '%s'\n", subvol); - return 12; - } - if (!ret) { - fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol); - return 13; - } - - fd = open_file_or_dir(subvol); - if (fd < 0) { - fprintf(stderr, "ERROR: can't access '%s'\n", subvol); - return 12; - } - ret = list_subvols(fd, print_parent, 0); - if (ret) - return 19; - return 0; -} - -int do_clone(int argc, char **argv) -{ - char *subvol, *dst; - int res, fd, fddst, len, e, optind = 0, readonly = 0; - char *newname; - char *dstdir; - struct btrfs_ioctl_vol_args_v2 args; - - memset(&args, 0, sizeof(args)); - - while (1) { - int c = getopt(argc, argv, "r"); - - if (c < 0) - break; - switch (c) { - case 'r': - optind++; - readonly = 1; - break; - default: - fprintf(stderr, - "Invalid arguments for subvolume snapshot\n"); - free(argv); - return 1; - } - } - if (argc - optind != 3) { - fprintf(stderr, "Invalid arguments for subvolume snapshot\n"); - free(argv); - return 1; - } - - subvol = argv[optind+1]; - dst = argv[optind+2]; - - res = test_issubvolume(subvol); - if(res<0){ - fprintf(stderr, "ERROR: error accessing '%s'\n", subvol); - return 12; - } - if(!res){ - fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol); - return 13; - } - - res = test_isdir(dst); - if(res == 0 ){ - fprintf(stderr, "ERROR: '%s' exists and it is not a directory\n", dst); - return 12; - } - - if(res>0){ - newname = strdup(subvol); - newname = basename(newname); - dstdir = dst; - }else{ - newname = strdup(dst); - newname = basename(newname); - dstdir = strdup(dst); - dstdir = dirname(dstdir); - } - - if( !strcmp(newname,".") || !strcmp(newname,"..") || - strchr(newname, '/') ){ - fprintf(stderr, "ERROR: incorrect snapshot name ('%s')\n", - newname); - return 14; - } - - len = strlen(newname); - if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { - fprintf(stderr, "ERROR: snapshot name too long ('%s)\n", - newname); - return 14; - } - - fddst = open_file_or_dir(dstdir); - if (fddst < 0) { - fprintf(stderr, "ERROR: can't access to '%s'\n", dstdir); - return 12; - } - - fd = open_file_or_dir(subvol); - if (fd < 0) { - close(fddst); - fprintf(stderr, "ERROR: can't access to '%s'\n", dstdir); - return 12; - } - - if (readonly) { - args.flags |= BTRFS_SUBVOL_RDONLY; - printf("Create a readonly snapshot of '%s' in '%s/%s'\n", - subvol, dstdir, newname); - } else { - printf("Create a snapshot of '%s' in '%s/%s'\n", - subvol, dstdir, newname); - } - - args.fd = fd; - strncpy(args.name, newname, BTRFS_SUBVOL_NAME_MAX); - res = ioctl(fddst, BTRFS_IOC_SNAP_CREATE_V2, &args); - e = errno; - - close(fd); - close(fddst); - - if(res < 0 ){ - fprintf( stderr, "ERROR: cannot snapshot '%s' - %s\n", - subvol, strerror(e)); - return 11; - } - - return 0; - -} - -int do_delete_subvolume(int argc, char **argv) -{ - int res, fd, len, e; - struct btrfs_ioctl_vol_args args; - char *dname, *vname, *cpath; - char *path = argv[1]; - - res = test_issubvolume(path); - if(res<0){ - fprintf(stderr, "ERROR: error accessing '%s'\n", path); - return 12; - } - if(!res){ - fprintf(stderr, "ERROR: '%s' is not a subvolume\n", path); - return 13; - } - - cpath = realpath(path, 0); - dname = strdup(cpath); - dname = dirname(dname); - vname = strdup(cpath); - vname = basename(vname); - free(cpath); - - if( !strcmp(vname,".") || !strcmp(vname,"..") || - strchr(vname, '/') ){ - fprintf(stderr, "ERROR: incorrect subvolume name ('%s')\n", - vname); - return 14; - } - - len = strlen(vname); - if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { - fprintf(stderr, "ERROR: snapshot name too long ('%s)\n", - vname); - return 14; - } - - fd = open_file_or_dir(dname); - if (fd < 0) { - close(fd); - fprintf(stderr, "ERROR: can't access to '%s'\n", dname); - return 12; - } - - printf("Delete subvolume '%s/%s'\n", dname, vname); - strncpy(args.name, vname, BTRFS_PATH_NAME_MAX); - res = ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &args); - e = errno; - - close(fd); - - if(res < 0 ){ - fprintf( stderr, "ERROR: cannot delete '%s/%s' - %s\n", - dname, vname, strerror(e)); - return 11; - } - - return 0; - -} - -int do_create_subvol(int argc, char **argv) -{ - int res, fddst, len, e; - char *newname; - char *dstdir; - struct btrfs_ioctl_vol_args args; - char *dst = argv[1]; - - res = test_isdir(dst); - if(res >= 0 ){ - fprintf(stderr, "ERROR: '%s' exists\n", dst); - return 12; - } - - newname = strdup(dst); - newname = basename(newname); - dstdir = strdup(dst); - dstdir = dirname(dstdir); - - if( !strcmp(newname,".") || !strcmp(newname,"..") || - strchr(newname, '/') ){ - fprintf(stderr, "ERROR: uncorrect subvolume name ('%s')\n", - newname); - return 14; - } - - len = strlen(newname); - if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { - fprintf(stderr, "ERROR: subvolume name too long ('%s)\n", - newname); - return 14; - } - - fddst = open_file_or_dir(dstdir); - if (fddst < 0) { - fprintf(stderr, "ERROR: can't access to '%s'\n", dstdir); - return 12; - } - - printf("Create subvolume '%s/%s'\n", dstdir, newname); - strncpy(args.name, newname, BTRFS_PATH_NAME_MAX); - res = ioctl(fddst, BTRFS_IOC_SUBVOL_CREATE, &args); - e = errno; - - close(fddst); - - if(res < 0 ){ - fprintf( stderr, "ERROR: cannot create subvolume - %s\n", - strerror(e)); - return 11; - } - - return 0; - -} - -int do_fssync(int argc, char **argv) -{ - int fd, res, e; - char *path = argv[1]; - - fd = open_file_or_dir(path); - if (fd < 0) { - fprintf(stderr, "ERROR: can't access to '%s'\n", path); - return 12; - } - - printf("FSSync '%s'\n", path); - res = ioctl(fd, BTRFS_IOC_SYNC); - e = errno; - close(fd); - if( res < 0 ){ - fprintf(stderr, "ERROR: unable to fs-syncing '%s' - %s\n", - path, strerror(e)); - return 16; - } - - return 0; -} - -int do_scan(int argc, char **argv) -{ - int i, fd, e; - int checklist = 1; - int devstart = 1; - - if( argc >= 2 && !strcmp(argv[1],"--all-devices")){ - - if( argc >2 ){ - fprintf(stderr, "ERROR: too may arguments\n"); - return 22; - } - - checklist = 0; - devstart += 1; - } - - if(argc<=devstart){ - - int ret; - - printf("Scanning for Btrfs filesystems\n"); - if(checklist) - ret = btrfs_scan_block_devices(1); - else - ret = btrfs_scan_one_dir("/dev", 1); - if (ret){ - fprintf(stderr, "ERROR: error %d while scanning\n", ret); - return 18; - } - return 0; - } - - fd = open("/dev/btrfs-control", O_RDWR); - if (fd < 0) { - perror("failed to open /dev/btrfs-control"); - return 10; - } - - for( i = devstart ; i < argc ; i++ ){ - struct btrfs_ioctl_vol_args args; - int ret; - - printf("Scanning for Btrfs filesystems in '%s'\n", argv[i]); - - strncpy(args.name, argv[i], BTRFS_PATH_NAME_MAX); - /* - * FIXME: which are the error code returned by this ioctl ? - * it seems that is impossible to understand if there no is - * a btrfs filesystem from an I/O error !!! - */ - ret = ioctl(fd, BTRFS_IOC_SCAN_DEV, &args); - e = errno; - - if( ret < 0 ){ - close(fd); - fprintf(stderr, "ERROR: unable to scan the device '%s' - %s\n", - argv[i], strerror(e)); - return 11; - } - } - - close(fd); - return 0; - -} - -int do_resize(int argc, char **argv) -{ - - struct btrfs_ioctl_vol_args args; - int fd, res, len, e; - char *amount=argv[1], *path=argv[2]; - - fd = open_file_or_dir(path); - if (fd < 0) { - fprintf(stderr, "ERROR: can't access to '%s'\n", path); - return 12; - } - len = strlen(amount); - if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { - fprintf(stderr, "ERROR: size value too long ('%s)\n", - amount); - return 14; - } - - printf("Resize '%s' of '%s'\n", path, amount); - strncpy(args.name, amount, BTRFS_PATH_NAME_MAX); - res = ioctl(fd, BTRFS_IOC_RESIZE, &args); - e = errno; - close(fd); - if( res < 0 ){ - fprintf(stderr, "ERROR: unable to resize '%s' - %s\n", - path, strerror(e)); - return 30; - } - return 0; -} - -static int uuid_search(struct btrfs_fs_devices *fs_devices, char *search) -{ - struct list_head *cur; - struct btrfs_device *device; - - list_for_each(cur, &fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); - if ((device->label && strcmp(device->label, search) == 0) || - strcmp(device->name, search) == 0) - return 1; - } - return 0; -} - -static void print_one_uuid(struct btrfs_fs_devices *fs_devices) -{ - char uuidbuf[37]; - struct list_head *cur; - struct btrfs_device *device; - char *super_bytes_used; - u64 devs_found = 0; - u64 total; - - uuid_unparse(fs_devices->fsid, uuidbuf); - device = list_entry(fs_devices->devices.next, struct btrfs_device, - dev_list); - if (device->label && device->label[0]) - printf("Label: '%s' ", device->label); - else - printf("Label: none "); - - super_bytes_used = pretty_sizes(device->super_bytes_used); - - total = device->total_devs; - printf(" uuid: %s\n\tTotal devices %llu FS bytes used %s\n", uuidbuf, - (unsigned long long)total, super_bytes_used); - - free(super_bytes_used); - - list_for_each(cur, &fs_devices->devices) { - char *total_bytes; - char *bytes_used; - device = list_entry(cur, struct btrfs_device, dev_list); - total_bytes = pretty_sizes(device->total_bytes); - bytes_used = pretty_sizes(device->bytes_used); - printf("\tdevid %4llu size %s used %s path %s\n", - (unsigned long long)device->devid, - total_bytes, bytes_used, device->name); - free(total_bytes); - free(bytes_used); - devs_found++; - } - if (devs_found < total) { - printf("\t*** Some devices missing\n"); - } - printf("\n"); -} - -int do_show_filesystem(int argc, char **argv) -{ - struct list_head *all_uuids; - struct btrfs_fs_devices *fs_devices; - struct list_head *cur_uuid; - char *search = 0; - int ret; - int checklist = 1; - int searchstart = 1; - - if( argc >= 2 && !strcmp(argv[1],"--all-devices")){ - checklist = 0; - searchstart += 1; - } - - if( argc > searchstart+1 ){ - fprintf(stderr, "ERROR: too many arguments\n"); - return 22; - } - - if(checklist) - ret = btrfs_scan_block_devices(0); - else - ret = btrfs_scan_one_dir("/dev", 0); - - if (ret){ - fprintf(stderr, "ERROR: error %d while scanning\n", ret); - return 18; - } - - if(searchstart < argc) - search = argv[searchstart]; - - all_uuids = btrfs_scanned_uuids(); - list_for_each(cur_uuid, all_uuids) { - fs_devices = list_entry(cur_uuid, struct btrfs_fs_devices, - list); - if (search && uuid_search(fs_devices, search) == 0) - continue; - print_one_uuid(fs_devices); - } - printf("%s\n", BTRFS_BUILD_VERSION); - return 0; -} - -int do_add_volume(int nargs, char **args) -{ - - char *mntpnt = args[nargs-1]; - int i, fdmnt, ret=0, e; - - - fdmnt = open_file_or_dir(mntpnt); - if (fdmnt < 0) { - fprintf(stderr, "ERROR: can't access to '%s'\n", mntpnt); - return 12; - } - - for (i = 1; i < (nargs-1); i++ ){ - struct btrfs_ioctl_vol_args ioctl_args; - int devfd, res; - u64 dev_block_count = 0; - struct stat st; - int mixed = 0; - - res = check_mounted(args[i]); - if (res < 0) { - fprintf(stderr, "error checking %s mount status\n", - args[i]); - ret++; - continue; - } - if (res == 1) { - fprintf(stderr, "%s is mounted\n", args[i]); - ret++; - continue; - } - - devfd = open(args[i], O_RDWR); - if (!devfd) { - fprintf(stderr, "ERROR: Unable to open device '%s'\n", args[i]); - close(devfd); - ret++; - continue; - } - res = fstat(devfd, &st); - if (res) { - fprintf(stderr, "ERROR: Unable to stat '%s'\n", args[i]); - close(devfd); - ret++; - continue; - } - if (!S_ISBLK(st.st_mode)) { - fprintf(stderr, "ERROR: '%s' is not a block device\n", args[i]); - close(devfd); - ret++; - continue; - } - - res = btrfs_prepare_device(devfd, args[i], 1, &dev_block_count, &mixed); - if (res) { - fprintf(stderr, "ERROR: Unable to init '%s'\n", args[i]); - close(devfd); - ret++; - continue; - } - close(devfd); - - strncpy(ioctl_args.name, args[i], BTRFS_PATH_NAME_MAX); - res = ioctl(fdmnt, BTRFS_IOC_ADD_DEV, &ioctl_args); - e = errno; - if(res<0){ - fprintf(stderr, "ERROR: error adding the device '%s' - %s\n", - args[i], strerror(e)); - ret++; - } - - } - - close(fdmnt); - if (ret) - return ret+20; - else - return 0; - -} - -int do_balance(int argc, char **argv) -{ - - int fdmnt, ret=0, e; - struct btrfs_ioctl_vol_args args; - char *path = argv[1]; - - fdmnt = open_file_or_dir(path); - if (fdmnt < 0) { - fprintf(stderr, "ERROR: can't access to '%s'\n", path); - return 12; - } - - memset(&args, 0, sizeof(args)); - ret = ioctl(fdmnt, BTRFS_IOC_BALANCE, &args); - e = errno; - close(fdmnt); - if(ret<0){ - fprintf(stderr, "ERROR: error during balancing '%s' - %s\n", - path, strerror(e)); - - return 19; - } - return 0; -} -int do_remove_volume(int nargs, char **args) -{ - - char *mntpnt = args[nargs-1]; - int i, fdmnt, ret=0, e; - - fdmnt = open_file_or_dir(mntpnt); - if (fdmnt < 0) { - fprintf(stderr, "ERROR: can't access to '%s'\n", mntpnt); - return 12; - } - - for(i=1 ; i < (nargs-1) ; i++ ){ - struct btrfs_ioctl_vol_args arg; - int res; - - strncpy(arg.name, args[i], BTRFS_PATH_NAME_MAX); - res = ioctl(fdmnt, BTRFS_IOC_RM_DEV, &arg); - e = errno; - if(res<0){ - fprintf(stderr, "ERROR: error removing the device '%s' - %s\n", - args[i], strerror(e)); - ret++; - } - } - - close(fdmnt); - if( ret) - return ret+20; - else - return 0; -} - -int do_set_default_subvol(int nargs, char **argv) -{ - int ret=0, fd, e; - u64 objectid; - char *path = argv[2]; - char *subvolid = argv[1]; - - fd = open_file_or_dir(path); - if (fd < 0) { - fprintf(stderr, "ERROR: can't access to '%s'\n", path); - return 12; - } - - objectid = (unsigned long long)strtoll(subvolid, NULL, 0); - if (errno == ERANGE) { - fprintf(stderr, "ERROR: invalid tree id (%s)\n",subvolid); - return 30; - } - ret = ioctl(fd, BTRFS_IOC_DEFAULT_SUBVOL, &objectid); - e = errno; - close(fd); - if( ret < 0 ){ - fprintf(stderr, "ERROR: unable to set a new default subvolume - %s\n", - strerror(e)); - return 30; - } - return 0; -} - -int do_change_label(int nargs, char **argv) -{ - /* check the number of argument */ - if ( nargs > 3 ){ - fprintf(stderr, "ERROR: '%s' requires maximum 2 args\n", - argv[0]); - return -2; - }else if (nargs == 2){ - return get_label(argv[1]); - } else { /* nargs == 0 */ - return set_label(argv[1], argv[2]); - } -} - - -int do_get_default_subvol(int nargs, char **argv) -{ - int fd; - int ret; - char *subvol; - - subvol = argv[1]; - - ret = test_issubvolume(subvol); - if (ret < 0) { - fprintf(stderr, "ERROR: error accessing '%s'\n", subvol); - return 12; - } - if (!ret) { - fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol); - return 13; - } - - fd = open_file_or_dir(subvol); - if (fd < 0) { - fprintf(stderr, "ERROR: can't access '%s'\n", subvol); - return 12; - } - ret = list_subvols(fd, 0, 1); - if (ret) - return 19; - return 0; -} - -int do_df_filesystem(int nargs, char **argv) -{ - struct btrfs_ioctl_space_args *sargs; - u64 count = 0, i; - int ret; - int fd; - int e; - char *path = argv[1]; - - fd = open_file_or_dir(path); - if (fd < 0) { - fprintf(stderr, "ERROR: can't access to '%s'\n", path); - return 12; - } - - sargs = malloc(sizeof(struct btrfs_ioctl_space_args)); - if (!sargs) - return -ENOMEM; - - sargs->space_slots = 0; - sargs->total_spaces = 0; - - ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs); - e = errno; - if (ret) { - fprintf(stderr, "ERROR: couldn't get space info on '%s' - %s\n", - path, strerror(e)); - free(sargs); - return ret; - } - if (!sargs->total_spaces) - return 0; - - count = sargs->total_spaces; - - sargs = realloc(sargs, sizeof(struct btrfs_ioctl_space_args) + - (count * sizeof(struct btrfs_ioctl_space_info))); - if (!sargs) - return -ENOMEM; - - sargs->space_slots = count; - sargs->total_spaces = 0; - - ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs); - e = errno; - if (ret) { - fprintf(stderr, "ERROR: couldn't get space info on '%s' - %s\n", - path, strerror(e)); - close(fd); - free(sargs); - return ret; - } - - for (i = 0; i < sargs->total_spaces; i++) { - char description[80]; - char *total_bytes; - char *used_bytes; - int written = 0; - u64 flags = sargs->spaces[i].flags; - - memset(description, 0, 80); - - if (flags & BTRFS_BLOCK_GROUP_DATA) { - if (flags & BTRFS_BLOCK_GROUP_METADATA) { - snprintf(description, 14, "%s", - "Data+Metadata"); - written += 13; - } else { - snprintf(description, 5, "%s", "Data"); - written += 4; - } - } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) { - snprintf(description, 7, "%s", "System"); - written += 6; - } else if (flags & BTRFS_BLOCK_GROUP_METADATA) { - snprintf(description, 9, "%s", "Metadata"); - written += 8; - } - - if (flags & BTRFS_BLOCK_GROUP_RAID0) { - snprintf(description+written, 8, "%s", ", RAID0"); - written += 7; - } else if (flags & BTRFS_BLOCK_GROUP_RAID1) { - snprintf(description+written, 8, "%s", ", RAID1"); - written += 7; - } else if (flags & BTRFS_BLOCK_GROUP_DUP) { - snprintf(description+written, 6, "%s", ", DUP"); - written += 5; - } else if (flags & BTRFS_BLOCK_GROUP_RAID10) { - snprintf(description+written, 9, "%s", ", RAID10"); - written += 8; - } - - total_bytes = pretty_sizes(sargs->spaces[i].total_bytes); - used_bytes = pretty_sizes(sargs->spaces[i].used_bytes); - printf("%s: total=%s, used=%s\n", description, total_bytes, - used_bytes); - } - free(sargs); - - return 0; -} - -static int __ino_to_path_fd(u64 inum, int fd, int verbose, const char *prepend) -{ - int ret; - int i; - struct btrfs_ioctl_ino_path_args ipa; - struct btrfs_data_container *fspath; - - fspath = malloc(4096); - if (!fspath) - return 1; - - ipa.inum = inum; - ipa.size = 4096; - ipa.fspath = (u64)fspath; - - ret = ioctl(fd, BTRFS_IOC_INO_PATHS, &ipa); - if (ret) { - printf("ioctl ret=%d, error: %s\n", ret, strerror(errno)); - goto out; - } - - if (verbose) - printf("ioctl ret=%d, bytes_left=%lu, bytes_missing=%lu, " - "cnt=%d, missed=%d\n", ret, - (unsigned long)fspath->bytes_left, - (unsigned long)fspath->bytes_missing, - fspath->elem_cnt, fspath->elem_missed); - - for (i = 0; i < fspath->elem_cnt; ++i) { - char **str = (char **)fspath->val; - str[i] += (unsigned long)fspath->val; - if (prepend) - printf("%s/%s\n", prepend, str[i]); - else - printf("%s\n", str[i]); - } - -out: - free(fspath); - return ret; -} - -int do_ino_to_path(int nargs, char **argv) -{ - int fd; - int verbose = 0; - - optind = 1; - while (1) { - int c = getopt(nargs, argv, "v"); - if (c < 0) - break; - switch (c) { - case 'v': - verbose = 1; - break; - default: - fprintf(stderr, "invalid arguments for ipath\n"); - return 1; - } - } - if (nargs - optind != 2) { - fprintf(stderr, "invalid arguments for ipath\n"); - return 1; - } - - fd = open_file_or_dir(argv[optind+1]); - if (fd < 0) { - fprintf(stderr, "ERROR: can't access '%s'\n", argv[optind+1]); - return 12; - } - - return __ino_to_path_fd(atoll(argv[optind]), fd, verbose, - argv[optind+1]); -} - -int do_logical_to_ino(int nargs, char **argv) -{ - int ret; - int fd; - int i; - int verbose = 0; - int getpath = 1; - int bytes_left; - struct btrfs_ioctl_logical_ino_args loi; - struct btrfs_data_container *inodes; - char full_path[4096]; - char *path_ptr; - - optind = 1; - while (1) { - int c = getopt(nargs, argv, "Pv"); - if (c < 0) - break; - switch (c) { - case 'P': - getpath = 0; - break; - case 'v': - verbose = 1; - break; - default: - fprintf(stderr, "invalid arguments for ipath\n"); - return 1; - } - } - if (nargs - optind != 2) { - fprintf(stderr, "invalid arguments for ipath\n"); - return 1; - } - - inodes = malloc(4096); - if (!inodes) - return 1; - - loi.logical = atoll(argv[optind]); - loi.size = 4096; - loi.inodes = (u64)inodes; - - fd = open_file_or_dir(argv[optind+1]); - if (fd < 0) { - fprintf(stderr, "ERROR: can't access '%s'\n", argv[optind+1]); - ret = 12; - goto out; - } - - ret = ioctl(fd, BTRFS_IOC_LOGICAL_INO, &loi); - if (ret) { - printf("ioctl ret=%d, error: %s\n", ret, strerror(errno)); - goto out; - } - - if (verbose) - printf("ioctl ret=%d, bytes_left=%lu, bytes_missing=%lu, " - "cnt=%d, missed=%d\n", ret, - (unsigned long)inodes->bytes_left, - (unsigned long)inodes->bytes_missing, - inodes->elem_cnt, inodes->elem_missed); - - bytes_left = sizeof(full_path); - ret = snprintf(full_path, bytes_left, "%s/", argv[optind+1]); - path_ptr = full_path + ret; - bytes_left -= ret + 1; - BUG_ON(bytes_left < 0); - - for (i = 0; i < inodes->elem_cnt; i += 3) { - u64 inum = inodes->val[i]; - u64 offset = inodes->val[i+1]; - u64 root = inodes->val[i+2]; - int path_fd; - char *name; - - if (getpath) { - name = path_for_root(fd, root); - if (IS_ERR(name)) - return PTR_ERR(name); - if (!name) { - path_ptr[-1] = '\0'; - path_fd = fd; - } else { - path_ptr[-1] = '/'; - ret = snprintf(path_ptr, bytes_left, "%s", - name); - BUG_ON(ret >= bytes_left); - free(name); - path_fd = open_file_or_dir(full_path); - if (path_fd < 0) { - fprintf(stderr, "ERROR: can't access " - "'%s'\n", full_path); - goto out; - } - } - __ino_to_path_fd(inum, path_fd, verbose, full_path); - } else { - printf("inode %llu offset %llu root %llu\n", inum, - offset, root); - } - } - -out: - free(inodes); - return ret; -} diff --git a/btrfs_cmds.h b/btrfs_cmds.h deleted file mode 100644 index 81182b1..0000000 --- a/btrfs_cmds.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -/* btrfs_cmds.c*/ -int do_clone(int nargs, char **argv); -int do_delete_subvolume(int nargs, char **argv); -int do_create_subvol(int nargs, char **argv); -int do_fssync(int nargs, char **argv); -int do_defrag(int argc, char **argv); -int do_show_filesystem(int nargs, char **argv); -int do_add_volume(int nargs, char **args); -int do_balance(int nargs, char **argv); -int do_scrub_start(int nargs, char **argv); -int do_scrub_status(int argc, char **argv); -int do_scrub_resume(int argc, char **argv); -int do_scrub_cancel(int nargs, char **argv); -int do_remove_volume(int nargs, char **args); -int do_scan(int nargs, char **argv); -int do_resize(int nargs, char **argv); -int do_subvol_list(int nargs, char **argv); -int do_set_default_subvol(int nargs, char **argv); -int do_get_default_subvol(int nargs, char **argv); -int list_subvols(int fd, int print_parent, int get_default); -int do_df_filesystem(int nargs, char **argv); -int find_updated_files(int fd, u64 root_id, u64 oldest_gen); -int do_find_newer(int argc, char **argv); -int do_change_label(int argc, char **argv); -int open_file_or_dir(const char *fname); -int do_ino_to_path(int nargs, char **argv); -int do_logical_to_ino(int nargs, char **argv); -char *path_for_root(int fd, u64 root); diff --git a/btrfsctl.c b/btrfsctl.c deleted file mode 100644 index d45e2a7..0000000 --- a/btrfsctl.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __CHECKER__ -#include <sys/ioctl.h> -#include <sys/mount.h> -#include "ioctl.h" -#endif -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <dirent.h> -#include <libgen.h> -#include <stdlib.h> -#include "kerncompat.h" -#include "ctree.h" -#include "transaction.h" -#include "utils.h" -#include "version.h" - -#ifdef __CHECKER__ -#define BLKGETSIZE64 0 -#define BTRFS_IOC_SNAP_CREATE 0 -#define BTRFS_VOL_NAME_MAX 255 -struct btrfs_ioctl_vol_args { char name[BTRFS_VOL_NAME_MAX]; }; -static inline int ioctl(int fd, int define, void *arg) { return 0; } -#endif - -static void print_usage(void) -{ - printf("usage: btrfsctl [ -d file|dir] [ -s snap_name subvol|tree ]\n"); - printf(" [-r size] [-A device] [-a] [-c] [-D dir .]\n"); - printf("\t-d filename: defragments one file\n"); - printf("\t-d directory: defragments the entire Btree\n"); - printf("\t-s snap_name dir: creates a new snapshot of dir\n"); - printf("\t-S subvol_name dir: creates a new subvolume\n"); - printf("\t-r [+-]size[gkm]: resize the FS by size amount\n"); - printf("\t-A device: scans the device file for a Btrfs filesystem\n"); - printf("\t-a: scans all devices for Btrfs filesystems\n"); - printf("\t-c: forces a single FS sync\n"); - printf("\t-D: delete snapshot\n"); - printf("\t-m [tree id] directory: set the default mounted subvolume" - " to the [tree id] or the directory\n"); - printf("%s\n", BTRFS_BUILD_VERSION); - exit(1); -} - -static int open_file_or_dir(const char *fname) -{ - int ret; - struct stat st; - DIR *dirstream; - int fd; - - ret = stat(fname, &st); - if (ret < 0) { - perror("stat:"); - exit(1); - } - if (S_ISDIR(st.st_mode)) { - dirstream = opendir(fname); - if (!dirstream) { - perror("opendir"); - exit(1); - } - fd = dirfd(dirstream); - } else { - fd = open(fname, O_RDWR); - } - if (fd < 0) { - perror("open"); - exit(1); - } - return fd; -} -int main(int ac, char **av) -{ - char *fname = NULL; - char *snap_location = NULL; - int snap_fd = 0; - int fd; - int ret; - struct btrfs_ioctl_vol_args args; - char *name = NULL; - int i; - unsigned long command = 0; - int len; - char *pos; - char *fullpath; - u64 objectid = 0; - - printf( "**\n" - "** WARNING: this program is considered deprecated\n" - "** Please consider to switch to the btrfs utility\n" - "**\n"); - - if (ac == 2 && strcmp(av[1], "-a") == 0) { - fprintf(stderr, "Scanning for Btrfs filesystems\n"); - btrfs_scan_one_dir("/dev", 1); - exit(0); - } - for (i = 1; i < ac; i++) { - if (strcmp(av[i], "-s") == 0) { - if (i + 1 >= ac - 1) { - fprintf(stderr, "-s requires an arg"); - print_usage(); - } - fullpath = av[i + 1]; - - snap_location = strdup(fullpath); - snap_location = dirname(snap_location); - - snap_fd = open_file_or_dir(snap_location); - - name = strdup(fullpath); - name = basename(name); - len = strlen(name); - - if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { - fprintf(stderr, - "snapshot name zero length or too long\n"); - exit(1); - } - if (strchr(name, '/')) { - fprintf(stderr, - "error: / not allowed in names\n"); - exit(1); - } - command = BTRFS_IOC_SNAP_CREATE; - } else if (strcmp(av[i], "-S") == 0) { - if (i + 1 >= ac - 1) { - fprintf(stderr, "-S requires an arg"); - print_usage(); - } - name = av[i + 1]; - len = strlen(name); - if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { - fprintf(stderr, - "snapshot name zero length or too long\n"); - exit(1); - } - if (strchr(name, '/')) { - fprintf(stderr, - "error: / not allowed in names\n"); - exit(1); - } - command = BTRFS_IOC_SUBVOL_CREATE; - } else if (strcmp(av[i], "-d") == 0) { - if (i >= ac - 1) { - fprintf(stderr, "-d requires an arg\n"); - print_usage(); - } - command = BTRFS_IOC_DEFRAG; - } else if (strcmp(av[i], "-D") == 0) { - if (i >= ac - 1) { - fprintf(stderr, "-D requires an arg\n"); - print_usage(); - } - command = BTRFS_IOC_SNAP_DESTROY; - name = av[i + 1]; - len = strlen(name); - pos = strchr(name, '/'); - if (pos) { - if (*(pos + 1) == '\0') - *(pos) = '\0'; - else { - fprintf(stderr, - "error: / not allowed in names\n"); - exit(1); - } - } - if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { - fprintf(stderr, "-D size too long\n"); - exit(1); - } - } else if (strcmp(av[i], "-A") == 0) { - if (i >= ac - 1) { - fprintf(stderr, "-A requires an arg\n"); - print_usage(); - } - command = BTRFS_IOC_SCAN_DEV; - } else if (strcmp(av[i], "-r") == 0) { - if (i >= ac - 1) { - fprintf(stderr, "-r requires an arg\n"); - print_usage(); - } - name = av[i + 1]; - len = strlen(name); - if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { - fprintf(stderr, "-r size too long\n"); - exit(1); - } - command = BTRFS_IOC_RESIZE; - } else if (strcmp(av[i], "-c") == 0) { - command = BTRFS_IOC_SYNC; - } else if (strcmp(av[i], "-m") == 0) { - command = BTRFS_IOC_DEFAULT_SUBVOL; - if (i == ac - 3) { - objectid = (unsigned long long) - strtoll(av[i + 1], NULL, 0); - if (errno == ERANGE) { - fprintf(stderr, "invalid tree id\n"); - exit(1); - } - } - } - } - if (command == 0) { - fprintf(stderr, "no valid commands given\n"); - print_usage(); - exit(1); - } - fname = av[ac - 1]; - - if (command == BTRFS_IOC_SCAN_DEV) { - fd = open("/dev/btrfs-control", O_RDWR); - if (fd < 0) { - perror("failed to open /dev/btrfs-control"); - exit(1); - } - name = fname; - } else { - fd = open_file_or_dir(fname); - } - - if (name) - strncpy(args.name, name, BTRFS_PATH_NAME_MAX + 1); - else - args.name[0] = '\0'; - - if (command == BTRFS_IOC_SNAP_CREATE) { - args.fd = fd; - ret = ioctl(snap_fd, command, &args); - } else if (command == BTRFS_IOC_DEFAULT_SUBVOL) { - printf("objectid is %llu\n", (unsigned long long)objectid); - ret = ioctl(fd, command, &objectid); - } else - ret = ioctl(fd, command, &args); - if (ret < 0) { - perror("ioctl:"); - exit(1); - } - if (ret == 0) { - printf("operation complete\n"); - } else { - printf("ioctl failed with error %d\n", ret); - } - printf("%s\n", BTRFS_BUILD_VERSION); - if (ret) - exit(1); - - return 0; -} - diff --git a/btrfslabel.c b/btrfslabel.c deleted file mode 100644 index c9f4684..0000000 --- a/btrfslabel.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2008 Morey Roof. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#define _GNU_SOURCE - -#ifndef __CHECKER__ -#include <sys/ioctl.h> -#include <sys/mount.h> -#include "ioctl.h" -#endif /* __CHECKER__ */ - -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <dirent.h> -#include <fcntl.h> -#include <unistd.h> -#include <linux/fs.h> -#include <linux/limits.h> -#include <ctype.h> -#include "kerncompat.h" -#include "ctree.h" -#include "utils.h" -#include "version.h" -#include "disk-io.h" -#include "transaction.h" - -#define MOUNTED 1 -#define UNMOUNTED 2 -#define GET_LABEL 3 -#define SET_LABEL 4 - -static void change_label_unmounted(char *dev, char *nLabel) -{ - struct btrfs_root *root; - struct btrfs_trans_handle *trans; - - /* Open the super_block at the default location - * and as read-write. - */ - root = open_ctree(dev, 0, 1); - - trans = btrfs_start_transaction(root, 1); - strncpy(root->fs_info->super_copy.label, nLabel, BTRFS_LABEL_SIZE); - btrfs_commit_transaction(trans, root); - - /* Now we close it since we are done. */ - close_ctree(root); -} - -static void get_label_unmounted(char *dev) -{ - struct btrfs_root *root; - - /* Open the super_block at the default location - * and as read-only. - */ - root = open_ctree(dev, 0, 0); - - fprintf(stdout, "%s\n", root->fs_info->super_copy.label); - - /* Now we close it since we are done. */ - close_ctree(root); -} - -int get_label(char *btrfs_dev) -{ - - int ret; - ret = check_mounted(btrfs_dev); - if (ret < 0) - { - fprintf(stderr, "FATAL: error checking %s mount status\n", btrfs_dev); - return -1; - } - - if(ret != 0) - { - fprintf(stderr, "FATAL: the filesystem has to be unmounted\n"); - return -2; - } - get_label_unmounted(btrfs_dev); - return 0; -} - - -int set_label(char *btrfs_dev, char *nLabel) -{ - - int ret; - ret = check_mounted(btrfs_dev); - if (ret < 0) - { - fprintf(stderr, "FATAL: error checking %s mount status\n", btrfs_dev); - return -1; - } - - if(ret != 0) - { - fprintf(stderr, "FATAL: the filesystem has to be unmounted\n"); - return -2; - } - change_label_unmounted(btrfs_dev, nLabel); - return 0; -} diff --git a/btrfslabel.h b/btrfslabel.h deleted file mode 100644 index abf43ad..0000000 --- a/btrfslabel.h +++ /dev/null @@ -1,5 +0,0 @@ -/* btrflabel.h */ - - -int get_label(char *btrfs_dev); -int set_label(char *btrfs_dev, char *nLabel);
\ No newline at end of file diff --git a/btrfstune.c b/btrfstune.c index 47830c5..4db1767 100644 --- a/btrfstune.c +++ b/btrfstune.c @@ -40,7 +40,7 @@ int update_seeding_flag(struct btrfs_root *root, int set_flag) struct btrfs_super_block *disk_super; u64 super_flags; - disk_super = &root->fs_info->super_copy; + disk_super = root->fs_info->super_copy; super_flags = btrfs_super_flags(disk_super); if (set_flag) { if (super_flags & BTRFS_SUPER_FLAG_SEEDING) { @@ -65,22 +65,58 @@ int update_seeding_flag(struct btrfs_root *root, int set_flag) return 0; } +int enable_extrefs_flag(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + struct btrfs_super_block *disk_super; + u64 super_flags; + + disk_super = root->fs_info->super_copy; + super_flags = btrfs_super_incompat_flags(disk_super); + super_flags |= BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF; + trans = btrfs_start_transaction(root, 1); + btrfs_set_super_incompat_flags(disk_super, super_flags); + btrfs_commit_transaction(trans, root); + + return 0; +} + +int enable_skinny_metadata(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + struct btrfs_super_block *disk_super; + u64 super_flags; + + disk_super = root->fs_info->super_copy; + super_flags = btrfs_super_incompat_flags(disk_super); + super_flags |= BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA; + trans = btrfs_start_transaction(root, 1); + btrfs_set_super_incompat_flags(disk_super, super_flags); + btrfs_commit_transaction(trans, root); + + return 0; +} + static void print_usage(void) { fprintf(stderr, "usage: btrfstune [options] device\n"); fprintf(stderr, "\t-S value\tenable/disable seeding\n"); + fprintf(stderr, "\t-r \t\tenable extended inode refs\n"); + fprintf(stderr, "\t-x enable skinny metadata extent refs\n"); } int main(int argc, char *argv[]) { struct btrfs_root *root; int success = 0; + int extrefs_flag = 0; int seeding_flag = 0; int seeding_value = 0; + int skinny_flag = 0; int ret; while(1) { - int c = getopt(argc, argv, "S:"); + int c = getopt(argc, argv, "S:rx"); if (c < 0) break; switch(c) { @@ -88,6 +124,12 @@ int main(int argc, char *argv[]) seeding_flag = 1; seeding_value = atoi(optarg); break; + case 'r': + extrefs_flag = 1; + break; + case 'x': + skinny_flag = 1; + break; default: print_usage(); return 1; @@ -108,12 +150,27 @@ int main(int argc, char *argv[]) root = open_ctree(device, 0, 1); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + return 1; + } + if (seeding_flag) { ret = update_seeding_flag(root, seeding_value); if (!ret) success++; } + if (extrefs_flag) { + enable_extrefs_flag(root); + success++; + } + + if (skinny_flag) { + enable_skinny_metadata(root); + success++; + } + if (success > 0) { ret = 0; } else { diff --git a/cmds-balance.c b/cmds-balance.c new file mode 100644 index 0000000..cffa807 --- /dev/null +++ b/cmds-balance.c @@ -0,0 +1,728 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/ioctl.h> +#include <errno.h> + +#include "kerncompat.h" +#include "ctree.h" +#include "ioctl.h" +#include "volumes.h" + +#include "commands.h" +#include "utils.h" + +static const char * const balance_cmd_group_usage[] = { + "btrfs [filesystem] balance <command> [options] <path>", + "btrfs [filesystem] balance <path>", + NULL +}; + +static const char balance_cmd_group_info[] = + "'btrfs filesystem balance' command is deprecated, please use\n" + "'btrfs balance start' command instead."; + +static int parse_one_profile(const char *profile, u64 *flags) +{ + if (!strcmp(profile, "raid0")) { + *flags |= BTRFS_BLOCK_GROUP_RAID0; + } else if (!strcmp(profile, "raid1")) { + *flags |= BTRFS_BLOCK_GROUP_RAID1; + } else if (!strcmp(profile, "raid10")) { + *flags |= BTRFS_BLOCK_GROUP_RAID10; + } else if (!strcmp(profile, "raid5")) { + *flags |= BTRFS_BLOCK_GROUP_RAID5; + } else if (!strcmp(profile, "raid6")) { + *flags |= BTRFS_BLOCK_GROUP_RAID6; + } else if (!strcmp(profile, "dup")) { + *flags |= BTRFS_BLOCK_GROUP_DUP; + } else if (!strcmp(profile, "single")) { + *flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE; + } else { + fprintf(stderr, "Unknown profile '%s'\n", profile); + return 1; + } + + return 0; +} + +static int parse_profiles(char *profiles, u64 *flags) +{ + char *this_char; + char *save_ptr = NULL; /* Satisfy static checkers */ + + for (this_char = strtok_r(profiles, "|", &save_ptr); + this_char != NULL; + this_char = strtok_r(NULL, "|", &save_ptr)) { + if (parse_one_profile(this_char, flags)) + return 1; + } + + return 0; +} + +static int parse_u64(const char *str, u64 *result) +{ + char *endptr; + u64 val; + + val = strtoull(str, &endptr, 10); + if (*endptr) + return 1; + + *result = val; + return 0; +} + +static int parse_range(const char *range, u64 *start, u64 *end) +{ + char *dots; + + dots = strstr(range, ".."); + if (dots) { + const char *rest = dots + 2; + int skipped = 0; + + *dots = 0; + + if (!*rest) { + *end = (u64)-1; + skipped++; + } else { + if (parse_u64(rest, end)) + return 1; + } + if (dots == range) { + *start = 0; + skipped++; + } else { + if (parse_u64(range, start)) + return 1; + } + + if (*start >= *end) { + fprintf(stderr, "Range %llu..%llu doesn't make " + "sense\n", (unsigned long long)*start, + (unsigned long long)*end); + return 1; + } + + if (skipped <= 1) + return 0; + } + + return 1; +} + +static int parse_filters(char *filters, struct btrfs_balance_args *args) +{ + char *this_char; + char *value; + char *save_ptr = NULL; /* Satisfy static checkers */ + + if (!filters) + return 0; + + for (this_char = strtok_r(filters, ",", &save_ptr); + this_char != NULL; + this_char = strtok_r(NULL, ",", &save_ptr)) { + if ((value = strchr(this_char, '=')) != NULL) + *value++ = 0; + if (!strcmp(this_char, "profiles")) { + if (!value || !*value) { + fprintf(stderr, "the profiles filter requires " + "an argument\n"); + return 1; + } + if (parse_profiles(value, &args->profiles)) { + fprintf(stderr, "Invalid profiles argument\n"); + return 1; + } + args->flags |= BTRFS_BALANCE_ARGS_PROFILES; + } else if (!strcmp(this_char, "usage")) { + if (!value || !*value) { + fprintf(stderr, "the usage filter requires " + "an argument\n"); + return 1; + } + if (parse_u64(value, &args->usage) || + args->usage > 100) { + fprintf(stderr, "Invalid usage argument: %s\n", + value); + return 1; + } + args->flags |= BTRFS_BALANCE_ARGS_USAGE; + } else if (!strcmp(this_char, "devid")) { + if (!value || !*value) { + fprintf(stderr, "the devid filter requires " + "an argument\n"); + return 1; + } + if (parse_u64(value, &args->devid) || + args->devid == 0) { + fprintf(stderr, "Invalid devid argument: %s\n", + value); + return 1; + } + args->flags |= BTRFS_BALANCE_ARGS_DEVID; + } else if (!strcmp(this_char, "drange")) { + if (!value || !*value) { + fprintf(stderr, "the drange filter requires " + "an argument\n"); + return 1; + } + if (parse_range(value, &args->pstart, &args->pend)) { + fprintf(stderr, "Invalid drange argument\n"); + return 1; + } + args->flags |= BTRFS_BALANCE_ARGS_DRANGE; + } else if (!strcmp(this_char, "vrange")) { + if (!value || !*value) { + fprintf(stderr, "the vrange filter requires " + "an argument\n"); + return 1; + } + if (parse_range(value, &args->vstart, &args->vend)) { + fprintf(stderr, "Invalid vrange argument\n"); + return 1; + } + args->flags |= BTRFS_BALANCE_ARGS_VRANGE; + } else if (!strcmp(this_char, "convert")) { + if (!value || !*value) { + fprintf(stderr, "the convert option requires " + "an argument\n"); + return 1; + } + if (parse_one_profile(value, &args->target)) { + fprintf(stderr, "Invalid convert argument\n"); + return 1; + } + args->flags |= BTRFS_BALANCE_ARGS_CONVERT; + } else if (!strcmp(this_char, "soft")) { + args->flags |= BTRFS_BALANCE_ARGS_SOFT; + } else { + fprintf(stderr, "Unrecognized balance option '%s'\n", + this_char); + return 1; + } + } + + return 0; +} + +static void dump_balance_args(struct btrfs_balance_args *args) +{ + if (args->flags & BTRFS_BALANCE_ARGS_CONVERT) { + printf("converting, target=%llu, soft is %s", + (unsigned long long)args->target, + (args->flags & BTRFS_BALANCE_ARGS_SOFT) ? "on" : "off"); + } else { + printf("balancing"); + } + + if (args->flags & BTRFS_BALANCE_ARGS_PROFILES) + printf(", profiles=%llu", (unsigned long long)args->profiles); + if (args->flags & BTRFS_BALANCE_ARGS_USAGE) + printf(", usage=%llu", (unsigned long long)args->usage); + if (args->flags & BTRFS_BALANCE_ARGS_DEVID) + printf(", devid=%llu", (unsigned long long)args->devid); + if (args->flags & BTRFS_BALANCE_ARGS_DRANGE) + printf(", drange=%llu..%llu", + (unsigned long long)args->pstart, + (unsigned long long)args->pend); + if (args->flags & BTRFS_BALANCE_ARGS_VRANGE) + printf(", vrange=%llu..%llu", + (unsigned long long)args->vstart, + (unsigned long long)args->vend); + + printf("\n"); +} + +static void dump_ioctl_balance_args(struct btrfs_ioctl_balance_args *args) +{ + printf("Dumping filters: flags 0x%llx, state 0x%llx, force is %s\n", + (unsigned long long)args->flags, (unsigned long long)args->state, + (args->flags & BTRFS_BALANCE_FORCE) ? "on" : "off"); + if (args->flags & BTRFS_BALANCE_DATA) { + printf(" DATA (flags 0x%llx): ", + (unsigned long long)args->data.flags); + dump_balance_args(&args->data); + } + if (args->flags & BTRFS_BALANCE_METADATA) { + printf(" METADATA (flags 0x%llx): ", + (unsigned long long)args->meta.flags); + dump_balance_args(&args->meta); + } + if (args->flags & BTRFS_BALANCE_SYSTEM) { + printf(" SYSTEM (flags 0x%llx): ", + (unsigned long long)args->sys.flags); + dump_balance_args(&args->sys); + } +} + +static int do_balance_v1(int fd) +{ + struct btrfs_ioctl_vol_args args; + int ret; + + memset(&args, 0, sizeof(args)); + ret = ioctl(fd, BTRFS_IOC_BALANCE, &args); + return ret; +} + +static int do_balance(const char *path, struct btrfs_ioctl_balance_args *args, + int nofilters) +{ + int fd; + int ret; + int e; + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 12; + } + + ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, args); + e = errno; + + if (ret < 0) { + /* + * older kernels don't have the new balance ioctl, try the + * old one. But, the old one doesn't know any filters, so + * don't fall back if they tried to use the fancy new things + */ + if (e == ENOTTY && nofilters) { + ret = do_balance_v1(fd); + if (ret == 0) + goto out; + e = errno; + } + + if (e == ECANCELED) { + if (args->state & BTRFS_BALANCE_STATE_PAUSE_REQ) + fprintf(stderr, "balance paused by user\n"); + if (args->state & BTRFS_BALANCE_STATE_CANCEL_REQ) + fprintf(stderr, "balance canceled by user\n"); + ret = 0; + } else { + fprintf(stderr, "ERROR: error during balancing '%s' " + "- %s\n", path, strerror(e)); + if (e != EINPROGRESS) + fprintf(stderr, "There may be more info in " + "syslog - try dmesg | tail\n"); + ret = 19; + } + } else { + printf("Done, had to relocate %llu out of %llu chunks\n", + (unsigned long long)args->stat.completed, + (unsigned long long)args->stat.considered); + ret = 0; + } + +out: + close(fd); + return ret; +} + +static const char * const cmd_balance_start_usage[] = { + "btrfs [filesystem] balance start [options] <path>", + "Balance chunks across the devices", + "Balance and/or convert (change allocation profile of) chunks that", + "passed all filters in a comma-separated list of filters for a", + "particular chunk type. If filter list is not given balance all", + "chunks of that type. In case none of the -d, -m or -s options is", + "given balance all chunks in a filesystem.", + "", + "-d[filters] act on data chunks", + "-m[filters] act on metadata chunks", + "-s[filters] act on system chunks (only under -f)", + "-v be verbose", + "-f force reducing of metadata integrity", + NULL +}; + +static int cmd_balance_start(int argc, char **argv) +{ + struct btrfs_ioctl_balance_args args; + struct btrfs_balance_args *ptrs[] = { &args.data, &args.sys, + &args.meta, NULL }; + int force = 0; + int verbose = 0; + int nofilters = 1; + int i; + + memset(&args, 0, sizeof(args)); + + optind = 1; + while (1) { + int longindex; + static struct option longopts[] = { + { "data", optional_argument, NULL, 'd'}, + { "metadata", optional_argument, NULL, 'm' }, + { "system", optional_argument, NULL, 's' }, + { "force", no_argument, NULL, 'f' }, + { "verbose", no_argument, NULL, 'v' }, + { 0, 0, 0, 0 } + }; + + int opt = getopt_long(argc, argv, "d::s::m::fv", longopts, + &longindex); + if (opt < 0) + break; + + switch (opt) { + case 'd': + nofilters = 0; + args.flags |= BTRFS_BALANCE_DATA; + + if (parse_filters(optarg, &args.data)) + return 1; + break; + case 's': + nofilters = 0; + args.flags |= BTRFS_BALANCE_SYSTEM; + + if (parse_filters(optarg, &args.sys)) + return 1; + break; + case 'm': + nofilters = 0; + args.flags |= BTRFS_BALANCE_METADATA; + + if (parse_filters(optarg, &args.meta)) + return 1; + break; + case 'f': + force = 1; + break; + case 'v': + verbose = 1; + break; + default: + usage(cmd_balance_start_usage); + } + } + + if (check_argc_exact(argc - optind, 1)) + usage(cmd_balance_start_usage); + + /* + * allow -s only under --force, otherwise do with system chunks + * the same thing we were ordered to do with meta chunks + */ + if (args.flags & BTRFS_BALANCE_SYSTEM) { + if (!force) { + fprintf(stderr, +"Refusing to explicitly operate on system chunks.\n" +"Pass --force if you really want to do that.\n"); + return 1; + } + } else if (args.flags & BTRFS_BALANCE_METADATA) { + args.flags |= BTRFS_BALANCE_SYSTEM; + memcpy(&args.sys, &args.meta, + sizeof(struct btrfs_balance_args)); + } + + if (nofilters) { + /* relocate everything - no filters */ + args.flags |= BTRFS_BALANCE_TYPE_MASK; + } + + /* drange makes sense only when devid is set */ + for (i = 0; ptrs[i]; i++) { + if ((ptrs[i]->flags & BTRFS_BALANCE_ARGS_DRANGE) && + !(ptrs[i]->flags & BTRFS_BALANCE_ARGS_DEVID)) { + fprintf(stderr, "drange filter can be used only if " + "devid filter is used\n"); + return 1; + } + } + + /* soft makes sense only when convert for corresponding type is set */ + for (i = 0; ptrs[i]; i++) { + if ((ptrs[i]->flags & BTRFS_BALANCE_ARGS_SOFT) && + !(ptrs[i]->flags & BTRFS_BALANCE_ARGS_CONVERT)) { + fprintf(stderr, "'soft' option can be used only if " + "changing profiles\n"); + return 1; + } + } + + if (force) + args.flags |= BTRFS_BALANCE_FORCE; + if (verbose) + dump_ioctl_balance_args(&args); + + return do_balance(argv[optind], &args, nofilters); +} + +static const char * const cmd_balance_pause_usage[] = { + "btrfs [filesystem] balance pause <path>", + "Pause running balance", + NULL +}; + +static int cmd_balance_pause(int argc, char **argv) +{ + const char *path; + int fd; + int ret; + int e; + + if (check_argc_exact(argc, 2)) + usage(cmd_balance_pause_usage); + + path = argv[1]; + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 12; + } + + ret = ioctl(fd, BTRFS_IOC_BALANCE_CTL, BTRFS_BALANCE_CTL_PAUSE); + e = errno; + close(fd); + + if (ret < 0) { + fprintf(stderr, "ERROR: balance pause on '%s' failed - %s\n", + path, (e == ENOTCONN) ? "Not running" : strerror(e)); + return 19; + } + + return 0; +} + +static const char * const cmd_balance_cancel_usage[] = { + "btrfs [filesystem] balance cancel <path>", + "Cancel running or paused balance", + NULL +}; + +static int cmd_balance_cancel(int argc, char **argv) +{ + const char *path; + int fd; + int ret; + int e; + + if (check_argc_exact(argc, 2)) + usage(cmd_balance_cancel_usage); + + path = argv[1]; + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 12; + } + + ret = ioctl(fd, BTRFS_IOC_BALANCE_CTL, BTRFS_BALANCE_CTL_CANCEL); + e = errno; + close(fd); + + if (ret < 0) { + fprintf(stderr, "ERROR: balance cancel on '%s' failed - %s\n", + path, (e == ENOTCONN) ? "Not in progress" : strerror(e)); + return 19; + } + + return 0; +} + +static const char * const cmd_balance_resume_usage[] = { + "btrfs [filesystem] balance resume <path>", + "Resume interrupted balance", + NULL +}; + +static int cmd_balance_resume(int argc, char **argv) +{ + struct btrfs_ioctl_balance_args args; + const char *path; + int fd; + int ret; + int e; + + if (check_argc_exact(argc, 2)) + usage(cmd_balance_resume_usage); + + path = argv[1]; + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 12; + } + + memset(&args, 0, sizeof(args)); + args.flags |= BTRFS_BALANCE_RESUME; + + ret = ioctl(fd, BTRFS_IOC_BALANCE_V2, &args); + e = errno; + close(fd); + + if (ret < 0) { + if (e == ECANCELED) { + if (args.state & BTRFS_BALANCE_STATE_PAUSE_REQ) + fprintf(stderr, "balance paused by user\n"); + if (args.state & BTRFS_BALANCE_STATE_CANCEL_REQ) + fprintf(stderr, "balance canceled by user\n"); + } else if (e == ENOTCONN || e == EINPROGRESS) { + fprintf(stderr, "ERROR: balance resume on '%s' " + "failed - %s\n", path, + (e == ENOTCONN) ? "Not in progress" : + "Already running"); + return 19; + } else { + fprintf(stderr, +"ERROR: error during balancing '%s' - %s\n" +"There may be more info in syslog - try dmesg | tail\n", path, strerror(e)); + return 19; + } + } else { + printf("Done, had to relocate %llu out of %llu chunks\n", + (unsigned long long)args.stat.completed, + (unsigned long long)args.stat.considered); + } + + return 0; +} + +static const char * const cmd_balance_status_usage[] = { + "btrfs [filesystem] balance status [-v] <path>", + "Show status of running or paused balance", + "", + "-v be verbose", + NULL +}; + +/* Checks the status of the balance if any + * return codes: + * 2 : Error failed to know if there is any pending balance + * 1 : Successful to know status of a pending balance + * 0 : When there is no pending balance or completed + */ +static int cmd_balance_status(int argc, char **argv) +{ + struct btrfs_ioctl_balance_args args; + const char *path; + int fd; + int verbose = 0; + int ret; + int e; + + optind = 1; + while (1) { + int longindex; + static struct option longopts[] = { + { "verbose", no_argument, NULL, 'v' }, + { 0, 0, 0, 0} + }; + + int opt = getopt_long(argc, argv, "v", longopts, &longindex); + if (opt < 0) + break; + + switch (opt) { + case 'v': + verbose = 1; + break; + default: + usage(cmd_balance_status_usage); + } + } + + if (check_argc_exact(argc - optind, 1)) + usage(cmd_balance_status_usage); + + path = argv[optind]; + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 2; + } + + ret = ioctl(fd, BTRFS_IOC_BALANCE_PROGRESS, &args); + e = errno; + close(fd); + + if (ret < 0) { + if (e == ENOTCONN) { + printf("No balance found on '%s'\n", path); + return 0; + } + fprintf(stderr, "ERROR: balance status on '%s' failed - %s\n", + path, strerror(e)); + return 2; + } + + if (args.state & BTRFS_BALANCE_STATE_RUNNING) { + printf("Balance on '%s' is running", path); + if (args.state & BTRFS_BALANCE_STATE_CANCEL_REQ) + printf(", cancel requested\n"); + else if (args.state & BTRFS_BALANCE_STATE_PAUSE_REQ) + printf(", pause requested\n"); + else + printf("\n"); + } else { + printf("Balance on '%s' is paused\n", path); + } + + printf("%llu out of about %llu chunks balanced (%llu considered), " + "%3.f%% left\n", (unsigned long long)args.stat.completed, + (unsigned long long)args.stat.expected, + (unsigned long long)args.stat.considered, + 100 * (1 - (float)args.stat.completed/args.stat.expected)); + + if (verbose) + dump_ioctl_balance_args(&args); + + return 1; +} + +const struct cmd_group balance_cmd_group = { + balance_cmd_group_usage, balance_cmd_group_info, { + { "start", cmd_balance_start, cmd_balance_start_usage, NULL, 0 }, + { "pause", cmd_balance_pause, cmd_balance_pause_usage, NULL, 0 }, + { "cancel", cmd_balance_cancel, cmd_balance_cancel_usage, NULL, 0 }, + { "resume", cmd_balance_resume, cmd_balance_resume_usage, NULL, 0 }, + { "status", cmd_balance_status, cmd_balance_status_usage, NULL, 0 }, + { 0, 0, 0, 0, 0 } + } +}; + +int cmd_balance(int argc, char **argv) +{ + if (argc == 2) { + /* old 'btrfs filesystem balance <path>' syntax */ + struct btrfs_ioctl_balance_args args; + + memset(&args, 0, sizeof(args)); + args.flags |= BTRFS_BALANCE_TYPE_MASK; + + return do_balance(argv[1], &args, 1); + } + + return handle_command_group(&balance_cmd_group, argc, argv); +} @@ -22,20 +22,29 @@ #include <stdlib.h> #include <unistd.h> #include <fcntl.h> +#include <sys/types.h> #include <sys/stat.h> +#include <unistd.h> +#include <getopt.h> +#include <uuid/uuid.h> #include "kerncompat.h" #include "ctree.h" +#include "volumes.h" +#include "repair.h" #include "disk-io.h" #include "print-tree.h" #include "transaction.h" #include "list.h" #include "version.h" #include "utils.h" +#include "commands.h" +#include "free-space-cache.h" static u64 bytes_used = 0; static u64 total_csum_bytes = 0; static u64 total_btree_bytes = 0; static u64 total_fs_tree_bytes = 0; +static u64 total_extent_tree_bytes = 0; static u64 btree_space_waste = 0; static u64 data_bytes_allocated = 0; static u64 data_bytes_referenced = 0; @@ -57,6 +66,7 @@ struct data_backref { }; u64 owner; u64 offset; + u64 bytes; u32 num_refs; u32 found_ref; }; @@ -74,12 +84,17 @@ struct extent_record { struct cache_extent cache; struct btrfs_disk_key parent_key; u64 start; + u64 max_size; u64 nr; u64 refs; u64 extent_item_refs; + u64 generation; + u64 info_objectid; + u8 info_level; unsigned int content_checked:1; unsigned int owner_ref_checked:1; unsigned int is_root:1; + unsigned int metadata:1; }; struct inode_backref { @@ -89,6 +104,7 @@ struct inode_backref { unsigned int found_inode_ref:1; unsigned int filetype:8; int errors; + unsigned int ref_type; u64 dir; u64 index; u16 namelen; @@ -267,6 +283,9 @@ static struct inode_record *get_inode_rec(struct cache_tree *inode_cache, node->cache.size = 1; node->data = rec; + if (ino == BTRFS_FREE_INO_OBJECTID) + rec->found_link = 1; + ret = insert_existing_cache_extent(inode_cache, &node->cache); BUG_ON(ret); } @@ -459,12 +478,14 @@ static int add_inode_backref(struct cache_tree *inode_cache, backref->filetype = filetype; backref->found_dir_item = 1; - } else if (itemtype == BTRFS_INODE_REF_KEY) { + } else if ((itemtype == BTRFS_INODE_REF_KEY) || + (itemtype == BTRFS_INODE_EXTREF_KEY)) { if (backref->found_inode_ref) backref->errors |= REF_ERR_DUP_INODE_REF; if (backref->found_dir_index && backref->index != index) backref->errors |= REF_ERR_INDEX_UNMATCH; + backref->ref_type = itemtype; backref->index = index; backref->found_inode_ref = 1; } else { @@ -500,7 +521,7 @@ static int merge_inode_recs(struct inode_record *src, struct inode_record *dst, add_inode_backref(dst_cache, dst->ino, backref->dir, backref->index, backref->name, backref->namelen, 0, - BTRFS_INODE_REF_KEY, backref->errors); + backref->ref_type, backref->errors); } } @@ -740,7 +761,66 @@ static int leave_shared_node(struct btrfs_root *root, return 0; } -static int process_dir_item(struct extent_buffer *eb, +static int is_child_root(struct btrfs_root *root, u64 parent_root_id, + u64 child_root_id) +{ + struct btrfs_path path; + struct btrfs_key key; + struct extent_buffer *leaf; + int has_parent = 0; + int ret; + + btrfs_init_path(&path); + + key.objectid = parent_root_id; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = child_root_id; + ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, + 0, 0); + BUG_ON(ret < 0); + btrfs_release_path(root, &path); + if (!ret) + return 1; + + key.objectid = child_root_id; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path, + 0, 0); + BUG_ON(ret <= 0); + + while (1) { + leaf = path.nodes[0]; + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root->fs_info->tree_root, &path); + BUG_ON(ret < 0); + + if (ret > 0) + break; + leaf = path.nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.objectid != child_root_id || + key.type != BTRFS_ROOT_BACKREF_KEY) + break; + + has_parent = 1; + + if (key.offset == parent_root_id) { + btrfs_release_path(root, &path); + return 1; + } + + path.slots[0]++; + } + + btrfs_release_path(root, &path); + return has_parent? 0 : -1; +} + +static int process_dir_item(struct btrfs_root *root, + struct extent_buffer *eb, int slot, struct btrfs_key *key, struct shared_node *active_node) { @@ -789,8 +869,9 @@ static int process_dir_item(struct extent_buffer *eb, len, filetype, key->type, error); } else if (location.type == BTRFS_ROOT_ITEM_KEY) { add_inode_backref(root_cache, location.objectid, - key->objectid, key->offset, namebuf, - len, filetype, key->type, error); + key->objectid, key->offset, + namebuf, len, filetype, + key->type, error); } else { fprintf(stderr, "warning line %d\n", __LINE__); } @@ -844,6 +925,49 @@ static int process_inode_ref(struct extent_buffer *eb, return 0; } +static int process_inode_extref(struct extent_buffer *eb, + int slot, struct btrfs_key *key, + struct shared_node *active_node) +{ + u32 total; + u32 cur = 0; + u32 len; + u32 name_len; + u64 index; + u64 parent; + int error; + struct cache_tree *inode_cache; + struct btrfs_inode_extref *extref; + char namebuf[BTRFS_NAME_LEN]; + + inode_cache = &active_node->inode_cache; + + extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref); + total = btrfs_item_size_nr(eb, slot); + while (cur < total) { + name_len = btrfs_inode_extref_name_len(eb, extref); + index = btrfs_inode_extref_index(eb, extref); + parent = btrfs_inode_extref_parent(eb, extref); + if (name_len <= BTRFS_NAME_LEN) { + len = name_len; + error = 0; + } else { + len = BTRFS_NAME_LEN; + error = REF_ERR_NAME_TOO_LONG; + } + read_extent_buffer(eb, namebuf, + (unsigned long)(extref + 1), len); + add_inode_backref(inode_cache, key->objectid, parent, + index, namebuf, len, 0, key->type, error); + + len = sizeof(*extref) + name_len; + extref = (struct btrfs_inode_extref *)((char *)extref + len); + cur += len; + } + return 0; + +} + static u64 count_csum_range(struct btrfs_root *root, u64 start, u64 len) { struct btrfs_key key; @@ -853,7 +977,7 @@ static u64 count_csum_range(struct btrfs_root *root, u64 start, u64 len) size_t size; u64 found = 0; u64 csum_end; - u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); btrfs_init_path(&path); @@ -1008,6 +1132,10 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb, nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { btrfs_item_key_to_cpu(eb, &key, i); + + if (key.objectid == BTRFS_FREE_SPACE_OBJECTID) + continue; + if (active_node->current == NULL || active_node->current->ino < key.objectid) { if (active_node->current) { @@ -1021,11 +1149,14 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb, switch (key.type) { case BTRFS_DIR_ITEM_KEY: case BTRFS_DIR_INDEX_KEY: - ret = process_dir_item(eb, i, &key, active_node); + ret = process_dir_item(root, eb, i, &key, active_node); break; case BTRFS_INODE_REF_KEY: ret = process_inode_ref(eb, i, &key, active_node); break; + case BTRFS_INODE_EXTREF_KEY: + ret = process_inode_extref(eb, i, &key, active_node); + break; case BTRFS_INODE_ITEM_KEY: ret = process_inode_item(eb, i, &key, active_node); break; @@ -1081,8 +1212,10 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_lookup_extent_info(NULL, root, path->nodes[*level]->start, - path->nodes[*level]->len, &refs, NULL); - BUG_ON(ret); + *level, 1, &refs, NULL); + if (ret < 0) + goto out; + if (refs > 1) { ret = enter_shared_node(root, path->nodes[*level]->start, refs, wc, *level); @@ -1107,9 +1240,10 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); - ret = btrfs_lookup_extent_info(NULL, root, bytenr, blocksize, - &refs, NULL); - BUG_ON(ret); + ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1, + 1, &refs, NULL); + if (ret < 0) + refs = 0; if (refs > 1) { ret = enter_shared_node(root, bytenr, refs, @@ -1409,6 +1543,9 @@ static int merge_root_recs(struct btrfs_root *root, remove_cache_extent(src_cache, &node->cache); free(node); + if (!is_child_root(root, root->objectid, rec->ino)) + goto skip; + list_for_each_entry(backref, &rec->backrefs, list) { BUG_ON(backref->found_inode_ref); if (backref->found_dir_item) @@ -1424,6 +1561,7 @@ static int merge_root_recs(struct btrfs_root *root, backref->namelen, BTRFS_DIR_INDEX_KEY, backref->errors); } +skip: free_inode_rec(rec); } return 0; @@ -1691,6 +1829,10 @@ static int check_fs_roots(struct btrfs_root *root, fs_root_objectid(key.objectid)) { tmp_root = btrfs_read_fs_root_no_cache(root->fs_info, &key); + if (IS_ERR(tmp_root)) { + err = 1; + goto next; + } ret = check_fs_root(tmp_root, root_cache, &wc); if (ret) err = 1; @@ -1700,6 +1842,7 @@ static int check_fs_roots(struct btrfs_root *root, process_root_ref(leaf, path.slots[0], &key, root_cache); } +next: path.slots[0]++; } btrfs_release_path(tree_root, &path); @@ -1710,86 +1853,6 @@ static int check_fs_roots(struct btrfs_root *root, return err; } -static int check_node(struct btrfs_root *root, - struct btrfs_disk_key *parent_key, - struct extent_buffer *buf) -{ - int i; - struct btrfs_key cpukey; - struct btrfs_disk_key key; - u32 nritems = btrfs_header_nritems(buf); - - if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root)) - return 1; - if (parent_key->type) { - btrfs_node_key(buf, &key, 0); - if (memcmp(parent_key, &key, sizeof(key))) - return 1; - } - for (i = 0; nritems > 1 && i < nritems - 2; i++) { - btrfs_node_key(buf, &key, i); - btrfs_node_key_to_cpu(buf, &cpukey, i + 1); - if (btrfs_comp_keys(&key, &cpukey) >= 0) - return 1; - } - return 0; -} - -static int check_leaf(struct btrfs_root *root, - struct btrfs_disk_key *parent_key, - struct extent_buffer *buf) -{ - int i; - struct btrfs_key cpukey; - struct btrfs_disk_key key; - u32 nritems = btrfs_header_nritems(buf); - - if (btrfs_header_level(buf) != 0) { - fprintf(stderr, "leaf is not a leaf %llu\n", - (unsigned long long)btrfs_header_bytenr(buf)); - return 1; - } - if (btrfs_leaf_free_space(root, buf) < 0) { - fprintf(stderr, "leaf free space incorrect %llu %d\n", - (unsigned long long)btrfs_header_bytenr(buf), - btrfs_leaf_free_space(root, buf)); - return 1; - } - - if (nritems == 0) - return 0; - - btrfs_item_key(buf, &key, 0); - if (parent_key->type && memcmp(parent_key, &key, sizeof(key))) { - fprintf(stderr, "leaf parent key incorrect %llu\n", - (unsigned long long)btrfs_header_bytenr(buf)); - return 1; - } - for (i = 0; nritems > 1 && i < nritems - 2; i++) { - btrfs_item_key(buf, &key, i); - btrfs_item_key_to_cpu(buf, &cpukey, i + 1); - if (btrfs_comp_keys(&key, &cpukey) >= 0) { - fprintf(stderr, "bad key ordering %d %d\n", i, i+1); - return 1; - } - if (btrfs_item_offset_nr(buf, i) != - btrfs_item_end_nr(buf, i + 1)) { - fprintf(stderr, "incorrect offsets %u %u\n", - btrfs_item_offset_nr(buf, i), - btrfs_item_end_nr(buf, i + 1)); - return 1; - } - if (i == 0 && btrfs_item_end_nr(buf, i) != - BTRFS_LEAF_DATA_SIZE(root)) { - fprintf(stderr, "bad item end %u wanted %u\n", - btrfs_item_end_nr(buf, i), - (unsigned)BTRFS_LEAF_DATA_SIZE(root)); - return 1; - } - } - return 0; -} - static int all_backpointers_checked(struct extent_record *rec, int print_errs) { struct list_head *cur = rec->backrefs.next; @@ -1834,12 +1897,12 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) if (!print_errs) goto out; tback = (struct tree_backref *)back; - fprintf(stderr, "Backref %llu %s %llu not referenced\n", + fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n", (unsigned long long)rec->start, back->full_backref ? "parent" : "root", back->full_backref ? (unsigned long long)tback->parent : - (unsigned long long)tback->root); + (unsigned long long)tback->root, back); } if (back->is_data) { dback = (struct data_backref *)back; @@ -1849,7 +1912,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) goto out; fprintf(stderr, "Incorrect local backref count" " on %llu %s %llu owner %llu" - " offset %llu found %u wanted %u\n", + " offset %llu found %u wanted %u back %p\n", (unsigned long long)rec->start, back->full_backref ? "parent" : "root", @@ -1858,7 +1921,18 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs) (unsigned long long)dback->root, (unsigned long long)dback->owner, (unsigned long long)dback->offset, - dback->found_ref, dback->num_refs); + dback->found_ref, dback->num_refs, back); + } + if (dback->bytes != rec->nr) { + err = 1; + if (!print_errs) + goto out; + fprintf(stderr, "Backref bytes do not match " + "extent backref, bytenr=%llu, ref " + "bytes=%llu, backref bytes=%llu\n", + (unsigned long long)rec->start, + (unsigned long long)rec->nr, + (unsigned long long)dback->bytes); } } if (!back->is_data) { @@ -1917,8 +1991,10 @@ static int check_owner_ref(struct btrfs_root *root, struct btrfs_root *ref_root; struct btrfs_key key; struct btrfs_path path; + struct extent_buffer *parent; int level; int found = 0; + int ret; list_for_each_entry(node, &rec->backrefs, list) { if (node->is_data) @@ -1939,43 +2015,105 @@ static int check_owner_ref(struct btrfs_root *root, key.offset = (u64)-1; ref_root = btrfs_read_fs_root(root->fs_info, &key); - BUG_ON(IS_ERR(ref_root)); + if (IS_ERR(ref_root)) + return 1; level = btrfs_header_level(buf); if (level == 0) btrfs_item_key_to_cpu(buf, &key, 0); else btrfs_node_key_to_cpu(buf, &key, 0); - + btrfs_init_path(&path); path.lowest_level = level + 1; - btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0); + ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0); + if (ret < 0) + return 0; - if (buf->start == btrfs_node_blockptr(path.nodes[level + 1], - path.slots[level + 1])) - rec->owner_ref_checked = 1; + parent = path.nodes[level + 1]; + if (parent && buf->start == btrfs_node_blockptr(parent, + path.slots[level + 1])) + found = 1; btrfs_release_path(ref_root, &path); return found ? 0 : 1; } +static int is_extent_tree_record(struct extent_record *rec) +{ + struct list_head *cur = rec->backrefs.next; + struct extent_backref *node; + struct tree_backref *back; + int is_extent = 0; + + while(cur != &rec->backrefs) { + node = list_entry(cur, struct extent_backref, list); + cur = cur->next; + if (node->is_data) + return 0; + back = (struct tree_backref *)node; + if (node->full_backref) + return 0; + if (back->root == BTRFS_EXTENT_TREE_OBJECTID) + is_extent = 1; + } + return is_extent; +} + + +static int record_bad_block_io(struct btrfs_fs_info *info, + struct cache_tree *extent_cache, + u64 start, u64 len) +{ + struct extent_record *rec; + struct cache_extent *cache; + struct btrfs_key key; + + cache = find_cache_extent(extent_cache, start, len); + if (!cache) + return 0; + + rec = container_of(cache, struct extent_record, cache); + if (!is_extent_tree_record(rec)) + return 0; + + btrfs_disk_key_to_cpu(&key, &rec->parent_key); + return btrfs_add_corrupt_extent_record(info, &key, start, len, 0); +} + static int check_block(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, u64 flags) { struct extent_record *rec; struct cache_extent *cache; + struct btrfs_key key; int ret = 1; + int level; cache = find_cache_extent(extent_cache, buf->start, buf->len); if (!cache) return 1; rec = container_of(cache, struct extent_record, cache); - if (btrfs_is_leaf(buf)) { - ret = check_leaf(root, &rec->parent_key, buf); - } else { - ret = check_node(root, &rec->parent_key, buf); + rec->generation = btrfs_header_generation(buf); + + level = btrfs_header_level(buf); + if (btrfs_header_nritems(buf) > 0) { + + if (level == 0) + btrfs_item_key_to_cpu(buf, &key, 0); + else + btrfs_node_key_to_cpu(buf, &key, 0); + + rec->info_objectid = key.objectid; } + rec->info_level = level; + + if (btrfs_is_leaf(buf)) + ret = btrfs_check_leaf(root, &rec->parent_key, buf); + else + ret = btrfs_check_node(root, &rec->parent_key, buf); + if (ret) { fprintf(stderr, "bad block %llu\n", (unsigned long long)buf->start); @@ -2035,12 +2173,14 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec, ref->node.full_backref = 0; } list_add_tail(&ref->node.list, &rec->backrefs); + return ref; } static struct data_backref *find_data_backref(struct extent_record *rec, u64 parent, u64 root, - u64 owner, u64 offset) + u64 owner, u64 offset, + int found_ref, u64 bytes) { struct list_head *cur = rec->backrefs.next; struct extent_backref *node; @@ -2052,7 +2192,7 @@ static struct data_backref *find_data_backref(struct extent_record *rec, if (!node->is_data) continue; back = (struct data_backref *)node; - if (parent > 0) { + if (parent > 0) { if (!node->full_backref) continue; if (parent == back->parent) @@ -2061,8 +2201,12 @@ static struct data_backref *find_data_backref(struct extent_record *rec, if (node->full_backref) continue; if (back->root == root && back->owner == owner && - back->offset == offset) + back->offset == offset) { + if (found_ref && node->found_ref && + back->bytes != bytes) + continue; return back; + } } } return NULL; @@ -2070,11 +2214,13 @@ static struct data_backref *find_data_backref(struct extent_record *rec, static struct data_backref *alloc_data_backref(struct extent_record *rec, u64 parent, u64 root, - u64 owner, u64 offset) + u64 owner, u64 offset, + u64 max_size) { struct data_backref *ref = malloc(sizeof(*ref)); memset(&ref->node, 0, sizeof(ref->node)); ref->node.is_data = 1; + if (parent > 0) { ref->parent = parent; ref->owner = 0; @@ -2086,16 +2232,20 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec, ref->offset = offset; ref->node.full_backref = 0; } + ref->bytes = max_size; ref->found_ref = 0; ref->num_refs = 0; list_add_tail(&ref->node.list, &rec->backrefs); + if (max_size > rec->max_size) + rec->max_size = max_size; return ref; } static int add_extent_rec(struct cache_tree *extent_cache, struct btrfs_key *parent_key, u64 start, u64 nr, u64 extent_item_refs, - int is_root, int inc_ref, int set_checked) + int is_root, int inc_ref, int set_checked, + int metadata, int extent_rec, u64 max_size) { struct extent_record *rec; struct cache_extent *cache; @@ -2107,6 +2257,14 @@ static int add_extent_rec(struct cache_tree *extent_cache, if (inc_ref) rec->refs++; if (rec->nr == 1) + rec->nr = max(nr, max_size); + + /* + * We need to make sure to reset nr to whatever the extent + * record says was the real size, this way we can compare it to + * the backrefs. + */ + if (extent_rec) rec->nr = nr; if (start != rec->start) { @@ -2136,14 +2294,19 @@ static int add_extent_rec(struct cache_tree *extent_cache, if (parent_key) btrfs_cpu_key_to_disk(&rec->parent_key, parent_key); + if (rec->max_size < max_size) + rec->max_size = max_size; + maybe_free_extent_rec(extent_cache, rec); return ret; } rec = malloc(sizeof(*rec)); rec->start = start; - rec->nr = nr; + rec->max_size = max_size; + rec->nr = max(nr, max_size); rec->content_checked = 0; rec->owner_ref_checked = 0; + rec->metadata = metadata; INIT_LIST_HEAD(&rec->backrefs); if (is_root) @@ -2187,7 +2350,8 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, cache = find_cache_extent(extent_cache, bytenr, 1); if (!cache) { - add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0); + add_extent_rec(extent_cache, NULL, bytenr, + 1, 0, 0, 0, 0, 1, 0, 0); cache = find_cache_extent(extent_cache, bytenr, 1); if (!cache) abort(); @@ -2226,7 +2390,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, u64 parent, u64 root, u64 owner, u64 offset, - u32 num_refs, int found_ref) + u32 num_refs, int found_ref, u64 max_size) { struct extent_record *rec; struct data_backref *back; @@ -2234,7 +2398,8 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, cache = find_cache_extent(extent_cache, bytenr, 1); if (!cache) { - add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0); + add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0, + 0, 0, max_size); cache = find_cache_extent(extent_cache, bytenr, 1); if (!cache) abort(); @@ -2244,14 +2409,32 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, if (rec->start != bytenr) { abort(); } - back = find_data_backref(rec, parent, root, owner, offset); + if (rec->max_size < max_size) + rec->max_size = max_size; + + /* + * If found_ref is set then max_size is the real size and must match the + * existing refs. So if we have already found a ref then we need to + * make sure that this ref matches the existing one, otherwise we need + * to add a new backref so we can notice that the backrefs don't match + * and we need to figure out who is telling the truth. This is to + * account for that awful fsync bug I introduced where we'd end up with + * a btrfs_file_extent_item that would have its length include multiple + * prealloc extents or point inside of a prealloc extent. + */ + back = find_data_backref(rec, parent, root, owner, offset, found_ref, + max_size); if (!back) - back = alloc_data_backref(rec, parent, root, owner, offset); + back = alloc_data_backref(rec, parent, root, owner, offset, + max_size); if (found_ref) { BUG_ON(num_refs != 1); + if (back->node.found_ref) + BUG_ON(back->bytes != max_size); back->node.found_ref = 1; back->found_ref += 1; + back->bytes = max_size; } else { if (back->node.found_extent_tree) { fprintf(stderr, "Extent back ref already exists " @@ -2359,17 +2542,17 @@ static int process_extent_ref_v0(struct cache_tree *extent_cache, btrfs_item_key_to_cpu(leaf, &key, slot); ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0); if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) { - add_tree_backref(extent_cache, key.objectid, key.offset, - 0, 0); + add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0); } else { add_data_backref(extent_cache, key.objectid, key.offset, 0, - 0, 0, btrfs_ref_count_v0(leaf, ref0), 0); + 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0); } return 0; } #endif -static int process_extent_item(struct cache_tree *extent_cache, +static int process_extent_item(struct btrfs_root *root, + struct cache_tree *extent_cache, struct extent_buffer *eb, int slot) { struct btrfs_extent_item *ei; @@ -2383,9 +2566,18 @@ static int process_extent_item(struct cache_tree *extent_cache, u32 item_size = btrfs_item_size_nr(eb, slot); u64 refs = 0; u64 offset; + u64 num_bytes; + int metadata = 0; btrfs_item_key_to_cpu(eb, &key, slot); + if (key.type == BTRFS_METADATA_ITEM_KEY) { + metadata = 1; + num_bytes = root->leafsize; + } else { + num_bytes = key.offset; + } + if (item_size < sizeof(*ei)) { #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 struct btrfs_extent_item_v0 *ei0; @@ -2396,17 +2588,19 @@ static int process_extent_item(struct cache_tree *extent_cache, BUG(); #endif return add_extent_rec(extent_cache, NULL, key.objectid, - key.offset, refs, 0, 0, 0); + num_bytes, refs, 0, 0, 0, metadata, 1, + num_bytes); } ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); refs = btrfs_extent_refs(eb, ei); - add_extent_rec(extent_cache, NULL, key.objectid, key.offset, - refs, 0, 0, 0); + add_extent_rec(extent_cache, NULL, key.objectid, num_bytes, + refs, 0, 0, 0, metadata, 1, num_bytes); ptr = (unsigned long)(ei + 1); - if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) + if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK && + key.type == BTRFS_EXTENT_ITEM_KEY) ptr += sizeof(struct btrfs_tree_block_info); end = (unsigned long)ei + item_size; @@ -2431,24 +2625,452 @@ static int process_extent_item(struct cache_tree *extent_cache, dref), btrfs_extent_data_ref_offset(eb, dref), btrfs_extent_data_ref_count(eb, dref), - 0); + 0, num_bytes); break; case BTRFS_SHARED_DATA_REF_KEY: sref = (struct btrfs_shared_data_ref *)(iref + 1); add_data_backref(extent_cache, key.objectid, offset, 0, 0, 0, btrfs_shared_data_ref_count(eb, sref), - 0); + 0, num_bytes); break; default: - BUG(); + fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n", + key.objectid, key.type, num_bytes); + goto out; } ptr += btrfs_extent_inline_ref_size(type); } WARN_ON(ptr > end); +out: + return 0; +} + +static int check_cache_range(struct btrfs_root *root, + struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes) +{ + struct btrfs_free_space *entry; + u64 *logical; + u64 bytenr; + int stripe_len; + int i, nr, ret; + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + cache->key.objectid, bytenr, 0, + &logical, &nr, &stripe_len); + if (ret) + return ret; + + while (nr--) { + if (logical[nr] + stripe_len <= offset) + continue; + if (offset + bytes <= logical[nr]) + continue; + if (logical[nr] == offset) { + if (stripe_len >= bytes) { + kfree(logical); + return 0; + } + bytes -= stripe_len; + offset += stripe_len; + } else if (logical[nr] < offset) { + if (logical[nr] + stripe_len >= + offset + bytes) { + kfree(logical); + return 0; + } + bytes = (offset + bytes) - + (logical[nr] + stripe_len); + offset = logical[nr] + stripe_len; + } else { + /* + * Could be tricky, the super may land in the + * middle of the area we're checking. First + * check the easiest case, it's at the end. + */ + if (logical[nr] + stripe_len >= + bytes + offset) { + bytes = logical[nr] - offset; + continue; + } + + /* Check the left side */ + ret = check_cache_range(root, cache, + offset, + logical[nr] - offset); + if (ret) { + kfree(logical); + return ret; + } + + /* Now we continue with the right side */ + bytes = (offset + bytes) - + (logical[nr] + stripe_len); + offset = logical[nr] + stripe_len; + } + } + + kfree(logical); + } + + entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes); + if (!entry) { + fprintf(stderr, "There is no free space entry for %Lu-%Lu\n", + offset, offset+bytes); + return -EINVAL; + } + + if (entry->offset != offset) { + fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset, + entry->offset); + return -EINVAL; + } + + if (entry->bytes != bytes) { + fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n", + bytes, entry->bytes, offset); + return -EINVAL; + } + + unlink_free_space(cache->free_space_ctl, entry); + free(entry); return 0; } +static int verify_space_cache(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + u64 last; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + root = root->fs_info->extent_root; + + last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET); + + key.objectid = last; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + ret = 0; + while (1) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + if (ret > 0) { + ret = 0; + break; + } + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid >= cache->key.offset + cache->key.objectid) + break; + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) { + path->slots[0]++; + continue; + } + + if (last == key.objectid) { + last = key.objectid + key.offset; + path->slots[0]++; + continue; + } + + ret = check_cache_range(root, cache, last, + key.objectid - last); + if (ret) + break; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + last = key.objectid + key.offset; + else + last = key.objectid + root->leafsize; + path->slots[0]++; + } + + if (last < cache->key.objectid + cache->key.offset) + ret = check_cache_range(root, cache, last, + cache->key.objectid + + cache->key.offset - last); + btrfs_free_path(path); + + if (!ret && + !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) { + fprintf(stderr, "There are still entries left in the space " + "cache\n"); + ret = -EINVAL; + } + + return ret; +} + +static int check_space_cache(struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache; + u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE; + int ret; + int error = 0; + + if (btrfs_super_generation(root->fs_info->super_copy) != + btrfs_super_cache_generation(root->fs_info->super_copy)) { + printf("cache and super generation don't match, space cache " + "will be invalidated\n"); + return 0; + } + + while (1) { + cache = btrfs_lookup_first_block_group(root->fs_info, start); + if (!cache) + break; + + start = cache->key.objectid + cache->key.offset; + if (!cache->free_space_ctl) { + int sectorsize; + + if (cache->flags & (BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_SYSTEM)) + sectorsize = root->leafsize; + else + sectorsize = root->sectorsize; + + if (btrfs_init_free_space_ctl(cache, sectorsize)) { + ret = -ENOMEM; + break; + } + } else { + btrfs_remove_free_space_cache(cache); + } + + ret = load_free_space_cache(root->fs_info, cache); + if (!ret) + continue; + + ret = verify_space_cache(root, cache); + if (ret) { + fprintf(stderr, "cache appears valid but isnt %Lu\n", + cache->key.objectid); + error++; + } + } + + return error ? -EINVAL : 0; +} + +static int check_extent_exists(struct btrfs_root *root, u64 bytenr, + u64 num_bytes) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + int ret; + + path = btrfs_alloc_path(); + if (!path) { + fprintf(stderr, "Error allocing path\n"); + return -ENOMEM; + } + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = 0; + + +again: + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, + 0, 0); + if (ret < 0) { + fprintf(stderr, "Error looking up extent record %d\n", ret); + btrfs_free_path(path); + return ret; + } else if (ret) { + if (path->slots[0]) + path->slots[0]--; + else + btrfs_prev_leaf(root, path); + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + /* + * Block group items come before extent items if they have the same + * bytenr, so walk back one more just in case. Dear future traveler, + * first congrats on mastering time travel. Now if it's not too much + * trouble could you go back to 2006 and tell Chris to make the + * BLOCK_GROUP_ITEM_KEY lower than the EXTENT_ITEM_KEY please? + */ + if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { + if (path->slots[0]) + path->slots[0]--; + else + btrfs_prev_leaf(root, path); + } + + while (num_bytes) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + btrfs_free_path(path); + return ret; + } else if (ret) { + break; + } + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type != BTRFS_EXTENT_ITEM_KEY) { + path->slots[0]++; + continue; + } + if (key.objectid + key.offset < bytenr) { + path->slots[0]++; + continue; + } + if (key.objectid > bytenr + num_bytes) + break; + + if (key.objectid == bytenr) { + if (key.offset >= num_bytes) { + num_bytes = 0; + break; + } + num_bytes -= key.offset; + bytenr += key.offset; + } else if (key.objectid < bytenr) { + if (key.objectid + key.offset >= bytenr + num_bytes) { + num_bytes = 0; + break; + } + num_bytes = (bytenr + num_bytes) - + (key.objectid + key.offset); + bytenr = key.objectid + key.offset; + } else { + if (key.objectid + key.offset < bytenr + num_bytes) { + u64 new_start = key.objectid + key.offset; + u64 new_bytes = bytenr + num_bytes - new_start; + + /* + * Weird case, the extent is in the middle of + * our range, we'll have to search one side + * and then the other. Not sure if this happens + * in real life, but no harm in coding it up + * anyway just in case. + */ + btrfs_release_path(root, path); + ret = check_extent_exists(root, new_start, + new_bytes); + if (ret) { + fprintf(stderr, "Right section didn't " + "have a record\n"); + break; + } + num_bytes = key.objectid - bytenr; + goto again; + } + num_bytes = key.objectid - bytenr; + } + path->slots[0]++; + } + ret = 0; + + if (num_bytes) { + fprintf(stderr, "There are no extents for csum range " + "%Lu-%Lu\n", bytenr, bytenr+num_bytes); + ret = 1; + } + + btrfs_free_path(path); + return ret; +} + +static int check_csums(struct btrfs_root *root) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + u64 offset = 0, num_bytes = 0; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + int errors = 0; + int ret; + + root = root->fs_info->csum_root; + + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.type = BTRFS_EXTENT_CSUM_KEY; + key.offset = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + fprintf(stderr, "Error searching csum tree %d\n", ret); + btrfs_free_path(path); + return ret; + } + + if (ret > 0 && path->slots[0]) + path->slots[0]--; + ret = 0; + + while (1) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + break; + } + if (ret) + break; + } + leaf = path->nodes[0]; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type != BTRFS_EXTENT_CSUM_KEY) { + path->slots[0]++; + continue; + } + + if (!num_bytes) { + offset = key.offset; + } else if (key.offset != offset + num_bytes) { + ret = check_extent_exists(root, offset, num_bytes); + if (ret) { + fprintf(stderr, "Csum exists for %Lu-%Lu but " + "there is no extent record\n", + offset, offset+num_bytes); + errors++; + } + offset = key.offset; + num_bytes = 0; + } + + num_bytes += (btrfs_item_size_nr(leaf, path->slots[0]) / + csum_size) * root->sectorsize; + path->slots[0]++; + } + + btrfs_free_path(path); + return errors; +} + static int run_next_block(struct btrfs_root *root, struct block_info *bits, int bits_nr, @@ -2509,9 +3131,19 @@ static int run_next_block(struct btrfs_root *root, /* fixme, get the real parent transid */ buf = read_tree_block(root, bytenr, size, 0); + if (!extent_buffer_uptodate(buf)) { + record_bad_block_io(root->fs_info, + extent_cache, bytenr, size); + goto out; + } + nritems = btrfs_header_nritems(buf); - ret = btrfs_lookup_extent_info(NULL, root, bytenr, size, NULL, &flags); + ret = btrfs_lookup_extent_info(NULL, root, bytenr, + btrfs_header_level(buf), 1, NULL, + &flags); + if (ret < 0) + flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { parent = bytenr; @@ -2522,6 +3154,8 @@ static int run_next_block(struct btrfs_root *root, } ret = check_block(root, extent_cache, buf, flags); + if (ret) + goto out; if (btrfs_is_leaf(buf)) { btree_space_waste += btrfs_leaf_free_space(root, buf); @@ -2529,7 +3163,13 @@ static int run_next_block(struct btrfs_root *root, struct btrfs_file_extent_item *fi; btrfs_item_key_to_cpu(buf, &key, i); if (key.type == BTRFS_EXTENT_ITEM_KEY) { - process_extent_item(extent_cache, buf, i); + process_extent_item(root, extent_cache, buf, + i); + continue; + } + if (key.type == BTRFS_METADATA_ITEM_KEY) { + process_extent_item(root, extent_cache, buf, + i); continue; } if (key.type == BTRFS_EXTENT_CSUM_KEY) { @@ -2570,7 +3210,7 @@ static int run_next_block(struct btrfs_root *root, ref), btrfs_extent_data_ref_offset(buf, ref), btrfs_extent_data_ref_count(buf, ref), - 0); + 0, root->sectorsize); continue; } if (key.type == BTRFS_SHARED_DATA_REF_KEY) { @@ -2580,7 +3220,7 @@ static int run_next_block(struct btrfs_root *root, add_data_backref(extent_cache, key.objectid, key.offset, 0, 0, 0, btrfs_shared_data_ref_count(buf, ref), - 0); + 0, root->sectorsize); continue; } if (key.type != BTRFS_EXTENT_DATA_KEY) @@ -2603,26 +3243,34 @@ static int run_next_block(struct btrfs_root *root, ret = add_extent_rec(extent_cache, NULL, btrfs_file_extent_disk_bytenr(buf, fi), btrfs_file_extent_disk_num_bytes(buf, fi), - 0, 0, 1, 1); + 0, 0, 1, 1, 0, 0, + btrfs_file_extent_disk_num_bytes(buf, fi)); add_data_backref(extent_cache, btrfs_file_extent_disk_bytenr(buf, fi), parent, owner, key.objectid, key.offset - - btrfs_file_extent_offset(buf, fi), 1, 1); + btrfs_file_extent_offset(buf, fi), 1, 1, + btrfs_file_extent_disk_num_bytes(buf, fi)); BUG_ON(ret); } } else { int level; + struct btrfs_key first_key; + + first_key.objectid = 0; + + if (nritems > 0) + btrfs_item_key_to_cpu(buf, &first_key, 0); level = btrfs_header_level(buf); for (i = 0; i < nritems; i++) { u64 ptr = btrfs_node_blockptr(buf, i); u32 size = btrfs_level_size(root, level - 1); btrfs_node_key_to_cpu(buf, &key, i); ret = add_extent_rec(extent_cache, &key, - ptr, size, 0, 0, 1, 0); + ptr, size, 0, 0, 1, 0, 1, 0, + size); BUG_ON(ret); - add_tree_backref(extent_cache, ptr, parent, - owner, 1); + add_tree_backref(extent_cache, ptr, parent, owner, 1); if (level > 1) { add_pending(nodes, seen, ptr, size); @@ -2636,22 +3284,22 @@ static int run_next_block(struct btrfs_root *root, total_btree_bytes += buf->len; if (fs_root_objectid(btrfs_header_owner(buf))) total_fs_tree_bytes += buf->len; + if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) + total_extent_tree_bytes += buf->len; if (!found_old_backref && btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID && btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) found_old_backref = 1; +out: free_extent_buffer(buf); return 0; } static int add_root_to_pending(struct extent_buffer *buf, - struct block_info *bits, - int bits_nr, struct cache_tree *extent_cache, struct cache_tree *pending, struct cache_tree *seen, - struct cache_tree *reada, struct cache_tree *nodes, struct btrfs_key *root_key) { @@ -2660,25 +3308,559 @@ static int add_root_to_pending(struct extent_buffer *buf, else add_pending(pending, seen, buf->start, buf->len); add_extent_rec(extent_cache, NULL, buf->start, buf->len, - 0, 1, 1, 0); + 0, 1, 1, 0, 1, 0, buf->len); if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID || btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) - add_tree_backref(extent_cache, buf->start, buf->start, 0, 1); + add_tree_backref(extent_cache, buf->start, buf->start, + 0, 1); else add_tree_backref(extent_cache, buf->start, 0, root_key->objectid, 1); return 0; } -static int check_extent_refs(struct btrfs_root *root, - struct cache_tree *extent_cache) +/* as we fix the tree, we might be deleting blocks that + * we're tracking for repair. This hook makes sure we + * remove any backrefs for blocks as we are fixing them. + */ +static int free_extent_hook(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset, + int refs_to_drop) +{ + struct extent_record *rec; + struct cache_extent *cache; + int is_data; + struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache; + + is_data = owner >= BTRFS_FIRST_FREE_OBJECTID; + cache = find_cache_extent(extent_cache, bytenr, num_bytes); + if (!cache) + return 0; + + rec = container_of(cache, struct extent_record, cache); + if (is_data) { + struct data_backref *back; + back = find_data_backref(rec, parent, root_objectid, owner, + offset, 1, num_bytes); + if (!back) + goto out; + if (back->node.found_ref) { + back->found_ref -= refs_to_drop; + if (rec->refs) + rec->refs -= refs_to_drop; + } + if (back->node.found_extent_tree) { + back->num_refs -= refs_to_drop; + if (rec->extent_item_refs) + rec->extent_item_refs -= refs_to_drop; + } + if (back->found_ref == 0) + back->node.found_ref = 0; + if (back->num_refs == 0) + back->node.found_extent_tree = 0; + + if (!back->node.found_extent_tree && back->node.found_ref) { + list_del(&back->node.list); + free(back); + } + } else { + struct tree_backref *back; + back = find_tree_backref(rec, parent, root_objectid); + if (!back) + goto out; + if (back->node.found_ref) { + if (rec->refs) + rec->refs--; + back->node.found_ref = 0; + } + if (back->node.found_extent_tree) { + if (rec->extent_item_refs) + rec->extent_item_refs--; + back->node.found_extent_tree = 0; + } + if (!back->node.found_extent_tree && back->node.found_ref) { + list_del(&back->node.list); + free(back); + } + } + maybe_free_extent_rec(extent_cache, rec); +out: + return 0; +} + +static int delete_extent_records(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 new_len) +{ + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + int slot; + + + key.objectid = bytenr; + key.type = (u8)-1; + key.offset = (u64)-1; + + while(1) { + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, path, 0, 1); + if (ret < 0) + break; + + if (ret > 0) { + ret = 0; + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + ret = 0; + + leaf = path->nodes[0]; + slot = path->slots[0]; + + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid != bytenr) + break; + + if (found_key.type != BTRFS_EXTENT_ITEM_KEY && + found_key.type != BTRFS_METADATA_ITEM_KEY && + found_key.type != BTRFS_TREE_BLOCK_REF_KEY && + found_key.type != BTRFS_EXTENT_DATA_REF_KEY && + found_key.type != BTRFS_EXTENT_REF_V0_KEY && + found_key.type != BTRFS_SHARED_BLOCK_REF_KEY && + found_key.type != BTRFS_SHARED_DATA_REF_KEY) { + btrfs_release_path(NULL, path); + if (found_key.type == 0) { + if (found_key.offset == 0) + break; + key.offset = found_key.offset - 1; + key.type = found_key.type; + } + key.type = found_key.type - 1; + key.offset = (u64)-1; + continue; + } + + fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n", + found_key.objectid, found_key.type, found_key.offset); + + ret = btrfs_del_item(trans, root->fs_info->extent_root, path); + if (ret) + break; + btrfs_release_path(NULL, path); + + if (found_key.type == BTRFS_EXTENT_ITEM_KEY || + found_key.type == BTRFS_METADATA_ITEM_KEY) { + u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ? + found_key.offset : root->leafsize; + + ret = btrfs_update_block_group(trans, root, bytenr, + bytes, 0, 0); + if (ret) + break; + } + } + + btrfs_release_path(NULL, path); + return ret; +} + +/* + * for a single backref, this will allocate a new extent + * and add the backref to it. + */ +static int record_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info, + struct btrfs_path *path, + struct extent_record *rec, + struct extent_backref *back, + int allocated, u64 flags) +{ + int ret; + struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; + struct btrfs_key ins_key; + struct btrfs_extent_item *ei; + struct tree_backref *tback; + struct data_backref *dback; + struct btrfs_tree_block_info *bi; + + if (!back->is_data) + rec->max_size = max_t(u64, rec->max_size, + info->extent_root->leafsize); + + if (!allocated) { + u32 item_size = sizeof(*ei); + + if (!back->is_data) + item_size += sizeof(*bi); + + ins_key.objectid = rec->start; + ins_key.offset = rec->max_size; + ins_key.type = BTRFS_EXTENT_ITEM_KEY; + + ret = btrfs_insert_empty_item(trans, extent_root, path, + &ins_key, item_size); + if (ret) + goto fail; + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + + btrfs_set_extent_refs(leaf, ei, 0); + btrfs_set_extent_generation(leaf, ei, rec->generation); + + if (back->is_data) { + btrfs_set_extent_flags(leaf, ei, + BTRFS_EXTENT_FLAG_DATA); + } else { + struct btrfs_disk_key copy_key;; + + tback = (struct tree_backref *)back; + bi = (struct btrfs_tree_block_info *)(ei + 1); + memset_extent_buffer(leaf, 0, (unsigned long)bi, + sizeof(*bi)); + memset(©_key, 0, sizeof(copy_key)); + + copy_key.objectid = le64_to_cpu(rec->info_objectid); + btrfs_set_tree_block_level(leaf, bi, rec->info_level); + btrfs_set_tree_block_key(leaf, bi, ©_key); + + btrfs_set_extent_flags(leaf, ei, + BTRFS_EXTENT_FLAG_TREE_BLOCK | flags); + } + + btrfs_mark_buffer_dirty(leaf); + ret = btrfs_update_block_group(trans, extent_root, rec->start, + rec->max_size, 1, 0); + if (ret) + goto fail; + btrfs_release_path(NULL, path); + } + + if (back->is_data) { + u64 parent; + int i; + + dback = (struct data_backref *)back; + if (back->full_backref) + parent = dback->parent; + else + parent = 0; + + for (i = 0; i < dback->found_ref; i++) { + /* if parent != 0, we're doing a full backref + * passing BTRFS_FIRST_FREE_OBJECTID as the owner + * just makes the backref allocator create a data + * backref + */ + ret = btrfs_inc_extent_ref(trans, info->extent_root, + rec->start, rec->max_size, + parent, + dback->root, + parent ? + BTRFS_FIRST_FREE_OBJECTID : + dback->owner, + dback->offset); + if (ret) + break; + } + fprintf(stderr, "adding new data backref" + " on %llu %s %llu owner %llu" + " offset %llu found %d\n", + (unsigned long long)rec->start, + back->full_backref ? + "parent" : "root", + back->full_backref ? + (unsigned long long)parent : + (unsigned long long)dback->root, + (unsigned long long)dback->owner, + (unsigned long long)dback->offset, + dback->found_ref); + } else { + u64 parent; + + tback = (struct tree_backref *)back; + if (back->full_backref) + parent = tback->parent; + else + parent = 0; + + ret = btrfs_inc_extent_ref(trans, info->extent_root, + rec->start, rec->max_size, + parent, tback->root, 0, 0); + fprintf(stderr, "adding new tree backref on " + "start %llu len %llu parent %llu root %llu\n", + rec->start, rec->max_size, tback->parent, tback->root); + } + if (ret) + goto fail; +fail: + btrfs_release_path(NULL, path); + return ret; +} + +/* + * when an incorrect extent item is found, this will delete + * all of the existing entries for it and recreate them + * based on what the tree scan found. + */ +static int fixup_extent_refs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info, + struct extent_record *rec) +{ + int ret; + struct btrfs_path *path; + struct list_head *cur = rec->backrefs.next; + struct cache_extent *cache; + struct extent_backref *back; + int allocated = 0; + u64 flags = 0; + + /* remember our flags for recreating the extent */ + ret = btrfs_lookup_extent_info(NULL, info->extent_root, rec->start, + rec->max_size, rec->metadata, NULL, + &flags); + if (ret < 0) + flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; + + path = btrfs_alloc_path(); + + /* step one, delete all the existing records */ + ret = delete_extent_records(trans, info->extent_root, path, + rec->start, rec->max_size); + + if (ret < 0) + goto out; + + /* was this block corrupt? If so, don't add references to it */ + cache = find_cache_extent(info->corrupt_blocks, rec->start, rec->max_size); + if (cache) { + ret = 0; + goto out; + } + + /* step two, recreate all the refs we did find */ + while(cur != &rec->backrefs) { + back = list_entry(cur, struct extent_backref, list); + cur = cur->next; + + /* + * if we didn't find any references, don't create a + * new extent record + */ + if (!back->found_ref) + continue; + + ret = record_extent(trans, info, path, rec, back, allocated, flags); + allocated = 1; + + if (ret) + goto out; + } +out: + btrfs_free_path(path); + return ret; +} + +/* right now we only prune from the extent allocation tree */ +static int prune_one_block(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info, + struct btrfs_corrupt_block *corrupt) +{ + int ret; + struct btrfs_path path; + struct extent_buffer *eb; + u64 found; + int slot; + int nritems; + int level = corrupt->level + 1; + + btrfs_init_path(&path); +again: + /* we want to stop at the parent to our busted block */ + path.lowest_level = level; + + ret = btrfs_search_slot(trans, info->extent_root, + &corrupt->key, &path, -1, 1); + + if (ret < 0) + goto out; + + eb = path.nodes[level]; + if (!eb) { + ret = -ENOENT; + goto out; + } + + /* + * hopefully the search gave us the block we want to prune, + * lets try that first + */ + slot = path.slots[level]; + found = btrfs_node_blockptr(eb, slot); + if (found == corrupt->cache.start) + goto del_ptr; + + nritems = btrfs_header_nritems(eb); + + /* the search failed, lets scan this node and hope we find it */ + for (slot = 0; slot < nritems; slot++) { + found = btrfs_node_blockptr(eb, slot); + if (found == corrupt->cache.start) + goto del_ptr; + } + /* + * we couldn't find the bad block. TODO, search all the nodes for pointers + * to this block + */ + if (eb == info->extent_root->node) { + ret = -ENOENT; + goto out; + } else { + level++; + btrfs_release_path(NULL, &path); + goto again; + } + +del_ptr: + printk("deleting pointer to block %Lu\n", corrupt->cache.start); + ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot); + +out: + btrfs_release_path(NULL, &path); + return ret; +} + +static int prune_corrupt_blocks(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info) +{ + struct cache_extent *cache; + struct btrfs_corrupt_block *corrupt; + + cache = find_first_cache_extent(info->corrupt_blocks, 0); + while (1) { + if (!cache) + break; + corrupt = container_of(cache, struct btrfs_corrupt_block, cache); + prune_one_block(trans, info, corrupt); + cache = next_cache_extent(cache); + } + return 0; +} + +static void free_corrupt_blocks(struct btrfs_fs_info *info) +{ + struct cache_extent *cache; + struct btrfs_corrupt_block *corrupt; + + while (1) { + cache = find_first_cache_extent(info->corrupt_blocks, 0); + if (!cache) + break; + corrupt = container_of(cache, struct btrfs_corrupt_block, cache); + remove_cache_extent(info->corrupt_blocks, cache); + free(corrupt); + } +} + +static int check_block_group(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info, + struct map_lookup *map, + int *reinit) +{ + struct btrfs_key key; + struct btrfs_path path; + int ret; + + key.objectid = map->ce.start; + key.offset = map->ce.size; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, info->extent_root, + &key, &path, 0, 0); + btrfs_release_path(NULL, &path); + if (ret <= 0) + goto out; + + ret = btrfs_make_block_group(trans, info->extent_root, 0, map->type, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, + key.objectid, key.offset); + *reinit = 1; +out: + return ret; +} + +static int check_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info, int *reinit) +{ + struct cache_extent *ce; + struct map_lookup *map; + struct btrfs_mapping_tree *map_tree = &info->mapping_tree; + + /* this isn't quite working */ + return 0; + + ce = find_first_cache_extent(&map_tree->cache_tree, 0); + while (1) { + if (!ce) + break; + map = container_of(ce, struct map_lookup, ce); + check_block_group(trans, info, map, reinit); + ce = next_cache_extent(ce); + } + return 0; +} + +static int check_extent_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct cache_tree *extent_cache, int repair) { struct extent_record *rec; struct cache_extent *cache; int err = 0; + int ret = 0; + int fixed = 0; + int reinit = 0; + if (repair) { + /* + * if we're doing a repair, we have to make sure + * we don't allocate from the problem extents. + * In the worst case, this will be all the + * extents in the FS + */ + cache = find_first_cache_extent(extent_cache, 0); + while(cache) { + rec = container_of(cache, struct extent_record, cache); + btrfs_pin_extent(root->fs_info, + rec->start, rec->max_size); + cache = next_cache_extent(cache); + } + + /* pin down all the corrupted blocks too */ + cache = find_first_cache_extent(root->fs_info->corrupt_blocks, 0); + while(cache) { + rec = container_of(cache, struct extent_record, cache); + btrfs_pin_extent(root->fs_info, + rec->start, rec->max_size); + cache = next_cache_extent(cache); + } + prune_corrupt_blocks(trans, root->fs_info); + check_block_groups(trans, root->fs_info, &reinit); + if (reinit) + btrfs_read_block_groups(root->fs_info->extent_root); + } while(1) { + fixed = 0; cache = find_first_cache_extent(extent_cache, 0); if (!cache) break; @@ -2690,19 +3872,39 @@ static int check_extent_refs(struct btrfs_root *root, fprintf(stderr, "extent item %llu, found %llu\n", (unsigned long long)rec->extent_item_refs, (unsigned long long)rec->refs); + if (!fixed && repair) { + ret = fixup_extent_refs(trans, root->fs_info, rec); + if (ret) + goto repair_abort; + fixed = 1; + } err = 1; + } if (all_backpointers_checked(rec, 1)) { fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", (unsigned long long)rec->start, (unsigned long long)rec->nr); + if (!fixed && repair) { + ret = fixup_extent_refs(trans, root->fs_info, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + err = 1; } if (!rec->owner_ref_checked) { fprintf(stderr, "owner ref check failed [%llu %llu]\n", (unsigned long long)rec->start, (unsigned long long)rec->nr); + if (!fixed && repair) { + ret = fixup_extent_refs(trans, root->fs_info, rec); + if (ret) + goto repair_abort; + fixed = 1; + } err = 1; } @@ -2710,16 +3912,30 @@ static int check_extent_refs(struct btrfs_root *root, free_all_extent_backrefs(rec); free(rec); } +repair_abort: + if (repair) { + if (ret) { + fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); + exit(1); + } else { + btrfs_fix_block_accounting(trans, root); + } + if (err) + fprintf(stderr, "repaired damaged extent references\n"); + return ret; + } return err; } -static int check_extents(struct btrfs_root *root) +static int check_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int repair) { struct cache_tree extent_cache; struct cache_tree seen; struct cache_tree pending; struct cache_tree reada; struct cache_tree nodes; + struct cache_tree corrupt_blocks; struct btrfs_path path; struct btrfs_key key; struct btrfs_key found_key; @@ -2736,6 +3952,13 @@ static int check_extents(struct btrfs_root *root) cache_tree_init(&pending); cache_tree_init(&nodes); cache_tree_init(&reada); + cache_tree_init(&corrupt_blocks); + + if (repair) { + root->fs_info->fsck_extent_cache = &extent_cache; + root->fs_info->free_extent_hook = free_extent_hook; + root->fs_info->corrupt_blocks = &corrupt_blocks; + } bits_nr = 1024; bits = malloc(bits_nr * sizeof(struct block_info)); @@ -2744,12 +3967,12 @@ static int check_extents(struct btrfs_root *root) exit(1); } - add_root_to_pending(root->fs_info->tree_root->node, bits, bits_nr, - &extent_cache, &pending, &seen, &reada, &nodes, + add_root_to_pending(root->fs_info->tree_root->node, + &extent_cache, &pending, &seen, &nodes, &root->fs_info->tree_root->root_key); - add_root_to_pending(root->fs_info->chunk_root->node, bits, bits_nr, - &extent_cache, &pending, &seen, &reada, &nodes, + add_root_to_pending(root->fs_info->chunk_root->node, + &extent_cache, &pending, &seen, &nodes, &root->fs_info->chunk_root->root_key); btrfs_init_path(&path); @@ -2780,9 +4003,8 @@ static int check_extents(struct btrfs_root *root) btrfs_root_bytenr(&ri), btrfs_level_size(root, btrfs_root_level(&ri)), 0); - add_root_to_pending(buf, bits, bits_nr, &extent_cache, - &pending, &seen, &reada, &nodes, - &found_key); + add_root_to_pending(buf, &extent_cache, &pending, + &seen, &nodes, &found_key); free_extent_buffer(buf); } path.slots[0]++; @@ -2794,76 +4016,161 @@ static int check_extents(struct btrfs_root *root) if (ret != 0) break; } - ret = check_extent_refs(root, &extent_cache); + ret = check_extent_refs(trans, root, &extent_cache, repair); + + if (repair) { + free_corrupt_blocks(root->fs_info); + root->fs_info->fsck_extent_cache = NULL; + root->fs_info->free_extent_hook = NULL; + root->fs_info->corrupt_blocks = NULL; + } + + free(bits); return ret; } -static void print_usage(void) -{ - fprintf(stderr, "usage: btrfsck dev\n"); - fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION); - exit(1); -} +static struct option long_options[] = { + { "super", 1, NULL, 's' }, + { "repair", 0, NULL, 0 }, + { "init-csum-tree", 0, NULL, 0 }, + { "init-extent-tree", 0, NULL, 0 }, + { 0, 0, 0, 0} +}; -int main(int ac, char **av) +const char * const cmd_check_usage[] = { + "btrfs check [options] <device>", + "Check an unmounted btrfs filesystem.", + "", + "-s|--super <superblock> use this superblock copy", + "--repair try to repair the filesystem", + "--init-csum-tree create a new CRC tree", + "--init-extent-tree create a new extent tree", + NULL +}; + +int cmd_check(int argc, char **argv) { struct cache_tree root_cache; struct btrfs_root *root; + struct btrfs_fs_info *info; + struct btrfs_trans_handle *trans = NULL; u64 bytenr = 0; + char uuidbuf[37]; int ret; int num; + int repair = 0; + int option_index = 0; + int init_csum_tree = 0; + int rw = 0; while(1) { int c; - c = getopt(ac, av, "s:"); + c = getopt_long(argc, argv, "as:", long_options, + &option_index); if (c < 0) break; switch(c) { + case 'a': /* ignored */ break; case 's': num = atol(optarg); bytenr = btrfs_sb_offset(num); printf("using SB copy %d, bytenr %llu\n", num, (unsigned long long)bytenr); break; - default: - print_usage(); + case '?': + case 'h': + usage(cmd_check_usage); } + if (option_index == 1) { + printf("enabling repair mode\n"); + repair = 1; + rw = 1; + } else if (option_index == 2) { + printf("Creating a new CRC tree\n"); + init_csum_tree = 1; + rw = 1; + } + } - ac = ac - optind; + argc = argc - optind; - if (ac != 1) - print_usage(); + if (argc != 1) + usage(cmd_check_usage); radix_tree_init(); cache_tree_init(&root_cache); - if((ret = check_mounted(av[optind])) < 0) { + if((ret = check_mounted(argv[optind])) < 0) { fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret)); return ret; } else if(ret) { - fprintf(stderr, "%s is currently mounted. Aborting.\n", av[optind]); + fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]); return -EBUSY; } - root = open_ctree(av[optind], bytenr, 0); + info = open_ctree_fs_info(argv[optind], bytenr, 0, rw, 1); + if (!info) { + fprintf(stderr, "Couldn't open file system\n"); + return -EIO; + } - if (root == NULL) - return 1; + uuid_unparse(info->super_copy->fsid, uuidbuf); + printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf); + + if (!extent_buffer_uptodate(info->tree_root->node) || + !extent_buffer_uptodate(info->dev_root->node) || + !extent_buffer_uptodate(info->extent_root->node) || + !extent_buffer_uptodate(info->chunk_root->node)) { + fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n"); + return -EIO; + } + + root = info->fs_root; - ret = check_extents(root); + fprintf(stderr, "checking extents\n"); + if (rw) + trans = btrfs_start_transaction(root, 1); + + if (init_csum_tree) { + fprintf(stderr, "Reinit crc root\n"); + ret = btrfs_fsck_reinit_root(trans, info->csum_root); + if (ret) { + fprintf(stderr, "crc root initialization failed\n"); + return -EIO; + } + goto out; + } + ret = check_extents(trans, root, repair); + if (ret) + fprintf(stderr, "Errors found in extent allocation tree\n"); + + fprintf(stderr, "checking free space cache\n"); + ret = check_space_cache(root); if (ret) goto out; + + fprintf(stderr, "checking fs roots\n"); ret = check_fs_roots(root, &root_cache); if (ret) goto out; + fprintf(stderr, "checking csums\n"); + ret = check_csums(root); + if (ret) + goto out; + + fprintf(stderr, "checking root refs\n"); ret = check_root_refs(root, &root_cache); out: free_root_recs(&root_cache); + if (rw) { + ret = btrfs_commit_transaction(trans, root); + if (ret) + exit(1); + } close_ctree(root); - if (found_old_backref) { - /* + if (found_old_backref) { /* * there was a disk format change when mixed * backref was in testing tree. The old format * existed about one week. @@ -2881,6 +4188,8 @@ out: (unsigned long long)total_btree_bytes); printf("total fs tree bytes: %llu\n", (unsigned long long)total_fs_tree_bytes); + printf("total extent tree bytes: %llu\n", + (unsigned long long)total_extent_tree_bytes); printf("btree space waste bytes: %llu\n", (unsigned long long)btree_space_waste); printf("file data blocks allocated: %llu\n referenced %llu\n", diff --git a/cmds-device.c b/cmds-device.c new file mode 100644 index 0000000..41e79d3 --- /dev/null +++ b/cmds-device.c @@ -0,0 +1,411 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <sys/stat.h> + +#include "kerncompat.h" +#include "ctree.h" +#include "ioctl.h" +#include "utils.h" + +#include "commands.h" + +/* FIXME - imported cruft, fix sparse errors and warnings */ +#ifdef __CHECKER__ +#define BLKGETSIZE64 0 +#define BTRFS_IOC_SNAP_CREATE_V2 0 +#define BTRFS_VOL_NAME_MAX 255 +struct btrfs_ioctl_vol_args { char name[BTRFS_VOL_NAME_MAX]; }; +static inline int ioctl(int fd, int define, void *arg) { return 0; } +#endif + +static const char * const device_cmd_group_usage[] = { + "btrfs device <command> [<args>]", + NULL +}; + +static const char * const cmd_add_dev_usage[] = { + "btrfs device add <device> [<device>...] <path>", + "Add a device to a filesystem", + NULL +}; + +static int cmd_add_dev(int argc, char **argv) +{ + char *mntpnt; + int i, fdmnt, ret=0, e; + + if (check_argc_min(argc, 3)) + usage(cmd_add_dev_usage); + + mntpnt = argv[argc - 1]; + + fdmnt = open_file_or_dir(mntpnt); + if (fdmnt < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", mntpnt); + return 12; + } + + for (i = 1; i < argc - 1; i++ ){ + struct btrfs_ioctl_vol_args ioctl_args; + int devfd, res; + u64 dev_block_count = 0; + struct stat st; + int mixed = 0; + + res = check_mounted(argv[i]); + if (res < 0) { + fprintf(stderr, "error checking %s mount status\n", + argv[i]); + ret++; + continue; + } + if (res == 1) { + fprintf(stderr, "%s is mounted\n", argv[i]); + ret++; + continue; + } + + devfd = open(argv[i], O_RDWR); + if (devfd < 0) { + fprintf(stderr, "ERROR: Unable to open device '%s'\n", argv[i]); + ret++; + continue; + } + res = fstat(devfd, &st); + if (res) { + fprintf(stderr, "ERROR: Unable to stat '%s'\n", argv[i]); + close(devfd); + ret++; + continue; + } + if (!S_ISBLK(st.st_mode)) { + fprintf(stderr, "ERROR: '%s' is not a block device\n", argv[i]); + close(devfd); + ret++; + continue; + } + + res = btrfs_prepare_device(devfd, argv[i], 1, &dev_block_count, + 0, &mixed, 0); + if (res) { + fprintf(stderr, "ERROR: Unable to init '%s'\n", argv[i]); + close(devfd); + ret++; + continue; + } + close(devfd); + + strncpy_null(ioctl_args.name, argv[i]); + res = ioctl(fdmnt, BTRFS_IOC_ADD_DEV, &ioctl_args); + e = errno; + if(res<0){ + fprintf(stderr, "ERROR: error adding the device '%s' - %s\n", + argv[i], strerror(e)); + ret++; + } + + } + + close(fdmnt); + if (ret) + return ret+20; + else + return 0; +} + +static const char * const cmd_rm_dev_usage[] = { + "btrfs device delete <device> [<device>...] <path>", + "Remove a device from a filesystem", + NULL +}; + +static int cmd_rm_dev(int argc, char **argv) +{ + char *mntpnt; + int i, fdmnt, ret=0, e; + + if (check_argc_min(argc, 3)) + usage(cmd_rm_dev_usage); + + mntpnt = argv[argc - 1]; + + fdmnt = open_file_or_dir(mntpnt); + if (fdmnt < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", mntpnt); + return 12; + } + + for(i=1 ; i < argc - 1; i++ ){ + struct btrfs_ioctl_vol_args arg; + int res; + + strncpy_null(arg.name, argv[i]); + res = ioctl(fdmnt, BTRFS_IOC_RM_DEV, &arg); + e = errno; + if(res<0){ + fprintf(stderr, "ERROR: error removing the device '%s' - %s\n", + argv[i], strerror(e)); + ret++; + } + } + + close(fdmnt); + if( ret) + return ret+20; + else + return 0; +} + +static const char * const cmd_scan_dev_usage[] = { + "btrfs device scan [<device>...]", + "Scan devices for a btrfs filesystem", + NULL +}; + +static int cmd_scan_dev(int argc, char **argv) +{ + int i, fd, e; + int checklist = 1; + int devstart = 1; + + if( argc > 1 && !strcmp(argv[1],"--all-devices")){ + if (check_argc_max(argc, 2)) + usage(cmd_scan_dev_usage); + + checklist = 0; + devstart += 1; + } + + if(argc<=devstart){ + + int ret; + + printf("Scanning for Btrfs filesystems\n"); + if(checklist) + ret = btrfs_scan_block_devices(1); + else + ret = btrfs_scan_one_dir("/dev", 1); + if (ret){ + fprintf(stderr, "ERROR: error %d while scanning\n", ret); + return 18; + } + return 0; + } + + fd = open("/dev/btrfs-control", O_RDWR); + if (fd < 0) { + perror("failed to open /dev/btrfs-control"); + return 10; + } + + for( i = devstart ; i < argc ; i++ ){ + struct btrfs_ioctl_vol_args args; + int ret; + + printf("Scanning for Btrfs filesystems in '%s'\n", argv[i]); + + strncpy_null(args.name, argv[i]); + /* + * FIXME: which are the error code returned by this ioctl ? + * it seems that is impossible to understand if there no is + * a btrfs filesystem from an I/O error !!! + */ + ret = ioctl(fd, BTRFS_IOC_SCAN_DEV, &args); + e = errno; + + if( ret < 0 ){ + close(fd); + fprintf(stderr, "ERROR: unable to scan the device '%s' - %s\n", + argv[i], strerror(e)); + return 11; + } + } + + close(fd); + return 0; +} + +static const char * const cmd_ready_dev_usage[] = { + "btrfs device ready <device>", + "Check device to see if it has all of it's devices in cache for mounting", + NULL +}; + +static int cmd_ready_dev(int argc, char **argv) +{ + struct btrfs_ioctl_vol_args args; + int fd; + int ret; + + if (check_argc_min(argc, 2)) + usage(cmd_ready_dev_usage); + + fd = open("/dev/btrfs-control", O_RDWR); + if (fd < 0) { + perror("failed to open /dev/btrfs-control"); + return 10; + } + + strncpy(args.name, argv[argc - 1], BTRFS_PATH_NAME_MAX); + ret = ioctl(fd, BTRFS_IOC_DEVICES_READY, &args); + if (ret < 0) { + fprintf(stderr, "ERROR: unable to determine if the device '%s'" + " is ready for mounting - %s\n", argv[argc - 1], + strerror(errno)); + ret = 1; + } + + close(fd); + return ret; +} + +static const char * const cmd_dev_stats_usage[] = { + "btrfs device stats [-z] <path>|<device>", + "Show current device IO stats. -z to reset stats afterwards.", + NULL +}; + +static int cmd_dev_stats(int argc, char **argv) +{ + char *path; + struct btrfs_ioctl_fs_info_args fi_args; + struct btrfs_ioctl_dev_info_args *di_args = NULL; + int ret; + int fdmnt; + int i; + char c; + int err = 0; + __u64 flags = 0; + + optind = 1; + while ((c = getopt(argc, argv, "z")) != -1) { + switch (c) { + case 'z': + flags = BTRFS_DEV_STATS_RESET; + break; + case '?': + default: + fprintf(stderr, "ERROR: device stat args invalid.\n" + " device stat [-z] <path>|<device>\n" + " -z to reset stats after reading.\n"); + return 1; + } + } + + if (optind + 1 != argc) { + fprintf(stderr, "ERROR: device stat needs path|device as single" + " argument\n"); + return 1; + } + + path = argv[optind]; + + fdmnt = open_path_or_dev_mnt(path); + + if (fdmnt < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", path); + return 12; + } + + ret = get_fs_info(path, &fi_args, &di_args); + if (ret) { + fprintf(stderr, "ERROR: getting dev info for devstats failed: " + "%s\n", strerror(-ret)); + err = 1; + goto out; + } + if (!fi_args.num_devices) { + fprintf(stderr, "ERROR: no devices found\n"); + err = 1; + goto out; + } + + for (i = 0; i < fi_args.num_devices; i++) { + struct btrfs_ioctl_get_dev_stats args = {0}; + __u8 path[BTRFS_DEVICE_PATH_NAME_MAX + 1]; + + strncpy((char *)path, (char *)di_args[i].path, + BTRFS_DEVICE_PATH_NAME_MAX); + path[BTRFS_DEVICE_PATH_NAME_MAX] = '\0'; + + args.devid = di_args[i].devid; + args.nr_items = BTRFS_DEV_STAT_VALUES_MAX; + args.flags = flags; + + if (ioctl(fdmnt, BTRFS_IOC_GET_DEV_STATS, &args) < 0) { + fprintf(stderr, + "ERROR: ioctl(BTRFS_IOC_GET_DEV_STATS) on %s failed: %s\n", + path, strerror(errno)); + err = 1; + } else { + if (args.nr_items >= BTRFS_DEV_STAT_WRITE_ERRS + 1) + printf("[%s].write_io_errs %llu\n", + path, + (unsigned long long) args.values[ + BTRFS_DEV_STAT_WRITE_ERRS]); + if (args.nr_items >= BTRFS_DEV_STAT_READ_ERRS + 1) + printf("[%s].read_io_errs %llu\n", + path, + (unsigned long long) args.values[ + BTRFS_DEV_STAT_READ_ERRS]); + if (args.nr_items >= BTRFS_DEV_STAT_FLUSH_ERRS + 1) + printf("[%s].flush_io_errs %llu\n", + path, + (unsigned long long) args.values[ + BTRFS_DEV_STAT_FLUSH_ERRS]); + if (args.nr_items >= BTRFS_DEV_STAT_CORRUPTION_ERRS + 1) + printf("[%s].corruption_errs %llu\n", + path, + (unsigned long long) args.values[ + BTRFS_DEV_STAT_CORRUPTION_ERRS]); + if (args.nr_items >= BTRFS_DEV_STAT_GENERATION_ERRS + 1) + printf("[%s].generation_errs %llu\n", + path, + (unsigned long long) args.values[ + BTRFS_DEV_STAT_GENERATION_ERRS]); + } + } + +out: + free(di_args); + close(fdmnt); + + return err; +} + +const struct cmd_group device_cmd_group = { + device_cmd_group_usage, NULL, { + { "add", cmd_add_dev, cmd_add_dev_usage, NULL, 0 }, + { "delete", cmd_rm_dev, cmd_rm_dev_usage, NULL, 0 }, + { "scan", cmd_scan_dev, cmd_scan_dev_usage, NULL, 0 }, + { "ready", cmd_ready_dev, cmd_ready_dev_usage, NULL, 0 }, + { "stats", cmd_dev_stats, cmd_dev_stats_usage, NULL, 0 }, + { 0, 0, 0, 0, 0 } + } +}; + +int cmd_device(int argc, char **argv) +{ + return handle_command_group(&device_cmd_group, argc, argv); +} diff --git a/cmds-filesystem.c b/cmds-filesystem.c new file mode 100644 index 0000000..f41a72a --- /dev/null +++ b/cmds-filesystem.c @@ -0,0 +1,534 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <uuid/uuid.h> +#include <ctype.h> + +#include "kerncompat.h" +#include "ctree.h" +#include "ioctl.h" +#include "utils.h" +#include "volumes.h" + +#include "version.h" + +#include "commands.h" + +static const char * const filesystem_cmd_group_usage[] = { + "btrfs filesystem [<group>] <command> [<args>]", + NULL +}; + +static const char * const cmd_df_usage[] = { + "btrfs filesystem df <path>", + "Show space usage information for a mount point", + NULL +}; + +static int cmd_df(int argc, char **argv) +{ + struct btrfs_ioctl_space_args *sargs, *sargs_orig; + u64 count = 0, i; + int ret; + int fd; + int e; + char *path; + + if (check_argc_exact(argc, 2)) + usage(cmd_df_usage); + + path = argv[1]; + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 12; + } + + sargs_orig = sargs = malloc(sizeof(struct btrfs_ioctl_space_args)); + if (!sargs) + return -ENOMEM; + + sargs->space_slots = 0; + sargs->total_spaces = 0; + + ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs); + e = errno; + if (ret) { + fprintf(stderr, "ERROR: couldn't get space info on '%s' - %s\n", + path, strerror(e)); + close(fd); + free(sargs); + return ret; + } + if (!sargs->total_spaces) { + close(fd); + free(sargs); + return 0; + } + + count = sargs->total_spaces; + + sargs = realloc(sargs, sizeof(struct btrfs_ioctl_space_args) + + (count * sizeof(struct btrfs_ioctl_space_info))); + if (!sargs) { + close(fd); + free(sargs_orig); + return -ENOMEM; + } + + sargs->space_slots = count; + sargs->total_spaces = 0; + + ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs); + e = errno; + if (ret) { + fprintf(stderr, "ERROR: couldn't get space info on '%s' - %s\n", + path, strerror(e)); + close(fd); + free(sargs); + return ret; + } + + for (i = 0; i < sargs->total_spaces; i++) { + char description[80]; + char *total_bytes; + char *used_bytes; + int written = 0; + u64 flags = sargs->spaces[i].flags; + + memset(description, 0, 80); + + if (flags & BTRFS_BLOCK_GROUP_DATA) { + if (flags & BTRFS_BLOCK_GROUP_METADATA) { + snprintf(description, 14, "%s", + "Data+Metadata"); + written += 13; + } else { + snprintf(description, 5, "%s", "Data"); + written += 4; + } + } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) { + snprintf(description, 7, "%s", "System"); + written += 6; + } else if (flags & BTRFS_BLOCK_GROUP_METADATA) { + snprintf(description, 9, "%s", "Metadata"); + written += 8; + } + + if (flags & BTRFS_BLOCK_GROUP_RAID0) { + snprintf(description+written, 8, "%s", ", RAID0"); + written += 7; + } else if (flags & BTRFS_BLOCK_GROUP_RAID1) { + snprintf(description+written, 8, "%s", ", RAID1"); + written += 7; + } else if (flags & BTRFS_BLOCK_GROUP_DUP) { + snprintf(description+written, 6, "%s", ", DUP"); + written += 5; + } else if (flags & BTRFS_BLOCK_GROUP_RAID10) { + snprintf(description+written, 9, "%s", ", RAID10"); + written += 8; + } else if (flags & BTRFS_BLOCK_GROUP_RAID5) { + snprintf(description+written, 9, "%s", ", RAID5"); + written += 7; + } else if (flags & BTRFS_BLOCK_GROUP_RAID6) { + snprintf(description+written, 9, "%s", ", RAID6"); + written += 7; + } + + total_bytes = pretty_sizes(sargs->spaces[i].total_bytes); + used_bytes = pretty_sizes(sargs->spaces[i].used_bytes); + printf("%s: total=%s, used=%s\n", description, total_bytes, + used_bytes); + } + close(fd); + free(sargs); + + return 0; +} + +static int uuid_search(struct btrfs_fs_devices *fs_devices, char *search) +{ + char uuidbuf[37]; + struct list_head *cur; + struct btrfs_device *device; + int search_len = strlen(search); + + search_len = min(search_len, 37); + uuid_unparse(fs_devices->fsid, uuidbuf); + if (!strncmp(uuidbuf, search, search_len)) + return 1; + + list_for_each(cur, &fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + if ((device->label && strcmp(device->label, search) == 0) || + strcmp(device->name, search) == 0) + return 1; + } + return 0; +} + +static void print_one_uuid(struct btrfs_fs_devices *fs_devices) +{ + char uuidbuf[37]; + struct list_head *cur; + struct btrfs_device *device; + char *super_bytes_used; + u64 devs_found = 0; + u64 total; + + uuid_unparse(fs_devices->fsid, uuidbuf); + device = list_entry(fs_devices->devices.next, struct btrfs_device, + dev_list); + if (device->label && device->label[0]) + printf("Label: '%s' ", device->label); + else + printf("Label: none "); + + super_bytes_used = pretty_sizes(device->super_bytes_used); + + total = device->total_devs; + printf(" uuid: %s\n\tTotal devices %llu FS bytes used %s\n", uuidbuf, + (unsigned long long)total, super_bytes_used); + + free(super_bytes_used); + + list_for_each(cur, &fs_devices->devices) { + char *total_bytes; + char *bytes_used; + device = list_entry(cur, struct btrfs_device, dev_list); + total_bytes = pretty_sizes(device->total_bytes); + bytes_used = pretty_sizes(device->bytes_used); + printf("\tdevid %4llu size %s used %s path %s\n", + (unsigned long long)device->devid, + total_bytes, bytes_used, device->name); + free(total_bytes); + free(bytes_used); + devs_found++; + } + if (devs_found < total) { + printf("\t*** Some devices missing\n"); + } + printf("\n"); +} + +static const char * const cmd_show_usage[] = { + "btrfs filesystem show [--all-devices] [<uuid>|<label>]", + "Show the structure of a filesystem", + "If no argument is given, structure of all present filesystems is shown.", + NULL +}; + +static int cmd_show(int argc, char **argv) +{ + struct list_head *all_uuids; + struct btrfs_fs_devices *fs_devices; + struct list_head *cur_uuid; + char *search = 0; + int ret; + int checklist = 1; + int searchstart = 1; + + if( argc > 1 && !strcmp(argv[1],"--all-devices")){ + checklist = 0; + searchstart += 1; + } + + if (check_argc_max(argc, searchstart + 1)) + usage(cmd_show_usage); + + if(checklist) + ret = btrfs_scan_block_devices(0); + else + ret = btrfs_scan_one_dir("/dev", 0); + + if (ret){ + fprintf(stderr, "ERROR: error %d while scanning\n", ret); + return 18; + } + + if(searchstart < argc) + search = argv[searchstart]; + + all_uuids = btrfs_scanned_uuids(); + list_for_each(cur_uuid, all_uuids) { + fs_devices = list_entry(cur_uuid, struct btrfs_fs_devices, + list); + if (search && uuid_search(fs_devices, search) == 0) + continue; + print_one_uuid(fs_devices); + } + printf("%s\n", BTRFS_BUILD_VERSION); + return 0; +} + +static const char * const cmd_sync_usage[] = { + "btrfs filesystem sync <path>", + "Force a sync on a filesystem", + NULL +}; + +static int cmd_sync(int argc, char **argv) +{ + int fd, res, e; + char *path; + + if (check_argc_exact(argc, 2)) + usage(cmd_sync_usage); + + path = argv[1]; + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 12; + } + + printf("FSSync '%s'\n", path); + res = ioctl(fd, BTRFS_IOC_SYNC); + e = errno; + close(fd); + if( res < 0 ){ + fprintf(stderr, "ERROR: unable to fs-syncing '%s' - %s\n", + path, strerror(e)); + return 16; + } + + return 0; +} + +static int parse_compress_type(char *s) +{ + if (strcmp(optarg, "zlib") == 0) + return BTRFS_COMPRESS_ZLIB; + else if (strcmp(optarg, "lzo") == 0) + return BTRFS_COMPRESS_LZO; + else { + fprintf(stderr, "Unknown compress type %s\n", s); + exit(1); + }; +} + +static const char * const cmd_defrag_usage[] = { + "btrfs filesystem defragment [options] <file>|<dir> [<file>|<dir>...]", + "Defragment a file or a directory", + "", + "-v be verbose", + "-c[zlib,lzo] compress the file while defragmenting", + "-f flush data to disk immediately after defragmenting", + "-s start defragment only from byte onward", + "-l len defragment only up to len bytes", + "-t size minimal size of file to be considered for defragmenting", + NULL +}; + +static int cmd_defrag(int argc, char **argv) +{ + int fd; + int flush = 0; + u64 start = 0; + u64 len = (u64)-1; + u32 thresh = 0; + int i; + int errors = 0; + int ret = 0; + int verbose = 0; + int fancy_ioctl = 0; + struct btrfs_ioctl_defrag_range_args range; + int e=0; + int compress_type = BTRFS_COMPRESS_NONE; + + optind = 1; + while(1) { + int c = getopt(argc, argv, "vc::fs:l:t:"); + if (c < 0) + break; + + switch(c) { + case 'c': + compress_type = BTRFS_COMPRESS_ZLIB; + if (optarg) + compress_type = parse_compress_type(optarg); + fancy_ioctl = 1; + break; + case 'f': + flush = 1; + fancy_ioctl = 1; + break; + case 'v': + verbose = 1; + break; + case 's': + start = parse_size(optarg); + fancy_ioctl = 1; + break; + case 'l': + len = parse_size(optarg); + fancy_ioctl = 1; + break; + case 't': + thresh = parse_size(optarg); + fancy_ioctl = 1; + break; + default: + usage(cmd_defrag_usage); + } + } + + if (check_argc_min(argc - optind, 1)) + usage(cmd_defrag_usage); + + memset(&range, 0, sizeof(range)); + range.start = start; + range.len = len; + range.extent_thresh = thresh; + if (compress_type) { + range.flags |= BTRFS_DEFRAG_RANGE_COMPRESS; + range.compress_type = compress_type; + } + if (flush) + range.flags |= BTRFS_DEFRAG_RANGE_START_IO; + + for (i = optind; i < argc; i++) { + if (verbose) + printf("%s\n", argv[i]); + fd = open_file_or_dir(argv[i]); + if (fd < 0) { + fprintf(stderr, "failed to open %s\n", argv[i]); + perror("open:"); + errors++; + continue; + } + if (!fancy_ioctl) { + ret = ioctl(fd, BTRFS_IOC_DEFRAG, NULL); + e=errno; + } else { + ret = ioctl(fd, BTRFS_IOC_DEFRAG_RANGE, &range); + if (ret && errno == ENOTTY) { + fprintf(stderr, "ERROR: defrag range ioctl not " + "supported in this kernel, please try " + "without any options.\n"); + errors++; + close(fd); + break; + } + e = errno; + } + if (ret) { + fprintf(stderr, "ERROR: defrag failed on %s - %s\n", + argv[i], strerror(e)); + errors++; + } + close(fd); + } + if (verbose) + printf("%s\n", BTRFS_BUILD_VERSION); + if (errors) { + fprintf(stderr, "total %d failures\n", errors); + exit(1); + } + + return errors; +} + +static const char * const cmd_resize_usage[] = { + "btrfs filesystem resize [devid:][+/-]<newsize>[gkm]|[devid:]max <path>", + "Resize a filesystem", + "If 'max' is passed, the filesystem will occupy all available space", + "on the device 'devid'.", + NULL +}; + +static int cmd_resize(int argc, char **argv) +{ + struct btrfs_ioctl_vol_args args; + int fd, res, len, e; + char *amount, *path; + + if (check_argc_exact(argc, 3)) + usage(cmd_resize_usage); + + amount = argv[1]; + path = argv[2]; + + len = strlen(amount); + if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { + fprintf(stderr, "ERROR: size value too long ('%s)\n", + amount); + return 14; + } + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 12; + } + + printf("Resize '%s' of '%s'\n", path, amount); + strncpy_null(args.name, amount); + res = ioctl(fd, BTRFS_IOC_RESIZE, &args); + e = errno; + close(fd); + if( res < 0 ){ + fprintf(stderr, "ERROR: unable to resize '%s' - %s\n", + path, strerror(e)); + return 30; + } + return 0; +} + +static const char * const cmd_label_usage[] = { + "btrfs filesystem label [<device>|<mountpoint>] [<newlabel>]", + "Get or change the label of a filesystem", + "With one argument, get the label of filesystem on <device>.", + "If <newlabel> is passed, set the filesystem label to <newlabel>.", + NULL +}; + +static int cmd_label(int argc, char **argv) +{ + if (check_argc_min(argc, 2) || check_argc_max(argc, 3)) + usage(cmd_label_usage); + + if (argc > 2) + return set_label(argv[1], argv[2]); + else + return get_label(argv[1]); +} + +const struct cmd_group filesystem_cmd_group = { + filesystem_cmd_group_usage, NULL, { + { "df", cmd_df, cmd_df_usage, NULL, 0 }, + { "show", cmd_show, cmd_show_usage, NULL, 0 }, + { "sync", cmd_sync, cmd_sync_usage, NULL, 0 }, + { "defragment", cmd_defrag, cmd_defrag_usage, NULL, 0 }, + { "balance", cmd_balance, NULL, &balance_cmd_group, 1 }, + { "resize", cmd_resize, cmd_resize_usage, NULL, 0 }, + { "label", cmd_label, cmd_label_usage, NULL, 0 }, + { 0, 0, 0, 0, 0 }, + } +}; + +int cmd_filesystem(int argc, char **argv) +{ + return handle_command_group(&filesystem_cmd_group, argc, argv); +} diff --git a/cmds-inspect.c b/cmds-inspect.c new file mode 100644 index 0000000..30b41fc --- /dev/null +++ b/cmds-inspect.c @@ -0,0 +1,315 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/ioctl.h> +#include <errno.h> + +#include "kerncompat.h" +#include "ioctl.h" +#include "utils.h" +#include "ctree.h" +#include "send-utils.h" + +#include "commands.h" +#include "btrfs-list.h" + +static const char * const inspect_cmd_group_usage[] = { + "btrfs inspect-internal <command> <args>", + NULL +}; + +static int __ino_to_path_fd(u64 inum, int fd, int verbose, const char *prepend) +{ + int ret; + int i; + struct btrfs_ioctl_ino_path_args ipa; + struct btrfs_data_container *fspath; + + fspath = malloc(4096); + if (!fspath) + return 1; + + memset(fspath, 0, sizeof(*fspath)); + ipa.inum = inum; + ipa.size = 4096; + ipa.fspath = (uintptr_t)fspath; + + ret = ioctl(fd, BTRFS_IOC_INO_PATHS, &ipa); + if (ret) { + printf("ioctl ret=%d, error: %s\n", ret, strerror(errno)); + goto out; + } + + if (verbose) + printf("ioctl ret=%d, bytes_left=%lu, bytes_missing=%lu, " + "cnt=%d, missed=%d\n", ret, + (unsigned long)fspath->bytes_left, + (unsigned long)fspath->bytes_missing, + fspath->elem_cnt, fspath->elem_missed); + + for (i = 0; i < fspath->elem_cnt; ++i) { + u64 ptr; + char *str; + ptr = (u64)(unsigned long)fspath->val; + ptr += fspath->val[i]; + str = (char *)(unsigned long)ptr; + if (prepend) + printf("%s/%s\n", prepend, str); + else + printf("%s\n", str); + } + +out: + free(fspath); + return ret; +} + +static const char * const cmd_inode_resolve_usage[] = { + "btrfs inspect-internal inode-resolve [-v] <inode> <path>", + "Get file system paths for the given inode", + NULL +}; + +static int cmd_inode_resolve(int argc, char **argv) +{ + int fd; + int verbose = 0; + int ret; + + optind = 1; + while (1) { + int c = getopt(argc, argv, "v"); + if (c < 0) + break; + + switch (c) { + case 'v': + verbose = 1; + break; + default: + usage(cmd_inode_resolve_usage); + } + } + + if (check_argc_exact(argc - optind, 2)) + usage(cmd_inode_resolve_usage); + + fd = open_file_or_dir(argv[optind+1]); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", argv[optind+1]); + return 12; + } + + ret = __ino_to_path_fd(atoll(argv[optind]), fd, verbose, + argv[optind+1]); + close(fd); + return ret; + +} + +static const char * const cmd_logical_resolve_usage[] = { + "btrfs inspect-internal logical-resolve [-Pv] [-s bufsize] <logical> <path>", + "Get file system paths for the given logical address", + "-P skip the path resolving and print the inodes instead", + "-v verbose mode", + "-s bufsize set inode container's size. This is used to increase inode", + " container's size in case it is not enough to read all the ", + " resolved results. The max value one can set is 64k", + NULL +}; + +static int cmd_logical_resolve(int argc, char **argv) +{ + int ret; + int fd; + int i; + int verbose = 0; + int getpath = 1; + int bytes_left; + struct btrfs_ioctl_logical_ino_args loi; + struct btrfs_data_container *inodes; + u64 size = 4096; + char full_path[4096]; + char *path_ptr; + + optind = 1; + while (1) { + int c = getopt(argc, argv, "Pvs:"); + if (c < 0) + break; + + switch (c) { + case 'P': + getpath = 0; + break; + case 'v': + verbose = 1; + break; + case 's': + size = atoll(optarg); + break; + default: + usage(cmd_logical_resolve_usage); + } + } + + if (check_argc_exact(argc - optind, 2)) + usage(cmd_logical_resolve_usage); + + size = min(size, (u64)64 * 1024); + inodes = malloc(size); + if (!inodes) + return 1; + + memset(inodes, 0, sizeof(*inodes)); + loi.logical = atoll(argv[optind]); + loi.size = size; + loi.inodes = (uintptr_t)inodes; + + fd = open_file_or_dir(argv[optind+1]); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", argv[optind+1]); + ret = 12; + goto out; + } + + ret = ioctl(fd, BTRFS_IOC_LOGICAL_INO, &loi); + if (ret) { + printf("ioctl ret=%d, error: %s\n", ret, strerror(errno)); + goto out; + } + + if (verbose) + printf("ioctl ret=%d, total_size=%llu, bytes_left=%lu, " + "bytes_missing=%lu, cnt=%d, missed=%d\n", + ret, size, + (unsigned long)inodes->bytes_left, + (unsigned long)inodes->bytes_missing, + inodes->elem_cnt, inodes->elem_missed); + + bytes_left = sizeof(full_path); + ret = snprintf(full_path, bytes_left, "%s/", argv[optind+1]); + path_ptr = full_path + ret; + bytes_left -= ret + 1; + BUG_ON(bytes_left < 0); + + for (i = 0; i < inodes->elem_cnt; i += 3) { + u64 inum = inodes->val[i]; + u64 offset = inodes->val[i+1]; + u64 root = inodes->val[i+2]; + int path_fd; + char *name; + + if (getpath) { + name = btrfs_list_path_for_root(fd, root); + if (IS_ERR(name)) { + ret = PTR_ERR(name); + goto out; + } + if (!name) { + path_ptr[-1] = '\0'; + path_fd = fd; + } else { + path_ptr[-1] = '/'; + ret = snprintf(path_ptr, bytes_left, "%s", + name); + BUG_ON(ret >= bytes_left); + free(name); + path_fd = open_file_or_dir(full_path); + if (path_fd < 0) { + fprintf(stderr, "ERROR: can't access " + "'%s'\n", full_path); + goto out; + } + } + __ino_to_path_fd(inum, path_fd, verbose, full_path); + if (path_fd != fd) + close(path_fd); + } else { + printf("inode %llu offset %llu root %llu\n", inum, + offset, root); + } + } + +out: + if (fd >= 0) + close(fd); + free(inodes); + return ret; +} + +static const char * const cmd_subvolid_resolve_usage[] = { + "btrfs inspect-internal subvolid-resolve <subvolid> <path>", + "Get file system paths for the given subvolume ID.", + NULL +}; + +static int cmd_subvolid_resolve(int argc, char **argv) +{ + int ret; + int fd = -1; + u64 subvol_id; + char path[BTRFS_PATH_NAME_MAX + 1]; + + if (check_argc_exact(argc, 3)) + usage(cmd_subvolid_resolve_usage); + + fd = open_file_or_dir(argv[2]); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", argv[2]); + ret = -ENOENT; + goto out; + } + + subvol_id = atoll(argv[1]); + ret = btrfs_subvolid_resolve(fd, path, sizeof(path), subvol_id); + + if (ret) { + fprintf(stderr, + "%s: btrfs_subvolid_resolve(subvol_id %llu) failed with ret=%d\n", + argv[0], (unsigned long long)subvol_id, ret); + goto out; + } + + path[BTRFS_PATH_NAME_MAX] = '\0'; + printf("%s\n", path); + +out: + if (fd >= 0) + close(fd); + return ret ? 1 : 0; +} + +const struct cmd_group inspect_cmd_group = { + inspect_cmd_group_usage, NULL, { + { "inode-resolve", cmd_inode_resolve, cmd_inode_resolve_usage, + NULL, 0 }, + { "logical-resolve", cmd_logical_resolve, + cmd_logical_resolve_usage, NULL, 0 }, + { "subvolid-resolve", cmd_subvolid_resolve, + cmd_subvolid_resolve_usage, NULL, 0 }, + { 0, 0, 0, 0, 0 } + } +}; + +int cmd_inspect(int argc, char **argv) +{ + return handle_command_group(&inspect_cmd_group, argc, argv); +} diff --git a/cmds-qgroup.c b/cmds-qgroup.c new file mode 100644 index 0000000..95aca9b --- /dev/null +++ b/cmds-qgroup.c @@ -0,0 +1,441 @@ +/* + * Copyright (C) 2012 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <sys/ioctl.h> +#include <unistd.h> +#include <getopt.h> + +#include "ctree.h" +#include "ioctl.h" + +#include "commands.h" +#include "qgroup.h" +#include "utils.h" + +static const char * const qgroup_cmd_group_usage[] = { + "btrfs qgroup <command> [options] <path>", + NULL +}; + +static int qgroup_assign(int assign, int argc, char **argv) +{ + int ret = 0; + int fd; + int e; + char *path = argv[3]; + struct btrfs_ioctl_qgroup_assign_args args; + + if (check_argc_exact(argc, 4)) + return -1; + + memset(&args, 0, sizeof(args)); + args.assign = assign; + args.src = parse_qgroupid(argv[1]); + args.dst = parse_qgroupid(argv[2]); + + /* + * FIXME src should accept subvol path + */ + if ((args.src >> 48) >= (args.dst >> 48)) { + fprintf(stderr, "ERROR: bad relation requested '%s'\n", path); + return 12; + } + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", path); + return 12; + } + + ret = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args); + e = errno; + close(fd); + if (ret < 0) { + fprintf(stderr, "ERROR: unable to assign quota group: %s\n", + strerror(e)); + return 30; + } + return 0; +} + +static int qgroup_create(int create, int argc, char **argv) +{ + int ret = 0; + int fd; + int e; + char *path = argv[2]; + struct btrfs_ioctl_qgroup_create_args args; + + if (check_argc_exact(argc, 3)) + return -1; + + memset(&args, 0, sizeof(args)); + args.create = create; + args.qgroupid = parse_qgroupid(argv[1]); + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", path); + return 12; + } + + ret = ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args); + e = errno; + close(fd); + if (ret < 0) { + fprintf(stderr, "ERROR: unable to create quota group: %s\n", + strerror(e)); + return 30; + } + return 0; +} + +void print_qgroup_info(u64 objectid, struct btrfs_qgroup_info_item *info) +{ + printf("%llu/%llu %lld %lld\n", objectid >> 48, + objectid & ((1ll << 48) - 1), + btrfs_stack_qgroup_info_referenced(info), + btrfs_stack_qgroup_info_exclusive(info)); +} + +int list_qgroups(int fd) +{ + int ret; + struct btrfs_ioctl_search_args args; + struct btrfs_ioctl_search_key *sk = &args.key; + struct btrfs_ioctl_search_header *sh; + unsigned long off = 0; + unsigned int i; + struct btrfs_qgroup_info_item *info; + + memset(&args, 0, sizeof(args)); + + /* search in the quota tree */ + sk->tree_id = BTRFS_QUOTA_TREE_OBJECTID; + + /* + * set the min and max to backref keys. The search will + * only send back this type of key now. + */ + sk->max_type = BTRFS_QGROUP_INFO_KEY; + sk->min_type = BTRFS_QGROUP_INFO_KEY; + sk->max_objectid = 0; + sk->max_offset = (u64)-1; + sk->max_transid = (u64)-1; + + /* just a big number, doesn't matter much */ + sk->nr_items = 4096; + + while (1) { + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args); + if (ret < 0) + return ret; + + /* the ioctl returns the number of item it found in nr_items */ + if (sk->nr_items == 0) + break; + + off = 0; + + /* + * for each item, pull the key out of the header and then + * read the root_ref item it contains + */ + for (i = 0; i < sk->nr_items; i++) { + sh = (struct btrfs_ioctl_search_header *)(args.buf + + off); + off += sizeof(*sh); + + if (sh->objectid != 0) + goto done; + + if (sh->type != BTRFS_QGROUP_INFO_KEY) + goto done; + + info = (struct btrfs_qgroup_info_item *) + (args.buf + off); + print_qgroup_info(sh->offset, info); + + off += sh->len; + + /* + * record the mins in sk so we can make sure the + * next search doesn't repeat this root + */ + sk->min_offset = sh->offset; + } + sk->nr_items = 4096; + /* + * this iteration is done, step forward one qgroup for the next + * ioctl + */ + if (sk->min_offset < (u64)-1) + sk->min_offset++; + else + break; + } + +done: + return ret; +} + +static int parse_limit(const char *p, unsigned long long *s) +{ + char *endptr; + unsigned long long size; + + if (strcasecmp(p, "none") == 0) { + *s = 0; + return 1; + } + size = strtoull(p, &endptr, 10); + switch (*endptr) { + case 'T': + case 't': + size *= 1024; + case 'G': + case 'g': + size *= 1024; + case 'M': + case 'm': + size *= 1024; + case 'K': + case 'k': + size *= 1024; + ++endptr; + break; + case 0: + break; + default: + return 0; + } + + if (*endptr) + return 0; + + *s = size; + + return 1; +} + +static const char * const cmd_qgroup_assign_usage[] = { + "btrfs qgroup assign <src> <dst> <path>", + "Enable subvolume qgroup support for a filesystem.", + NULL +}; + +static int cmd_qgroup_assign(int argc, char **argv) +{ + int ret = qgroup_assign(1, argc, argv); + if (ret < 0) + usage(cmd_qgroup_assign_usage); + return ret; +} + +static const char * const cmd_qgroup_remove_usage[] = { + "btrfs qgroup remove <src> <dst> <path>", + "Remove a subvol from a quota group.", + NULL +}; + +static int cmd_qgroup_remove(int argc, char **argv) +{ + int ret = qgroup_assign(0, argc, argv); + if (ret < 0) + usage(cmd_qgroup_remove_usage); + return ret; +} + +static const char * const cmd_qgroup_create_usage[] = { + "btrfs qgroup create <qgroupid> <path>", + "Create a subvolume quota group.", + NULL +}; + +static int cmd_qgroup_create(int argc, char **argv) +{ + int ret = qgroup_create(1, argc, argv); + if (ret < 0) + usage(cmd_qgroup_create_usage); + return ret; +} + +static const char * const cmd_qgroup_destroy_usage[] = { + "btrfs qgroup destroy <qgroupid> <path>", + "Destroy a subvolume quota group.", + NULL +}; + +static int cmd_qgroup_destroy(int argc, char **argv) +{ + int ret = qgroup_create(0, argc, argv); + if (ret < 0) + usage(cmd_qgroup_destroy_usage); + return ret; +} + +static const char * const cmd_qgroup_show_usage[] = { + "btrfs qgroup show <path>", + "Show all subvolume quota groups.", + NULL +}; + +static int cmd_qgroup_show(int argc, char **argv) +{ + int ret = 0; + int fd; + int e; + char *path = argv[1]; + + if (check_argc_exact(argc, 2)) + usage(cmd_qgroup_show_usage); + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", path); + return 12; + } + + ret = list_qgroups(fd); + e = errno; + close(fd); + if (ret < 0) { + fprintf(stderr, "ERROR: can't list qgroups: %s\n", + strerror(e)); + return 30; + } + + return ret; +} + +static const char * const cmd_qgroup_limit_usage[] = { + "btrfs qgroup limit [options] <size>|none [<qgroupid>] <path>", + "Limit the size of a subvolume quota group.", + "", + "-c limit amount of data after compression. This is the default,", + " it is currently not possible to turn off this option.", + "-e limit space exclusively assigned to this qgroup", + NULL +}; + +static int cmd_qgroup_limit(int argc, char **argv) +{ + int ret = 0; + int fd; + int e; + char *path = NULL; + struct btrfs_ioctl_qgroup_limit_args args; + unsigned long long size; + int compressed = 0; + int exclusive = 0; + + optind = 1; + while (1) { + int c = getopt(argc, argv, "ce"); + if (c < 0) + break; + switch (c) { + case 'c': + compressed = 1; + break; + case 'e': + exclusive = 1; + break; + default: + usage(cmd_qgroup_limit_usage); + } + } + + if (check_argc_min(argc - optind, 2)) + usage(cmd_qgroup_limit_usage); + + if (!parse_limit(argv[optind], &size)) { + fprintf(stderr, "Invalid size argument given\n"); + return 1; + } + + memset(&args, 0, sizeof(args)); + if (size) { + if (compressed) + args.lim.flags |= BTRFS_QGROUP_LIMIT_RFER_CMPR | + BTRFS_QGROUP_LIMIT_EXCL_CMPR; + if (exclusive) { + args.lim.flags |= BTRFS_QGROUP_LIMIT_MAX_EXCL; + args.lim.max_exclusive = size; + } else { + args.lim.flags |= BTRFS_QGROUP_LIMIT_MAX_RFER; + args.lim.max_referenced = size; + } + } + + if (argc - optind == 2) { + args.qgroupid = 0; + path = argv[optind + 1]; + ret = test_issubvolume(path); + if (ret < 0) { + fprintf(stderr, "ERROR: error accessing '%s'\n", path); + return 12; + } + if (!ret) { + fprintf(stderr, "ERROR: '%s' is not a subvolume\n", + path); + return 13; + } + /* + * keep qgroupid at 0, this indicates that the subvolume the + * fd refers to is to be limited + */ + } else if (argc - optind == 3) { + args.qgroupid = parse_qgroupid(argv[optind + 1]); + path = argv[optind + 2]; + } else + usage(cmd_qgroup_limit_usage); + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", path); + return 12; + } + + ret = ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args); + e = errno; + close(fd); + if (ret < 0) { + fprintf(stderr, "ERROR: unable to limit requested quota group: " + "%s\n", strerror(e)); + return 30; + } + return 0; +} + +const struct cmd_group qgroup_cmd_group = { + qgroup_cmd_group_usage, NULL, { + { "assign", cmd_qgroup_assign, cmd_qgroup_assign_usage, 0, 0 }, + { "remove", cmd_qgroup_remove, cmd_qgroup_remove_usage, 0, 0 }, + { "create", cmd_qgroup_create, cmd_qgroup_create_usage, 0, 0 }, + { "destroy", cmd_qgroup_destroy, + cmd_qgroup_destroy_usage, 0, 0 }, + { "show", cmd_qgroup_show, cmd_qgroup_show_usage, 0, 0 }, + { "limit", cmd_qgroup_limit, cmd_qgroup_limit_usage, 0, 0 }, + { 0, 0, 0, 0, 0 } + } +}; + +int cmd_qgroup(int argc, char **argv) +{ + return handle_command_group(&qgroup_cmd_group, argc, argv); +} diff --git a/cmds-quota.c b/cmds-quota.c new file mode 100644 index 0000000..2e2971a --- /dev/null +++ b/cmds-quota.c @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2012 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <sys/ioctl.h> +#include <unistd.h> + +#include "ctree.h" +#include "ioctl.h" + +#include "commands.h" +#include "utils.h" + +static const char * const quota_cmd_group_usage[] = { + "btrfs quota <command> [options] <path>", + NULL +}; + +int quota_ctl(int cmd, int argc, char **argv) +{ + int ret = 0; + int fd; + int e; + char *path = argv[1]; + struct btrfs_ioctl_quota_ctl_args args; + + if (check_argc_exact(argc, 2)) + return -1; + + memset(&args, 0, sizeof(args)); + args.cmd = cmd; + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", path); + return 12; + } + + ret = ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args); + e = errno; + close(fd); + if (ret < 0) { + fprintf(stderr, "ERROR: quota command failed: %s\n", + strerror(e)); + return 30; + } + return 0; +} + +static const char * const cmd_quota_enable_usage[] = { + "btrfs quota enable <path>", + "Enable subvolume quota support for a filesystem.", + "Any data already present on the filesystem will not count towards", + "the space usage numbers. It is recommended to enable quota for a", + "filesystem before writing any data to it.", + NULL +}; + +static int cmd_quota_enable(int argc, char **argv) +{ + int ret = quota_ctl(BTRFS_QUOTA_CTL_ENABLE, argc, argv); + if (ret < 0) + usage(cmd_quota_enable_usage); + return ret; +} + +static const char * const cmd_quota_disable_usage[] = { + "btrfs quota disable <path>", + "Disable subvolume quota support for a filesystem.", + NULL +}; + +static int cmd_quota_disable(int argc, char **argv) +{ + int ret = quota_ctl(BTRFS_QUOTA_CTL_DISABLE, argc, argv); + if (ret < 0) + usage(cmd_quota_disable_usage); + return ret; +} + +static const char * const cmd_quota_rescan_usage[] = { + "btrfs quota rescan [-s] <path>", + "Trash all qgroup numbers and scan the metadata again with the current config.", + "", + "-s show status of a running rescan operation", + NULL +}; + +static int cmd_quota_rescan(int argc, char **argv) +{ + int ret = 0; + int fd; + int e; + char *path = NULL; + struct btrfs_ioctl_quota_rescan_args args; + int ioctlnum = BTRFS_IOC_QUOTA_RESCAN; + + optind = 1; + while (1) { + int c = getopt(argc, argv, "s"); + if (c < 0) + break; + switch (c) { + case 's': + ioctlnum = BTRFS_IOC_QUOTA_RESCAN_STATUS; + break; + default: + usage(cmd_quota_rescan_usage); + } + } + + if (check_argc_exact(argc - optind, 1)) + usage(cmd_quota_rescan_usage); + + memset(&args, 0, sizeof(args)); + + path = argv[optind]; + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", path); + return 12; + } + + ret = ioctl(fd, ioctlnum, &args); + e = errno; + close(fd); + + if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN) { + if (ret < 0) { + fprintf(stderr, "ERROR: quota rescan failed: " + "%s\n", strerror(e)); + return 30; + } else { + printf("quota rescan started\n"); + } + } else { + if (!args.flags) { + printf("no rescan operation in progress\n"); + } else { + printf("rescan operation running (current key %lld)\n", + args.progress); + } + } + + return 0; +} + +const struct cmd_group quota_cmd_group = { + quota_cmd_group_usage, NULL, { + { "enable", cmd_quota_enable, cmd_quota_enable_usage, NULL, 0 }, + { "disable", cmd_quota_disable, cmd_quota_disable_usage, 0, 0 }, + { "rescan", cmd_quota_rescan, cmd_quota_rescan_usage, NULL, 0 }, + { 0, 0, 0, 0, 0 } + } +}; + +int cmd_quota(int argc, char **argv) +{ + return handle_command_group("a_cmd_group, argc, argv); +} diff --git a/cmds-receive.c b/cmds-receive.c new file mode 100644 index 0000000..c2fa2e1 --- /dev/null +++ b/cmds-receive.c @@ -0,0 +1,993 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 200809 +#define _XOPEN_SOURCE 700 +#define _BSD_SOURCE + +#include "kerncompat.h" + +#include <unistd.h> +#include <stdint.h> +#include <dirent.h> +#include <fcntl.h> +#include <pthread.h> +#include <math.h> +#include <ftw.h> +#include <wait.h> + +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <uuid/uuid.h> + +#include "ctree.h" +#include "ioctl.h" +#include "commands.h" +#include "utils.h" +#include "list.h" +#include "btrfs-list.h" + +#include "send.h" +#include "send-stream.h" +#include "send-utils.h" + +static int g_verbose = 0; + +struct btrfs_receive +{ + int mnt_fd; + int dest_dir_fd; + + int write_fd; + char *write_path; + + char *root_path; + char *dest_dir_path; /* relative to root_path */ + char *full_subvol_path; + + struct subvol_info *cur_subvol; + + struct subvol_uuid_search sus; + + int honor_end_cmd; +}; + +static int finish_subvol(struct btrfs_receive *r) +{ + int ret; + int subvol_fd = -1; + struct btrfs_ioctl_received_subvol_args rs_args; + char uuid_str[128]; + u64 flags; + + if (r->cur_subvol == NULL) + return 0; + + subvol_fd = openat(r->mnt_fd, r->cur_subvol->path, + O_RDONLY | O_NOATIME); + if (subvol_fd < 0) { + ret = -errno; + fprintf(stderr, "ERROR: open %s failed. %s\n", + r->cur_subvol->path, strerror(-ret)); + goto out; + } + + memset(&rs_args, 0, sizeof(rs_args)); + memcpy(rs_args.uuid, r->cur_subvol->received_uuid, BTRFS_UUID_SIZE); + rs_args.stransid = r->cur_subvol->stransid; + + if (g_verbose >= 1) { + uuid_unparse((u8*)rs_args.uuid, uuid_str); + fprintf(stderr, "BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=%s, " + "stransid=%llu\n", uuid_str, rs_args.stransid); + } + + ret = ioctl(subvol_fd, BTRFS_IOC_SET_RECEIVED_SUBVOL, &rs_args); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: BTRFS_IOC_SET_RECEIVED_SUBVOL failed. %s\n", + strerror(-ret)); + goto out; + } + r->cur_subvol->rtransid = rs_args.rtransid; + + ret = ioctl(subvol_fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: BTRFS_IOC_SUBVOL_GETFLAGS failed. %s\n", + strerror(-ret)); + goto out; + } + + flags |= BTRFS_SUBVOL_RDONLY; + + ret = ioctl(subvol_fd, BTRFS_IOC_SUBVOL_SETFLAGS, &flags); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: failed to make subvolume read only. " + "%s\n", strerror(-ret)); + goto out; + } + + ret = btrfs_list_get_path_rootid(subvol_fd, &r->cur_subvol->root_id); + if (ret < 0) + goto out; + subvol_uuid_search_add(&r->sus, r->cur_subvol); + r->cur_subvol = NULL; + ret = 0; + +out: + if (subvol_fd != -1) + close(subvol_fd); + return ret; +} + +static int process_subvol(const char *path, const u8 *uuid, u64 ctransid, + void *user) +{ + int ret; + struct btrfs_receive *r = user; + struct btrfs_ioctl_vol_args args_v1; + char uuid_str[128]; + + ret = finish_subvol(r); + if (ret < 0) + goto out; + + r->cur_subvol = calloc(1, sizeof(*r->cur_subvol)); + + if (strlen(r->dest_dir_path) == 0) + r->cur_subvol->path = strdup(path); + else + r->cur_subvol->path = path_cat(r->dest_dir_path, path); + free(r->full_subvol_path); + r->full_subvol_path = path_cat3(r->root_path, r->dest_dir_path, path); + + fprintf(stderr, "At subvol %s\n", path); + + memcpy(r->cur_subvol->received_uuid, uuid, BTRFS_UUID_SIZE); + r->cur_subvol->stransid = ctransid; + + if (g_verbose) { + uuid_unparse((u8*)r->cur_subvol->received_uuid, uuid_str); + fprintf(stderr, "receiving subvol %s uuid=%s, stransid=%llu\n", + path, uuid_str, + r->cur_subvol->stransid); + } + + memset(&args_v1, 0, sizeof(args_v1)); + strncpy_null(args_v1.name, path); + ret = ioctl(r->dest_dir_fd, BTRFS_IOC_SUBVOL_CREATE, &args_v1); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: creating subvolume %s failed. " + "%s\n", path, strerror(-ret)); + goto out; + } + +out: + return ret; +} + +static int process_snapshot(const char *path, const u8 *uuid, u64 ctransid, + const u8 *parent_uuid, u64 parent_ctransid, + void *user) +{ + int ret; + struct btrfs_receive *r = user; + char uuid_str[128]; + struct btrfs_ioctl_vol_args_v2 args_v2; + struct subvol_info *parent_subvol; + + ret = finish_subvol(r); + if (ret < 0) + goto out; + + r->cur_subvol = calloc(1, sizeof(*r->cur_subvol)); + + if (strlen(r->dest_dir_path) == 0) + r->cur_subvol->path = strdup(path); + else + r->cur_subvol->path = path_cat(r->dest_dir_path, path); + free(r->full_subvol_path); + r->full_subvol_path = path_cat3(r->root_path, r->dest_dir_path, path); + + fprintf(stderr, "At snapshot %s\n", path); + + memcpy(r->cur_subvol->received_uuid, uuid, BTRFS_UUID_SIZE); + r->cur_subvol->stransid = ctransid; + + if (g_verbose) { + uuid_unparse((u8*)r->cur_subvol->received_uuid, uuid_str); + fprintf(stderr, "receiving snapshot %s uuid=%s, " + "ctransid=%llu ", path, uuid_str, + r->cur_subvol->stransid); + uuid_unparse(parent_uuid, uuid_str); + fprintf(stderr, "parent_uuid=%s, parent_ctransid=%llu\n", + uuid_str, parent_ctransid); + } + + memset(&args_v2, 0, sizeof(args_v2)); + strncpy_null(args_v2.name, path); + + parent_subvol = subvol_uuid_search(&r->sus, 0, parent_uuid, + parent_ctransid, NULL, subvol_search_by_received_uuid); + if (!parent_subvol) { + ret = -ENOENT; + fprintf(stderr, "ERROR: could not find parent subvolume\n"); + goto out; + } + + /*if (rs_args.ctransid > rs_args.rtransid) { + if (!r->force) { + ret = -EINVAL; + fprintf(stderr, "ERROR: subvolume %s was modified after it was received.\n", r->subvol_parent_name); + goto out; + } else { + fprintf(stderr, "WARNING: subvolume %s was modified after it was received.\n", r->subvol_parent_name); + } + }*/ + + args_v2.fd = openat(r->mnt_fd, parent_subvol->path, + O_RDONLY | O_NOATIME); + if (args_v2.fd < 0) { + ret = -errno; + fprintf(stderr, "ERROR: open %s failed. %s\n", + parent_subvol->path, strerror(-ret)); + goto out; + } + + ret = ioctl(r->dest_dir_fd, BTRFS_IOC_SNAP_CREATE_V2, &args_v2); + close(args_v2.fd); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: creating snapshot %s -> %s " + "failed. %s\n", parent_subvol->path, + path, strerror(-ret)); + goto out; + } + +out: + return ret; +} + +static int process_mkfile(const char *path, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "mkfile %s\n", path); + + ret = creat(full_path, 0600); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: mkfile %s failed. %s\n", path, + strerror(-ret)); + goto out; + } + close(ret); + ret = 0; + +out: + free(full_path); + return ret; +} + +static int process_mkdir(const char *path, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "mkdir %s\n", path); + + ret = mkdir(full_path, 0700); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: mkdir %s failed. %s\n", path, + strerror(-ret)); + } + + free(full_path); + return ret; +} + +static int process_mknod(const char *path, u64 mode, u64 dev, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "mknod %s mode=%llu, dev=%llu\n", + path, mode, dev); + + ret = mknod(full_path, mode & S_IFMT, dev); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: mknod %s failed. %s\n", path, + strerror(-ret)); + } + + free(full_path); + return ret; +} + +static int process_mkfifo(const char *path, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "mkfifo %s\n", path); + + ret = mkfifo(full_path, 0600); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: mkfifo %s failed. %s\n", path, + strerror(-ret)); + } + + free(full_path); + return ret; +} + +static int process_mksock(const char *path, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "mksock %s\n", path); + + ret = mknod(full_path, 0600 | S_IFSOCK, 0); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: mknod %s failed. %s\n", path, + strerror(-ret)); + } + + free(full_path); + return ret; +} + +static int process_symlink(const char *path, const char *lnk, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "symlink %s -> %s\n", path, lnk); + + ret = symlink(lnk, full_path); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: symlink %s -> %s failed. %s\n", path, + lnk, strerror(-ret)); + } + + free(full_path); + return ret; +} + +static int process_rename(const char *from, const char *to, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_from = path_cat(r->full_subvol_path, from); + char *full_to = path_cat(r->full_subvol_path, to); + + if (g_verbose >= 2) + fprintf(stderr, "rename %s -> %s\n", from, to); + + ret = rename(full_from, full_to); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: rename %s -> %s failed. %s\n", from, + to, strerror(-ret)); + } + + free(full_from); + free(full_to); + return ret; +} + +static int process_link(const char *path, const char *lnk, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + char *full_link_path = path_cat(r->full_subvol_path, lnk); + + if (g_verbose >= 2) + fprintf(stderr, "link %s -> %s\n", path, lnk); + + ret = link(full_link_path, full_path); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: link %s -> %s failed. %s\n", path, + lnk, strerror(-ret)); + } + + free(full_path); + free(full_link_path); + return ret; +} + + +static int process_unlink(const char *path, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "unlink %s\n", path); + + ret = unlink(full_path); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: unlink %s failed. %s\n", path, + strerror(-ret)); + } + + free(full_path); + return ret; +} + +static int process_rmdir(const char *path, void *user) +{ + int ret; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "rmdir %s\n", path); + + ret = rmdir(full_path); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: rmdir %s failed. %s\n", path, + strerror(-ret)); + } + + free(full_path); + return ret; +} + + +static int open_inode_for_write(struct btrfs_receive *r, const char *path) +{ + int ret = 0; + + if (r->write_fd != -1) { + if (strcmp(r->write_path, path) == 0) + goto out; + close(r->write_fd); + r->write_fd = -1; + } + + r->write_fd = open(path, O_RDWR); + if (r->write_fd < 0) { + ret = -errno; + fprintf(stderr, "ERROR: open %s failed. %s\n", path, + strerror(-ret)); + goto out; + } + free(r->write_path); + r->write_path = strdup(path); + +out: + return ret; +} + +static int close_inode_for_write(struct btrfs_receive *r) +{ + int ret = 0; + + if(r->write_fd == -1) + goto out; + + close(r->write_fd); + r->write_fd = -1; + r->write_path[0] = 0; + +out: + return ret; +} + +static int process_write(const char *path, const void *data, u64 offset, + u64 len, void *user) +{ + int ret = 0; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + u64 pos = 0; + int w; + + ret = open_inode_for_write(r, full_path); + if (ret < 0) + goto out; + + while (pos < len) { + w = pwrite(r->write_fd, (char*)data + pos, len - pos, + offset + pos); + if (w < 0) { + ret = -errno; + fprintf(stderr, "ERROR: writing to %s failed. %s\n", + path, strerror(-ret)); + goto out; + } + pos += w; + } + +out: + free(full_path); + return ret; +} + +static int process_clone(const char *path, u64 offset, u64 len, + const u8 *clone_uuid, u64 clone_ctransid, + const char *clone_path, u64 clone_offset, + void *user) +{ + int ret = 0; + struct btrfs_receive *r = user; + struct btrfs_ioctl_clone_range_args clone_args; + struct subvol_info *si = NULL; + char *full_path = path_cat(r->full_subvol_path, path); + char *subvol_path = NULL; + char *full_clone_path = NULL; + int clone_fd = -1; + + ret = open_inode_for_write(r, full_path); + if (ret < 0) + goto out; + + si = subvol_uuid_search(&r->sus, 0, clone_uuid, clone_ctransid, NULL, + subvol_search_by_received_uuid); + if (!si) { + if (memcmp(clone_uuid, r->cur_subvol->received_uuid, + BTRFS_UUID_SIZE) == 0) { + /* TODO check generation of extent */ + subvol_path = strdup(r->cur_subvol->path); + } else { + ret = -ENOENT; + fprintf(stderr, "ERROR: did not find source subvol.\n"); + goto out; + } + } else { + /*if (rs_args.ctransid > rs_args.rtransid) { + if (!r->force) { + ret = -EINVAL; + fprintf(stderr, "ERROR: subvolume %s was " + "modified after it was " + "received.\n", + r->subvol_parent_name); + goto out; + } else { + fprintf(stderr, "WARNING: subvolume %s was " + "modified after it was " + "received.\n", + r->subvol_parent_name); + } + }*/ + subvol_path = strdup(si->path); + } + + full_clone_path = path_cat3(r->root_path, subvol_path, clone_path); + + clone_fd = open(full_clone_path, O_RDONLY | O_NOATIME); + if (clone_fd < 0) { + ret = -errno; + fprintf(stderr, "ERROR: failed to open %s. %s\n", + full_clone_path, strerror(-ret)); + goto out; + } + + clone_args.src_fd = clone_fd; + clone_args.src_offset = clone_offset; + clone_args.src_length = len; + clone_args.dest_offset = offset; + ret = ioctl(r->write_fd, BTRFS_IOC_CLONE_RANGE, &clone_args); + if (ret) { + ret = -errno; + fprintf(stderr, "ERROR: failed to clone extents to %s\n%s\n", + path, strerror(-ret)); + goto out; + } + +out: + free(full_path); + free(full_clone_path); + free(subvol_path); + if (clone_fd != -1) + close(clone_fd); + return ret; +} + + +static int process_set_xattr(const char *path, const char *name, + const void *data, int len, void *user) +{ + int ret = 0; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) { + fprintf(stderr, "set_xattr %s - name=%s data_len=%d " + "data=%.*s\n", path, name, len, + len, (char*)data); + } + + ret = lsetxattr(full_path, name, data, len, 0); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: lsetxattr %s %s=%.*s failed. %s\n", + path, name, len, (char*)data, strerror(-ret)); + goto out; + } + +out: + free(full_path); + return ret; +} + +static int process_remove_xattr(const char *path, const char *name, void *user) +{ + int ret = 0; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) { + fprintf(stderr, "remove_xattr %s - name=%s\n", + path, name); + } + + ret = lremovexattr(full_path, name); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: lremovexattr %s %s failed. %s\n", + path, name, strerror(-ret)); + goto out; + } + +out: + free(full_path); + return ret; +} + +static int process_truncate(const char *path, u64 size, void *user) +{ + int ret = 0; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "truncate %s size=%llu\n", path, size); + + ret = truncate(full_path, size); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: truncate %s failed. %s\n", + path, strerror(-ret)); + goto out; + } + +out: + free(full_path); + return ret; +} + +static int process_chmod(const char *path, u64 mode, void *user) +{ + int ret = 0; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "chmod %s - mode=0%o\n", path, (int)mode); + + ret = chmod(full_path, mode); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: chmod %s failed. %s\n", + path, strerror(-ret)); + goto out; + } + +out: + free(full_path); + return ret; +} + +static int process_chown(const char *path, u64 uid, u64 gid, void *user) +{ + int ret = 0; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + if (g_verbose >= 2) + fprintf(stderr, "chown %s - uid=%llu, gid=%llu\n", path, + uid, gid); + + ret = lchown(full_path, uid, gid); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: chown %s failed. %s\n", + path, strerror(-ret)); + goto out; + } + +out: + free(full_path); + return ret; +} + +static int process_utimes(const char *path, struct timespec *at, + struct timespec *mt, struct timespec *ct, + void *user) +{ + int ret = 0; + struct btrfs_receive *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + struct timespec tv[2]; + + if (g_verbose >= 2) + fprintf(stderr, "utimes %s\n", path); + + tv[0] = *at; + tv[1] = *mt; + ret = utimensat(AT_FDCWD, full_path, tv, AT_SYMLINK_NOFOLLOW); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: utimes %s failed. %s\n", + path, strerror(-ret)); + goto out; + } + +out: + free(full_path); + return ret; +} + + +struct btrfs_send_ops send_ops = { + .subvol = process_subvol, + .snapshot = process_snapshot, + .mkfile = process_mkfile, + .mkdir = process_mkdir, + .mknod = process_mknod, + .mkfifo = process_mkfifo, + .mksock = process_mksock, + .symlink = process_symlink, + .rename = process_rename, + .link = process_link, + .unlink = process_unlink, + .rmdir = process_rmdir, + .write = process_write, + .clone = process_clone, + .set_xattr = process_set_xattr, + .remove_xattr = process_remove_xattr, + .truncate = process_truncate, + .chmod = process_chmod, + .chown = process_chown, + .utimes = process_utimes, +}; + +int do_receive(struct btrfs_receive *r, const char *tomnt, int r_fd) +{ + int ret; + char *dest_dir_full_path; + int end = 0; + + dest_dir_full_path = realpath(tomnt, NULL); + if (!dest_dir_full_path) { + ret = -errno; + fprintf(stderr, "ERROR: realpath(%s) failed. %s\n", tomnt, + strerror(-ret)); + goto out; + } + r->dest_dir_fd = open(dest_dir_full_path, O_RDONLY | O_NOATIME); + if (r->dest_dir_fd < 0) { + ret = -errno; + fprintf(stderr, + "ERROR: failed to open destination directory %s. %s\n", + dest_dir_full_path, strerror(-ret)); + goto out; + } + + ret = find_mount_root(dest_dir_full_path, &r->root_path); + if (ret < 0) { + ret = -EINVAL; + fprintf(stderr, "ERROR: failed to determine mount point " + "for %s\n", dest_dir_full_path); + goto out; + } + r->mnt_fd = open(r->root_path, O_RDONLY | O_NOATIME); + if (r->mnt_fd < 0) { + ret = -errno; + fprintf(stderr, "ERROR: failed to open %s. %s\n", r->root_path, + strerror(-ret)); + goto out; + } + + /* + * find_mount_root returns a root_path that is a subpath of + * dest_dir_full_path. Now get the other part of root_path, + * which is the destination dir relative to root_path. + */ + r->dest_dir_path = dest_dir_full_path + strlen(r->root_path); + while (r->dest_dir_path[0] == '/') + r->dest_dir_path++; + + ret = subvol_uuid_search_init(r->mnt_fd, &r->sus); + if (ret < 0) + goto out; + + while (!end) { + ret = btrfs_read_and_process_send_stream(r_fd, &send_ops, r, + r->honor_end_cmd); + if (ret < 0) + goto out; + if (ret) + end = 1; + + ret = close_inode_for_write(r); + if (ret < 0) + goto out; + ret = finish_subvol(r); + if (ret < 0) + goto out; + } + ret = 0; + +out: + if (r->write_fd != -1) { + close(r->write_fd); + r->write_fd = -1; + } + free(r->root_path); + r->root_path = NULL; + free(r->write_path); + r->write_path = NULL; + free(r->full_subvol_path); + r->full_subvol_path = NULL; + r->dest_dir_path = NULL; + free(dest_dir_full_path); + if (r->cur_subvol) { + free(r->cur_subvol->path); + free(r->cur_subvol); + r->cur_subvol = NULL; + } + subvol_uuid_search_finit(&r->sus); + if (r->mnt_fd != -1) { + close(r->mnt_fd); + r->mnt_fd = -1; + } + if (r->dest_dir_fd != -1) { + close(r->dest_dir_fd); + r->dest_dir_fd = -1; + } + return ret; +} + +static int do_cmd_receive(int argc, char **argv) +{ + int c; + char *tomnt = NULL; + char *fromfile = NULL; + struct btrfs_receive r; + int receive_fd = fileno(stdin); + + int ret; + + memset(&r, 0, sizeof(r)); + r.mnt_fd = -1; + r.write_fd = -1; + r.dest_dir_fd = -1; + + while ((c = getopt(argc, argv, "evf:")) != -1) { + switch (c) { + case 'v': + g_verbose++; + break; + case 'f': + fromfile = optarg; + break; + case 'e': + r.honor_end_cmd = 1; + break; + case '?': + default: + fprintf(stderr, "ERROR: receive args invalid.\n"); + return 1; + } + } + + if (optind + 1 != argc) { + fprintf(stderr, "ERROR: receive needs path to subvolume\n"); + return 1; + } + + tomnt = argv[optind]; + + if (fromfile) { + receive_fd = open(fromfile, O_RDONLY | O_NOATIME); + if (receive_fd < 0) { + fprintf(stderr, "ERROR: failed to open %s\n", fromfile); + return -errno; + } + } + + ret = do_receive(&r, tomnt, receive_fd); + + return ret; +} + +static const char * const receive_cmd_group_usage[] = { + "btrfs receive <command> <args>", + NULL +}; + +const char * const cmd_receive_usage[] = { + "btrfs receive [-ve] [-f <infile>] <mount>", + "Receive subvolumes from stdin.", + "Receives one or more subvolumes that were previously ", + "sent with btrfs send. The received subvolumes are stored", + "into <mount>.", + "btrfs receive will fail in case a receiving subvolume", + "already exists. It will also fail in case a previously", + "received subvolume was changed after it was received.", + "After receiving a subvolume, it is immediately set to", + "read only.\n", + "-v Enable verbose debug output. Each", + " occurrence of this option increases the", + " verbose level more.", + "-f <infile> By default, btrfs receive uses stdin", + " to receive the subvolumes. Use this", + " option to specify a file to use instead.", + "-e Terminate after receiving an <end cmd>", + " in the data stream. Without this option,", + " the receiver terminates only if an error", + " is recognized or on EOF.", + NULL +}; + +const struct cmd_group receive_cmd_group = { + receive_cmd_group_usage, NULL, { + { "receive", do_cmd_receive, cmd_receive_usage, NULL, 0 }, + { 0, 0, 0, 0, 0 }, + }, +}; + +int cmd_receive(int argc, char **argv) +{ + return do_cmd_receive(argc, argv); +} diff --git a/cmds-replace.c b/cmds-replace.c new file mode 100644 index 0000000..6397bb5 --- /dev/null +++ b/cmds-replace.c @@ -0,0 +1,585 @@ +/* + * Copyright (C) 2012 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <sys/stat.h> +#include <time.h> +#include <assert.h> +#include <inttypes.h> +#include <sys/wait.h> + +#include "kerncompat.h" +#include "ctree.h" +#include "ioctl.h" +#include "utils.h" +#include "volumes.h" +#include "disk-io.h" + +#include "commands.h" + + +static int print_replace_status(int fd, const char *path, int once); +static char *time2string(char *buf, size_t s, __u64 t); +static char *progress2string(char *buf, size_t s, int progress_1000); + + +static const char *replace_dev_result2string(__u64 result) +{ + switch (result) { + case BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR: + return "no error"; + case BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED: + return "not started"; + case BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED: + return "already started"; + default: + return "<illegal result value>"; + } +} + +static const char * const replace_cmd_group_usage[] = { + "btrfs replace <command> [<args>]", + NULL +}; + +static int is_numerical(const char *str) +{ + if (!(*str >= '0' && *str <= '9')) + return 0; + while (*str >= '0' && *str <= '9') + str++; + if (*str != '\0') + return 0; + return 1; +} + +static int dev_replace_cancel_fd = -1; +static void dev_replace_sigint_handler(int signal) +{ + int ret; + struct btrfs_ioctl_dev_replace_args args = {0}; + + args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL; + ret = ioctl(dev_replace_cancel_fd, BTRFS_IOC_DEV_REPLACE, &args); + if (ret < 0) + perror("Device replace cancel failed"); +} + +static int dev_replace_handle_sigint(int fd) +{ + struct sigaction sa = { + .sa_handler = fd == -1 ? SIG_DFL : dev_replace_sigint_handler + }; + + dev_replace_cancel_fd = fd; + return sigaction(SIGINT, &sa, NULL); +} + +static const char *const cmd_start_replace_usage[] = { + "btrfs replace start srcdev|devid targetdev [-Bfr] mount_point", + "Replace device of a btrfs filesystem.", + "On a live filesystem, duplicate the data to the target device which", + "is currently stored on the source device. If the source device is not", + "available anymore, or if the -r option is set, the data is built", + "only using the RAID redundancy mechanisms. After completion of the", + "operation, the source device is removed from the filesystem.", + "If the srcdev is a numerical value, it is assumed to be the device id", + "of the filesystem which is mounted at mount_point, otherwise it is", + "the path to the source device. If the source device is disconnected,", + "from the system, you have to use the devid parameter format.", + "The targetdev needs to be same size or larger than the srcdev.", + "", + "-r only read from srcdev if no other zero-defect mirror exists", + " (enable this if your drive has lots of read errors, the access", + " would be very slow)", + "-f force using and overwriting targetdev even if it looks like", + " containing a valid btrfs filesystem. A valid filesystem is", + " assumed if a btrfs superblock is found which contains a", + " correct checksum. Devices which are currently mounted are", + " never allowed to be used as the targetdev", + "-B do not background", + NULL +}; + +static int cmd_start_replace(int argc, char **argv) +{ + struct btrfs_ioctl_dev_replace_args start_args = {0}; + struct btrfs_ioctl_dev_replace_args status_args = {0}; + int ret; + int i; + int c; + int fdmnt = -1; + int fdsrcdev = -1; + int fddstdev = -1; + char *path; + char *srcdev; + char *dstdev; + int avoid_reading_from_srcdev = 0; + int force_using_targetdev = 0; + u64 total_devs = 1; + struct btrfs_fs_devices *fs_devices_mnt = NULL; + struct stat st; + u64 dstdev_block_count; + int do_not_background = 0; + int mixed = 0; + + while ((c = getopt(argc, argv, "Brf")) != -1) { + switch (c) { + case 'B': + do_not_background = 1; + break; + case 'r': + avoid_reading_from_srcdev = 1; + break; + case 'f': + force_using_targetdev = 1; + break; + case '?': + default: + usage(cmd_start_replace_usage); + } + } + + start_args.start.cont_reading_from_srcdev_mode = + avoid_reading_from_srcdev ? + BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID : + BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS; + if (check_argc_exact(argc - optind, 3)) + usage(cmd_start_replace_usage); + path = argv[optind + 2]; + + fdmnt = open_path_or_dev_mnt(path); + + if (fdmnt < 0) { + fprintf(stderr, "ERROR: can't access \"%s\": %s\n", + path, strerror(errno)); + goto leave_with_error; + } + + /* check for possible errors before backgrounding */ + status_args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS; + ret = ioctl(fdmnt, BTRFS_IOC_DEV_REPLACE, &status_args); + if (ret) { + fprintf(stderr, + "ERROR: ioctl(DEV_REPLACE_STATUS) failed on \"%s\": %s, %s\n", + path, strerror(errno), + replace_dev_result2string(status_args.result)); + goto leave_with_error; + } + + if (status_args.result != BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) { + fprintf(stderr, + "ERROR: ioctl(DEV_REPLACE_STATUS) on \"%s\" returns error: %s\n", + path, replace_dev_result2string(status_args.result)); + goto leave_with_error; + } + + if (status_args.status.replace_state == + BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { + fprintf(stderr, + "ERROR: btrfs replace on \"%s\" already started!\n", + path); + goto leave_with_error; + } + + srcdev = argv[optind]; + dstdev = argv[optind + 1]; + + if (is_numerical(srcdev)) { + struct btrfs_ioctl_fs_info_args fi_args; + struct btrfs_ioctl_dev_info_args *di_args = NULL; + + if (atoi(srcdev) == 0) { + fprintf(stderr, "Error: Failed to parse the numerical devid value '%s'\n", + srcdev); + goto leave_with_error; + } + start_args.start.srcdevid = (__u64)atoi(srcdev); + + ret = get_fs_info(path, &fi_args, &di_args); + if (ret) { + fprintf(stderr, "ERROR: getting dev info for devstats failed: " + "%s\n", strerror(-ret)); + free(di_args); + goto leave_with_error; + } + if (!fi_args.num_devices) { + fprintf(stderr, "ERROR: no devices found\n"); + free(di_args); + goto leave_with_error; + } + + for (i = 0; i < fi_args.num_devices; i++) + if (start_args.start.srcdevid == di_args[i].devid) + break; + free(di_args); + if (i == fi_args.num_devices) { + fprintf(stderr, "Error: '%s' is not a valid devid for filesystem '%s'\n", + srcdev, path); + goto leave_with_error; + } + } else { + fdsrcdev = open(srcdev, O_RDWR); + if (fdsrcdev < 0) { + fprintf(stderr, "Error: Unable to open device '%s'\n", + srcdev); + goto leave_with_error; + } + ret = fstat(fdsrcdev, &st); + if (ret) { + fprintf(stderr, "Error: Unable to stat '%s'\n", srcdev); + goto leave_with_error; + } + if (!S_ISBLK(st.st_mode)) { + fprintf(stderr, "Error: '%s' is not a block device\n", + srcdev); + goto leave_with_error; + } + strncpy((char *)start_args.start.srcdev_name, srcdev, + BTRFS_DEVICE_PATH_NAME_MAX); + close(fdsrcdev); + fdsrcdev = -1; + start_args.start.srcdevid = 0; + } + + ret = check_mounted(dstdev); + if (ret < 0) { + fprintf(stderr, "Error checking %s mount status\n", dstdev); + goto leave_with_error; + } + if (ret == 1) { + fprintf(stderr, + "Error, target device %s is in use and currently mounted!\n", + dstdev); + goto leave_with_error; + } + fddstdev = open(dstdev, O_RDWR); + if (fddstdev < 0) { + fprintf(stderr, "Unable to open %s\n", dstdev); + goto leave_with_error; + } + ret = btrfs_scan_one_device(fddstdev, dstdev, &fs_devices_mnt, + &total_devs, BTRFS_SUPER_INFO_OFFSET); + if (ret >= 0 && !force_using_targetdev) { + fprintf(stderr, + "Error, target device %s contains filesystem, use '-f' to force overwriting.\n", + dstdev); + goto leave_with_error; + } + ret = fstat(fddstdev, &st); + if (ret) { + fprintf(stderr, "Error: Unable to stat '%s'\n", dstdev); + goto leave_with_error; + } + if (!S_ISBLK(st.st_mode)) { + fprintf(stderr, "Error: '%s' is not a block device\n", dstdev); + goto leave_with_error; + } + strncpy((char *)start_args.start.tgtdev_name, dstdev, + BTRFS_DEVICE_PATH_NAME_MAX); + if (btrfs_prepare_device(fddstdev, dstdev, 1, &dstdev_block_count, 0, + &mixed, 0)) { + fprintf(stderr, "Error: Failed to prepare device '%s'\n", + dstdev); + goto leave_with_error; + } + close(fddstdev); + fddstdev = -1; + + dev_replace_handle_sigint(fdmnt); + if (!do_not_background) { + if (daemon(0, 0) < 0) { + fprintf(stderr, "ERROR, backgrounding failed: %s\n", + strerror(errno)); + goto leave_with_error; + } + } + + start_args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_START; + ret = ioctl(fdmnt, BTRFS_IOC_DEV_REPLACE, &start_args); + if (do_not_background) { + if (ret) { + fprintf(stderr, + "ERROR: ioctl(DEV_REPLACE_START) failed on \"%s\": %s, %s\n", + path, strerror(errno), + replace_dev_result2string(start_args.result)); + goto leave_with_error; + } + + if (start_args.result != + BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) { + fprintf(stderr, + "ERROR: ioctl(DEV_REPLACE_START) on \"%s\" returns error: %s\n", + path, + replace_dev_result2string(start_args.result)); + goto leave_with_error; + } + } + close(fdmnt); + return 0; + +leave_with_error: + if (fdmnt != -1) + close(fdmnt); + if (fdsrcdev != -1) + close(fdsrcdev); + if (fddstdev != -1) + close(fddstdev); + return -1; +} + +static const char *const cmd_status_replace_usage[] = { + "btrfs replace status mount_point [-1]", + "Print status and progress information of a running device replace", + "operation", + "", + "-1 print once instead of print continously until the replace", + " operation finishes (or is canceled)", + NULL +}; + +static int cmd_status_replace(int argc, char **argv) +{ + int fd; + int e; + int c; + char *path; + int once = 0; + int ret; + + while ((c = getopt(argc, argv, "1")) != -1) { + switch (c) { + case '1': + once = 1; + break; + case '?': + default: + usage(cmd_status_replace_usage); + } + } + + if (check_argc_exact(argc - optind, 1)) + usage(cmd_status_replace_usage); + + path = argv[optind]; + fd = open_file_or_dir(path); + e = errno; + if (fd < 0) { + fprintf(stderr, "ERROR: can't access \"%s\": %s\n", + path, strerror(e)); + return -1; + } + + ret = print_replace_status(fd, path, once); + close(fd); + return ret; +} + +static int print_replace_status(int fd, const char *path, int once) +{ + struct btrfs_ioctl_dev_replace_args args = {0}; + struct btrfs_ioctl_dev_replace_status_params *status; + int ret; + int prevent_loop = 0; + int skip_stats; + int num_chars; + char string1[80]; + char string2[80]; + char string3[80]; + + for (;;) { + args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS; + ret = ioctl(fd, BTRFS_IOC_DEV_REPLACE, &args); + if (ret) { + fprintf(stderr, "ERROR: ioctl(DEV_REPLACE_STATUS) failed on \"%s\": %s, %s\n", + path, strerror(errno), + replace_dev_result2string(args.result)); + return ret; + } + + status = &args.status; + if (args.result != BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) { + fprintf(stderr, "ERROR: ioctl(DEV_REPLACE_STATUS) on \"%s\" returns error: %s\n", + path, + replace_dev_result2string(args.result)); + return -1; + } + + skip_stats = 0; + num_chars = 0; + switch (status->replace_state) { + case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: + num_chars = + printf("%s done", + progress2string(string3, + sizeof(string3), + status->progress_1000)); + break; + case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: + prevent_loop = 1; + printf("Started on %s, finished on %s", + time2string(string1, sizeof(string1), + status->time_started), + time2string(string2, sizeof(string2), + status->time_stopped)); + break; + case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: + prevent_loop = 1; + printf("Started on %s, canceled on %s at %s", + time2string(string1, sizeof(string1), + status->time_started), + time2string(string2, sizeof(string2), + status->time_stopped), + progress2string(string3, sizeof(string3), + status->progress_1000)); + break; + case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: + prevent_loop = 1; + printf("Started on %s, suspended on %s at %s", + time2string(string1, sizeof(string1), + status->time_started), + time2string(string2, sizeof(string2), + status->time_stopped), + progress2string(string3, sizeof(string3), + status->progress_1000)); + break; + case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: + prevent_loop = 1; + skip_stats = 1; + printf("Never started"); + break; + default: + prevent_loop = 1; + assert(0); + break; + } + + if (!skip_stats) + num_chars += printf( + ", %llu write errs, %llu uncorr. read errs", + (unsigned long long)status->num_write_errors, + (unsigned long long) + status->num_uncorrectable_read_errors); + if (once || prevent_loop) { + printf("\n"); + return 0; + } + + fflush(stdout); + sleep(1); + while (num_chars > 0) { + putchar('\b'); + num_chars--; + } + } + + return 0; +} + +static char * +time2string(char *buf, size_t s, __u64 t) +{ + struct tm t_tm; + time_t t_time_t; + + t_time_t = (time_t)t; + assert((__u64)t_time_t == t); + localtime_r(&t_time_t, &t_tm); + strftime(buf, s, "%e.%b %T", &t_tm); + return buf; +} + +static char * +progress2string(char *buf, size_t s, int progress_1000) +{ + snprintf(buf, s, "%d.%01d%%", progress_1000 / 10, progress_1000 % 10); + assert(s > 0); + buf[s - 1] = '\0'; + return buf; +} + +static const char *const cmd_cancel_replace_usage[] = { + "btrfs replace cancel mount_point", + "Cancel a running device replace operation.", + NULL +}; + +static int cmd_cancel_replace(int argc, char **argv) +{ + struct btrfs_ioctl_dev_replace_args args = {0}; + int ret; + int c; + int fd; + int e; + char *path; + + while ((c = getopt(argc, argv, "")) != -1) { + switch (c) { + case '?': + default: + usage(cmd_cancel_replace_usage); + } + } + + if (check_argc_exact(argc - optind, 1)) + usage(cmd_cancel_replace_usage); + + path = argv[optind]; + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access \"%s\": %s\n", + path, strerror(errno)); + return -1; + } + + args.cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL; + ret = ioctl(fd, BTRFS_IOC_DEV_REPLACE, &args); + e = errno; + close(fd); + if (ret) { + fprintf(stderr, "ERROR: ioctl(DEV_REPLACE_CANCEL) failed on \"%s\": %s, %s\n", + path, strerror(e), + replace_dev_result2string(args.result)); + return ret; + } + + return 0; +} + +const struct cmd_group replace_cmd_group = { + replace_cmd_group_usage, NULL, { + { "start", cmd_start_replace, cmd_start_replace_usage, NULL, + 0 }, + { "status", cmd_status_replace, cmd_status_replace_usage, NULL, + 0 }, + { "cancel", cmd_cancel_replace, cmd_cancel_replace_usage, NULL, + 0 }, + { 0, 0, 0, 0, 0 } + } +}; + +int cmd_replace(int argc, char **argv) +{ + return handle_command_group(&replace_cmd_group, argc, argv); +} diff --git a/restore.c b/cmds-restore.c index 250c9d3..dcf459f 100644 --- a/restore.c +++ b/cmds-restore.c @@ -18,14 +18,19 @@ #define _XOPEN_SOURCE 500 #define _GNU_SOURCE 1 + +#include "kerncompat.h" + #include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <fcntl.h> #include <sys/stat.h> +#include <lzo/lzoconf.h> +#include <lzo/lzo1x.h> #include <zlib.h> -#include "kerncompat.h" + #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -34,15 +39,21 @@ #include "version.h" #include "volumes.h" #include "utils.h" +#include "commands.h" +static char fs_name[4096]; static char path_name[4096]; static int get_snaps = 0; static int verbose = 0; static int ignore_errors = 0; static int overwrite = 0; -static int decompress(char *inbuf, char *outbuf, u64 compress_len, - u64 decompress_len) +#define LZO_LEN 4 +#define PAGE_CACHE_SIZE 4096 +#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) + +static int decompress_zlib(char *inbuf, char *outbuf, u64 compress_len, + u64 decompress_len) { z_stream strm; int ret; @@ -61,13 +72,81 @@ static int decompress(char *inbuf, char *outbuf, u64 compress_len, ret = inflate(&strm, Z_NO_FLUSH); if (ret != Z_STREAM_END) { (void)inflateEnd(&strm); - fprintf(stderr, "ret is %d\n", ret); + fprintf(stderr, "failed to inflate: %d\n", ret); return -1; } (void)inflateEnd(&strm); return 0; } +static inline size_t read_compress_length(unsigned char *buf) +{ + __le32 dlen; + memcpy(&dlen, buf, LZO_LEN); + return le32_to_cpu(dlen); +} + +static int decompress_lzo(unsigned char *inbuf, char *outbuf, u64 compress_len, + u64 *decompress_len) +{ + size_t new_len; + size_t in_len; + size_t out_len = 0; + size_t tot_len; + size_t tot_in; + int ret; + + ret = lzo_init(); + if (ret != LZO_E_OK) { + fprintf(stderr, "lzo init returned %d\n", ret); + return -1; + } + + tot_len = read_compress_length(inbuf); + inbuf += LZO_LEN; + tot_in = LZO_LEN; + + while (tot_in < tot_len) { + in_len = read_compress_length(inbuf); + inbuf += LZO_LEN; + tot_in += LZO_LEN; + + new_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); + ret = lzo1x_decompress_safe((const unsigned char *)inbuf, in_len, + (unsigned char *)outbuf, + (void *)&new_len, NULL); + if (ret != LZO_E_OK) { + fprintf(stderr, "failed to inflate: %d\n", ret); + return -1; + } + out_len += new_len; + outbuf += new_len; + inbuf += in_len; + tot_in += in_len; + } + + *decompress_len = out_len; + + return 0; +} + +static int decompress(char *inbuf, char *outbuf, u64 compress_len, + u64 *decompress_len, int compress) +{ + switch (compress) { + case BTRFS_COMPRESS_ZLIB: + return decompress_zlib(inbuf, outbuf, compress_len, + *decompress_len); + case BTRFS_COMPRESS_LZO: + return decompress_lzo((unsigned char *)inbuf, outbuf, compress_len, + decompress_len); + default: + break; + } + + fprintf(stderr, "invalid compression type: %d\n", compress); + return -1; +} int next_leaf(struct btrfs_root *root, struct btrfs_path *path) { @@ -99,9 +178,6 @@ int next_leaf(struct btrfs_root *root, struct btrfs_path *path) continue; } - if (next) - free_extent_buffer(next); - if (path->reada) reada_for_search(root, path, level, slot, 0); @@ -130,11 +206,11 @@ static int copy_one_inline(int fd, struct btrfs_path *path, u64 pos) struct btrfs_file_extent_item *fi; char buf[4096]; char *outbuf; + u64 ram_size; ssize_t done; unsigned long ptr; int ret; int len; - int ram_size; int compress; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -162,7 +238,7 @@ static int copy_one_inline(int fd, struct btrfs_path *path, u64 pos) return -1; } - ret = decompress(buf, outbuf, len, ram_size); + ret = decompress(buf, outbuf, len, &ram_size, compress); if (ret) { free(outbuf); return ret; @@ -170,8 +246,8 @@ static int copy_one_inline(int fd, struct btrfs_path *path, u64 pos) done = pwrite(fd, outbuf, ram_size, pos); free(outbuf); - if (done < len) { - fprintf(stderr, "Short compressed inline write, wanted %d, " + if (done < ram_size) { + fprintf(stderr, "Short compressed inline write, wanted %Lu, " "did %zd: %d\n", ram_size, done, errno); return -1; } @@ -193,17 +269,23 @@ static int copy_one_extent(struct btrfs_root *root, int fd, u64 length; u64 size_left; u64 dev_bytenr; + u64 offset; u64 count = 0; int compress; int ret; int dev_fd; + int mirror_num = 1; + int num_copies; compress = btrfs_file_extent_compression(leaf, fi); bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); disk_size = btrfs_file_extent_disk_num_bytes(leaf, fi); ram_size = btrfs_file_extent_ram_bytes(leaf, fi); + offset = btrfs_file_extent_offset(leaf, fi); size_left = disk_size; + if (offset) + printf("offset is %Lu\n", offset); /* we found a hole */ if (disk_size == 0) return 0; @@ -225,12 +307,10 @@ static int copy_one_extent(struct btrfs_root *root, int fd, again: length = size_left; ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - bytenr, &length, &multi, 0); + bytenr, &length, &multi, mirror_num, NULL); if (ret) { - free(inbuf); - free(outbuf); fprintf(stderr, "Error mapping block %d\n", ret); - return ret; + goto out; } device = multi->stripes[0].dev; dev_fd = device->fd; @@ -240,56 +320,70 @@ again: if (size_left < length) length = size_left; - size_left -= length; done = pread(dev_fd, inbuf+count, length, dev_bytenr); - if (done < length) { - free(inbuf); - free(outbuf); - fprintf(stderr, "Short read %d\n", errno); - return -1; + /* Need both checks, or we miss negative values due to u64 conversion */ + if (done < 0 || done < length) { + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, + bytenr, length); + mirror_num++; + /* mirror_num is 1-indexed, so num_copies is a valid mirror. */ + if (mirror_num > num_copies) { + ret = -1; + fprintf(stderr, "Exhausted mirrors trying to read\n"); + goto out; + } + fprintf(stderr, "Trying another mirror\n"); + goto again; } + mirror_num = 1; + size_left -= length; count += length; bytenr += length; if (size_left) goto again; - if (compress == BTRFS_COMPRESS_NONE) { while (total < ram_size) { done = pwrite(fd, inbuf+total, ram_size-total, pos+total); if (done < 0) { - free(inbuf); + ret = -1; fprintf(stderr, "Error writing: %d %s\n", errno, strerror(errno)); - return -1; + goto out; } total += done; } - free(inbuf); - return 0; + ret = 0; + goto out; } - ret = decompress(inbuf, outbuf, disk_size, ram_size); - free(inbuf); + ret = decompress(inbuf, outbuf, disk_size, &ram_size, compress); if (ret) { - free(outbuf); - return ret; + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, + bytenr, length); + mirror_num++; + if (mirror_num >= num_copies) { + ret = -1; + goto out; + } + fprintf(stderr, "Trying another mirror\n"); + goto again; } while (total < ram_size) { done = pwrite(fd, outbuf+total, ram_size-total, pos+total); if (done < 0) { - free(outbuf); - fprintf(stderr, "Error writing: %d %s\n", errno, strerror(errno)); - return -1; + ret = -1; + goto out; } total += done; } +out: + free(inbuf); free(outbuf); - - return 0; + return ret; } static int ask_to_continue(const char *file) @@ -385,7 +479,6 @@ static int copy_file(struct btrfs_root *root, int fd, struct btrfs_key *key, /* No more leaves to search */ btrfs_free_path(path); goto set_size; - return 0; } leaf = path->nodes[0]; } while (!leaf); @@ -431,13 +524,16 @@ next: btrfs_free_path(path); set_size: - if (found_size) - ftruncate(fd, (loff_t)found_size); + if (found_size) { + ret = ftruncate(fd, (loff_t)found_size); + if (ret) + return ret; + } return 0; } static int search_dir(struct btrfs_root *root, struct btrfs_key *key, - const char *dir) + const char *output_rootdir, const char *dir) { struct btrfs_path *path; struct extent_buffer *leaf; @@ -541,8 +637,11 @@ static int search_dir(struct btrfs_root *root, struct btrfs_key *key, type = btrfs_dir_type(leaf, dir_item); btrfs_dir_item_key_to_cpu(leaf, dir_item, &location); - snprintf(path_name, 4096, "%s/%s", dir, filename); + /* full path from root of btrfs being restored */ + snprintf(fs_name, 4096, "%s/%s", dir, filename); + /* full path from system root */ + snprintf(path_name, 4096, "%s%s", output_rootdir, fs_name); /* * At this point we're only going to restore directories and @@ -590,7 +689,7 @@ static int search_dir(struct btrfs_root *root, struct btrfs_key *key, } } else if (type == BTRFS_FT_DIR) { struct btrfs_root *search_root = root; - char *dir = strdup(path_name); + char *dir = strdup(fs_name); if (!dir) { fprintf(stderr, "Ran out of memory\n"); @@ -619,6 +718,7 @@ static int search_dir(struct btrfs_root *root, struct btrfs_key *key, PTR_ERR(search_root)); if (ignore_errors) goto next; + btrfs_free_path(path); return PTR_ERR(search_root); } @@ -651,7 +751,8 @@ static int search_dir(struct btrfs_root *root, struct btrfs_key *key, return -1; } loops = 0; - ret = search_dir(search_root, &location, dir); + ret = search_dir(search_root, &location, + output_rootdir, dir); free(dir); if (ret) { if (ignore_errors) @@ -670,27 +771,127 @@ next: return 0; } -static void usage() +static int do_list_roots(struct btrfs_root *root) { - fprintf(stderr, "Usage: restore [-svio] [-t disk offset] <device> " - "<directory>\n"); + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_disk_key disk_key; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_root_item ri; + unsigned long offset; + int slot; + int ret; + + root = root->fs_info->tree_root; + path = btrfs_alloc_path(); + if (!path) { + fprintf(stderr, "Failed to alloc path\n"); + return -1; + } + + key.offset = 0; + key.objectid = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + fprintf(stderr, "Failed to do search %d\n", ret); + btrfs_free_path(path); + return -1; + } + + while (1) { + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = path->nodes[0]; + slot = path->slots[0]; + } + btrfs_item_key(leaf, &disk_key, slot); + btrfs_disk_key_to_cpu(&found_key, &disk_key); + if (btrfs_key_type(&found_key) != BTRFS_ROOT_ITEM_KEY) { + path->slots[0]++; + continue; + } + + offset = btrfs_item_ptr_offset(leaf, slot); + read_extent_buffer(leaf, &ri, offset, sizeof(ri)); + printf(" tree "); + btrfs_print_key(&disk_key); + printf(" %Lu level %d\n", btrfs_root_bytenr(&ri), + btrfs_root_level(&ri)); + path->slots[0]++; + } + btrfs_free_path(path); + + return 0; } -static struct btrfs_root *open_fs(const char *dev, u64 root_location, int super_mirror) +static struct btrfs_root *open_fs(const char *dev, u64 root_location, + int super_mirror, int list_roots) { - struct btrfs_root *root; + struct btrfs_fs_info *fs_info = NULL; + struct btrfs_root *root = NULL; u64 bytenr; int i; for (i = super_mirror; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - root = open_ctree_recovery(dev, bytenr, root_location); - if (root) - return root; + fs_info = open_ctree_fs_info(dev, bytenr, root_location, 0, 1); + if (fs_info) + break; fprintf(stderr, "Could not open root, trying backup super\n"); } - return NULL; + if (!fs_info) + return NULL; + + /* + * All we really need to succeed is reading the chunk tree, everything + * else we can do by hand, since we only need to read the tree root and + * the fs_root. + */ + if (!extent_buffer_uptodate(fs_info->tree_root->node)) { + u64 generation; + + root = fs_info->tree_root; + if (!root_location) + root_location = btrfs_super_root(fs_info->super_copy); + generation = btrfs_super_generation(fs_info->super_copy); + root->node = read_tree_block(root, root_location, + root->leafsize, generation); + if (!extent_buffer_uptodate(root->node)) { + fprintf(stderr, "Error opening tree root\n"); + close_ctree(root); + return NULL; + } + } + + if (!list_roots && !fs_info->fs_root) { + struct btrfs_key key; + + key.objectid = BTRFS_FS_TREE_OBJECTID; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + fs_info->fs_root = btrfs_read_fs_root_no_cache(fs_info, &key); + if (IS_ERR(fs_info->fs_root)) { + fprintf(stderr, "Couldn't read fs root: %ld\n", + PTR_ERR(fs_info->fs_root)); + close_ctree(fs_info->tree_root); + return NULL; + } + } + + if (list_roots && do_list_roots(fs_info->tree_root)) { + close_ctree(fs_info->tree_root); + return NULL; + } + + return fs_info->fs_root; } static int find_first_dir(struct btrfs_root *root, u64 *objectid) @@ -708,7 +909,7 @@ static int find_first_dir(struct btrfs_root *root, u64 *objectid) path = btrfs_alloc_path(); if (!path) { fprintf(stderr, "Ran out of memory\n"); - goto out; + return ret; } ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -753,20 +954,37 @@ out: return ret; } -int main(int argc, char **argv) +const char * const cmd_restore_usage[] = { + "btrfs restore [options] <device>", + "Try to restore files from a damaged filesystem (unmounted)", + "", + "-s get snapshots", + "-v verbose", + "-i ignore errors", + "-o overwrite", + "-t tree location", + "-f <offset> filesystem location", + "-u <block> super mirror", + "-d find dir", + NULL +}; + +int cmd_restore(int argc, char **argv) { struct btrfs_root *root; struct btrfs_key key; char dir_name[128]; u64 tree_location = 0; u64 fs_location = 0; + u64 root_objectid = 0; int len; int ret; int opt; int super_mirror = 0; int find_dir = 0; + int list_roots = 0; - while ((opt = getopt(argc, argv, "sviot:u:df:")) != -1) { + while ((opt = getopt(argc, argv, "sviot:u:df:r:l")) != -1) { switch (opt) { case 's': get_snaps = 1; @@ -809,30 +1027,43 @@ int main(int argc, char **argv) case 'd': find_dir = 1; break; + case 'r': + errno = 0; + root_objectid = (u64)strtoll(optarg, NULL, 10); + if (errno != 0) { + fprintf(stderr, "Root objectid not valid\n"); + exit(1); + } + break; + case 'l': + list_roots = 1; + break; default: - usage(); - exit(1); + usage(cmd_restore_usage); } } - if (optind + 1 >= argc) { - usage(); - exit(1); - } + if (!list_roots && optind + 1 >= argc) + usage(cmd_restore_usage); + else if (list_roots && optind >= argc) + usage(cmd_restore_usage); if ((ret = check_mounted(argv[optind])) < 0) { fprintf(stderr, "Could not check mount status: %s\n", - strerror(ret)); + strerror(-ret)); return ret; } else if (ret) { - fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind + 1]); - return -EBUSY; + fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]); + return 1; } - root = open_fs(argv[optind], tree_location, super_mirror); + root = open_fs(argv[optind], tree_location, super_mirror, list_roots); if (root == NULL) return 1; + if (list_roots) + goto out; + if (fs_location != 0) { free_extent_buffer(root->node); root->node = read_tree_block(root, fs_location, 4096, 0); @@ -842,18 +1073,32 @@ int main(int argc, char **argv) } } - printf("Root objectid is %Lu\n", root->objectid); - memset(path_name, 0, 4096); - strncpy(dir_name, argv[optind + 1], 128); + strncpy(dir_name, argv[optind + 1], sizeof dir_name); + dir_name[sizeof dir_name - 1] = 0; /* Strip the trailing / on the dir name */ - while (1) { - len = strlen(dir_name); - if (dir_name[len - 1] != '/') - break; - dir_name[len - 1] = '\0'; + len = strlen(dir_name); + while (len && dir_name[--len] == '/') { + dir_name[len] = '\0'; + } + + if (root_objectid != 0) { + struct btrfs_root *orig_root = root; + + key.objectid = root_objectid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = btrfs_read_fs_root(orig_root->fs_info, &key); + if (IS_ERR(root)) { + fprintf(stderr, "Error reading root\n"); + root = orig_root; + ret = 1; + goto out; + } + key.type = 0; + key.offset = 0; } if (find_dir) { @@ -864,7 +1109,7 @@ int main(int argc, char **argv) key.objectid = BTRFS_FIRST_FREE_OBJECTID; } - ret = search_dir(root->fs_info->fs_root, &key, dir_name); + ret = search_dir(root, &key, dir_name, ""); out: close_ctree(root); @@ -16,12 +16,15 @@ * Boston, MA 021110-1307, USA. */ +#include "kerncompat.h" + #include <sys/ioctl.h> #include <sys/wait.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/un.h> +#include <sys/syscall.h> #include <poll.h> #include <sys/file.h> #include <uuid/uuid.h> @@ -34,11 +37,17 @@ #include "ctree.h" #include "ioctl.h" -#include "btrfs_cmds.h" #include "utils.h" #include "volumes.h" #include "disk-io.h" +#include "commands.h" + +static const char * const scrub_cmd_group_usage[] = { + "btrfs scrub <command> [options] <path>|<device>", + NULL +}; + #define SCRUB_DATA_FILE "/var/lib/btrfs/scrub.status" #define SCRUB_PROGRESS_SOCKET_PATH "/var/lib/btrfs/scrub.progress" #define SCRUB_FILE_VERSION_PREFIX "scrub status" @@ -52,6 +61,15 @@ struct scrub_stats { u64 canceled; }; +/* TBD: replace with #include "linux/ioprio.h" in some years */ +#if !defined (IOPRIO_H) +#define IOPRIO_WHO_PROCESS 1 +#define IOPRIO_CLASS_SHIFT 13 +#define IOPRIO_PRIO_VALUE(class, data) \ + (((class) << IOPRIO_CLASS_SHIFT) | (data)) +#define IOPRIO_CLASS_IDLE 3 +#endif + struct scrub_progress { struct btrfs_ioctl_scrub_args scrub_args; int fd; @@ -61,6 +79,8 @@ struct scrub_progress { struct scrub_file_record *resumed; int ioctl_errno; pthread_mutex_t progress_mutex; + int ioprio_class; + int ioprio_classdata; }; struct scrub_file_record { @@ -279,7 +299,11 @@ static void free_history(struct scrub_file_record **last_scrubs) static int cancel_fd = -1; static void scrub_sigint_record_progress(int signal) { - ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL); + int ret; + + ret = ioctl(cancel_fd, BTRFS_IOC_SCRUB_CANCEL, NULL); + if (ret < 0) + perror("Scrub cancel failed"); } static int scrub_handle_sigint_parent(void) @@ -746,12 +770,12 @@ static int scrub_write_progress(pthread_mutex_t *m, const char *fsid, { int ret; int err; - int fd = 0; + int fd = -1; int old; ret = pthread_mutex_lock(m); if (ret) { - err = -errno; + err = -ret; goto out; } @@ -774,7 +798,7 @@ static int scrub_write_progress(pthread_mutex_t *m, const char *fsid, goto out; out: - if (fd > 0) { + if (fd >= 0) { ret = close(fd); if (ret) err = -errno; @@ -801,6 +825,14 @@ static void *scrub_one_dev(void *ctx) sp->stats.duration = 0; sp->stats.finished = 0; + ret = syscall(SYS_ioprio_set, IOPRIO_WHO_PROCESS, 0, + IOPRIO_PRIO_VALUE(sp->ioprio_class, + sp->ioprio_classdata)); + if (ret) + fprintf(stderr, + "WARNING: setting ioprio failed: %s (ignored).\n", + strerror(errno)); + ret = ioctl(sp->fd, BTRFS_IOC_SCRUB, &sp->scrub_args); gettimeofday(&tv, NULL); sp->ret = ret; @@ -828,9 +860,11 @@ static void *progress_one_dev(void *ctx) return NULL; } +/* nb: returns a negative errno via ERR_PTR */ static void *scrub_progress_cycle(void *ctx) { int ret; + int perr = 0; /* positive / pthread error returns */ int old; int i; char fsid[37]; @@ -855,9 +889,9 @@ static void *scrub_progress_cycle(void *ctx) struct sockaddr_un peer; socklen_t peer_size = sizeof(peer); - ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); - if (ret) - return ERR_PTR(-ret); + perr = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + if (perr) + goto out; uuid_unparse(spc->fi->fsid, fsid); @@ -878,8 +912,10 @@ static void *scrub_progress_cycle(void *ctx) while (1) { ret = poll(&accept_poll_fd, 1, 5 * 1000); - if (ret == -1) - return ERR_PTR(-errno); + if (ret == -1) { + ret = -errno; + goto out; + } if (ret) peer_fd = accept(spc->prg_fd, (struct sockaddr *)&peer, &peer_size); @@ -897,42 +933,46 @@ static void *scrub_progress_cycle(void *ctx) if (!sp->ret) continue; if (sp->ioctl_errno != ENOTCONN && - sp->ioctl_errno != ENODEV) - return ERR_PTR(-sp->ioctl_errno); + sp->ioctl_errno != ENODEV) { + ret = -sp->ioctl_errno; + goto out; + } /* * scrub finished or device removed, check the * finished flag. if unset, just use the last * result we got for the current write and go * on. flag should be set on next cycle, then. */ - ret = pthread_mutex_lock(&sp_shared->progress_mutex); - if (ret) - return ERR_PTR(-ret); + perr = pthread_mutex_lock(&sp_shared->progress_mutex); + if (perr) + goto out; if (!sp_shared->stats.finished) { - ret = pthread_mutex_unlock( + perr = pthread_mutex_unlock( &sp_shared->progress_mutex); - if (ret) - return ERR_PTR(-ret); + if (perr) + goto out; memcpy(sp, sp_last, sizeof(*sp)); continue; } - ret = pthread_mutex_unlock(&sp_shared->progress_mutex); - if (ret) - return ERR_PTR(-ret); + perr = pthread_mutex_unlock(&sp_shared->progress_mutex); + if (perr) + goto out; memcpy(sp, sp_shared, sizeof(*sp)); memcpy(sp_last, sp_shared, sizeof(*sp)); } if (peer_fd != -1) { write_poll_fd.fd = peer_fd; ret = poll(&write_poll_fd, 1, 0); - if (ret == -1) - return ERR_PTR(-errno); + if (ret == -1) { + ret = -errno; + goto out; + } if (ret) { ret = scrub_write_file( peer_fd, fsid, &spc->progress[this * ndev], ndev); if (ret) - return ERR_PTR(ret); + goto out; } close(peer_fd); peer_fd = -1; @@ -942,8 +982,14 @@ static void *scrub_progress_cycle(void *ctx) ret = scrub_write_progress(spc->write_mutex, fsid, &spc->progress[this * ndev], ndev); if (ret) - return ERR_PTR(ret); + goto out; } +out: + if (peer_fd != -1) + close(peer_fd); + if (perr) + ret = -perr; + return ERR_PTR(ret); } static struct scrub_file_record *last_dev_scrub( @@ -961,74 +1007,6 @@ static struct scrub_file_record *last_dev_scrub( return NULL; } -static int scrub_device_info(int fd, u64 devid, - struct btrfs_ioctl_dev_info_args *di_args) -{ - int ret; - - di_args->devid = devid; - memset(&di_args->uuid, '\0', sizeof(di_args->uuid)); - - ret = ioctl(fd, BTRFS_IOC_DEV_INFO, di_args); - return ret ? -errno : 0; -} - -static int scrub_fs_info(int fd, char *path, - struct btrfs_ioctl_fs_info_args *fi_args, - struct btrfs_ioctl_dev_info_args **di_ret) -{ - int ret = 0; - int ndevs = 0; - int i = 1; - struct btrfs_fs_devices *fs_devices_mnt = NULL; - struct btrfs_ioctl_dev_info_args *di_args; - char mp[BTRFS_PATH_NAME_MAX + 1]; - - memset(fi_args, 0, sizeof(*fi_args)); - - ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args); - if (ret && errno == EINVAL) { - /* path is no mounted btrfs. try if it's a device */ - ret = check_mounted_where(fd, path, mp, sizeof(mp), - &fs_devices_mnt); - if (!ret) - return -EINVAL; - if (ret < 0) - return ret; - fi_args->num_devices = 1; - fi_args->max_id = fs_devices_mnt->latest_devid; - i = fs_devices_mnt->latest_devid; - memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE); - close(fd); - fd = open_file_or_dir(mp); - if (fd < 0) - return -errno; - } else if (ret) { - return -errno; - } - - if (!fi_args->num_devices) - return 0; - - di_args = *di_ret = malloc(fi_args->num_devices * sizeof(*di_args)); - if (!di_args) - return -errno; - - for (; i <= fi_args->max_id; ++i) { - BUG_ON(ndevs >= fi_args->num_devices); - ret = scrub_device_info(fd, i, &di_args[ndevs]); - if (ret == -ENODEV) - continue; - if (ret) - return ret; - ++ndevs; - } - - BUG_ON(ndevs == 0); - - return 0; -} - int mkdir_p(char *path) { int i; @@ -1047,6 +1025,9 @@ int mkdir_p(char *path) return 0; } +static const char * const cmd_scrub_start_usage[]; +static const char * const cmd_scrub_resume_usage[]; + static int scrub_start(int argc, char **argv, int resume) { int fdmnt; @@ -1068,6 +1049,8 @@ static int scrub_start(int argc, char **argv, int resume) int do_record = 1; int readonly = 0; int do_stats_per_dev = 0; + int ioprio_class = IOPRIO_CLASS_IDLE; + int ioprio_classdata = 0; int n_start = 0; int n_skip = 0; int n_resume = 0; @@ -1093,7 +1076,7 @@ static int scrub_start(int argc, char **argv, int resume) u64 devid; optind = 1; - while ((c = getopt(argc, argv, "BdqrR")) != -1) { + while ((c = getopt(argc, argv, "BdqrRc:n:")) != -1) { switch (c) { case 'B': do_background = 0; @@ -1112,23 +1095,24 @@ static int scrub_start(int argc, char **argv, int resume) case 'R': print_raw = 1; break; + case 'c': + ioprio_class = (int)strtol(optarg, NULL, 10); + break; + case 'n': + ioprio_classdata = (int)strtol(optarg, NULL, 10); + break; case '?': default: - fprintf(stderr, "ERROR: scrub args invalid.\n" - " -B do not background\n" - " -d stats per device (-B only)\n" - " -q quiet\n" - " -r read only mode\n"); - return 1; + usage(resume ? cmd_scrub_resume_usage : + cmd_scrub_start_usage); } } /* try to catch most error cases before forking */ - if (optind + 1 != argc) { - fprintf(stderr, "ERROR: scrub start needs path as last " - "argument\n"); - return 1; + if (check_argc_exact(argc - optind, 1)) { + usage(resume ? cmd_scrub_resume_usage : + cmd_scrub_start_usage); } spc.progress = NULL; @@ -1145,13 +1129,14 @@ static int scrub_start(int argc, char **argv, int resume) path = argv[optind]; - fdmnt = open_file_or_dir(path); + fdmnt = open_path_or_dev_mnt(path); + if (fdmnt < 0) { ERR(!do_quiet, "ERROR: can't access '%s'\n", path); return 12; } - ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); + ret = get_fs_info(path, &fi_args, &di_args); if (ret) { ERR(!do_quiet, "ERROR: getting dev info for scrub failed: " "%s\n", strerror(-ret)); @@ -1225,6 +1210,8 @@ static int scrub_start(int argc, char **argv, int resume) sp[i].skip = 0; sp[i].scrub_args.end = (u64)-1ll; sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0; + sp[i].ioprio_class = ioprio_class; + sp[i].ioprio_classdata = ioprio_classdata; } if (!n_start && !n_resume) { @@ -1254,6 +1241,7 @@ static int scrub_start(int argc, char **argv, int resume) /* ... yes, so scrub must be running. error out */ fprintf(stderr, "ERROR: scrub already running\n"); close(prg_fd); + prg_fd = -1; goto out; } /* @@ -1430,11 +1418,14 @@ static int scrub_start(int argc, char **argv, int resume) ret = pthread_cancel(t_prog); if (!ret) ret = pthread_join(t_prog, &terr); + + /* check for errors from the handling of the progress thread */ if (do_print && ret) { - fprintf(stderr, "ERROR: progress thead handling failed: %s\n", + fprintf(stderr, "ERROR: progress thread handling failed: %s\n", strerror(ret)); } + /* check for errors returned from the progress thread itself */ if (do_print && terr && terr != PTHREAD_CANCELED) { fprintf(stderr, "ERROR: recording progress " "failed: %s\n", strerror(-PTR_ERR(terr))); @@ -1473,64 +1464,96 @@ out: return 0; } -int do_scrub_start(int argc, char **argv) +static const char * const cmd_scrub_start_usage[] = { + "btrfs scrub start [-Bdqr] [-c ioprio_class -n ioprio_classdata] <path>|<device>", + "Start a new scrub", + "", + "-B do not background", + "-d stats per device (-B only)", + "-q be quiet", + "-r read only mode", + "-c set ioprio class (see ionice(1) manpage)", + "-n set ioprio classdata (see ionice(1) manpage)", + NULL +}; + +static int cmd_scrub_start(int argc, char **argv) { return scrub_start(argc, argv, 0); } -int do_scrub_resume(int argc, char **argv) -{ - return scrub_start(argc, argv, 1); -} +static const char * const cmd_scrub_cancel_usage[] = { + "btrfs scrub cancel <path>|<device>", + "Cancel a running scrub", + NULL +}; -int do_scrub_cancel(int argc, char **argv) +static int cmd_scrub_cancel(int argc, char **argv) { - char *path = argv[1]; + char *path; int ret; - int fdmnt; - int err; - char mp[BTRFS_PATH_NAME_MAX + 1]; - struct btrfs_fs_devices *fs_devices_mnt = NULL; + int fdmnt = -1; + + if (check_argc_exact(argc, 2)) + usage(cmd_scrub_cancel_usage); - fdmnt = open_file_or_dir(path); + path = argv[1]; + + fdmnt = open_path_or_dev_mnt(path); if (fdmnt < 0) { - fprintf(stderr, "ERROR: scrub cancel failed\n"); - return 12; + fprintf(stderr, "ERROR: could not open %s: %s\n", + path, strerror(errno)); + ret = 1; + goto out; } -again: ret = ioctl(fdmnt, BTRFS_IOC_SCRUB_CANCEL, NULL); - err = errno; - close(fdmnt); - - if (ret && err == EINVAL) { - /* path is no mounted btrfs. try if it's a device */ - ret = check_mounted_where(fdmnt, path, mp, sizeof(mp), - &fs_devices_mnt); - close(fdmnt); - if (ret) { - fdmnt = open_file_or_dir(mp); - if (fdmnt >= 0) { - path = mp; - goto again; - } - } - } - if (ret) { + if (ret < 0) { fprintf(stderr, "ERROR: scrub cancel failed on %s: %s\n", path, - err == ENOTCONN ? "not running" : strerror(errno)); - return 1; + errno == ENOTCONN ? "not running" : strerror(errno)); + ret = 1; + goto out; } + ret = 0; printf("scrub cancelled\n"); - return 0; +out: + if (fdmnt != -1) + close(fdmnt); + return ret; } -int do_scrub_status(int argc, char **argv) +static const char * const cmd_scrub_resume_usage[] = { + "btrfs scrub resume [-Bdqr] [-c ioprio_class -n ioprio_classdata] <path>|<device>", + "Resume previously canceled or interrupted scrub", + "", + "-B do not background", + "-d stats per device (-B only)", + "-q be quiet", + "-r read only mode", + "-c set ioprio class (see ionice(1) manpage)", + "-n set ioprio classdata (see ionice(1) manpage)", + NULL +}; + +static int cmd_scrub_resume(int argc, char **argv) { + return scrub_start(argc, argv, 1); +} + +static const char * const cmd_scrub_status_usage[] = { + "btrfs scrub status [-dR] <path>|<device>", + "Show status of running or finished scrub", + "", + "-d stats per device", + "-R print raw stats", + NULL +}; +static int cmd_scrub_status(int argc, char **argv) +{ char *path; struct btrfs_ioctl_fs_info_args fi_args; struct btrfs_ioctl_dev_info_args *di_args = NULL; @@ -1541,16 +1564,16 @@ int do_scrub_status(int argc, char **argv) .sun_family = AF_UNIX, }; int ret; - int fdmnt; int i; - optind = 1; + int fdmnt; int print_raw = 0; int do_stats_per_dev = 0; - char c; + int c; char fsid[37]; int fdres = -1; int err = 0; + optind = 1; while ((c = getopt(argc, argv, "dR")) != -1) { switch (c) { case 'd': @@ -1561,27 +1584,23 @@ int do_scrub_status(int argc, char **argv) break; case '?': default: - fprintf(stderr, "ERROR: scrub status args invalid.\n" - " -d stats per device\n"); - return 1; + usage(cmd_scrub_status_usage); } } - if (optind + 1 != argc) { - fprintf(stderr, "ERROR: scrub status needs path as last " - "argument\n"); - return 1; - } + if (check_argc_exact(argc - optind, 1)) + usage(cmd_scrub_status_usage); path = argv[optind]; - fdmnt = open_file_or_dir(path); + fdmnt = open_path_or_dev_mnt(path); + if (fdmnt < 0) { fprintf(stderr, "ERROR: can't access to '%s'\n", path); return 12; } - ret = scrub_fs_info(fdmnt, path, &fi_args, &di_args); + ret = get_fs_info(path, &fi_args, &di_args); if (ret) { fprintf(stderr, "ERROR: getting dev info for scrub failed: " "%s\n", strerror(-ret)); @@ -1610,6 +1629,7 @@ int do_scrub_status(int argc, char **argv) addr.sun_path[sizeof(addr.sun_path) - 1] = '\0'; ret = connect(fdres, (struct sockaddr *)&addr, sizeof(addr)); if (ret == -1) { + close(fdres); fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid); if (fdres < 0 && fdres != -ENOENT) { fprintf(stderr, "WARNING: failed to open status file: " @@ -1658,9 +1678,23 @@ int do_scrub_status(int argc, char **argv) out: free_history(past_scrubs); free(di_args); - close(fdmnt); if (fdres > -1) close(fdres); return err; } + +const struct cmd_group scrub_cmd_group = { + scrub_cmd_group_usage, NULL, { + { "start", cmd_scrub_start, cmd_scrub_start_usage, NULL, 0 }, + { "cancel", cmd_scrub_cancel, cmd_scrub_cancel_usage, NULL, 0 }, + { "resume", cmd_scrub_resume, cmd_scrub_resume_usage, NULL, 0 }, + { "status", cmd_scrub_status, cmd_scrub_status_usage, NULL, 0 }, + { 0, 0, 0, 0, 0 } + } +}; + +int cmd_scrub(int argc, char **argv) +{ + return handle_command_group(&scrub_cmd_group, argc, argv); +} diff --git a/cmds-send.c b/cmds-send.c new file mode 100644 index 0000000..0057e6b --- /dev/null +++ b/cmds-send.c @@ -0,0 +1,695 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#define _GNU_SOURCE + +#include "kerncompat.h" + +#include <unistd.h> +#include <stdint.h> +#include <dirent.h> +#include <fcntl.h> +#include <pthread.h> +#include <math.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <libgen.h> +#include <mntent.h> + +#include <uuid/uuid.h> + +#include "ctree.h" +#include "ioctl.h" +#include "commands.h" +#include "list.h" + +#include "send.h" +#include "send-utils.h" + +static int g_verbose = 0; + +struct btrfs_send { + int send_fd; + int dump_fd; + int mnt_fd; + + u64 *clone_sources; + u64 clone_sources_count; + + char *root_path; + struct subvol_uuid_search sus; +}; + +int find_mount_root(const char *path, char **mount_root) +{ + FILE *mnttab; + int fd; + struct mntent *ent; + int len; + int longest_matchlen = 0; + char *longest_match = NULL; + + fd = open(path, O_RDONLY | O_NOATIME); + if (fd < 0) + return -errno; + close(fd); + + mnttab = fopen("/proc/mounts", "r"); + while ((ent = getmntent(mnttab))) { + len = strlen(ent->mnt_dir); + if (strncmp(ent->mnt_dir, path, len) == 0) { + /* match found */ + if (longest_matchlen < len) { + free(longest_match); + longest_matchlen = len; + longest_match = strdup(ent->mnt_dir); + } + } + } + fclose(mnttab); + + if (!longest_match) { + fprintf(stderr, + "ERROR: Failed to find mount root for path %s.\n", + path); + return -ENOENT; + } + + *mount_root = realpath(longest_match, NULL); + free(longest_match); + + return 0; +} + +static int get_root_id(struct btrfs_send *s, const char *path, u64 *root_id) +{ + struct subvol_info *si; + + si = subvol_uuid_search(&s->sus, 0, NULL, 0, path, + subvol_search_by_path); + if (!si) + return -ENOENT; + *root_id = si->root_id; + return 0; +} + +static struct subvol_info *get_parent(struct btrfs_send *s, u64 root_id) +{ + struct subvol_info *si; + + si = subvol_uuid_search(&s->sus, root_id, NULL, 0, NULL, + subvol_search_by_root_id); + if (!si) + return NULL; + + si = subvol_uuid_search(&s->sus, 0, si->parent_uuid, 0, NULL, + subvol_search_by_uuid); + if (!si) + return NULL; + return si; +} + +static int find_good_parent(struct btrfs_send *s, u64 root_id, u64 *found) +{ + int ret; + struct subvol_info *parent; + struct subvol_info *parent2; + struct subvol_info *best_parent = NULL; + __s64 tmp; + u64 best_diff = (u64)-1; + int i; + + parent = get_parent(s, root_id); + if (!parent) { + ret = -ENOENT; + goto out; + } + + for (i = 0; i < s->clone_sources_count; i++) { + if (s->clone_sources[i] == parent->root_id) { + best_parent = parent; + goto out_found; + } + } + + for (i = 0; i < s->clone_sources_count; i++) { + parent2 = get_parent(s, s->clone_sources[i]); + if (parent2 != parent) + continue; + + parent2 = subvol_uuid_search(&s->sus, s->clone_sources[i], NULL, + 0, NULL, subvol_search_by_root_id); + + tmp = parent2->ctransid - parent->ctransid; + if (tmp < 0) + tmp *= -1; + if (tmp < best_diff) { + best_parent = parent2; + best_diff = tmp; + } + } + + if (!best_parent) { + ret = -ENOENT; + goto out; + } + +out_found: + *found = best_parent->root_id; + ret = 0; + +out: + return ret; +} + +static void add_clone_source(struct btrfs_send *s, u64 root_id) +{ + s->clone_sources = realloc(s->clone_sources, + sizeof(*s->clone_sources) * (s->clone_sources_count + 1)); + s->clone_sources[s->clone_sources_count++] = root_id; +} + +static int write_buf(int fd, const void *buf, int size) +{ + int ret; + int pos = 0; + + while (pos < size) { + ret = write(fd, (char*)buf + pos, size - pos); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: failed to dump stream. %s", + strerror(-ret)); + goto out; + } + if (!ret) { + ret = -EIO; + fprintf(stderr, "ERROR: failed to dump stream. %s", + strerror(-ret)); + goto out; + } + pos += ret; + } + ret = 0; + +out: + return ret; +} + +static void *dump_thread(void *arg_) +{ + int ret; + struct btrfs_send *s = (struct btrfs_send*)arg_; + char buf[4096]; + int readed; + + while (1) { + readed = read(s->send_fd, buf, sizeof(buf)); + if (readed < 0) { + ret = -errno; + fprintf(stderr, "ERROR: failed to read stream from " + "kernel. %s\n", strerror(-ret)); + goto out; + } + if (!readed) { + ret = 0; + goto out; + } + ret = write_buf(s->dump_fd, buf, readed); + if (ret < 0) + goto out; + } + +out: + if (ret < 0) { + exit(-ret); + } + + return ERR_PTR(ret); +} + +static int do_send(struct btrfs_send *send, u64 root_id, u64 parent_root_id) +{ + int ret; + pthread_t t_read; + pthread_attr_t t_attr; + struct btrfs_ioctl_send_args io_send; + struct subvol_info *si; + void *t_err = NULL; + int subvol_fd = -1; + int pipefd[2] = {-1, -1}; + + si = subvol_uuid_search(&send->sus, root_id, NULL, 0, NULL, + subvol_search_by_root_id); + if (!si) { + ret = -ENOENT; + fprintf(stderr, "ERROR: could not find subvol info for %llu", + root_id); + goto out; + } + + subvol_fd = openat(send->mnt_fd, si->path, O_RDONLY | O_NOATIME); + if (subvol_fd < 0) { + ret = -errno; + fprintf(stderr, "ERROR: open %s failed. %s\n", si->path, + strerror(-ret)); + goto out; + } + + ret = pthread_attr_init(&t_attr); + + ret = pipe(pipefd); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: pipe failed. %s\n", strerror(-ret)); + goto out; + } + + memset(&io_send, 0, sizeof(io_send)); + io_send.send_fd = pipefd[1]; + send->send_fd = pipefd[0]; + + if (!ret) + ret = pthread_create(&t_read, &t_attr, dump_thread, + send); + if (ret) { + ret = -ret; + fprintf(stderr, "ERROR: thread setup failed: %s\n", + strerror(-ret)); + goto out; + } + + io_send.clone_sources = (__u64*)send->clone_sources; + io_send.clone_sources_count = send->clone_sources_count; + io_send.parent_root = parent_root_id; + ret = ioctl(subvol_fd, BTRFS_IOC_SEND, &io_send); + if (ret) { + ret = -errno; + fprintf(stderr, "ERROR: send ioctl failed with %d: %s\n", ret, + strerror(-ret)); + goto out; + } + if (g_verbose > 0) + fprintf(stderr, "BTRFS_IOC_SEND returned %d\n", ret); + + if (g_verbose > 0) + fprintf(stderr, "joining genl thread\n"); + + close(pipefd[1]); + pipefd[1] = 0; + + ret = pthread_join(t_read, &t_err); + if (ret) { + ret = -ret; + fprintf(stderr, "ERROR: pthread_join failed: %s\n", + strerror(-ret)); + goto out; + } + if (t_err) { + ret = (long int)t_err; + fprintf(stderr, "ERROR: failed to process send stream, ret=%ld " + "(%s)\n", (long int)t_err, strerror(-ret)); + goto out; + } + + pthread_attr_destroy(&t_attr); + + ret = 0; + +out: + if (subvol_fd != -1) + close(subvol_fd); + if (pipefd[0] != -1) + close(pipefd[0]); + if (pipefd[1] != -1) + close(pipefd[1]); + return ret; +} + +char *get_subvol_name(char *mnt, char *full_path) +{ + int len = strlen(mnt); + if (!len) + return full_path; + if (mnt[len - 1] != '/') + len += 1; + + return full_path + len; +} + +static int init_root_path(struct btrfs_send *s, const char *subvol) +{ + int ret = 0; + + if (s->root_path) + goto out; + + ret = find_mount_root(subvol, &s->root_path); + if (ret < 0) { + ret = -EINVAL; + fprintf(stderr, "ERROR: failed to determine mount point " + "for %s\n", subvol); + goto out; + } + + s->mnt_fd = open(s->root_path, O_RDONLY | O_NOATIME); + if (s->mnt_fd < 0) { + ret = -errno; + fprintf(stderr, "ERROR: can't open '%s': %s\n", s->root_path, + strerror(-ret)); + goto out; + } + + ret = subvol_uuid_search_init(s->mnt_fd, &s->sus); + if (ret < 0) { + fprintf(stderr, "ERROR: failed to initialize subvol search. " + "%s\n", strerror(-ret)); + goto out; + } + +out: + return ret; + +} + +static int is_subvol_ro(struct btrfs_send *s, char *subvol) +{ + int ret; + u64 flags; + int fd = -1; + + fd = openat(s->mnt_fd, subvol, O_RDONLY | O_NOATIME); + if (fd < 0) { + ret = -errno; + fprintf(stderr, "ERROR: failed to open %s. %s\n", + subvol, strerror(-ret)); + goto out; + } + + ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: failed to get flags for subvolume. " + "%s\n", strerror(-ret)); + goto out; + } + + if (flags & BTRFS_SUBVOL_RDONLY) + ret = 1; + else + ret = 0; + +out: + if (fd != -1) + close(fd); + + return ret; +} + +int cmd_send_start(int argc, char **argv) +{ + char *subvol = NULL; + int c; + int ret; + char *outname = NULL; + struct btrfs_send send; + u32 i; + char *mount_root = NULL; + char *snapshot_parent = NULL; + u64 root_id; + u64 parent_root_id = 0; + int full_send = 1; + + memset(&send, 0, sizeof(send)); + send.dump_fd = fileno(stdout); + + while ((c = getopt(argc, argv, "vc:f:i:p:")) != -1) { + switch (c) { + case 'v': + g_verbose++; + break; + case 'c': + subvol = realpath(optarg, NULL); + if (!subvol) { + ret = -errno; + fprintf(stderr, "ERROR: realpath %s failed. " + "%s\n", optarg, strerror(-ret)); + goto out; + } + + ret = init_root_path(&send, subvol); + if (ret < 0) + goto out; + + ret = get_root_id(&send, get_subvol_name(send.root_path, subvol), + &root_id); + if (ret < 0) { + fprintf(stderr, "ERROR: could not resolve " + "root_id for %s\n", subvol); + goto out; + } + add_clone_source(&send, root_id); + subvol_uuid_search_finit(&send.sus); + free(subvol); + subvol = NULL; + if (send.mnt_fd >= 0) { + close(send.mnt_fd); + send.mnt_fd = -1; + } + free(send.root_path); + send.root_path = NULL; + full_send = 0; + break; + case 'f': + outname = optarg; + break; + case 'p': + if (snapshot_parent) { + fprintf(stderr, "ERROR: you cannot have more than one parent (-p)\n"); + ret = 1; + goto out; + } + snapshot_parent = realpath(optarg, NULL); + if (!snapshot_parent) { + ret = -errno; + fprintf(stderr, "ERROR: realpath %s failed. " + "%s\n", optarg, strerror(-ret)); + goto out; + } + full_send = 0; + break; + case 'i': + fprintf(stderr, + "ERROR: -i was removed, use -c instead\n"); + ret = 1; + goto out; + case '?': + default: + fprintf(stderr, "ERROR: send args invalid.\n"); + ret = 1; + goto out; + } + } + + if (optind == argc) { + fprintf(stderr, "ERROR: send needs path to snapshot\n"); + ret = 1; + goto out; + } + + if (outname != NULL) { + send.dump_fd = creat(outname, 0600); + if (send.dump_fd == -1) { + ret = -errno; + fprintf(stderr, "ERROR: can't create '%s': %s\n", + outname, strerror(-ret)); + goto out; + } + } + + if (isatty(send.dump_fd)) { + fprintf(stderr, + "ERROR: not dumping send stream into a terminal, " + "redirect it into a file\n"); + ret = 1; + goto out; + } + + /* use first send subvol to determine mount_root */ + subvol = argv[optind]; + + subvol = realpath(argv[optind], NULL); + if (!subvol) { + ret = -errno; + fprintf(stderr, "ERROR: unable to resolve %s\n", argv[optind]); + goto out; + } + + ret = init_root_path(&send, subvol); + if (ret < 0) + goto out; + + if (snapshot_parent != NULL) { + ret = get_root_id(&send, + get_subvol_name(send.root_path, snapshot_parent), + &parent_root_id); + if (ret < 0) { + fprintf(stderr, "ERROR: could not resolve root_id " + "for %s\n", snapshot_parent); + goto out; + } + + add_clone_source(&send, parent_root_id); + } + + for (i = optind; i < argc; i++) { + free(subvol); + subvol = realpath(argv[i], NULL); + if (!subvol) { + ret = -errno; + fprintf(stderr, "ERROR: unable to resolve %s\n", argv[i]); + goto out; + } + + ret = find_mount_root(subvol, &mount_root); + if (ret < 0) { + fprintf(stderr, "ERROR: find_mount_root failed on %s: " + "%s\n", subvol, + strerror(-ret)); + goto out; + } + if (strcmp(send.root_path, mount_root) != 0) { + ret = -EINVAL; + fprintf(stderr, "ERROR: all subvols must be from the " + "same fs.\n"); + goto out; + } + free(mount_root); + + ret = is_subvol_ro(&send, subvol); + if (ret < 0) + goto out; + if (!ret) { + ret = -EINVAL; + fprintf(stderr, "ERROR: %s is not read-only.\n", + subvol); + goto out; + } + } + + for (i = optind; i < argc; i++) { + free(subvol); + subvol = argv[i]; + + fprintf(stderr, "At subvol %s\n", subvol); + + subvol = realpath(subvol, NULL); + if (!subvol) { + ret = -errno; + fprintf(stderr, "ERROR: realpath %s failed. " + "%s\n", argv[i], strerror(-ret)); + goto out; + } + + ret = get_root_id(&send, get_subvol_name(send.root_path, subvol), + &root_id); + if (ret < 0) { + fprintf(stderr, "ERROR: could not resolve root_id " + "for %s\n", subvol); + goto out; + } + + if (!full_send && !parent_root_id) { + ret = find_good_parent(&send, root_id, &parent_root_id); + if (ret < 0) { + fprintf(stderr, "ERROR: parent determination failed for %lld\n", + root_id); + goto out; + } + } + + ret = is_subvol_ro(&send, subvol); + if (ret < 0) + goto out; + if (!ret) { + ret = -EINVAL; + fprintf(stderr, "ERROR: %s is not read-only.\n", + subvol); + goto out; + } + + ret = do_send(&send, root_id, parent_root_id); + if (ret < 0) + goto out; + + /* done with this subvol, so add it to the clone sources */ + add_clone_source(&send, root_id); + + parent_root_id = 0; + full_send = 0; + } + + ret = 0; + +out: + free(subvol); + free(snapshot_parent); + free(send.clone_sources); + if (send.mnt_fd >= 0) + close(send.mnt_fd); + free(send.root_path); + subvol_uuid_search_finit(&send.sus); + return ret; +} + +static const char * const send_cmd_group_usage[] = { + "btrfs send <command> <args>", + NULL +}; + +const char * const cmd_send_usage[] = { + "btrfs send [-v] [-p <parent>] [-c <clone-src>] <subvol>", + "Send the subvolume to stdout.", + "Sends the subvolume specified by <subvol> to stdout.", + "By default, this will send the whole subvolume. To do an incremental", + "send, use '-p <parent>'. If you want to allow btrfs to clone from", + "any additional local snapshots, use -c <clone-src> (multiple times", + "where applicable). You must not specify clone sources unless you", + "guarantee that these snapshots are exactly in the same state on both", + "sides, the sender and the receiver. It is allowed to omit the", + "'-p <parent>' option when '-c <clone-src>' options are given, in", + "which case 'btrfs send' will determine a suitable parent among the", + "clone sources itself.", + "\n", + "-v Enable verbose debug output. Each occurrence of", + " this option increases the verbose level more.", + "-p <parent> Send an incremental stream from <parent> to", + " <subvol>.", + "-c <clone-src> Use this snapshot as a clone source for an ", + " incremental send (multiple allowed)", + "-f <outfile> Output is normally written to stdout. To write to", + " a file, use this option. An alternative would be to", + " use pipes.", + NULL +}; + +int cmd_send(int argc, char **argv) +{ + return cmd_send_start(argc, argv); +} diff --git a/cmds-subvolume.c b/cmds-subvolume.c new file mode 100644 index 0000000..ccb4762 --- /dev/null +++ b/cmds-subvolume.c @@ -0,0 +1,970 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <sys/stat.h> +#include <libgen.h> +#include <limits.h> +#include <getopt.h> +#include <uuid/uuid.h> + +#include "kerncompat.h" +#include "ioctl.h" +#include "qgroup.h" + +#include "ctree.h" +#include "commands.h" +#include "utils.h" +#include "btrfs-list.h" +#include "utils.h" + +static const char * const subvolume_cmd_group_usage[] = { + "btrfs subvolume <command> <args>", + NULL +}; + +/* + * test if path is a directory + * this function return + * 0-> path exists but it is not a directory + * 1-> path exists and it is a directory + * -1 -> path is unaccessible + */ +static int test_isdir(char *path) +{ + struct stat st; + int res; + + res = stat(path, &st); + if(res < 0 ) + return -1; + + return S_ISDIR(st.st_mode); +} + +static const char * const cmd_subvol_create_usage[] = { + "btrfs subvolume create [-i <qgroupid>] [<dest>/]<name>", + "Create a subvolume", + "Create a subvolume <name> in <dest>. If <dest> is not given", + "subvolume <name> will be created in the current directory.", + "", + "-i <qgroupid> add the newly created subvolume to a qgroup. This", + " option can be given multiple times.", + NULL +}; + +static int cmd_subvol_create(int argc, char **argv) +{ + int retval, res, len; + int fddst = -1; + char *newname; + char *dstdir; + char *dst; + struct btrfs_qgroup_inherit *inherit = NULL; + + optind = 1; + while (1) { + int c = getopt(argc, argv, "c:i:r"); + if (c < 0) + break; + + switch (c) { + case 'c': + res = qgroup_inherit_add_copy(&inherit, optarg, 0); + if (res) + return res; + break; + case 'i': + res = qgroup_inherit_add_group(&inherit, optarg); + if (res) + return res; + break; + default: + usage(cmd_subvol_create_usage); + } + } + + if (check_argc_exact(argc - optind, 1)) + usage(cmd_subvol_create_usage); + + dst = argv[optind]; + + retval = 1; /* failure */ + res = test_isdir(dst); + if (res >= 0) { + fprintf(stderr, "ERROR: '%s' exists\n", dst); + goto out; + } + + newname = strdup(dst); + newname = basename(newname); + dstdir = strdup(dst); + dstdir = dirname(dstdir); + + if (!strcmp(newname, ".") || !strcmp(newname, "..") || + strchr(newname, '/') ){ + fprintf(stderr, "ERROR: uncorrect subvolume name ('%s')\n", + newname); + goto out; + } + + len = strlen(newname); + if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { + fprintf(stderr, "ERROR: subvolume name too long ('%s)\n", + newname); + goto out; + } + + fddst = open_file_or_dir(dstdir); + if (fddst < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", dstdir); + goto out; + } + + printf("Create subvolume '%s/%s'\n", dstdir, newname); + if (inherit) { + struct btrfs_ioctl_vol_args_v2 args; + + memset(&args, 0, sizeof(args)); + strncpy_null(args.name, newname); + args.flags |= BTRFS_SUBVOL_QGROUP_INHERIT; + args.size = qgroup_inherit_size(inherit); + args.qgroup_inherit = inherit; + + res = ioctl(fddst, BTRFS_IOC_SUBVOL_CREATE_V2, &args); + } else { + struct btrfs_ioctl_vol_args args; + + memset(&args, 0, sizeof(args)); + strncpy_null(args.name, newname); + + res = ioctl(fddst, BTRFS_IOC_SUBVOL_CREATE, &args); + } + + if (res < 0) { + fprintf(stderr, "ERROR: cannot create subvolume - %s\n", + strerror(errno)); + goto out; + } + + retval = 0; /* success */ +out: + if (fddst != -1) + close(fddst); + free(inherit); + + return retval; +} + +/* + * test if path is a subvolume: + * this function return + * 0-> path exists but it is not a subvolume + * 1-> path exists and it is a subvolume + * -1 -> path is unaccessible + */ +int test_issubvolume(char *path) +{ + struct stat st; + int res; + + res = stat(path, &st); + if(res < 0 ) + return -1; + + return (st.st_ino == 256) && S_ISDIR(st.st_mode); +} + +static const char * const cmd_subvol_delete_usage[] = { + "btrfs subvolume delete <subvolume> [<subvolume>...]", + "Delete subvolume(s)", + NULL +}; + +static int cmd_subvol_delete(int argc, char **argv) +{ + int res, fd, len, e, cnt = 1, ret = 0; + struct btrfs_ioctl_vol_args args; + char *dname, *vname, *cpath; + char *path; + + if (argc < 2) + usage(cmd_subvol_delete_usage); + +again: + path = argv[cnt]; + + res = test_issubvolume(path); + if(res<0){ + fprintf(stderr, "ERROR: error accessing '%s'\n", path); + ret = 12; + goto out; + } + if(!res){ + fprintf(stderr, "ERROR: '%s' is not a subvolume\n", path); + ret = 13; + goto out; + } + + cpath = realpath(path, 0); + dname = strdup(cpath); + dname = dirname(dname); + vname = strdup(cpath); + vname = basename(vname); + free(cpath); + + if( !strcmp(vname,".") || !strcmp(vname,"..") || + strchr(vname, '/') ){ + fprintf(stderr, "ERROR: incorrect subvolume name ('%s')\n", + vname); + ret = 14; + goto out; + } + + len = strlen(vname); + if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { + fprintf(stderr, "ERROR: snapshot name too long ('%s)\n", + vname); + ret = 14; + goto out; + } + + fd = open_file_or_dir(dname); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", dname); + ret = 12; + goto out; + } + + printf("Delete subvolume '%s/%s'\n", dname, vname); + strncpy_null(args.name, vname); + res = ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &args); + e = errno; + + close(fd); + + if(res < 0 ){ + fprintf( stderr, "ERROR: cannot delete '%s/%s' - %s\n", + dname, vname, strerror(e)); + ret = 11; + goto out; + } + +out: + cnt++; + if (cnt < argc) + goto again; + + return ret; +} + +/* + * Naming of options: + * - uppercase for filters and sort options + * - lowercase for enabling specific items in the output + */ +static const char * const cmd_subvol_list_usage[] = { + "btrfs subvolume list [-agopurts] [-G [+|-]value] [-C [+|-]value] " + "[--sort=gen,ogen,rootid,path] <path>", + "List subvolumes (and snapshots)", + "", + "-p print parent ID", + "-a print all the subvolumes in the filesystem and", + " distinguish absolute and relative path with respect", + " to the given <path>", + "-c print the ogeneration of the subvolume", + "-g print the generation of the subvolume", + "-o print only subvolumes bellow specified path", + "-u print the uuid of subvolumes (and snapshots)", + "-q print the parent uuid of the snapshots", + "-t print the result as a table", + "-s list snapshots only in the filesystem", + "-r list readonly subvolumes (including snapshots)", + "-G [+|-]value", + " filter the subvolumes by generation", + " (+value: >= value; -value: <= value; value: = value)", + "-C [+|-]value", + " filter the subvolumes by ogeneration", + " (+value: >= value; -value: <= value; value: = value)", + "--sort=gen,ogen,rootid,path", + " list the subvolume in order of gen, ogen, rootid or path", + " you also can add '+' or '-' in front of each items.", + " (+:ascending, -:descending, ascending default)", + NULL, +}; + +static int cmd_subvol_list(int argc, char **argv) +{ + struct btrfs_list_filter_set *filter_set; + struct btrfs_list_comparer_set *comparer_set; + u64 flags = 0; + int fd = -1; + u64 top_id; + int ret = -1, uerr = 0; + int c; + char *subvol; + int is_tab_result = 0; + int is_list_all = 0; + int is_only_in_path = 0; + struct option long_options[] = { + {"sort", 1, NULL, 'S'}, + {0, 0, 0, 0} + }; + + filter_set = btrfs_list_alloc_filter_set(); + comparer_set = btrfs_list_alloc_comparer_set(); + + optind = 1; + while(1) { + c = getopt_long(argc, argv, + "acgopqsurG:C:t", long_options, NULL); + if (c < 0) + break; + + switch(c) { + case 'p': + btrfs_list_setup_print_column(BTRFS_LIST_PARENT); + break; + case 'a': + is_list_all = 1; + break; + case 'c': + btrfs_list_setup_print_column(BTRFS_LIST_OGENERATION); + break; + case 'g': + btrfs_list_setup_print_column(BTRFS_LIST_GENERATION); + break; + case 'o': + is_only_in_path = 1; + break; + case 't': + is_tab_result = 1; + break; + case 's': + btrfs_list_setup_filter(&filter_set, + BTRFS_LIST_FILTER_SNAPSHOT_ONLY, + 0); + btrfs_list_setup_print_column(BTRFS_LIST_OGENERATION); + btrfs_list_setup_print_column(BTRFS_LIST_OTIME); + break; + case 'u': + btrfs_list_setup_print_column(BTRFS_LIST_UUID); + break; + case 'q': + btrfs_list_setup_print_column(BTRFS_LIST_PUUID); + break; + case 'r': + flags |= BTRFS_ROOT_SUBVOL_RDONLY; + break; + case 'G': + btrfs_list_setup_print_column(BTRFS_LIST_GENERATION); + ret = btrfs_list_parse_filter_string(optarg, + &filter_set, + BTRFS_LIST_FILTER_GEN); + if (ret) { + uerr = 1; + goto out; + } + break; + + case 'C': + btrfs_list_setup_print_column(BTRFS_LIST_OGENERATION); + ret = btrfs_list_parse_filter_string(optarg, + &filter_set, + BTRFS_LIST_FILTER_CGEN); + if (ret) { + uerr = 1; + goto out; + } + break; + case 'S': + ret = btrfs_list_parse_sort_string(optarg, + &comparer_set); + if (ret) { + uerr = 1; + goto out; + } + break; + + default: + uerr = 1; + goto out; + } + } + + if (flags) + btrfs_list_setup_filter(&filter_set, BTRFS_LIST_FILTER_FLAGS, + flags); + + if (check_argc_exact(argc - optind, 1)) { + uerr = 1; + goto out; + } + + subvol = argv[optind]; + + ret = test_issubvolume(subvol); + if (ret < 0) { + fprintf(stderr, "ERROR: error accessing '%s'\n", subvol); + goto out; + } + if (!ret) { + fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol); + ret = -1; + goto out; + } + + fd = open_file_or_dir(subvol); + if (fd < 0) { + ret = -1; + fprintf(stderr, "ERROR: can't access '%s'\n", subvol); + goto out; + } + + ret = btrfs_list_get_path_rootid(fd, &top_id); + if (ret) { + fprintf(stderr, "ERROR: can't get rootid for '%s'\n", subvol); + goto out; + } + + if (is_list_all) + btrfs_list_setup_filter(&filter_set, + BTRFS_LIST_FILTER_FULL_PATH, + top_id); + else if (is_only_in_path) + btrfs_list_setup_filter(&filter_set, + BTRFS_LIST_FILTER_TOPID_EQUAL, + top_id); + + /* by default we shall print the following columns*/ + btrfs_list_setup_print_column(BTRFS_LIST_OBJECTID); + btrfs_list_setup_print_column(BTRFS_LIST_GENERATION); + btrfs_list_setup_print_column(BTRFS_LIST_TOP_LEVEL); + btrfs_list_setup_print_column(BTRFS_LIST_PATH); + + if (is_tab_result) + ret = btrfs_list_subvols_print(fd, filter_set, comparer_set, + BTRFS_LIST_LAYOUT_TABLE, + !is_list_all && !is_only_in_path, NULL); + else + ret = btrfs_list_subvols_print(fd, filter_set, comparer_set, + BTRFS_LIST_LAYOUT_DEFAULT, + !is_list_all && !is_only_in_path, NULL); + +out: + if (fd != -1) + close(fd); + if (filter_set) + btrfs_list_free_filter_set(filter_set); + if (comparer_set) + btrfs_list_free_comparer_set(comparer_set); + if (uerr) + usage(cmd_subvol_list_usage); + + return ret; +} + +static const char * const cmd_snapshot_usage[] = { + "btrfs subvolume snapshot [-r] <source> [<dest>/]<name>", + "btrfs subvolume snapshot [-r] [-i <qgroupid>] <source> [<dest>/]<name>", + "Create a snapshot of the subvolume", + "Create a writable/readonly snapshot of the subvolume <source> with", + "the name <name> in the <dest> directory", + "", + "-r create a readonly snapshot", + "-i <qgroupid> add the newly created snapshot to a qgroup. This", + " option can be given multiple times.", + NULL +}; + +static int cmd_snapshot(int argc, char **argv) +{ + char *subvol, *dst; + int res, retval; + int fd = -1, fddst = -1; + int len, readonly = 0; + char *newname; + char *dstdir; + struct btrfs_ioctl_vol_args_v2 args; + struct btrfs_qgroup_inherit *inherit = NULL; + + optind = 1; + memset(&args, 0, sizeof(args)); + while (1) { + int c = getopt(argc, argv, "c:i:r"); + if (c < 0) + break; + + switch (c) { + case 'c': + res = qgroup_inherit_add_copy(&inherit, optarg, 0); + if (res) + return res; + break; + case 'i': + res = qgroup_inherit_add_group(&inherit, optarg); + if (res) + return res; + break; + case 'r': + readonly = 1; + break; + case 'x': + res = qgroup_inherit_add_copy(&inherit, optarg, 1); + if (res) + return res; + break; + default: + usage(cmd_snapshot_usage); + } + } + + if (check_argc_exact(argc - optind, 2)) + usage(cmd_snapshot_usage); + + subvol = argv[optind]; + dst = argv[optind + 1]; + + retval = 1; /* failure */ + res = test_issubvolume(subvol); + if (res < 0) { + fprintf(stderr, "ERROR: error accessing '%s'\n", subvol); + goto out; + } + if (!res) { + fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol); + goto out; + } + + res = test_isdir(dst); + if (res == 0) { + fprintf(stderr, "ERROR: '%s' exists and it is not a directory\n", dst); + goto out; + } + + if (res > 0) { + newname = strdup(subvol); + newname = basename(newname); + dstdir = dst; + } else { + newname = strdup(dst); + newname = basename(newname); + dstdir = strdup(dst); + dstdir = dirname(dstdir); + } + + if (!strcmp(newname, ".") || !strcmp(newname, "..") || + strchr(newname, '/') ){ + fprintf(stderr, "ERROR: incorrect snapshot name ('%s')\n", + newname); + goto out; + } + + len = strlen(newname); + if (len == 0 || len >= BTRFS_VOL_NAME_MAX) { + fprintf(stderr, "ERROR: snapshot name too long ('%s)\n", + newname); + goto out; + } + + fddst = open_file_or_dir(dstdir); + if (fddst < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", dstdir); + goto out; + } + + fd = open_file_or_dir(subvol); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", dstdir); + goto out; + } + + if (readonly) { + args.flags |= BTRFS_SUBVOL_RDONLY; + printf("Create a readonly snapshot of '%s' in '%s/%s'\n", + subvol, dstdir, newname); + } else { + printf("Create a snapshot of '%s' in '%s/%s'\n", + subvol, dstdir, newname); + } + + args.fd = fd; + if (inherit) { + args.flags |= BTRFS_SUBVOL_QGROUP_INHERIT; + args.size = qgroup_inherit_size(inherit); + args.qgroup_inherit = inherit; + } + strncpy_null(args.name, newname); + + res = ioctl(fddst, BTRFS_IOC_SNAP_CREATE_V2, &args); + + if (res < 0) { + fprintf( stderr, "ERROR: cannot snapshot '%s' - %s\n", + subvol, strerror(errno)); + goto out; + } + + retval = 0; /* success */ + +out: + if (fd != -1) + close(fd); + if (fddst != -1) + close(fddst); + free(inherit); + + return retval; +} + +static const char * const cmd_subvol_get_default_usage[] = { + "btrfs subvolume get-default <path>", + "Get the default subvolume of a filesystem", + NULL +}; + +static int cmd_subvol_get_default(int argc, char **argv) +{ + int fd = -1; + int ret; + char *subvol; + struct btrfs_list_filter_set *filter_set; + u64 default_id; + + if (check_argc_exact(argc, 2)) + usage(cmd_subvol_get_default_usage); + + subvol = argv[1]; + + ret = test_issubvolume(subvol); + if (ret < 0) { + fprintf(stderr, "ERROR: error accessing '%s'\n", subvol); + return 1; + } + if (!ret) { + fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol); + return 1; + } + + fd = open_file_or_dir(subvol); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", subvol); + return 1; + } + + ret = btrfs_list_get_default_subvolume(fd, &default_id); + if (ret) { + fprintf(stderr, "ERROR: can't perform the search - %s\n", + strerror(errno)); + goto out; + } + + ret = 1; + if (default_id == 0) { + fprintf(stderr, "ERROR: 'default' dir item not found\n"); + goto out; + } + + /* no need to resolve roots if FS_TREE is default */ + if (default_id == BTRFS_FS_TREE_OBJECTID) { + printf("ID 5 (FS_TREE)\n"); + goto out; + } + + filter_set = btrfs_list_alloc_filter_set(); + btrfs_list_setup_filter(&filter_set, BTRFS_LIST_FILTER_ROOTID, + default_id); + + /* by default we shall print the following columns*/ + btrfs_list_setup_print_column(BTRFS_LIST_OBJECTID); + btrfs_list_setup_print_column(BTRFS_LIST_GENERATION); + btrfs_list_setup_print_column(BTRFS_LIST_TOP_LEVEL); + btrfs_list_setup_print_column(BTRFS_LIST_PATH); + + ret = btrfs_list_subvols_print(fd, filter_set, NULL, + BTRFS_LIST_LAYOUT_DEFAULT, 1, NULL); + + if (filter_set) + btrfs_list_free_filter_set(filter_set); +out: + if (fd != -1) + close(fd); + if (ret) + return 1; + return 0; +} + +static const char * const cmd_subvol_set_default_usage[] = { + "btrfs subvolume set-default <subvolid> <path>", + "Set the default subvolume of a filesystem", + NULL +}; + +static int cmd_subvol_set_default(int argc, char **argv) +{ + int ret=0, fd, e; + u64 objectid; + char *path; + char *subvolid; + + if (check_argc_exact(argc, 3)) + usage(cmd_subvol_set_default_usage); + + subvolid = argv[1]; + path = argv[2]; + + objectid = (unsigned long long)strtoll(subvolid, NULL, 0); + if (errno == ERANGE) { + fprintf(stderr, "ERROR: invalid tree id (%s)\n", subvolid); + return 1; + } + + fd = open_file_or_dir(path); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access to '%s'\n", path); + return 1; + } + + ret = ioctl(fd, BTRFS_IOC_DEFAULT_SUBVOL, &objectid); + e = errno; + close(fd); + if (ret < 0) { + fprintf(stderr, "ERROR: unable to set a new default subvolume - %s\n", + strerror(e)); + return 1; + } + return 0; +} + +static const char * const cmd_find_new_usage[] = { + "btrfs subvolume find-new <path> <lastgen>", + "List the recently modified files in a filesystem", + NULL +}; + +static int cmd_find_new(int argc, char **argv) +{ + int fd; + int ret; + char *subvol; + u64 last_gen; + + if (check_argc_exact(argc, 3)) + usage(cmd_find_new_usage); + + subvol = argv[1]; + last_gen = atoll(argv[2]); + + ret = test_issubvolume(subvol); + if (ret < 0) { + fprintf(stderr, "ERROR: error accessing '%s'\n", subvol); + return 12; + } + if (!ret) { + fprintf(stderr, "ERROR: '%s' is not a subvolume\n", subvol); + return 13; + } + + fd = open_file_or_dir(subvol); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", subvol); + return 12; + } + ret = btrfs_list_find_updated_files(fd, 0, last_gen); + close(fd); + if (ret) + return 19; + return 0; +} + +static const char * const cmd_subvol_show_usage[] = { + "btrfs subvolume show <subvol-path>", + "Show more information of the subvolume", + NULL +}; + +static int cmd_subvol_show(int argc, char **argv) +{ + struct root_info get_ri; + struct btrfs_list_filter_set *filter_set; + char tstr[256]; + char uuidparse[37]; + char *fullpath = NULL, *svpath = NULL, *mnt = NULL; + char raw_prefix[] = "\t\t\t\t"; + u64 sv_id, mntid; + int fd = -1, mntfd = -1; + int ret = -1; + + if (check_argc_exact(argc, 2)) + usage(cmd_subvol_show_usage); + + fullpath = realpath(argv[1], 0); + if (!fullpath) { + fprintf(stderr, "ERROR: finding real path for '%s', %s\n", + argv[1], strerror(errno)); + goto out; + } + + ret = test_issubvolume(fullpath); + if (ret < 0) { + fprintf(stderr, "ERROR: error accessing '%s'\n", fullpath); + goto out; + } + if (!ret) { + fprintf(stderr, "ERROR: '%s' is not a subvolume\n", fullpath); + ret = -1; + goto out; + } + + ret = find_mount_root(fullpath, &mnt); + if (ret < 0) { + fprintf(stderr, "ERROR: find_mount_root failed on %s: " + "%s\n", fullpath, strerror(-ret)); + goto out; + } + ret = -1; + svpath = get_subvol_name(mnt, fullpath); + + fd = open_file_or_dir(fullpath); + if (fd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", fullpath); + goto out; + } + + ret = btrfs_list_get_path_rootid(fd, &sv_id); + if (ret) { + fprintf(stderr, "ERROR: can't get rootid for '%s'\n", + fullpath); + goto out; + } + + mntfd = open_file_or_dir(mnt); + if (mntfd < 0) { + fprintf(stderr, "ERROR: can't access '%s'\n", mnt); + goto out; + } + + ret = btrfs_list_get_path_rootid(mntfd, &mntid); + if (ret) { + fprintf(stderr, "ERROR: can't get rootid for '%s'\n", mnt); + goto out; + } + + if (sv_id == BTRFS_FS_TREE_OBJECTID) { + printf("%s is btrfs root\n", fullpath); + goto out; + } + + memset(&get_ri, 0, sizeof(get_ri)); + get_ri.root_id = sv_id; + + if (btrfs_get_subvol(mntfd, &get_ri)) { + fprintf(stderr, "ERROR: can't find '%s'\n", + svpath); + goto out; + } + + ret = 0; + /* print the info */ + printf("%s\n", fullpath); + printf("\tName: \t\t\t%s\n", get_ri.name); + + if (uuid_is_null(get_ri.uuid)) + strcpy(uuidparse, "-"); + else + uuid_unparse(get_ri.uuid, uuidparse); + printf("\tuuid: \t\t\t%s\n", uuidparse); + + if (uuid_is_null(get_ri.puuid)) + strcpy(uuidparse, "-"); + else + uuid_unparse(get_ri.puuid, uuidparse); + printf("\tParent uuid: \t\t%s\n", uuidparse); + + if (get_ri.otime) + strftime(tstr, 256, "%Y-%m-%d %X", + localtime(&get_ri.otime)); + else + strcpy(tstr, "-"); + printf("\tCreation time: \t\t%s\n", tstr); + + printf("\tObject ID: \t\t%llu\n", get_ri.root_id); + printf("\tGeneration (Gen): \t%llu\n", get_ri.gen); + printf("\tGen at creation: \t%llu\n", get_ri.ogen); + printf("\tParent: \t\t%llu\n", get_ri.ref_tree); + printf("\tTop Level: \t\t%llu\n", get_ri.top_id); + + if (get_ri.flags & BTRFS_ROOT_SUBVOL_RDONLY) + printf("\tFlags: \t\t\treadonly\n"); + else + printf("\tFlags: \t\t\t-\n"); + + /* print the snapshots of the given subvol if any*/ + printf("\tSnapshot(s):\n"); + filter_set = btrfs_list_alloc_filter_set(); + btrfs_list_setup_filter(&filter_set, BTRFS_LIST_FILTER_BY_PARENT, + (u64)(unsigned long)get_ri.uuid); + btrfs_list_setup_print_column(BTRFS_LIST_PATH); + btrfs_list_subvols_print(fd, filter_set, NULL, BTRFS_LIST_LAYOUT_RAW, + 1, raw_prefix); + + /* clean up */ + if (get_ri.path) + free(get_ri.path); + if (get_ri.name) + free(get_ri.name); + if (get_ri.full_path) + free(get_ri.full_path); + if (filter_set) + btrfs_list_free_filter_set(filter_set); + +out: + if (mntfd >= 0) + close(mntfd); + if (fd >= 0) + close(fd); + if (mnt) + free(mnt); + if (fullpath) + free(fullpath); + + return ret; +} + +const struct cmd_group subvolume_cmd_group = { + subvolume_cmd_group_usage, NULL, { + { "create", cmd_subvol_create, cmd_subvol_create_usage, NULL, 0 }, + { "delete", cmd_subvol_delete, cmd_subvol_delete_usage, NULL, 0 }, + { "list", cmd_subvol_list, cmd_subvol_list_usage, NULL, 0 }, + { "snapshot", cmd_snapshot, cmd_snapshot_usage, NULL, 0 }, + { "get-default", cmd_subvol_get_default, + cmd_subvol_get_default_usage, NULL, 0 }, + { "set-default", cmd_subvol_set_default, + cmd_subvol_set_default_usage, NULL, 0 }, + { "find-new", cmd_find_new, cmd_find_new_usage, NULL, 0 }, + { "show", cmd_subvol_show, cmd_subvol_show_usage, NULL, 0 }, + { 0, 0, 0, 0, 0 } + } +}; + +int cmd_subvolume(int argc, char **argv) +{ + return handle_command_group(&subvolume_cmd_group, argc, argv); +} diff --git a/commands.h b/commands.h new file mode 100644 index 0000000..15c616d --- /dev/null +++ b/commands.h @@ -0,0 +1,121 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#define ARGV0_BUF_SIZE 64 + +struct cmd_struct { + const char *token; + int (*fn)(int, char **); + + /* + * Usage strings + * + * A NULL-terminated array of the following format: + * + * usagestr[0] - one-line synopsis (required) + * usagestr[1] - one-line short description (required) + * usagestr[2..m] - a long (possibly multi-line) description + * (optional) + * usagestr[m + 1] - an empty line separator (required if at least one + * option string is given, not needed otherwise) + * usagestr[m + 2..n] - option strings, one option per line + * (optional) + * usagestr[n + 1] - NULL terminator + * + * Options (if present) should always (even if there is no long + * description) be prepended with an empty line. Supplied strings are + * indented but otherwise printed as-is, no automatic wrapping is done. + * + * Grep for cmd_*_usage[] for examples. + */ + const char * const *usagestr; + + /* should be NULL if token is not a subgroup */ + const struct cmd_group *next; + + /* if true don't list this token in help listings */ + int hidden; +}; + +struct cmd_group { + const char * const *usagestr; + const char *infostr; + + const struct cmd_struct commands[]; +}; + +/* btrfs.c */ +int prefixcmp(const char *str, const char *prefix); + +int check_argc_exact(int nargs, int expected); +int check_argc_min(int nargs, int expected); +int check_argc_max(int nargs, int expected); + +int handle_command_group(const struct cmd_group *grp, int argc, + char **argv); + +/* help.c */ +extern const char * const generic_cmd_help_usage[]; + +void usage(const char * const *usagestr); +void usage_command(const struct cmd_struct *cmd, int full, int err); +void usage_command_group(const struct cmd_group *grp, int all, int err); + +void help_unknown_token(const char *arg, const struct cmd_group *grp); +void help_ambiguous_token(const char *arg, const struct cmd_group *grp); + +void help_command_group(const struct cmd_group *grp, int argc, char **argv); + +extern const struct cmd_group subvolume_cmd_group; +extern const struct cmd_group filesystem_cmd_group; +extern const struct cmd_group balance_cmd_group; +extern const struct cmd_group device_cmd_group; +extern const struct cmd_group scrub_cmd_group; +extern const struct cmd_group inspect_cmd_group; +extern const struct cmd_group send_cmd_group; +extern const struct cmd_group receive_cmd_group; +extern const struct cmd_group quota_cmd_group; +extern const struct cmd_group qgroup_cmd_group; +extern const struct cmd_group replace_cmd_group; + +extern const char * const cmd_send_usage[]; +extern const char * const cmd_receive_usage[]; +extern const char * const cmd_check_usage[]; +extern const char * const cmd_restore_usage[]; + +int cmd_subvolume(int argc, char **argv); +int cmd_filesystem(int argc, char **argv); +int cmd_balance(int argc, char **argv); +int cmd_device(int argc, char **argv); +int cmd_scrub(int argc, char **argv); +int cmd_check(int argc, char **argv); +int cmd_inspect(int argc, char **argv); +int cmd_send(int argc, char **argv); +int cmd_receive(int argc, char **argv); +int cmd_quota(int argc, char **argv); +int cmd_qgroup(int argc, char **argv); +int cmd_replace(int argc, char **argv); +int cmd_restore(int argc, char **argv); +int cmd_select_super(int argc, char **argv); +int cmd_dump_super(int argc, char **argv); +int cmd_debug_tree(int argc, char **argv); + +/* subvolume exported functions */ +int test_issubvolume(char *path); + +/* send.c */ +int find_mount_root(const char *path, char **mount_root); +char *get_subvol_name(char *mnt, char *full_path); @@ -9,6 +9,123 @@ */ #include "kerncompat.h" #include "crc32c.h" +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> + +u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length); +static u32 (*crc_function)(u32 crc, unsigned char const *data, size_t length) = __crc32c_le; + +#ifdef __x86_64__ + +/* + * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com> + * + * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. + * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) + * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2A: Instruction Set Reference, A-M + */ +#if __SIZEOF_LONG__ == 8 +#define REX_PRE "0x48, " +#define SCALE_F 8 +#else +#define REX_PRE +#define SCALE_F 4 +#endif + +static int crc32c_probed = 0; +static int crc32c_intel_available = 0; + +static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data, + unsigned long length) +{ + while (length--) { + __asm__ __volatile__( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + :"=S"(crc) + :"0"(crc), "c"(*data) + ); + data++; + } + + return crc; +} + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ +uint32_t crc32c_intel(u32 crc, unsigned char const *data, unsigned long length) +{ + unsigned int iquotient = length / SCALE_F; + unsigned int iremainder = length % SCALE_F; + unsigned long *ptmp = (unsigned long *)data; + + while (iquotient--) { + __asm__ __volatile__( + ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" + :"=S"(crc) + :"0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (iremainder) + crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, + iremainder); + + return crc; +} + +static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, + unsigned int *edx) +{ + int id = *eax; + + asm("movl %4, %%eax;" + "cpuid;" + "movl %%eax, %0;" + "movl %%ebx, %1;" + "movl %%ecx, %2;" + "movl %%edx, %3;" + : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx) + : "r" (id) + : "eax", "ebx", "ecx", "edx"); +} + +void crc32c_intel_probe(void) +{ + if (!crc32c_probed) { + unsigned int eax, ebx, ecx, edx; + + eax = 1; + + do_cpuid(&eax, &ebx, &ecx, &edx); + crc32c_intel_available = (ecx & (1 << 20)) != 0; + crc32c_probed = 1; + } +} + +void crc32c_optimization_init(void) +{ + crc32c_intel_probe(); + if (crc32c_intel_available) + crc_function = crc32c_intel; +} +#else + +void crc32c_optimization_init(void) +{ +} + +#endif /* __x86_64__ */ /* * This is the CRC-32C table @@ -91,11 +208,15 @@ static const u32 crc32c_table[256] = { * crc using table. */ -u32 crc32c_le(u32 crc, unsigned char const *data, size_t length) +u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length) { while (length--) crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); - return crc; } + +u32 crc32c_le(u32 crc, unsigned char const *data, size_t length) +{ + return crc_function(crc, data, length); +} @@ -19,9 +19,14 @@ #ifndef __CRC32C__ #define __CRC32C__ +#if BTRFS_FLAT_INCLUDES #include "kerncompat.h" +#else +#include <btrfs/kerncompat.h> +#endif /* BTRFS_FLAT_INCLUDES */ u32 crc32c_le(u32 seed, unsigned char const *data, size_t length); +void crc32c_optimization_init(void); #define crc32c(seed, data, length) crc32c_le(seed, (unsigned char const *)data, length) #define btrfs_crc32c crc32c @@ -19,6 +19,7 @@ #include "disk-io.h" #include "transaction.h" #include "print-tree.h" +#include "repair.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); @@ -32,8 +33,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *dst_buf, struct extent_buffer *src_buf); -static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot); inline void btrfs_init_path(struct btrfs_path *p) { @@ -138,6 +137,48 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } +int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct extent_buffer *c; + struct extent_buffer *old = root->node; + int level; + struct btrfs_disk_key disk_key = {0,0,0}; + + level = 0; + + c = btrfs_alloc_free_block(trans, root, + btrfs_level_size(root, 0), + root->root_key.objectid, + &disk_key, level, 0, 0); + if (IS_ERR(c)) { + c = old; + extent_buffer_get(c); + } + + memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_level(c, level); + btrfs_set_header_bytenr(c, c->start); + btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(c, root->root_key.objectid); + + write_extent_buffer(c, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(c), + BTRFS_FSID_SIZE); + + write_extent_buffer(c, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(c), + BTRFS_UUID_SIZE); + + btrfs_mark_buffer_dirty(c); + + free_extent_buffer(old); + root->node = c; + add_root_to_dirty_list(root); + return 0; +} + /* * check if the tree block can be shared by multiple trees */ @@ -194,7 +235,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (btrfs_block_can_be_shared(root, buf)) { ret = btrfs_lookup_extent_info(trans, root, buf->start, - buf->len, &refs, &flags); + btrfs_header_level(buf), 1, + &refs, &flags); BUG_ON(ret); BUG_ON(refs == 0); } else { @@ -236,7 +278,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, } if (new_flags != 0) { ret = btrfs_set_block_flags(trans, root, buf->start, - buf->len, new_flags); + btrfs_header_level(buf), + new_flags); BUG_ON(ret); } } else { @@ -549,156 +592,131 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, return btrfs_item_offset_nr(leaf, nr - 1); } -static int check_node(struct btrfs_root *root, struct btrfs_path *path, - int level) +int btrfs_check_node(struct btrfs_root *root, + struct btrfs_disk_key *parent_key, + struct extent_buffer *buf) { - struct extent_buffer *parent = NULL; - struct extent_buffer *node = path->nodes[level]; - struct btrfs_disk_key parent_key; - struct btrfs_disk_key node_key; - int parent_slot; - int slot; + int i; struct btrfs_key cpukey; - u32 nritems = btrfs_header_nritems(node); + struct btrfs_disk_key key; + u32 nritems = btrfs_header_nritems(buf); - if (path->nodes[level + 1]) - parent = path->nodes[level + 1]; + if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root)) + goto fail; - slot = path->slots[level]; - BUG_ON(nritems == 0); - if (parent) { - parent_slot = path->slots[level + 1]; - btrfs_node_key(parent, &parent_key, parent_slot); - btrfs_node_key(node, &node_key, 0); - BUG_ON(memcmp(&parent_key, &node_key, - sizeof(struct btrfs_disk_key))); - BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_bytenr(node)); - } - BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); - if (slot != 0) { - btrfs_node_key_to_cpu(node, &cpukey, slot - 1); - btrfs_node_key(node, &node_key, slot); - BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0); - } - if (slot < nritems - 1) { - btrfs_node_key_to_cpu(node, &cpukey, slot + 1); - btrfs_node_key(node, &node_key, slot); - BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0); + if (parent_key && parent_key->type) { + btrfs_node_key(buf, &key, 0); + if (memcmp(parent_key, &key, sizeof(key))) + goto fail; + } + for (i = 0; nritems > 1 && i < nritems - 2; i++) { + btrfs_node_key(buf, &key, i); + btrfs_node_key_to_cpu(buf, &cpukey, i + 1); + if (btrfs_comp_keys(&key, &cpukey) >= 0) + goto fail; } return 0; +fail: + if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) { + if (parent_key) + btrfs_disk_key_to_cpu(&cpukey, parent_key); + else + btrfs_node_key_to_cpu(buf, &cpukey, 0); + btrfs_add_corrupt_extent_record(root->fs_info, &cpukey, + buf->start, buf->len, + btrfs_header_level(buf)); + } + return -EIO; } -static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, - int level) +int btrfs_check_leaf(struct btrfs_root *root, + struct btrfs_disk_key *parent_key, + struct extent_buffer *buf) { - struct extent_buffer *leaf = path->nodes[level]; - struct extent_buffer *parent = NULL; - int parent_slot; + int i; struct btrfs_key cpukey; - struct btrfs_disk_key parent_key; - struct btrfs_disk_key leaf_key; - int slot = path->slots[0]; + struct btrfs_disk_key key; + u32 nritems = btrfs_header_nritems(buf); - u32 nritems = btrfs_header_nritems(leaf); + if (nritems * sizeof(struct btrfs_item) > buf->len) { + fprintf(stderr, "invalid number of items %llu\n", + (unsigned long long)buf->start); + goto fail; + } - if (path->nodes[level + 1]) - parent = path->nodes[level + 1]; + if (btrfs_header_level(buf) != 0) { + fprintf(stderr, "leaf is not a leaf %llu\n", + (unsigned long long)btrfs_header_bytenr(buf)); + goto fail; + } + if (btrfs_leaf_free_space(root, buf) < 0) { + fprintf(stderr, "leaf free space incorrect %llu %d\n", + (unsigned long long)btrfs_header_bytenr(buf), + btrfs_leaf_free_space(root, buf)); + goto fail; + } if (nritems == 0) return 0; - if (parent) { - parent_slot = path->slots[level + 1]; - btrfs_node_key(parent, &parent_key, parent_slot); - btrfs_item_key(leaf, &leaf_key, 0); - - BUG_ON(memcmp(&parent_key, &leaf_key, - sizeof(struct btrfs_disk_key))); - BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_bytenr(leaf)); + btrfs_item_key(buf, &key, 0); + if (parent_key && parent_key->type && + memcmp(parent_key, &key, sizeof(key))) { + fprintf(stderr, "leaf parent key incorrect %llu\n", + (unsigned long long)btrfs_header_bytenr(buf)); + goto fail; } -#if 0 for (i = 0; nritems > 1 && i < nritems - 2; i++) { - btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); - btrfs_item_key(leaf, &leaf_key, i); - if (comp_keys(&leaf_key, &cpukey) >= 0) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad key\n", i); - BUG_ON(1); + btrfs_item_key(buf, &key, i); + btrfs_item_key_to_cpu(buf, &cpukey, i + 1); + if (btrfs_comp_keys(&key, &cpukey) >= 0) { + fprintf(stderr, "bad key ordering %d %d\n", i, i+1); + goto fail; } - if (btrfs_item_offset_nr(leaf, i) != - btrfs_item_end_nr(leaf, i + 1)) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", i); - BUG_ON(1); + if (btrfs_item_offset_nr(buf, i) != + btrfs_item_end_nr(buf, i + 1)) { + fprintf(stderr, "incorrect offsets %u %u\n", + btrfs_item_offset_nr(buf, i), + btrfs_item_end_nr(buf, i + 1)); + goto fail; } - if (i == 0) { - if (btrfs_item_offset_nr(leaf, i) + - btrfs_item_size_nr(leaf, i) != - BTRFS_LEAF_DATA_SIZE(root)) { - btrfs_print_leaf(root, leaf); - printk("slot %d first offset bad\n", i); - BUG_ON(1); - } - } - } - if (nritems > 0) { - if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { - btrfs_print_leaf(root, leaf); - printk("slot %d bad size \n", nritems - 1); - BUG_ON(1); + if (i == 0 && btrfs_item_end_nr(buf, i) != + BTRFS_LEAF_DATA_SIZE(root)) { + fprintf(stderr, "bad item end %u wanted %u\n", + btrfs_item_end_nr(buf, i), + (unsigned)BTRFS_LEAF_DATA_SIZE(root)); + goto fail; } } -#endif - if (slot != 0 && slot < nritems - 1) { - btrfs_item_key(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); - if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad key\n", slot); - BUG_ON(1); - } - if (btrfs_item_offset_nr(leaf, slot - 1) != - btrfs_item_end_nr(leaf, slot)) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", slot); - BUG_ON(1); - } - } - if (slot < nritems - 1) { - btrfs_item_key(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); - BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0); - if (btrfs_item_offset_nr(leaf, slot) != - btrfs_item_end_nr(leaf, slot + 1)) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", slot); - BUG_ON(1); - } - } - BUG_ON(btrfs_item_offset_nr(leaf, 0) + - btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); return 0; +fail: + if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) { + if (parent_key) + btrfs_disk_key_to_cpu(&cpukey, parent_key); + else + btrfs_item_key_to_cpu(buf, &cpukey, 0); + + btrfs_add_corrupt_extent_record(root->fs_info, &cpukey, + buf->start, buf->len, 0); + } + return -EIO; } static int noinline check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - return 0; -#if 0 - struct extent_buffer *buf = path->nodes[level]; + struct btrfs_disk_key key; + struct btrfs_disk_key *key_ptr = NULL; + struct extent_buffer *parent; - if (memcmp_extent_buffer(buf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(buf), - BTRFS_FSID_SIZE)) { - printk("warning bad block %Lu\n", buf->start); - return 1; + if (path->nodes[level + 1]) { + parent = path->nodes[level + 1]; + btrfs_node_key(parent, &key, path->slots[level + 1]); + key_ptr = &key; } -#endif if (level == 0) - return check_leaf(root, path, level); - return check_node(root, path, level); + return btrfs_check_leaf(root, key_ptr, path->nodes[0]); + return btrfs_check_node(root, key_ptr, path->nodes[level]); } /* @@ -884,8 +902,8 @@ static int balance_level(struct btrfs_trans_handle *trans, wait_on_tree_block_writeback(root, right); free_extent_buffer(right); right = NULL; - wret = del_ptr(trans, root, path, level + 1, pslot + - 1); + wret = btrfs_del_ptr(trans, root, path, + level + 1, pslot + 1); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, @@ -932,7 +950,7 @@ static int balance_level(struct btrfs_trans_handle *trans, wait_on_tree_block_writeback(root, mid); free_extent_buffer(mid); mid = NULL; - wret = del_ptr(trans, root, path, level + 1, pslot); + wret = btrfs_del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, blocksize, @@ -1254,6 +1272,8 @@ again: key->objectid); b = read_node_slot(root, b, slot); + if (!extent_buffer_uptodate(b)) + return -EIO; } else { p->slots[level] = slot; if (ins_len > 0 && @@ -1948,7 +1968,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_item_offset_nr(right, push_items - 1), push_space); old_left_nritems = btrfs_header_nritems(left); - BUG_ON(old_left_nritems < 0); + BUG_ON(old_left_nritems == 0); old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { @@ -2659,7 +2679,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root * continuing all the way the root if required. The root is converted into * a leaf if all the nodes are emptied. */ -static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, +int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { struct extent_buffer *parent = path->nodes[level]; @@ -2711,7 +2731,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, int ret; WARN_ON(btrfs_header_generation(leaf) != trans->transid); - ret = del_ptr(trans, root, path, 1, path->slots[1]); + ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]); if (ret) return ret; @@ -2860,9 +2880,6 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) } slot--; - if (next) - free_extent_buffer(next); - next = read_node_slot(root, c, slot); break; } @@ -2908,9 +2925,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) continue; } - if (next) - free_extent_buffer(next); - if (path->reada) reada_for_search(root, path, level, slot, 0); @@ -19,16 +19,26 @@ #ifndef __BTRFS__ #define __BTRFS__ +#if BTRFS_FLAT_INCLUDES #include "list.h" #include "kerncompat.h" #include "radix-tree.h" #include "extent-cache.h" #include "extent_io.h" #include "ioctl.h" +#else +#include <btrfs/list.h> +#include <btrfs/kerncompat.h> +#include <btrfs/radix-tree.h> +#include <btrfs/extent-cache.h> +#include <btrfs/extent_io.h> +#include <btrfs/ioctl.h> +#endif /* BTRFS_FLAT_INCLUDES */ struct btrfs_root; struct btrfs_trans_handle; -#define BTRFS_MAGIC "_BHRfS_M" +struct btrfs_free_space_ctl; +#define BTRFS_MAGIC 0x4D5F53665248425F /* ascii _BHRfS_M, no null */ #define BTRFS_MAX_LEVEL 8 @@ -59,8 +69,12 @@ struct btrfs_trans_handle; #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL /* holds checksums of all the data extents */ #define BTRFS_CSUM_TREE_OBJECTID 7ULL +#define BTRFS_QUOTA_TREE_OBJECTID 8ULL +/* for storing balance parameters in the root tree */ +#define BTRFS_BALANCE_OBJECTID -4ULL + /* oprhan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL @@ -107,21 +121,30 @@ struct btrfs_trans_handle; #define BTRFS_DEV_ITEMS_OBJECTID 1ULL /* + * the max metadata block size. This limit is somewhat artificial, + * but the memmove costs go through the roof for larger blocks. + */ +#define BTRFS_MAX_METADATA_BLOCKSIZE 65536 + +/* * we can actually store much bigger names, but lets not confuse the rest * of linux */ #define BTRFS_NAME_LEN 255 +/* + * Theoretical limit is larger, but we keep this down to a sane + * value. That should limit greatly the possibility of collisions on + * inode ref items. + */ +#define BTRFS_LINK_MAX 65535U + /* 32 bytes in various csum fields */ #define BTRFS_CSUM_SIZE 32 /* csum types */ #define BTRFS_CSUM_TYPE_CRC32 0 - -/* csum types */ -#define BTRFS_CSUM_TYPE_CRC32 0 - static int btrfs_csum_sizes[] = { 4, 0 }; /* four bytes for CRC32 */ @@ -139,6 +162,8 @@ static int btrfs_csum_sizes[] = { 4, 0 }; #define BTRFS_FT_XATTR 8 #define BTRFS_FT_MAX 9 +#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags @@ -253,6 +278,22 @@ struct btrfs_chunk { /* additional stripes go here */ } __attribute__ ((__packed__)); +#define BTRFS_FREE_SPACE_EXTENT 1 +#define BTRFS_FREE_SPACE_BITMAP 2 + +struct btrfs_free_space_entry { + __le64 offset; + __le64 bytes; + u8 type; +} __attribute__ ((__packed__)); + +struct btrfs_free_space_header { + struct btrfs_disk_key location; + __le64 generation; + __le64 num_entries; + __le64 num_bitmaps; +} __attribute__ ((__packed__)); + static inline unsigned long btrfs_chunk_item_size(int num_stripes) { BUG_ON(num_stripes == 0); @@ -410,13 +451,35 @@ struct btrfs_super_block { #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) +/* + * some patches floated around with a second compression method + * lets save that incompat here for when they do get in + * Note we don't actually support it, we're just reserving the + * number + */ +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4) + +/* + * older kernels tried to do bigger metadata blocks, but the + * code was pretty buggy. Lets not let them try anymore. + */ +#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) +#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) +#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) +#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) + + #define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL #define BTRFS_FEATURE_INCOMPAT_SUPP \ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ - BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ + BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ + BTRFS_FEATURE_INCOMPAT_RAID56 | \ + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) /* * A leaf is full of items. offset and size tell us where to find @@ -555,6 +618,13 @@ struct btrfs_inode_ref { /* name goes here */ } __attribute__ ((__packed__)); +struct btrfs_inode_extref { + __le64 parent_objectid; + __le64 index; + __le16 name_len; + __u8 name[0]; /* name goes here */ +} __attribute__ ((__packed__)); + struct btrfs_timespec { __le64 sec; __le32 nsec; @@ -615,6 +685,21 @@ struct btrfs_dir_item { u8 type; } __attribute__ ((__packed__)); +struct btrfs_root_item_v0 { + struct btrfs_inode_item inode; + __le64 generation; + __le64 root_dirid; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; + __le64 last_snapshot; + __le64 flags; + __le32 refs; + struct btrfs_disk_key drop_progress; + u8 drop_level; + u8 level; +} __attribute__ ((__packed__)); + struct btrfs_root_item { struct btrfs_inode_item inode; __le64 generation; @@ -628,6 +713,36 @@ struct btrfs_root_item { struct btrfs_disk_key drop_progress; u8 drop_level; u8 level; + + /* + * The following fields appear after subvol_uuids+subvol_times + * were introduced. + */ + + /* + * This generation number is used to test if the new fields are valid + * and up to date while reading the root item. Everytime the root item + * is written out, the "generation" field is copied into this field. If + * anyone ever mounted the fs with an older kernel, we will have + * mismatching generation values here and thus must invalidate the + * new fields. See btrfs_update_root and btrfs_find_last_root for + * details. + * the offset of generation_v2 is also used as the start for the memset + * when invalidating the fields. + */ + __le64 generation_v2; + u8 uuid[BTRFS_UUID_SIZE]; + u8 parent_uuid[BTRFS_UUID_SIZE]; + u8 received_uuid[BTRFS_UUID_SIZE]; + __le64 ctransid; /* updated when an inode changes */ + __le64 otransid; /* trans when created */ + __le64 stransid; /* trans when sent. non-zero for received subvol */ + __le64 rtransid; /* trans when received. non-zero for received subvol */ + struct btrfs_timespec ctime; + struct btrfs_timespec otime; + struct btrfs_timespec stime; + struct btrfs_timespec rtime; + __le64 reserved[8]; /* for future */ } __attribute__ ((__packed__)); /* @@ -697,13 +812,32 @@ struct btrfs_csum_item { } __attribute__ ((__packed__)); /* tag for the radix tree of block groups in ram */ -#define BTRFS_BLOCK_GROUP_DATA (1 << 0) -#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) -#define BTRFS_BLOCK_GROUP_METADATA (1 << 2) -#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) -#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) -#define BTRFS_BLOCK_GROUP_DUP (1 << 5) -#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) +#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) +#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) +#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2) +#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3) +#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) +#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) +#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) +#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) +#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) +#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE + +/* used in struct btrfs_balance_args fields */ +#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) + +#define BTRFS_QGROUP_STATUS_OFF 0 +#define BTRFS_QGROUP_STATUS_ON 1 +#define BTRFS_QGROUP_STATUS_SCANNING 2 + +#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1 << 0) + +struct btrfs_qgroup_status_item { + __le64 version; + __le64 generation; + __le64 flags; + __le64 scan; /* progress during scanning */ +} __attribute__ ((__packed__)); struct btrfs_block_group_item { __le64 used; @@ -711,6 +845,30 @@ struct btrfs_block_group_item { __le64 flags; } __attribute__ ((__packed__)); +struct btrfs_qgroup_info_item { + __le64 generation; + __le64 referenced; + __le64 referenced_compressed; + __le64 exclusive; + __le64 exclusive_compressed; +} __attribute__ ((__packed__)); + +/* flags definition for qgroup limits */ +#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) +#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) +#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) +#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) +#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) +#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) + +struct btrfs_qgroup_limit_item { + __le64 flags; + __le64 max_referenced; + __le64 max_exclusive; + __le64 rsv_referenced; + __le64 rsv_exclusive; +} __attribute__ ((__packed__)); + struct btrfs_space_info { u64 flags; u64 total_bytes; @@ -725,6 +883,7 @@ struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; struct btrfs_space_info *space_info; + struct btrfs_free_space_ctl *free_space_ctl; u64 pinned; u64 flags; int cached; @@ -777,7 +936,7 @@ struct btrfs_fs_info { u64 alloc_start; struct btrfs_trans_handle *running_transaction; - struct btrfs_super_block super_copy; + struct btrfs_super_block *super_copy; struct mutex fs_mutex; u64 super_bytenr; @@ -790,6 +949,13 @@ struct btrfs_fs_info { struct list_head space_info; int system_allocs; int readonly; + int (*free_extent_hook)(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset, + int refs_to_drop); + struct cache_tree *fsck_extent_cache; + struct cache_tree *corrupt_blocks; }; /* @@ -837,6 +1003,7 @@ struct btrfs_root { */ #define BTRFS_INODE_ITEM_KEY 1 #define BTRFS_INODE_REF_KEY 12 +#define BTRFS_INODE_EXTREF_KEY 13 #define BTRFS_XATTR_ITEM_KEY 24 #define BTRFS_ORPHAN_ITEM_KEY 48 @@ -889,6 +1056,12 @@ struct btrfs_root { */ #define BTRFS_EXTENT_ITEM_KEY 168 +/* + * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know + * the length, so we save the level in key->offset instead of the length. + */ +#define BTRFS_METADATA_ITEM_KEY 169 + #define BTRFS_TREE_BLOCK_REF_KEY 176 #define BTRFS_EXTENT_DATA_REF_KEY 178 @@ -911,6 +1084,28 @@ struct btrfs_root { #define BTRFS_DEV_ITEM_KEY 216 #define BTRFS_CHUNK_ITEM_KEY 228 +#define BTRFS_BALANCE_ITEM_KEY 248 + +/* + * quota groups + */ +#define BTRFS_QGROUP_STATUS_KEY 240 +#define BTRFS_QGROUP_INFO_KEY 242 +#define BTRFS_QGROUP_LIMIT_KEY 244 +#define BTRFS_QGROUP_RELATION_KEY 246 + +/* + * Persistently stores the io stats in the device tree. + * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). + */ +#define BTRFS_DEV_STATS_KEY 249 + +/* + * Persistently stores the device replace state in the device tree. + * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). + */ +#define BTRFS_DEV_REPLACE_KEY 250 + /* * string items are for debugging. They just store a short string of * data in the FS @@ -954,17 +1149,14 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ { \ unsigned long offset = (unsigned long)s; \ type *p = (type *) (eb->data + offset); \ - u##bits tmp; \ - memcpy(&tmp, &(p->member), sizeof(tmp)); \ - return le##bits##_to_cpu(tmp); \ + return get_unaligned_le##bits(&p->member); \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ type *s, u##bits val) \ { \ unsigned long offset = (unsigned long)s; \ type *p = (type *) (eb->data + offset); \ - u##bits tmp = cpu_to_le##bits(val); \ - memcpy(&(p->member), &tmp, sizeof(tmp)); \ + put_unaligned_le##bits(val, &p->member); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ @@ -1117,6 +1309,13 @@ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); +/* struct btrfs_inode_extref */ +BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref, + parent_objectid, 64); +BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref, + name_len, 16); +BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64); + /* struct btrfs_inode_item */ BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); @@ -1211,9 +1410,11 @@ static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) /* struct btrfs_extent_item */ BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); +BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 64); BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item, generation, 64); BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_extent_flags, struct btrfs_extent_item, flags, 64); BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32); @@ -1249,6 +1450,10 @@ BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref, type, 8); BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref, offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_type, + struct btrfs_extent_inline_ref, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_offset, + struct btrfs_extent_inline_ref, offset, 64); static inline u32 btrfs_extent_inline_ref_size(int type) { @@ -1403,6 +1608,8 @@ BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16); + static inline void btrfs_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, struct btrfs_disk_key *key) @@ -1417,6 +1624,28 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, write_eb_member(eb, item, struct btrfs_dir_item, location, key); } +/* struct btrfs_free_space_header */ +BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, + num_entries, 64); +BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, + num_bitmaps, 64); +BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, + generation, 64); + +static inline void btrfs_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + +static inline void btrfs_set_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) +{ + write_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + /* struct btrfs_disk_key */ BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, objectid, 64); @@ -1580,7 +1809,16 @@ BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, last_snapshot, 64); - +BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item, + generation_v2, 64); +BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item, + ctransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item, + otransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, + stransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, + rtransid, 64); /* struct btrfs_root_backup */ BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, @@ -1741,6 +1979,51 @@ BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, other_encoding, 16); +/* btrfs_qgroup_status_item */ +BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, + version, 64); +BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, + flags, 64); +BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, + scan, 64); + +/* btrfs_qgroup_info_item */ +BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_info_referenced, struct btrfs_qgroup_info_item, + referenced, 64); +BTRFS_SETGET_FUNCS(qgroup_info_referenced_compressed, + struct btrfs_qgroup_info_item, referenced_compressed, 64); +BTRFS_SETGET_FUNCS(qgroup_info_exclusive, struct btrfs_qgroup_info_item, + exclusive, 64); +BTRFS_SETGET_FUNCS(qgroup_info_exclusive_compressed, + struct btrfs_qgroup_info_item, exclusive_compressed, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, + struct btrfs_qgroup_info_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced, + struct btrfs_qgroup_info_item, referenced, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced_compressed, + struct btrfs_qgroup_info_item, referenced_compressed, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive, + struct btrfs_qgroup_info_item, exclusive, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive_compressed, + struct btrfs_qgroup_info_item, exclusive_compressed, 64); + +/* btrfs_qgroup_limit_item */ +BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, + flags, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_referenced, struct btrfs_qgroup_limit_item, + max_referenced, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_exclusive, struct btrfs_qgroup_limit_item, + max_exclusive, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_referenced, struct btrfs_qgroup_limit_item, + rsv_referenced, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_exclusive, struct btrfs_qgroup_limit_item, + rsv_exclusive, 64); + /* this returns the number of file bytes represented by the inline item. * If an item is compressed, this is the uncompressed size */ @@ -1769,6 +2052,13 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { return root->nodesize; } +static inline int btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) +{ + struct btrfs_super_block *disk_super; + disk_super = fs_info->super_copy; + return !!(btrfs_super_incompat_flags(disk_super) & flag); +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -1779,12 +2069,19 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { btrfs_item_offset_nr(leaf, slot))) /* extent-tree.c */ +int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_check_block_accounting(struct btrfs_root *root); +void btrfs_pin_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr); +struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct + btrfs_fs_info *info, + u64 bytenr); struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -1802,10 +2099,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, int data); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *refs, u64 *flags); + u64 offset, int metadata, u64 *refs, u64 *flags); int btrfs_set_block_flags(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 flags); + u64 bytenr, int level, u64 flags); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, int record_parent); int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1841,6 +2138,16 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num, int alloc, int mark_free); /* ctree.c */ +int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int level, int slot); +int btrfs_check_node(struct btrfs_root *root, + struct btrfs_disk_key *parent_key, + struct extent_buffer *buf); +int btrfs_check_leaf(struct btrfs_root *root, + struct btrfs_disk_key *parent_key, + struct extent_buffer *buf); +int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root); void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot, u64 objectid); struct extent_buffer *read_node_slot(struct btrfs_root *root, @@ -436,6 +436,12 @@ int main(int ac, char **av) radix_tree_init(); root = open_ctree(av[ac-1], &super, 0); + + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + return 1; + } + trans = btrfs_start_transaction(root, 1); dir_oid = btrfs_super_root_dir(&super); @@ -479,6 +485,11 @@ int main(int ac, char **av) btrfs_header_nritems(&root->node->node.header)); close_ctree(root, &super); root = open_ctree("dbfile", &super, 0); + + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + return 1; + } } while(count--) { ret = ops[op](trans, root, &radix); @@ -89,9 +89,9 @@ int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, if (verify) { if (memcmp_extent_buffer(buf, result, 0, csum_size)) { - printk("checksum verify failed on %llu wanted %X " - "found %X\n", (unsigned long long)buf->start, - *((int *)result), *((char *)buf->data)); + printk("checksum verify failed on %llu found %08X " + "wanted %08X\n", (unsigned long long)buf->start, + *((u32 *)result), *((u32*)(char *)buf->data)); free(result); return 1; } @@ -106,7 +106,7 @@ int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + btrfs_super_csum_size(root->fs_info->super_copy); return csum_tree_block_size(buf, csum_size, verify); } @@ -141,7 +141,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, length = blocksize; ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - bytenr, &length, &multi, 0); + bytenr, &length, &multi, 0, NULL); BUG_ON(ret); device = multi->stripes[0].dev; device->total_ios++; @@ -182,15 +182,56 @@ out: } +static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) +{ + unsigned long offset = 0; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + int ret = 0; + u64 read_len; + unsigned long bytes_left = eb->len; + + while (bytes_left) { + read_len = bytes_left; + ret = btrfs_map_block(&info->mapping_tree, READ, + eb->start + offset, &read_len, &multi, + mirror, NULL); + if (ret) { + printk("Couldn't map the block %Lu\n", eb->start + offset); + kfree(multi); + return -EIO; + } + device = multi->stripes[0].dev; + + if (device->fd == 0) { + kfree(multi); + return -EIO; + } + + eb->fd = device->fd; + device->total_ios++; + eb->dev_bytenr = multi->stripes[0].physical; + kfree(multi); + multi = NULL; + + if (read_len > bytes_left) + read_len = bytes_left; + + ret = read_extent_from_disk(eb, offset, read_len); + if (ret) + return -EIO; + offset += read_len; + bytes_left -= read_len; + } + return 0; +} + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid) { int ret; struct extent_buffer *eb; - u64 length; u64 best_transid = 0; - struct btrfs_multi_bio *multi = NULL; - struct btrfs_device *device; int mirror_num = 0; int good_mirror = 0; int num_copies; @@ -203,21 +244,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (btrfs_buffer_uptodate(eb, parent_transid)) return eb; - length = blocksize; while (1) { - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - eb->start, &length, &multi, mirror_num); - if (ret) { - printk("Couldn't map the block %Lu\n", bytenr); - break; - } - device = multi->stripes[0].dev; - eb->fd = device->fd; - device->total_ios++; - eb->dev_bytenr = multi->stripes[0].physical; - kfree(multi); - ret = read_extent_from_disk(eb); - + ret = read_whole_eb(root->fs_info, eb, mirror_num); if (ret == 0 && check_tree_block(root, eb) == 0 && csum_tree_block(root, eb, 1) == 0 && verify_parent_transid(eb->tree, eb, parent_transid, ignore) @@ -253,12 +281,156 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; } +static int rmw_eb(struct btrfs_fs_info *info, + struct extent_buffer *eb, struct extent_buffer *orig_eb) +{ + int ret; + unsigned long orig_off = 0; + unsigned long dest_off = 0; + unsigned long copy_len = eb->len; + + ret = read_whole_eb(info, eb, 0); + if (ret) + return ret; + + if (eb->start + eb->len <= orig_eb->start || + eb->start >= orig_eb->start + orig_eb->len) + return 0; + /* + * | ----- orig_eb ------- | + * | ----- stripe ------- | + * | ----- orig_eb ------- | + * | ----- orig_eb ------- | + */ + if (eb->start > orig_eb->start) + orig_off = eb->start - orig_eb->start; + if (orig_eb->start > eb->start) + dest_off = orig_eb->start - eb->start; + + if (copy_len > orig_eb->len - orig_off) + copy_len = orig_eb->len - orig_off; + if (copy_len > eb->len - dest_off) + copy_len = eb->len - dest_off; + + memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len); + return 0; +} + +static void split_eb_for_raid56(struct btrfs_fs_info *info, + struct extent_buffer *orig_eb, + struct extent_buffer **ebs, + u64 stripe_len, u64 *raid_map, + int num_stripes) +{ + struct extent_buffer *eb; + u64 start = orig_eb->start; + u64 this_eb_start; + int i; + int ret; + + for (i = 0; i < num_stripes; i++) { + if (raid_map[i] >= BTRFS_RAID5_P_STRIPE) + break; + + eb = malloc(sizeof(struct extent_buffer) + stripe_len); + if (!eb) + BUG(); + memset(eb, 0, sizeof(struct extent_buffer) + stripe_len); + + eb->start = raid_map[i]; + eb->len = stripe_len; + eb->refs = 1; + eb->flags = 0; + eb->fd = -1; + eb->dev_bytenr = (u64)-1; + + this_eb_start = raid_map[i]; + + if (start > this_eb_start || + start + orig_eb->len < this_eb_start + stripe_len) { + ret = rmw_eb(info, eb, orig_eb); + BUG_ON(ret); + } else { + memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len); + } + ebs[i] = eb; + } +} + +static int write_raid56_with_parity(struct btrfs_fs_info *info, + struct extent_buffer *eb, + struct btrfs_multi_bio *multi, + u64 stripe_len, u64 *raid_map) +{ + struct extent_buffer *ebs[multi->num_stripes], *p_eb = NULL, *q_eb = NULL; + int i; + int j; + int ret; + int alloc_size = eb->len; + + if (stripe_len > alloc_size) + alloc_size = stripe_len; + + split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map, + multi->num_stripes); + + for (i = 0; i < multi->num_stripes; i++) { + struct extent_buffer *new_eb; + if (raid_map[i] < BTRFS_RAID5_P_STRIPE) { + ebs[i]->dev_bytenr = multi->stripes[i].physical; + ebs[i]->fd = multi->stripes[i].dev->fd; + multi->stripes[i].dev->total_ios++; + BUG_ON(ebs[i]->start != raid_map[i]); + continue; + } + new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS); + BUG_ON(!new_eb); + new_eb->dev_bytenr = multi->stripes[i].physical; + new_eb->fd = multi->stripes[i].dev->fd; + multi->stripes[i].dev->total_ios++; + new_eb->len = stripe_len; + + if (raid_map[i] == BTRFS_RAID5_P_STRIPE) + p_eb = new_eb; + else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE) + q_eb = new_eb; + } + if (q_eb) { + void *pointers[multi->num_stripes]; + ebs[multi->num_stripes - 2] = p_eb; + ebs[multi->num_stripes - 1] = q_eb; + + for (i = 0; i < multi->num_stripes; i++) + pointers[i] = ebs[i]->data; + + raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers); + } else { + ebs[multi->num_stripes - 1] = p_eb; + memcpy(p_eb->data, ebs[0]->data, stripe_len); + for (j = 1; j < multi->num_stripes - 1; j++) { + for (i = 0; i < stripe_len; i += sizeof(unsigned long)) { + *(unsigned long *)(p_eb->data + i) ^= + *(unsigned long *)(ebs[j]->data + i); + } + } + } + + for (i = 0; i < multi->num_stripes; i++) { + ret = write_extent_to_disk(ebs[i]); + BUG_ON(ret); + if (ebs[i] != eb) + kfree(ebs[i]); + } + return 0; +} + int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) { int ret; int dev_nr; u64 length; + u64 *raid_map = NULL; struct btrfs_multi_bio *multi = NULL; if (check_tree_block(root, eb)) @@ -272,9 +444,13 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, dev_nr = 0; length = eb->len; ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE, - eb->start, &length, &multi, 0); + eb->start, &length, &multi, 0, &raid_map); - while(dev_nr < multi->num_stripes) { + if (raid_map) { + ret = write_raid56_with_parity(root->fs_info, eb, multi, + length, raid_map); + BUG_ON(ret); + } else while (dev_nr < multi->num_stripes) { BUG_ON(ret); eb->fd = multi->stripes[dev_nr].dev->fd; eb->dev_bytenr = multi->stripes[dev_nr].physical; @@ -287,7 +463,7 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, +int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { @@ -345,14 +521,17 @@ static int commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root; struct list_head *next; struct extent_buffer *eb; + int ret; if (fs_info->readonly) return 0; eb = fs_info->tree_root->node; extent_buffer_get(eb); - btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); + ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); free_extent_buffer(eb); + if (ret) + return ret; while(!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; @@ -438,13 +617,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); - BUG_ON(ret); + if (ret) + return ret; blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); generation = btrfs_root_generation(&root->root_item); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); - BUG_ON(!root->node); + if (!extent_buffer_uptodate(root->node)) + return -EIO; + return 0; } @@ -456,8 +638,13 @@ static int find_and_setup_log_root(struct btrfs_root *tree_root, u64 blocknr = btrfs_super_log_root(disk_super); struct btrfs_root *log_root = malloc(sizeof(struct btrfs_root)); - if (blocknr == 0) + if (!log_root) + return -ENOMEM; + + if (blocknr == 0) { + free(log_root); return 0; + } blocksize = btrfs_level_size(tree_root, btrfs_super_log_root_level(disk_super)); @@ -471,7 +658,14 @@ static int find_and_setup_log_root(struct btrfs_root *tree_root, btrfs_super_generation(disk_super) + 1); fs_info->log_root_tree = log_root; - BUG_ON(!log_root->node); + + if (!extent_buffer_uptodate(log_root->node)) { + free_extent_buffer(log_root->node); + free(log_root); + fs_info->log_root_tree = NULL; + return -EIO; + } + return 0; } @@ -580,7 +774,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return fs_info->dev_root; if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) return fs_info->csum_root; - + BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID || location->offset != (u64)-1); @@ -601,8 +795,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return root; } -struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, - u64 root_tree_bytenr, int writes) +static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, + u64 sb_bytenr, + u64 root_tree_bytenr, int writes, + int partial) { u32 sectorsize; u32 nodesize; @@ -626,6 +822,10 @@ struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, if (sb_bytenr == 0) sb_bytenr = BTRFS_SUPER_INFO_OFFSET; + /* try to drop all the caches */ + if (posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED)) + fprintf(stderr, "Warning, could not drop caches\n"); + ret = btrfs_scan_one_device(fp, path, &fs_devices, &total_devs, sb_bytenr); @@ -641,6 +841,7 @@ struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, } memset(fs_info, 0, sizeof(*fs_info)); + fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE); fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -676,7 +877,7 @@ struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, goto out_cleanup; fs_info->super_bytenr = sb_bytenr; - disk_super = &fs_info->super_copy; + disk_super = fs_info->super_copy; ret = btrfs_read_dev_super(fs_devices->latest_bdev, disk_super, sb_bytenr); if (ret) { @@ -733,7 +934,7 @@ struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), blocksize, generation); - if (!chunk_root->node) { + if (!extent_buffer_uptodate(chunk_root->node)) { printk("Couldn't read chunk root\n"); goto out_devices; } @@ -744,8 +945,10 @@ struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, if (!(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)) { ret = btrfs_read_chunk_tree(chunk_root); - if (ret) + if (ret) { + printk("Couldn't read chunk tree\n"); goto out_chunk; + } } blocksize = btrfs_level_size(tree_root, @@ -757,15 +960,15 @@ struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, tree_root->node = read_tree_block(tree_root, root_tree_bytenr, blocksize, generation); - if (!tree_root->node) { + if (!extent_buffer_uptodate(tree_root->node)) { printk("Couldn't read tree root\n"); - goto out_chunk; + goto out_failed; } ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) { printk("Couldn't setup extent tree\n"); - goto out_tree; + goto out_failed; } extent_root->track_dirty = 1; @@ -773,7 +976,7 @@ struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, BTRFS_DEV_TREE_OBJECTID, dev_root); if (ret) { printk("Couldn't setup device tree\n"); - goto out_extent; + goto out_failed; } dev_root->track_dirty = 1; @@ -781,7 +984,8 @@ struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, BTRFS_CSUM_TREE_OBJECTID, csum_root); if (ret) { printk("Couldn't setup csum tree\n"); - goto out_dev; + if (!partial) + goto out_failed; } csum_root->track_dirty = 1; @@ -797,23 +1001,29 @@ struct btrfs_root *__open_ctree_fd(int fp, const char *path, u64 sb_bytenr, fs_info->fs_root = btrfs_read_fs_root(fs_info, &key); if (!fs_info->fs_root) - goto out_csum; + goto out_failed; fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; - return fs_info->fs_root; -out_csum: - free_extent_buffer(fs_info->csum_root->node); -out_dev: - free_extent_buffer(fs_info->dev_root->node); -out_extent: - free_extent_buffer(fs_info->extent_root->node); -out_tree: - free_extent_buffer(fs_info->tree_root->node); + return fs_info; + +out_failed: + if (partial) + return fs_info; + + if (fs_info->csum_root) + free_extent_buffer(fs_info->csum_root->node); + if (fs_info->dev_root) + free_extent_buffer(fs_info->dev_root->node); + if (fs_info->extent_root) + free_extent_buffer(fs_info->extent_root->node); + if (fs_info->tree_root) + free_extent_buffer(fs_info->tree_root->node); out_chunk: - free_extent_buffer(fs_info->chunk_root->node); + if (fs_info->chunk_root) + free_extent_buffer(fs_info->chunk_root->node); out_devices: close_all_devices(fs_info); out_cleanup: @@ -833,10 +1043,12 @@ out: return NULL; } -struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes) +struct btrfs_fs_info *open_ctree_fs_info(const char *filename, + u64 sb_bytenr, u64 root_tree_bytenr, + int writes, int partial) { int fp; - struct btrfs_root *root; + struct btrfs_fs_info *info; int flags = O_CREAT | O_RDWR; if (!writes) @@ -847,38 +1059,36 @@ struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes) fprintf (stderr, "Could not open %s\n", filename); return NULL; } - root = __open_ctree_fd(fp, filename, sb_bytenr, 0, writes); + info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr, + writes, partial); close(fp); - - return root; + return info; } -struct btrfs_root *open_ctree_recovery(const char *filename, u64 sb_bytenr, - u64 root_tree_bytenr) +struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes) { - int fp; - struct btrfs_root *root; + struct btrfs_fs_info *info; - fp = open(filename, O_RDONLY); - if (fp < 0) { - fprintf (stderr, "Could not open %s\n", filename); + info = open_ctree_fs_info(filename, sb_bytenr, 0, writes, 0); + if (!info) return NULL; - } - root = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr, 0); - close(fp); - - return root; + return info->fs_root; } struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, int writes) { - return __open_ctree_fd(fp, path, sb_bytenr, 0, writes); + struct btrfs_fs_info *info; + info = __open_ctree_fd(fp, path, sb_bytenr, 0, writes, 0); + if (!info) + return NULL; + return info->fs_root; } int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr) { u8 fsid[BTRFS_FSID_SIZE]; + int fsid_is_initialized = 0; struct btrfs_super_block buf; int i; int ret; @@ -891,8 +1101,7 @@ int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr) return -1; if (btrfs_super_bytenr(&buf) != sb_bytenr || - strncmp((char *)(&buf.magic), BTRFS_MAGIC, - sizeof(buf.magic))) + buf.magic != cpu_to_le64(BTRFS_MAGIC)) return -1; memcpy(sb, &buf, sizeof(*sb)); @@ -905,15 +1114,25 @@ int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr) if (ret < sizeof(buf)) break; - if (btrfs_super_bytenr(&buf) != bytenr || - strncmp((char *)(&buf.magic), BTRFS_MAGIC, - sizeof(buf.magic))) + if (btrfs_super_bytenr(&buf) != bytenr ) + continue; + /* if magic is NULL, the device was removed */ + if (buf.magic == 0 && i == 0) + return -1; + if (buf.magic != cpu_to_le64(BTRFS_MAGIC)) continue; - if (i == 0) + if (!fsid_is_initialized) { memcpy(fsid, buf.fsid, sizeof(fsid)); - else if (memcmp(fsid, buf.fsid, sizeof(fsid))) + fsid_is_initialized = 1; + } else if (memcmp(fsid, buf.fsid, sizeof(fsid))) { + /* + * the superblocks (the original one and + * its backups) contain data of different + * filesystems -> the super cannot be trusted + */ continue; + } if (btrfs_super_generation(&buf) > transid) { memcpy(sb, &buf, sizeof(*sb)); @@ -938,15 +1157,20 @@ int write_dev_supers(struct btrfs_root *root, struct btrfs_super_block *sb, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); btrfs_csum_final(crc, (char *)&sb->csum[0]); - ret = pwrite64(device->fd, sb, BTRFS_SUPER_INFO_SIZE, - root->fs_info->super_bytenr); + /* + * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is + * zero filled, we can use it directly + */ + ret = pwrite64(device->fd, root->fs_info->super_copy, + BTRFS_SUPER_INFO_SIZE, + root->fs_info->super_bytenr); BUG_ON(ret != BTRFS_SUPER_INFO_SIZE); return 0; } for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) + if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) break; btrfs_set_super_bytenr(sb, bytenr); @@ -956,9 +1180,15 @@ int write_dev_supers(struct btrfs_root *root, struct btrfs_super_block *sb, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); btrfs_csum_final(crc, (char *)&sb->csum[0]); - ret = pwrite64(device->fd, sb, BTRFS_SUPER_INFO_SIZE, bytenr); + /* + * super_copy is BTRFS_SUPER_INFO_SIZE bytes and is + * zero filled, we can use it directly + */ + ret = pwrite64(device->fd, root->fs_info->super_copy, + BTRFS_SUPER_INFO_SIZE, bytenr); BUG_ON(ret != BTRFS_SUPER_INFO_SIZE); } + return 0; } @@ -972,7 +1202,7 @@ int write_all_supers(struct btrfs_root *root) int ret; u64 flags; - sb = &root->fs_info->super_copy; + sb = root->fs_info->super_copy; dev_item = &sb->dev_item; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); @@ -1009,17 +1239,17 @@ int write_ctree_super(struct btrfs_trans_handle *trans, if (root->fs_info->readonly) return 0; - btrfs_set_super_generation(&root->fs_info->super_copy, + btrfs_set_super_generation(root->fs_info->super_copy, trans->transid); - btrfs_set_super_root(&root->fs_info->super_copy, + btrfs_set_super_root(root->fs_info->super_copy, tree_root->node->start); - btrfs_set_super_root_level(&root->fs_info->super_copy, + btrfs_set_super_root_level(root->fs_info->super_copy, btrfs_header_level(tree_root->node)); - btrfs_set_super_chunk_root(&root->fs_info->super_copy, + btrfs_set_super_chunk_root(root->fs_info->super_copy, chunk_root->node->start); - btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, + btrfs_set_super_chunk_root_level(root->fs_info->super_copy, btrfs_header_level(chunk_root->node)); - btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy, + btrfs_set_super_chunk_root_generation(root->fs_info->super_copy, btrfs_header_generation(chunk_root->node)); ret = write_all_supers(root); @@ -1031,19 +1261,39 @@ int write_ctree_super(struct btrfs_trans_handle *trans, static int close_all_devices(struct btrfs_fs_info *fs_info) { struct list_head *list; - struct list_head *next; struct btrfs_device *device; - return 0; - list = &fs_info->fs_devices->devices; - list_for_each(next, list) { - device = list_entry(next, struct btrfs_device, dev_list); + while (!list_empty(list)) { + device = list_entry(list->next, struct btrfs_device, dev_list); + list_del_init(&device->dev_list); + if (device->fd) { + fsync(device->fd); + if (posix_fadvise(device->fd, 0, 0, POSIX_FADV_DONTNEED)) + fprintf(stderr, "Warning, could not drop caches\n"); + } close(device->fd); + kfree(device->name); + kfree(device->label); + kfree(device); } + kfree(fs_info->fs_devices); return 0; } +static void free_mapping_cache(struct btrfs_fs_info *fs_info) +{ + struct cache_tree *cache_tree = &fs_info->mapping_tree.cache_tree; + struct cache_extent *ce; + struct map_lookup *map; + + while ((ce = find_first_cache_extent(cache_tree, 0))) { + map = container_of(ce, struct map_lookup, ce); + remove_cache_extent(cache_tree, ce); + kfree(map); + } +} + int close_ctree(struct btrfs_root *root) { int ret; @@ -1084,6 +1334,7 @@ int close_ctree(struct btrfs_root *root) } close_all_devices(fs_info); + free_mapping_cache(fs_info); extent_io_tree_cleanup(&fs_info->extent_cache); extent_io_tree_cleanup(&fs_info->free_space_cache); extent_io_tree_cleanup(&fs_info->block_group_cache); @@ -41,13 +41,18 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); + +int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, + u32 stripesize, struct btrfs_root *root, + struct btrfs_fs_info *fs_info, u64 objectid); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes); struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, int writes); -struct btrfs_root *open_ctree_recovery(const char *filename, u64 sb_bytenr, - u64 root_tree_bytenr); +struct btrfs_fs_info *open_ctree_fs_info(const char *filename, + u64 sb_bytenr, u64 root_tree_bytenr, + int writes, int partial); int close_ctree(struct btrfs_root *root); int write_all_supers(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, @@ -79,3 +84,6 @@ int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); #endif + +/* raid6.c */ +void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs); diff --git a/extent-cache.h b/extent-cache.h index 7f2f2a6..4cd0f79 100644 --- a/extent-cache.h +++ b/extent-cache.h @@ -18,8 +18,14 @@ #ifndef __PENDING_EXTENT__ #define __PENDING_EXTENT__ + +#if BTRFS_FLAT_INCLUDES #include "kerncompat.h" #include "rbtree.h" +#else +#include <btrfs/kerncompat.h> +#include <btrfs/rbtree.h> +#endif /* BTRFS_FLAT_INCLUDES */ struct cache_tree { struct rb_root root; diff --git a/extent-tree.c b/extent-tree.c index 5bed3c2..381572d 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -26,6 +26,7 @@ #include "transaction.h" #include "crc32c.h" #include "volumes.h" +#include "free-space-cache.h" #define BLOCK_GROUP_DATA EXTENT_WRITEBACK #define BLOCK_GROUP_METADATA EXTENT_UPTODATE @@ -115,7 +116,8 @@ static int cache_block_group(struct btrfs_root *root, last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); key.objectid = last; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.type = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; @@ -142,14 +144,18 @@ static int cache_block_group(struct btrfs_root *root, break; } - if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { + if (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY) { if (key.objectid > last) { hole_size = key.objectid - last; set_extent_dirty(free_space_cache, last, last + hole_size - 1, GFP_NOFS); } - last = key.objectid + key.offset; + if (key.type == BTRFS_METADATA_ITEM_KEY) + last = key.objectid + root->leafsize; + else + last = key.objectid + key.offset; } next: path->slots[0]++; @@ -1024,6 +1030,9 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, int want; int ret; int err = 0; + int skinny_metadata = + btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; @@ -1034,15 +1043,46 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, extra_size = btrfs_extent_inline_ref_size(want); else extra_size = -1; + + if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) { + skinny_metadata = 1; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner; + } else if (skinny_metadata) { + skinny_metadata = 0; + } + +again: ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); if (ret < 0) { err = ret; goto out; } + + /* + * We may be a newly converted file system which still has the old fat + * extent entries for metadata, so try and see if we have one of those. + */ + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + if (ret) { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + goto again; + } + } + if (ret) { printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset); - btrfs_print_leaf(root, path->nodes[0]); - btrfs_free_path(path); return -ENOENT; } @@ -1067,8 +1107,9 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, } #endif if (item_size < sizeof(*ei)) { - printf("Size is %u, needs to be %u, slot %d\n", item_size, - sizeof(*ei), path->slots[0]); + printf("Size is %u, needs to be %u, slot %d\n", + (unsigned)item_size, + (unsigned)sizeof(*ei), path->slots[0]); btrfs_print_leaf(root, leaf); return -EINVAL; } @@ -1080,11 +1121,13 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, ptr = (unsigned long)(ei + 1); end = (unsigned long)ei + item_size; - if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) { ptr += sizeof(struct btrfs_tree_block_info); BUG_ON(ptr > end); - } else { - BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); + } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + if (!(flags & BTRFS_EXTENT_FLAG_DATA)) { + return -EIO; + } } err = -ENOENT; @@ -1438,7 +1481,7 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *refs, u64 *flags) + u64 offset, int metadata, u64 *refs, u64 *flags) { struct btrfs_path *path; int ret; @@ -1449,21 +1492,57 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u64 num_refs; u64 extent_flags; - WARN_ON(num_bytes < root->sectorsize); + if (metadata && + !btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) { + offset = root->leafsize; + metadata = 0; + } + path = btrfs_alloc_path(); path->reada = 1; + key.objectid = bytenr; - key.offset = num_bytes; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.offset = offset; + if (metadata) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + +again: ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; + + /* + * Deal with the fact that we may have mixed SKINNY and normal refs. If + * we didn't find what we wanted check and see if we have a normal ref + * right next to us, or re-search if we are on the edge of the leaf just + * to make sure. + */ + if (ret > 0 && metadata) { + if (path->slots) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_METADATA_ITEM_KEY) + ret = 0; + } + + if (ret) { + btrfs_release_path(root, path); + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->leafsize; + metadata = 0; + goto again; + } + } + if (ret != 0) { - btrfs_print_leaf(root, path->nodes[0]); - printk("failed to find block number %Lu\n", - (unsigned long long)bytenr); - BUG(); + ret = -EIO; + goto out; } l = path->nodes[0]; @@ -1484,9 +1563,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; #else BUG(); -#endif - } - BUG_ON(num_refs == 0); +#endif + } item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); if (refs) *refs = num_refs; @@ -1499,7 +1577,7 @@ out: int btrfs_set_block_flags(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 flags) + u64 bytenr, int level, u64 flags) { struct btrfs_path *path; int ret; @@ -1507,17 +1585,47 @@ int btrfs_set_block_flags(struct btrfs_trans_handle *trans, struct extent_buffer *l; struct btrfs_extent_item *item; u32 item_size; + int skinny_metadata = + btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); - WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); path->reada = 1; + key.objectid = bytenr; - key.offset = num_bytes; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + if (skinny_metadata) { + key.offset = level; + key.type = BTRFS_METADATA_ITEM_KEY; + } else { + key.offset = root->leafsize; + key.type = BTRFS_EXTENT_ITEM_KEY; + } + +again: ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; + + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + if (path->slots[0]--) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.offset == root->leafsize && + key.type == BTRFS_EXTENT_ITEM_KEY) + ret = 0; + } + if (ret) { + btrfs_release_path(root, path); + key.offset = root->leafsize; + key.type = BTRFS_EXTENT_ITEM_KEY; + goto again; + } + } + if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); printk("failed to find block number %Lu\n", @@ -1705,7 +1813,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; ret = write_one_cache_group(trans, root, path, cache); - BUG_ON(ret); } btrfs_free_path(path); return 0; @@ -1736,7 +1843,12 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, if (found) { found->total_bytes += total_bytes; found->bytes_used += bytes_used; - WARN_ON(found->total_bytes < found->bytes_used); + if (found->total_bytes < found->bytes_used) { + fprintf(stderr, "warning, bad space info total_bytes " + "%llu used %llu\n", + (unsigned long long)found->total_bytes, + (unsigned long long)found->bytes_used); + } *space_info = found; return 0; } @@ -1760,6 +1872,8 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_DUP); if (extra_flags) { if (flags & BTRFS_BLOCK_GROUP_DATA) @@ -1826,12 +1940,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 end; /* block accounting for super block */ - old_val = btrfs_super_bytes_used(&info->super_copy); + old_val = btrfs_super_bytes_used(info->super_copy); if (alloc) old_val += num_bytes; else old_val -= num_bytes; - btrfs_set_super_bytes_used(&info->super_copy, old_val); + btrfs_set_super_bytes_used(info->super_copy, old_val); /* block accounting for root item */ old_val = btrfs_root_used(&root->root_item); @@ -1855,6 +1969,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); + if (alloc) { old_val += num_bytes; cache->space_info->bytes_used += num_bytes; @@ -1890,6 +2005,10 @@ static int update_pinned_extents(struct btrfs_root *root, } while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!cache) { + len = min((u64)root->sectorsize, num); + goto next; + } WARN_ON(!cache); len = min(num, cache->key.offset - (bytenr - cache->key.objectid)); @@ -1902,6 +2021,7 @@ static int update_pinned_extents(struct btrfs_root *root, cache->space_info->bytes_pinned -= len; fs_info->total_pinned -= len; } +next: bytenr += len; num -= len; } @@ -1949,6 +2069,21 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, return 0; } +static int extent_root_pending_ops(struct btrfs_fs_info *info) +{ + u64 start; + u64 end; + int ret; + + ret = find_first_extent_bit(&info->extent_ins, 0, &start, + &end, EXTENT_LOCKED); + if (!ret) { + ret = find_first_extent_bit(&info->pending_del, 0, &start, &end, + EXTENT_LOCKED); + } + return ret == 0; + +} static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { @@ -1960,6 +2095,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct pending_extent_op *extent_op; struct btrfs_key key; int ret; + int skinny_metadata = + btrfs_fs_incompat(extent_root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); path = btrfs_alloc_path(); @@ -1975,14 +2113,19 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, if (extent_op->type == PENDING_EXTENT_INSERT) { key.objectid = start; - key.offset = end + 1 - start; - key.type = BTRFS_EXTENT_ITEM_KEY; + if (skinny_metadata) { + key.offset = extent_op->level; + key.type = BTRFS_METADATA_ITEM_KEY; + } else { + key.offset = extent_op->num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + } ret = alloc_reserved_tree_block(trans, extent_root, extent_root->root_key.objectid, trans->transid, extent_op->flags, &extent_op->key, - extent_op->level, &key); + extent_op->level, &key); } else { BUG_ON(1); } @@ -2033,6 +2176,12 @@ pinit: return 0; } +void btrfs_pin_extent(struct btrfs_fs_info *fs_info, + u64 bytenr, u64 num_bytes) +{ + update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 1); +} + /* * remove an extent from the root, returns 0 on success */ @@ -2057,7 +2206,16 @@ static int __free_extent(struct btrfs_trans_handle *trans, int num_to_del = 1; u32 item_size; u64 refs; + int skinny_metadata = + btrfs_fs_incompat(extent_root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + if (root->fs_info->free_extent_hook) { + root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes, + parent, root_objectid, owner_objectid, + owner_offset, refs_to_drop); + + } path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2066,6 +2224,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, path->leave_spinning = 1; is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; + if (is_data) + skinny_metadata = 0; BUG_ON(!is_data && refs_to_drop != 1); ret = lookup_extent_backref(trans, extent_root, path, &iref, @@ -2084,6 +2244,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, found_extent = 1; break; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + key.offset == owner_objectid) { + found_extent = 1; + break; + } if (path->slots[0] - extent_slot > 5) break; extent_slot--; @@ -2103,11 +2268,37 @@ static int __free_extent(struct btrfs_trans_handle *trans, path->leave_spinning = 1; key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = num_bytes; + + if (skinny_metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner_objectid; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + } ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + if (ret > 0 && skinny_metadata && path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], + &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + + if (ret > 0 && skinny_metadata) { + skinny_metadata = 0; + btrfs_release_path(extent_root, path); + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + ret = btrfs_search_slot(trans, extent_root, + &key, path, -1, 1); + } + if (ret) { printk(KERN_ERR "umm, got %d back from search" ", was looking for %llu\n", ret, @@ -2118,8 +2309,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, extent_slot = path->slots[0]; } } else { - btrfs_print_leaf(extent_root, path->nodes[0]); - WARN_ON(1); printk(KERN_ERR "btrfs unable to find ref byte nr %llu " "parent %llu root %llu owner %llu offset %llu\n", (unsigned long long)bytenr, @@ -2127,6 +2316,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, (unsigned long long)root_objectid, (unsigned long long)owner_objectid, (unsigned long long)owner_offset); + ret = -EIO; + goto fail; } leaf = path->nodes[0]; @@ -2162,7 +2353,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, BUG_ON(item_size < sizeof(*ei)); ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); - if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && + key.type == BTRFS_EXTENT_ITEM_KEY) { struct btrfs_tree_block_info *bi; BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); bi = (struct btrfs_tree_block_info *)(ei + 1); @@ -2232,10 +2424,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } - ret = update_block_group(trans, root, bytenr, num_bytes, 0, - mark_free); - BUG_ON(ret); + update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); } +fail: btrfs_free_path(path); finish_current_insert(trans, extent_root); return ret; @@ -2374,6 +2565,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + search_start = stripe_align(root, search_start); + if (hint_byte) { block_group = btrfs_lookup_first_block_group(info, hint_byte); if (!block_group) @@ -2389,6 +2582,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, total_needed += empty_size; check_failed: + search_start = stripe_align(root, search_start); if (!block_group) { block_group = btrfs_lookup_first_block_group(info, search_start); @@ -2401,7 +2595,6 @@ check_failed: if (ret) goto error; - search_start = stripe_align(root, search_start); ins->objectid = search_start; ins->offset = num_bytes; @@ -2533,7 +2726,13 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_extent_inline_ref *iref; struct btrfs_path *path; struct extent_buffer *leaf; - u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); + u32 size = sizeof(*extent_item) + sizeof(*iref); + int skinny_metadata = + btrfs_fs_incompat(fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); + + if (!skinny_metadata) + size += sizeof(*block_info); path = btrfs_alloc_path(); BUG_ON(!path); @@ -2550,27 +2749,25 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_set_extent_generation(leaf, extent_item, generation); btrfs_set_extent_flags(leaf, extent_item, flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); - block_info = (struct btrfs_tree_block_info *)(extent_item + 1); - btrfs_set_tree_block_key(leaf, block_info, key); - btrfs_set_tree_block_level(leaf, block_info, level); + if (skinny_metadata) { + iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); + } else { + block_info = (struct btrfs_tree_block_info *)(extent_item + 1); + btrfs_set_tree_block_key(leaf, block_info, key); + btrfs_set_tree_block_level(leaf, block_info, level); + iref = (struct btrfs_extent_inline_ref *)(block_info + 1); + } - iref = (struct btrfs_extent_inline_ref *)(block_info + 1); btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY); btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); - ret = update_block_group(trans, root, ins->objectid, ins->offset, + ret = update_block_group(trans, root, ins->objectid, root->leafsize, 1, 0); - if (ret) { - printk(KERN_ERR "btrfs update block group failed for %llu " - "%llu\n", (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); - BUG(); - } - return ret; + return 0; } static int alloc_tree_block(struct btrfs_trans_handle *trans, @@ -2604,6 +2801,11 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, set_state_private(&root->fs_info->extent_ins, ins->objectid, (unsigned long)extent_op); } else { + if (btrfs_fs_incompat(root->fs_info, + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) { + ins->offset = level; + ins->type = BTRFS_METADATA_ITEM_KEY; + } ret = alloc_reserved_tree_block(trans, root, root_objectid, generation, flags, key, level, ins); @@ -2971,18 +3173,27 @@ out: int btrfs_free_block_groups(struct btrfs_fs_info *info) { + struct btrfs_space_info *sinfo; + struct btrfs_block_group_cache *cache; u64 start; u64 end; u64 ptr; int ret; + while(1) { ret = find_first_extent_bit(&info->block_group_cache, 0, &start, &end, (unsigned int)-1); if (ret) break; ret = get_state_private(&info->block_group_cache, start, &ptr); - if (!ret) - kfree((void *)(unsigned long)ptr); + if (!ret) { + cache = (struct btrfs_block_group_cache *)ptr; + if (cache->free_space_ctl) { + btrfs_remove_free_space_cache(cache); + kfree(cache->free_space_ctl); + } + kfree(cache); + } clear_extent_bits(&info->block_group_cache, start, end, (unsigned int)-1, GFP_NOFS); } @@ -2994,6 +3205,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) clear_extent_dirty(&info->free_space_cache, start, end, GFP_NOFS); } + + while (!list_empty(&info->space_info)) { + sinfo = list_entry(info->space_info.next, + struct btrfs_space_info, list); + list_del_init(&sinfo->list); + kfree(sinfo); + } return 0; } @@ -3153,7 +3371,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, finish_current_insert(trans, extent_root); ret = del_pending_extents(trans, extent_root); - BUG_ON(ret); set_avail_alloc_bits(extent_root->fs_info, type); return 0; } @@ -3185,7 +3402,7 @@ int btrfs_make_block_groups(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); group_align = 64 * root->sectorsize; cur_start = 0; @@ -3269,3 +3486,165 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, return update_block_group(trans, root, bytenr, num_bytes, alloc, mark_free); } + +static int btrfs_count_extents_in_block_group(struct btrfs_root *root, + struct btrfs_path *path, u64 start, + u64 len, + u64 *total) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + u64 bytes_used = 0; + int ret; + int slot; + + key.offset = 0; + key.objectid = start; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &key, path, 0, 0); + if (ret < 0) + return ret; + while(1) { + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + if (ret > 0) + break; + leaf = path->nodes[0]; + slot = path->slots[0]; + } + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid > start + len) + break; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + bytes_used += key.offset; + if (key.type == BTRFS_METADATA_ITEM_KEY) + bytes_used += root->leafsize; + path->slots[0]++; + } + *total = bytes_used; + btrfs_release_path(root, path); + return 0; +} + +int btrfs_check_block_accounting(struct btrfs_root *root) +{ + int ret; + u64 start = 0; + u64 bytes_used = 0; + struct btrfs_path path; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + btrfs_init_path(&path); + + while(1) { + cache = btrfs_lookup_block_group(fs_info, start); + if (!cache) + break; + + ret = btrfs_count_extents_in_block_group(root, &path, + cache->key.objectid, + cache->key.offset, + &bytes_used); + + if (ret == 0) { + u64 on_disk = btrfs_block_group_used(&cache->item); + if (on_disk != bytes_used) { + fprintf(stderr, "bad block group accounting found %llu " + "expected %llu block group %llu\n", + (unsigned long long)bytes_used, + (unsigned long long)on_disk, + (unsigned long long)cache->key.objectid); + } + } + start = cache->key.objectid + cache->key.offset; + + cache->space_info->bytes_used = 0; + } + return 0; +} + +/* + * Fixup block accounting. The initial block accounting created by + * make_block_groups isn't accuracy in this case. + */ +int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + int slot; + u64 start = 0; + u64 bytes_used = 0; + struct btrfs_path path; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + root = root->fs_info->extent_root; + + while(extent_root_pending_ops(fs_info)) { + ret = finish_current_insert(trans, root); + if (ret) + return ret; + ret = del_pending_extents(trans, root); + if (ret) + return ret; + } + + while(1) { + cache = btrfs_lookup_first_block_group(fs_info, start); + if (!cache) + break; + start = cache->key.objectid + cache->key.offset; + btrfs_set_block_group_used(&cache->item, 0); + cache->space_info->bytes_used = 0; + set_extent_bits(&root->fs_info->block_group_cache, + cache->key.objectid, + cache->key.objectid + cache->key.offset -1, + BLOCK_GROUP_DIRTY, GFP_NOFS); + } + + btrfs_init_path(&path); + key.offset = 0; + key.objectid = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, &path, 0, 0); + if (ret < 0) + return ret; + while(1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) + return ret; + if (ret > 0) + break; + leaf = path.nodes[0]; + slot = path.slots[0]; + } + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + bytes_used += key.offset; + ret = btrfs_update_block_group(trans, root, + key.objectid, key.offset, 1, 0); + BUG_ON(ret); + } else if (key.type == BTRFS_METADATA_ITEM_KEY) { + bytes_used += root->leafsize; + ret = btrfs_update_block_group(trans, root, + key.objectid, root->leafsize, 1, 0); + BUG_ON(ret); + } + path.slots[0]++; + } + btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used); + btrfs_release_path(root, &path); + return 0; +} diff --git a/extent_io.c b/extent_io.c index 973e918..5093aeb 100644 --- a/extent_io.c +++ b/extent_io.c @@ -27,8 +27,11 @@ #include "kerncompat.h" #include "extent_io.h" #include "list.h" +#include "ctree.h" +#include "volumes.h" -u64 cache_max = 1024 * 1024 * 32; +u64 cache_soft_max = 1024 * 1024 * 256; +u64 cache_hard_max = 1 * 1024 * 1024 * 1024; void extent_io_tree_init(struct extent_io_tree *tree) { @@ -47,7 +50,7 @@ static struct extent_state *alloc_extent_state(void) return NULL; state->refs = 1; state->state = 0; - state->private = 0; + state->xprivate = 0; return state; } @@ -299,9 +302,11 @@ int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 last_start; u64 last_end; again: - prealloc = alloc_extent_state(); - if (!prealloc) - return -ENOMEM; + if (!prealloc) { + prealloc = alloc_extent_state(); + if (!prealloc) + return -ENOMEM; + } /* * this search will find the extents that end after @@ -508,7 +513,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) ret = -ENOENT; goto out; } - state->private = private; + state->xprivate = private; out: return ret; } @@ -529,7 +534,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) ret = -ENOENT; goto out; } - *private = state->private; + *private = state->xprivate; out: return ret; } @@ -540,18 +545,19 @@ static int free_some_buffers(struct extent_io_tree *tree) struct extent_buffer *eb; struct list_head *node, *next; - if (tree->cache_size < cache_max) + if (tree->cache_size < cache_soft_max) return 0; + list_for_each_safe(node, next, &tree->lru) { eb = list_entry(node, struct extent_buffer, lru); if (eb->refs == 1) { free_extent_buffer(eb); - if (tree->cache_size < cache_max) + if (tree->cache_size < cache_hard_max) break; } else { list_move_tail(&eb->lru, &tree->lru); } - if (nrscan++ > 64) + if (nrscan++ > 64 && tree->cache_size < cache_hard_max) break; } return 0; @@ -661,13 +667,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, return eb; } -int read_extent_from_disk(struct extent_buffer *eb) +int read_extent_from_disk(struct extent_buffer *eb, + unsigned long offset, unsigned long len) { int ret; - ret = pread(eb->fd, eb->data, eb->len, eb->dev_bytenr); + ret = pread(eb->fd, eb->data + offset, len, eb->dev_bytenr); if (ret < 0) goto out; - if (ret != eb->len) { + if (ret != len) { ret = -EIO; goto out; } @@ -691,6 +698,55 @@ out: return ret; } +int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset, + u64 bytes, int mirror) +{ + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + u64 bytes_left = bytes; + u64 read_len; + u64 total_read = 0; + int ret; + + while (bytes_left) { + read_len = bytes_left; + ret = btrfs_map_block(&info->mapping_tree, READ, offset, + &read_len, &multi, mirror, NULL); + if (ret) { + fprintf(stderr, "Couldn't map the block %Lu\n", + offset); + return -EIO; + } + device = multi->stripes[0].dev; + + read_len = min(bytes_left, read_len); + if (device->fd == 0) { + kfree(multi); + return -EIO; + } + + ret = pread(device->fd, buf + total_read, read_len, + multi->stripes[0].physical); + kfree(multi); + if (ret < 0) { + fprintf(stderr, "Error reading %Lu, %d\n", offset, + ret); + return ret; + } + if (ret != read_len) { + fprintf(stderr, "Short read for %Lu, read %d, " + "read_len %Lu\n", offset, ret, read_len); + return -EIO; + } + + bytes_left -= read_len; + offset += read_len; + total_read += read_len; + } + + return 0; +} + int set_extent_buffer_uptodate(struct extent_buffer *eb) { eb->flags |= EXTENT_UPTODATE; @@ -706,6 +762,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, int extent_buffer_uptodate(struct extent_buffer *eb) { + if (!eb) + return 0; + if (eb->flags & EXTENT_UPTODATE) return 1; return 0; diff --git a/extent_io.h b/extent_io.h index a5d6bf0..a0308a9 100644 --- a/extent_io.h +++ b/extent_io.h @@ -18,9 +18,16 @@ #ifndef __EXTENTMAP__ #define __EXTENTMAP__ + +#if BTRFS_FLAT_INCLUDES #include "kerncompat.h" #include "extent-cache.h" #include "list.h" +#else +#include <btrfs/kerncompat.h> +#include <btrfs/extent-cache.h> +#include <btrfs/list.h> +#endif /* BTRFS_FLAT_INCLUDES */ #define EXTENT_DIRTY 1 #define EXTENT_WRITEBACK (1 << 1) @@ -34,6 +41,8 @@ #define EXTENT_CSUM (1 << 9) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +struct btrfs_fs_info; + struct extent_io_tree { struct cache_tree state; struct cache_tree cache; @@ -47,7 +56,7 @@ struct extent_state { u64 end; int refs; unsigned long state; - u64 private; + u64 xprivate; }; struct extent_buffer { @@ -86,8 +95,8 @@ int extent_buffer_uptodate(struct extent_buffer *eb); int set_extent_buffer_uptodate(struct extent_buffer *eb); int clear_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb); -int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); -int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); +int set_state_private(struct extent_io_tree *tree, u64 start, u64 xprivate); +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *xprivate); struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, u64 bytenr, u32 blocksize); struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree, @@ -95,7 +104,8 @@ struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree, struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, u64 bytenr, u32 blocksize); void free_extent_buffer(struct extent_buffer *eb); -int read_extent_from_disk(struct extent_buffer *eb); +int read_extent_from_disk(struct extent_buffer *eb, + unsigned long offset, unsigned long len); int write_extent_to_disk(struct extent_buffer *eb); int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, unsigned long start, unsigned long len); @@ -114,4 +124,6 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, unsigned long start, unsigned long len); int set_extent_buffer_dirty(struct extent_buffer *eb); int clear_extent_buffer_dirty(struct extent_buffer *eb); +int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset, + u64 bytes, int mirror); #endif diff --git a/file-item.c b/file-item.c index c746b44..9c787f0 100644 --- a/file-item.c +++ b/file-item.c @@ -134,7 +134,7 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; u64 csum_offset = 0; u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + btrfs_super_csum_size(root->fs_info->super_copy); int csums_in_item; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; @@ -206,7 +206,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u32 nritems; u32 ins_size; u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); BUG_ON(!path); @@ -352,7 +352,7 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, { struct extent_buffer *leaf; u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + btrfs_super_csum_size(root->fs_info->super_copy); u64 csum_end; u64 end_byte = bytenr + len; u32 blocksize = root->sectorsize; @@ -411,7 +411,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; int ret; u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + btrfs_super_csum_size(root->fs_info->super_copy); int blocksize = root->sectorsize; root = root->fs_info->csum_root; diff --git a/free-space-cache.c b/free-space-cache.c new file mode 100644 index 0000000..5fb8ece --- /dev/null +++ b/free-space-cache.c @@ -0,0 +1,867 @@ +/* + * Copyright (C) 2008 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "kerncompat.h" +#include "ctree.h" +#include "free-space-cache.h" +#include "transaction.h" +#include "disk-io.h" +#include "extent_io.h" +#include "crc32c.h" +#include "bitops.h" + +#define CACHE_SECTORSIZE 4096 +#define BITS_PER_BITMAP (CACHE_SECTORSIZE * 8) +#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) + +static int link_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info); +static void merge_space_tree(struct btrfs_free_space_ctl *ctl); + +struct io_ctl { + void *cur, *orig; + void *buffer; + struct btrfs_root *root; + unsigned long size; + u64 total_size; + int index; + int num_pages; + unsigned check_crcs:1; +}; + +static int io_ctl_init(struct io_ctl *io_ctl, u64 size, u64 ino, + struct btrfs_root *root) +{ + memset(io_ctl, 0, sizeof(struct io_ctl)); + io_ctl->num_pages = (size + CACHE_SECTORSIZE - 1) / CACHE_SECTORSIZE; + io_ctl->buffer = kzalloc(size, GFP_NOFS); + if (!io_ctl->buffer) + return -ENOMEM; + io_ctl->total_size = size; + io_ctl->root = root; + if (ino != BTRFS_FREE_INO_OBJECTID) + io_ctl->check_crcs = 1; + return 0; +} + +static void io_ctl_free(struct io_ctl *io_ctl) +{ + kfree(io_ctl->buffer); +} + +static void io_ctl_unmap_page(struct io_ctl *io_ctl) +{ + if (io_ctl->cur) { + io_ctl->cur = NULL; + io_ctl->orig = NULL; + } +} + +static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) +{ + BUG_ON(io_ctl->index >= io_ctl->num_pages); + io_ctl->cur = io_ctl->buffer + (io_ctl->index++ * CACHE_SECTORSIZE); + io_ctl->orig = io_ctl->cur; + io_ctl->size = CACHE_SECTORSIZE; + if (clear) + memset(io_ctl->cur, 0, CACHE_SECTORSIZE); +} + +static void io_ctl_drop_pages(struct io_ctl *io_ctl) +{ + io_ctl_unmap_page(io_ctl); +} + +static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct btrfs_root *root, + struct btrfs_path *path, u64 ino) +{ + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 bytenr, len; + u64 total_read = 0; + int ret = 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret) { + printf("Couldn't find file extent item for free space inode" + " %Lu\n", ino); + btrfs_release_path(root, path); + return -EINVAL; + } + + while (total_read < io_ctl->total_size) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret) { + ret = -EINVAL; + break; + } + } + leaf = path->nodes[0]; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != ino) { + ret = -EINVAL; + break; + } + + if (key.type != BTRFS_EXTENT_DATA_KEY) { + ret = -EINVAL; + break; + } + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(path->nodes[0], fi) != + BTRFS_FILE_EXTENT_REG) { + printf("Not the file extent type we wanted\n"); + ret = -EINVAL; + break; + } + + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi) + + btrfs_file_extent_offset(leaf, fi); + len = btrfs_file_extent_num_bytes(leaf, fi); + ret = read_data_from_disk(root->fs_info, + io_ctl->buffer + key.offset, bytenr, + len, 0); + if (ret) + break; + total_read += len; + path->slots[0]++; + } + + btrfs_release_path(root, path); + return ret; +} + +static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) +{ + __le64 *gen; + + /* + * Skip the crc area. If we don't check crcs then we just have a 64bit + * chunk at the front of the first page. + */ + if (io_ctl->check_crcs) { + io_ctl->cur += sizeof(u32) * io_ctl->num_pages; + io_ctl->size -= sizeof(u64) + + (sizeof(u32) * io_ctl->num_pages); + } else { + io_ctl->cur += sizeof(u64); + io_ctl->size -= sizeof(u64) * 2; + } + + gen = io_ctl->cur; + if (le64_to_cpu(*gen) != generation) { + printk("btrfs: space cache generation " + "(%Lu) does not match inode (%Lu)\n", *gen, + generation); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + io_ctl->cur += sizeof(u64); + return 0; +} + +static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) +{ + u32 *tmp, val; + u32 crc = ~(u32)0; + unsigned offset = 0; + + if (!io_ctl->check_crcs) { + io_ctl_map_page(io_ctl, 0); + return 0; + } + + if (index == 0) + offset = sizeof(u32) * io_ctl->num_pages; + + tmp = io_ctl->buffer; + tmp += index; + val = *tmp; + + io_ctl_map_page(io_ctl, 0); + crc = crc32c(crc, io_ctl->orig + offset, CACHE_SECTORSIZE - offset); + btrfs_csum_final(crc, (char *)&crc); + if (val != crc) { + printk("btrfs: csum mismatch on free space cache\n"); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + + return 0; +} + +static int io_ctl_read_entry(struct io_ctl *io_ctl, + struct btrfs_free_space *entry, u8 *type) +{ + struct btrfs_free_space_entry *e; + int ret; + + if (!io_ctl->cur) { + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + } + + e = io_ctl->cur; + entry->offset = le64_to_cpu(e->offset); + entry->bytes = le64_to_cpu(e->bytes); + *type = e->type; + io_ctl->cur += sizeof(struct btrfs_free_space_entry); + io_ctl->size -= sizeof(struct btrfs_free_space_entry); + + if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) + return 0; + + io_ctl_unmap_page(io_ctl); + + return 0; +} + +static int io_ctl_read_bitmap(struct io_ctl *io_ctl, + struct btrfs_free_space *entry) +{ + int ret; + + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + + memcpy(entry->bitmap, io_ctl->cur, CACHE_SECTORSIZE); + io_ctl_unmap_page(io_ctl); + + return 0; +} + + +int __load_free_space_cache(struct btrfs_root *root, + struct btrfs_free_space_ctl *ctl, + struct btrfs_path *path, u64 offset) +{ + struct btrfs_free_space_header *header; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct io_ctl io_ctl; + struct btrfs_key key; + struct btrfs_key inode_location; + struct btrfs_disk_key disk_key; + struct btrfs_free_space *e, *n; + struct list_head bitmaps; + u64 num_entries; + u64 num_bitmaps; + u64 generation; + u64 inode_size; + u8 type; + int ret = 0; + + INIT_LIST_HEAD(&bitmaps); + + key.objectid = BTRFS_FREE_SPACE_OBJECTID; + key.offset = offset; + key.type = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + return 0; + } else if (ret > 0) { + btrfs_release_path(root, path); + return 0; + } + + ret = -1; + + leaf = path->nodes[0]; + header = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_free_space_header); + num_entries = btrfs_free_space_entries(leaf, header); + num_bitmaps = btrfs_free_space_bitmaps(leaf, header); + generation = btrfs_free_space_generation(leaf, header); + btrfs_free_space_key(leaf, header, &disk_key); + btrfs_disk_key_to_cpu(&inode_location, &disk_key); + btrfs_release_path(root, path); + + ret = btrfs_search_slot(NULL, root, &inode_location, path, 0, 0); + if (ret) { + printf("Couldn't find free space inode %d\n", ret); + return 0; + } + + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + if (btrfs_inode_generation(leaf, inode_item) != generation) { + printf("free space inode generation (%llu) did not match " + "free space cache generation (%llu)", + (unsigned long long)btrfs_inode_generation(leaf, + inode_item), + (unsigned long long)generation); + btrfs_release_path(root, path); + return 0; + } + + inode_size = btrfs_inode_size(leaf, inode_item); + btrfs_release_path(root, path); + if (inode_size == 0) + return 0; + + if (!num_entries) + return 0; + + ret = io_ctl_init(&io_ctl, inode_size, inode_location.objectid, root); + if (ret) + return ret; + + ret = io_ctl_prepare_pages(&io_ctl, root, path, + inode_location.objectid); + if (ret) + goto out; + + ret = io_ctl_check_crc(&io_ctl, 0); + if (ret) + goto free_cache; + + ret = io_ctl_check_generation(&io_ctl, generation); + if (ret) + goto free_cache; + + while (num_entries) { + e = calloc(1, sizeof(*e)); + if (!e) + goto free_cache; + + ret = io_ctl_read_entry(&io_ctl, e, &type); + if (ret) { + free(e); + goto free_cache; + } + + if (!e->bytes) { + free(e); + goto free_cache; + } + + if (type == BTRFS_FREE_SPACE_EXTENT) { + ret = link_free_space(ctl, e); + if (ret) { + printf("Duplicate entries in free space cache, dumping"); + free(e); + goto free_cache; + } + } else { + BUG_ON(!num_bitmaps); + num_bitmaps--; + e->bitmap = kzalloc(CACHE_SECTORSIZE, GFP_NOFS); + if (!e->bitmap) { + free(e); + goto free_cache; + } + ret = link_free_space(ctl, e); + ctl->total_bitmaps++; + if (ret) { + printf("Duplicate entries in free space cache, dumping"); + free(e->bitmap); + free(e); + goto free_cache; + } + list_add_tail(&e->list, &bitmaps); + } + + num_entries--; + } + + io_ctl_unmap_page(&io_ctl); + + /* + * We add the bitmaps at the end of the entries in order that + * the bitmap entries are added to the cache. + */ + list_for_each_entry_safe(e, n, &bitmaps, list) { + list_del_init(&e->list); + ret = io_ctl_read_bitmap(&io_ctl, e); + if (ret) + goto free_cache; + } + + io_ctl_drop_pages(&io_ctl); + merge_space_tree(ctl); + ret = 1; +out: + io_ctl_free(&io_ctl); + return ret; +free_cache: + io_ctl_drop_pages(&io_ctl); + __btrfs_remove_free_space_cache(ctl); + goto out; +} + +int load_free_space_cache(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_path *path; + int ret = 0; + + path = btrfs_alloc_path(); + if (!path) + return 0; + + ret = __load_free_space_cache(fs_info->tree_root, ctl, path, + block_group->key.objectid); + btrfs_free_path(path); + + if (ret < 0) { + ret = 0; + + printf("failed to load free space cache for block group %llu", + block_group->key.objectid); + } + + return ret; +} + +static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, + u64 offset) +{ + BUG_ON(offset < bitmap_start); + offset -= bitmap_start; + return (unsigned long)(offset / unit); +} + +static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) +{ + return (unsigned long)(bytes / unit); +} + +static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, + u64 offset) +{ + u64 bitmap_start; + u64 bytes_per_bitmap; + + bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; + bitmap_start = offset - ctl->start; + bitmap_start = bitmap_start / bytes_per_bitmap; + bitmap_start *= bytes_per_bitmap; + bitmap_start += ctl->start; + + return bitmap_start; +} + +static int tree_insert_offset(struct rb_root *root, u64 offset, + struct rb_node *node, int bitmap) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct btrfs_free_space *info; + + while (*p) { + parent = *p; + info = rb_entry(parent, struct btrfs_free_space, offset_index); + + if (offset < info->offset) { + p = &(*p)->rb_left; + } else if (offset > info->offset) { + p = &(*p)->rb_right; + } else { + /* + * we could have a bitmap entry and an extent entry + * share the same offset. If this is the case, we want + * the extent entry to always be found first if we do a + * linear search through the tree, since we want to have + * the quickest allocation time, and allocating from an + * extent is faster than allocating from a bitmap. So + * if we're inserting a bitmap and we find an entry at + * this offset, we want to go right, or after this entry + * logically. If we are inserting an extent and we've + * found a bitmap, we want to go left, or before + * logically. + */ + if (bitmap) { + if (info->bitmap) + return -EEXIST; + p = &(*p)->rb_right; + } else { + if (!info->bitmap) + return -EEXIST; + p = &(*p)->rb_left; + } + } + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + + return 0; +} + +/* + * searches the tree for the given offset. + * + * fuzzy - If this is set, then we are trying to make an allocation, and we just + * want a section that has at least bytes size and comes at or after the given + * offset. + */ +static struct btrfs_free_space * +tree_search_offset(struct btrfs_free_space_ctl *ctl, + u64 offset, int bitmap_only, int fuzzy) +{ + struct rb_node *n = ctl->free_space_offset.rb_node; + struct btrfs_free_space *entry, *prev = NULL; + + /* find entry that is closest to the 'offset' */ + while (1) { + if (!n) { + entry = NULL; + break; + } + + entry = rb_entry(n, struct btrfs_free_space, offset_index); + prev = entry; + + if (offset < entry->offset) + n = n->rb_left; + else if (offset > entry->offset) + n = n->rb_right; + else + break; + } + + if (bitmap_only) { + if (!entry) + return NULL; + if (entry->bitmap) + return entry; + + /* + * bitmap entry and extent entry may share same offset, + * in that case, bitmap entry comes after extent entry. + */ + n = rb_next(n); + if (!n) + return NULL; + entry = rb_entry(n, struct btrfs_free_space, offset_index); + if (entry->offset != offset) + return NULL; + + WARN_ON(!entry->bitmap); + return entry; + } else if (entry) { + if (entry->bitmap) { + /* + * if previous extent entry covers the offset, + * we should return it instead of the bitmap entry + */ + n = rb_prev(&entry->offset_index); + if (n) { + prev = rb_entry(n, struct btrfs_free_space, + offset_index); + if (!prev->bitmap && + prev->offset + prev->bytes > offset) + entry = prev; + } + } + return entry; + } + + if (!prev) + return NULL; + + /* find last entry before the 'offset' */ + entry = prev; + if (entry->offset > offset) { + n = rb_prev(&entry->offset_index); + if (n) { + entry = rb_entry(n, struct btrfs_free_space, + offset_index); + BUG_ON(entry->offset > offset); + } else { + if (fuzzy) + return entry; + else + return NULL; + } + } + + if (entry->bitmap) { + n = rb_prev(&entry->offset_index); + if (n) { + prev = rb_entry(n, struct btrfs_free_space, + offset_index); + if (!prev->bitmap && + prev->offset + prev->bytes > offset) + return prev; + } + if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) + return entry; + } else if (entry->offset + entry->bytes > offset) + return entry; + + if (!fuzzy) + return NULL; + + while (1) { + if (entry->bitmap) { + if (entry->offset + BITS_PER_BITMAP * + ctl->unit > offset) + break; + } else { + if (entry->offset + entry->bytes > offset) + break; + } + + n = rb_next(&entry->offset_index); + if (!n) + return NULL; + entry = rb_entry(n, struct btrfs_free_space, offset_index); + } + return entry; +} + +void unlink_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + rb_erase(&info->offset_index, &ctl->free_space_offset); + ctl->free_extents--; + ctl->free_space -= info->bytes; +} + +static int link_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + int ret = 0; + + BUG_ON(!info->bitmap && !info->bytes); + ret = tree_insert_offset(&ctl->free_space_offset, info->offset, + &info->offset_index, (info->bitmap != NULL)); + if (ret) + return ret; + + ctl->free_space += info->bytes; + ctl->free_extents++; + return ret; +} + +static int search_bitmap(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *bitmap_info, u64 *offset, + u64 *bytes) +{ + unsigned long found_bits = 0; + unsigned long bits, i; + unsigned long next_zero; + + i = offset_to_bit(bitmap_info->offset, ctl->unit, + max_t(u64, *offset, bitmap_info->offset)); + bits = bytes_to_bits(*bytes, ctl->unit); + + for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { + next_zero = find_next_zero_bit(bitmap_info->bitmap, + BITS_PER_BITMAP, i); + if ((next_zero - i) >= bits) { + found_bits = next_zero - i; + break; + } + i = next_zero; + } + + if (found_bits) { + *offset = (u64)(i * ctl->unit) + bitmap_info->offset; + *bytes = (u64)(found_bits) * ctl->unit; + return 0; + } + + return -1; +} + +struct btrfs_free_space * +btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes) +{ + return tree_search_offset(ctl, offset, 0, 0); +} + +static void try_merge_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + struct btrfs_free_space *left_info; + struct btrfs_free_space *right_info; + u64 offset = info->offset; + u64 bytes = info->bytes; + + /* + * first we want to see if there is free space adjacent to the range we + * are adding, if there is remove that struct and add a new one to + * cover the entire range + */ + right_info = tree_search_offset(ctl, offset + bytes, 0, 0); + if (right_info && rb_prev(&right_info->offset_index)) + left_info = rb_entry(rb_prev(&right_info->offset_index), + struct btrfs_free_space, offset_index); + else + left_info = tree_search_offset(ctl, offset - 1, 0, 0); + + if (right_info && !right_info->bitmap) { + unlink_free_space(ctl, right_info); + info->bytes += right_info->bytes; + free(right_info); + } + + if (left_info && !left_info->bitmap && + left_info->offset + left_info->bytes == offset) { + unlink_free_space(ctl, left_info); + info->offset = left_info->offset; + info->bytes += left_info->bytes; + free(left_info); + } +} + +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *info; + struct rb_node *n; + int count = 0; + + for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { + info = rb_entry(n, struct btrfs_free_space, offset_index); + if (info->bytes >= bytes && !block_group->ro) + count++; + printk("entry offset %llu, bytes %llu, bitmap %s\n", + (unsigned long long)info->offset, + (unsigned long long)info->bytes, + (info->bitmap) ? "yes" : "no"); + } + printk("%d blocks of free space at or bigger than bytes is \n", count); +} + +int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group, + int sectorsize) +{ + struct btrfs_free_space_ctl *ctl; + + ctl = calloc(1, sizeof(*ctl)); + if (!ctl) + return -ENOMEM; + + ctl->unit = sectorsize; + ctl->start = block_group->key.objectid; + ctl->private = block_group; + block_group->free_space_ctl = ctl; + + return 0; +} + +void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) +{ + struct btrfs_free_space *info; + struct rb_node *node; + + while ((node = rb_last(&ctl->free_space_offset)) != NULL) { + info = rb_entry(node, struct btrfs_free_space, offset_index); + unlink_free_space(ctl, info); + if (info->bitmap) + free(info->bitmap); + free(info); + } +} + +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) +{ + __btrfs_remove_free_space_cache(block_group->free_space_ctl); +} + +int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, + u64 bytes) +{ + struct btrfs_free_space *info; + int ret = 0; + + info = calloc(1, sizeof(*info)); + if (!info) + return -ENOMEM; + + info->offset = offset; + info->bytes = bytes; + + try_merge_free_space(ctl, info); + + ret = link_free_space(ctl, info); + if (ret) { + printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); + BUG_ON(ret == -EEXIST); + } + + return ret; +} + +/* + * Merges all the free space cache and kills the bitmap entries since we just + * want to use the free space cache to verify it's correct, no reason to keep + * the bitmaps around to confuse things. + */ +static void merge_space_tree(struct btrfs_free_space_ctl *ctl) +{ + struct btrfs_free_space *e, *prev = NULL; + struct rb_node *n; + int ret; + +again: + prev = NULL; + for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { + e = rb_entry(n, struct btrfs_free_space, offset_index); + if (e->bitmap) { + u64 offset = e->offset, bytes = ctl->unit; + u64 end; + + end = e->offset + (u64)(BITS_PER_BITMAP * ctl->unit); + + unlink_free_space(ctl, e); + while (!(search_bitmap(ctl, e, &offset, &bytes))) { + ret = btrfs_add_free_space(ctl, offset, + bytes); + BUG_ON(ret); + offset += bytes; + if (offset >= end) + break; + bytes = ctl->unit; + } + free(e->bitmap); + free(e); + goto again; + } + if (!prev) + goto next; + if (prev->offset + prev->bytes == e->offset) { + unlink_free_space(ctl, prev); + unlink_free_space(ctl, e); + prev->bytes += e->bytes; + free(e); + link_free_space(ctl, prev); + goto again; + } +next: + prev = e; + } +} diff --git a/free-space-cache.h b/free-space-cache.h new file mode 100644 index 0000000..d286258 --- /dev/null +++ b/free-space-cache.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2009 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_FREE_SPACE_CACHE +#define __BTRFS_FREE_SPACE_CACHE + +struct btrfs_free_space { + struct rb_node offset_index; + u64 offset; + u64 bytes; + unsigned long *bitmap; + struct list_head list; +}; + +struct btrfs_free_space_ctl { + struct rb_root free_space_offset; + u64 free_space; + int extents_thresh; + int free_extents; + int total_bitmaps; + int unit; + u64 start; + void *private; +}; + +int load_free_space_cache(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group); + +void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl); +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache + *block_group); +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes); +struct btrfs_free_space * +btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes); +int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group, + int sectorsize); +void unlink_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info); +#endif @@ -0,0 +1,214 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "commands.h" + +extern char argv0_buf[ARGV0_BUF_SIZE]; + +#define USAGE_SHORT 1U +#define USAGE_LONG 2U +#define USAGE_OPTIONS 4U +#define USAGE_LISTING 8U + +static int do_usage_one_command(const char * const *usagestr, + unsigned int flags, FILE *outf) +{ + int pad = 4; + + if (!usagestr || !*usagestr) + return -1; + + fprintf(outf, "%s%s\n", (flags & USAGE_LISTING) ? " " : "usage: ", + *usagestr++); + + /* a short one-line description (mandatory) */ + if ((flags & USAGE_SHORT) == 0) + return 0; + else if (!*usagestr) + return -2; + + if (flags & USAGE_LISTING) + pad = 8; + else + fputc('\n', outf); + + fprintf(outf, "%*s%s\n", pad, "", *usagestr++); + + /* a long (possibly multi-line) description (optional) */ + if (!*usagestr || ((flags & USAGE_LONG) == 0)) + return 0; + + if (**usagestr) + fputc('\n', outf); + while (*usagestr && **usagestr) + fprintf(outf, "%*s%s\n", pad, "", *usagestr++); + + /* options (optional) */ + if (!*usagestr || ((flags & USAGE_OPTIONS) == 0)) + return 0; + + /* + * options (if present) should always (even if there is no long + * description) be prepended with an empty line, skip it + */ + usagestr++; + + fputc('\n', outf); + while (*usagestr) + fprintf(outf, "%*s%s\n", pad, "", *usagestr++); + + return 0; +} + +static int usage_command_internal(const char * const *usagestr, + const char *token, int full, int lst, + FILE *outf) +{ + unsigned int flags = USAGE_SHORT; + int ret; + + if (full) + flags |= USAGE_LONG | USAGE_OPTIONS; + if (lst) + flags |= USAGE_LISTING; + + ret = do_usage_one_command(usagestr, flags, outf); + switch (ret) { + case -1: + fprintf(outf, "No usage for '%s'\n", token); + break; + case -2: + fprintf(outf, "No short description for '%s'\n", token); + break; + } + + return ret; +} + +static void usage_command_usagestr(const char * const *usagestr, + const char *token, int full, int err) +{ + FILE *outf = err ? stderr : stdout; + int ret; + + ret = usage_command_internal(usagestr, token, full, 0, outf); + if (!ret) + fputc('\n', outf); +} + +void usage_command(const struct cmd_struct *cmd, int full, int err) +{ + usage_command_usagestr(cmd->usagestr, cmd->token, full, err); +} + +void usage(const char * const *usagestr) +{ + usage_command_usagestr(usagestr, NULL, 1, 1); + exit(129); +} + +static void usage_command_group_internal(const struct cmd_group *grp, int full, + FILE *outf) +{ + const struct cmd_struct *cmd = grp->commands; + int do_sep = 0; + + for (; cmd->token; cmd++) { + if (cmd->hidden) + continue; + + if (full && cmd != grp->commands) + fputc('\n', outf); + + if (!cmd->next) { + if (do_sep) { + fputc('\n', outf); + do_sep = 0; + } + + usage_command_internal(cmd->usagestr, cmd->token, full, + 1, outf); + continue; + } + + /* this is an entry point to a nested command group */ + + if (!full && cmd != grp->commands) + fputc('\n', outf); + + usage_command_group_internal(cmd->next, full, outf); + + if (!full) + do_sep = 1; + } +} + +void usage_command_group(const struct cmd_group *grp, int full, int err) +{ + const char * const *usagestr = grp->usagestr; + FILE *outf = err ? stderr : stdout; + + if (usagestr && *usagestr) { + fprintf(outf, "usage: %s\n", *usagestr++); + while (*usagestr) + fprintf(outf, " or: %s\n", *usagestr++); + } + + fputc('\n', outf); + usage_command_group_internal(grp, full, outf); + fputc('\n', outf); + + if (grp->infostr) + fprintf(outf, "%s\n", grp->infostr); +} + +void help_unknown_token(const char *arg, const struct cmd_group *grp) +{ + fprintf(stderr, "%s: unknown token '%s'\n", argv0_buf, arg); + usage_command_group(grp, 0, 1); + exit(1); +} + +void help_ambiguous_token(const char *arg, const struct cmd_group *grp) +{ + const struct cmd_struct *cmd = grp->commands; + + fprintf(stderr, "%s: ambiguous token '%s'\n", argv0_buf, arg); + fprintf(stderr, "\nDid you mean one of these ?\n"); + + for (; cmd->token; cmd++) { + if (!prefixcmp(cmd->token, arg)) + fprintf(stderr, "\t%s\n", cmd->token); + } + + exit(1); +} + +void help_command_group(const struct cmd_group *grp, int argc, char **argv) +{ + int full = 0; + + if (argc > 1) { + if (!strcmp(argv[1], "--full")) + full = 1; + } + + usage_command_group(grp, full, 0); +} @@ -20,6 +20,11 @@ #define __IOCTL_ #include <asm/types.h> #include <linux/ioctl.h> +#include <time.h> + +#ifdef __cplusplus +extern "C" { +#endif #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_VOL_NAME_MAX 255 @@ -31,14 +36,49 @@ struct btrfs_ioctl_vol_args { char name[BTRFS_PATH_NAME_MAX + 1]; }; +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 + +#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) #define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) + +#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) + +struct btrfs_qgroup_limit { + __u64 flags; + __u64 max_referenced; + __u64 max_exclusive; + __u64 rsv_referenced; + __u64 rsv_exclusive; +}; + +struct btrfs_qgroup_inherit { + __u64 flags; + __u64 num_qgroups; + __u64 num_ref_copies; + __u64 num_excl_copies; + struct btrfs_qgroup_limit lim; + __u64 qgroups[0]; +}; + +struct btrfs_ioctl_qgroup_limit_args { + __u64 qgroupid; + struct btrfs_qgroup_limit lim; +}; + #define BTRFS_SUBVOL_NAME_MAX 4039 struct btrfs_ioctl_vol_args_v2 { __s64 fd; __u64 transid; __u64 flags; - __u64 unused[4]; + union { + struct { + __u64 size; + struct btrfs_qgroup_inherit *qgroup_inherit; + }; + __u64 unused[4]; + }; char name[BTRFS_SUBVOL_NAME_MAX + 1]; }; @@ -74,7 +114,48 @@ struct btrfs_ioctl_scrub_args { __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; }; -#define BTRFS_DEVICE_PATH_NAME_MAX 1024 +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 +struct btrfs_ioctl_dev_replace_start_params { + __u64 srcdevid; /* in, if 0, use srcdev_name instead */ + __u64 cont_reading_from_srcdev_mode; /* in, see #define + * above */ + __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ + __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 +struct btrfs_ioctl_dev_replace_status_params { + __u64 replace_state; /* out, see #define above */ + __u64 progress_1000; /* out, 0 <= x <= 1000 */ + __u64 time_started; /* out, seconds since 1-Jan-1970 */ + __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ + __u64 num_write_errors; /* out */ + __u64 num_uncorrectable_read_errors; /* out */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 +struct btrfs_ioctl_dev_replace_args { + __u64 cmd; /* in */ + __u64 result; /* out */ + + union { + struct btrfs_ioctl_dev_replace_start_params start; + struct btrfs_ioctl_dev_replace_status_params status; + }; /* in/out */ + + __u64 spare[64]; +}; + struct btrfs_ioctl_dev_info_args { __u64 devid; /* in/out */ __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ @@ -91,6 +172,54 @@ struct btrfs_ioctl_fs_info_args { __u64 reserved[124]; /* pad to 1k */ }; +/* balance control ioctl modes */ +#define BTRFS_BALANCE_CTL_PAUSE 1 +#define BTRFS_BALANCE_CTL_CANCEL 2 +#define BTRFS_BALANCE_CTL_RESUME 3 + +/* + * this is packed, because it should be exactly the same as its disk + * byte order counterpart (struct btrfs_disk_balance_args) + */ +struct btrfs_balance_args { + __u64 profiles; + __u64 usage; + __u64 devid; + __u64 pstart; + __u64 pend; + __u64 vstart; + __u64 vend; + + __u64 target; + + __u64 flags; + + __u64 unused[8]; +} __attribute__ ((__packed__)); + +struct btrfs_balance_progress { + __u64 expected; + __u64 considered; + __u64 completed; +}; + +#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) +#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) +#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) + +struct btrfs_ioctl_balance_args { + __u64 flags; /* in/out */ + __u64 state; /* out */ + + struct btrfs_balance_args data; /* in/out */ + struct btrfs_balance_args meta; /* in/out */ + struct btrfs_balance_args sys; /* in/out */ + + struct btrfs_balance_progress stat; /* out */ + + __u64 unused[72]; /* pad to 1k */ +}; + struct btrfs_ioctl_search_key { /* which root are we searching. 0 is the tree of tree roots */ __u64 tree_id; @@ -224,7 +353,95 @@ struct btrfs_ioctl_logical_ino_args { __u64 inodes; }; +struct btrfs_ioctl_timespec { + __u64 sec; + __u32 nsec; +}; + +struct btrfs_ioctl_received_subvol_args { + char uuid[BTRFS_UUID_SIZE]; /* in */ + __u64 stransid; /* in */ + __u64 rtransid; /* out */ + struct btrfs_ioctl_timespec stime; /* in */ + struct btrfs_ioctl_timespec rtime; /* out */ + __u64 flags; /* in */ + __u64 reserved[16]; /* in */ +}; + +/* + * Caller doesn't want file data in the send stream, even if the + * search of clone sources doesn't find an extent. UPDATE_EXTENT + * commands will be sent instead of WRITE commands. + */ +#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1 + +struct btrfs_ioctl_send_args { + __s64 send_fd; /* in */ + __u64 clone_sources_count; /* in */ + __u64 *clone_sources; /* in */ + __u64 parent_root; /* in */ + __u64 flags; /* in */ + __u64 reserved[4]; /* in */ +}; + +enum btrfs_dev_stat_values { + /* disk I/O failure stats */ + BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ + + /* stats for indirect indications for I/O failures */ + BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or + * contents is illegal: this is an + * indication that the block was damaged + * during read or write, or written to + * wrong location or read from wrong + * location */ + BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not + * been written */ + + BTRFS_DEV_STAT_VALUES_MAX +}; + +/* Reset statistics after reading; needs SYS_ADMIN capability */ +#define BTRFS_DEV_STATS_RESET (1ULL << 0) + +struct btrfs_ioctl_get_dev_stats { + __u64 devid; /* in */ + __u64 nr_items; /* in/out */ + __u64 flags; /* in/out */ + + /* out values: */ + __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; + + __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ +}; + /* BTRFS_IOC_SNAP_CREATE is no longer used by the btrfs command */ +#define BTRFS_QUOTA_CTL_ENABLE 1 +#define BTRFS_QUOTA_CTL_DISABLE 2 +/* 3 has formerly been reserved for BTRFS_QUOTA_CTL_RESCAN */ +struct btrfs_ioctl_quota_ctl_args { + __u64 cmd; + __u64 status; +}; + +struct btrfs_ioctl_quota_rescan_args { + __u64 flags; + __u64 progress; + __u64 reserved[6]; +}; + +struct btrfs_ioctl_qgroup_assign_args { + __u64 assign; + __u64 src; + __u64 dst; +}; + +struct btrfs_ioctl_qgroup_create_args { + __u64 create; + __u64 qgroupid; +}; #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -233,6 +450,13 @@ struct btrfs_ioctl_logical_ino_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) + +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + /* trans start and trans end are dangerous, and only for * use by applications that know how to avoid the * resulting deadlocks @@ -248,7 +472,8 @@ struct btrfs_ioctl_logical_ino_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ struct btrfs_ioctl_vol_args) -/* 13 is for CLONE_RANGE */ +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ @@ -264,6 +489,10 @@ struct btrfs_ioctl_logical_ino_args { struct btrfs_ioctl_space_args) #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ struct btrfs_ioctl_scrub_args) #define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) @@ -273,9 +502,44 @@ struct btrfs_ioctl_logical_ino_args { struct btrfs_ioctl_dev_info_args) #define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ struct btrfs_ioctl_fs_info_args) +#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) +#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ + struct btrfs_ioctl_balance_args) #define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ struct btrfs_ioctl_ino_path_args) #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ + struct btrfs_ioctl_received_subvol_args) +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) + +#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ + struct btrfs_ioctl_quota_ctl_args) +#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ + struct btrfs_ioctl_qgroup_assign_args) +#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ + struct btrfs_ioctl_qgroup_create_args) +#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ + struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \ + struct btrfs_ioctl_quota_rescan_args) +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \ + struct btrfs_ioctl_quota_rescan_args) +#define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ + char[BTRFS_LABEL_SIZE]) +#define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \ + char[BTRFS_LABEL_SIZE]) +#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ + struct btrfs_ioctl_get_dev_stats) +#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ + struct btrfs_ioctl_dev_replace_args) + +#ifdef __cplusplus +} +#endif #endif diff --git a/kerncompat.h b/kerncompat.h index 46236cd..9c116b4 100644 --- a/kerncompat.h +++ b/kerncompat.h @@ -18,6 +18,7 @@ #ifndef __KERNCOMPAT #define __KERNCOMPAT + #include <stdio.h> #include <stdlib.h> #include <errno.h> @@ -35,7 +36,7 @@ #define gfp_t int #define get_cpu_var(p) (p) #define __get_cpu_var(p) (p) -#define BITS_PER_LONG (sizeof(long) * 8) +#define BITS_PER_LONG (__SIZEOF_LONG__ * 8) #define __GFP_BITS_SHIFT 20 #define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1)) #define GFP_KERNEL 0 @@ -57,11 +58,22 @@ #endif #ifndef __CHECKER__ +/* + * Since we're using primitive definitions from kernel-space, we need to + * define __KERNEL__ so that system header files know which definitions + * to use. + */ +#define __KERNEL__ #include <asm/types.h> typedef __u32 u32; typedef __u64 u64; typedef __u16 u16; typedef __u8 u8; +/* + * Continuing to define __KERNEL__ breaks others parts of the code, so + * we can just undefine it now that we have the correct headers... + */ +#undef __KERNEL__ #else typedef unsigned int u32; typedef unsigned int __u32; @@ -114,6 +126,10 @@ static inline int mutex_is_locked(struct mutex *m) #define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#ifndef __attribute_const__ +#define __attribute_const__ __attribute__((__const__)) +#endif + /** * __set_bit - Set a bit in memory * @nr: the bit to set @@ -191,6 +207,16 @@ static inline long IS_ERR(const void *ptr) ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; }) /* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +/* * printk */ #define printk(fmt, args...) fprintf(stderr, fmt, ##args) @@ -255,6 +281,19 @@ typedef u64 __bitwise __be64; #define cpu_to_le16(x) ((__force __le16)(u16)(x)) #define le16_to_cpu(x) ((__force u16)(__le16)(x)) #endif + +struct __una_u16 { u16 x; } __attribute__((__packed__)); +struct __una_u32 { u32 x; } __attribute__((__packed__)); +struct __una_u64 { u64 x; } __attribute__((__packed__)); + +#define get_unaligned_le8(p) (*((u8 *)(p))) +#define put_unaligned_le8(val,p) ((*((u8 *)(p))) = (val)) +#define get_unaligned_le16(p) le16_to_cpu(((const struct __una_u16 *)(p))->x) +#define put_unaligned_le16(val,p) (((struct __una_u16 *)(p))->x = cpu_to_le16(val)) +#define get_unaligned_le32(p) le32_to_cpu(((const struct __una_u32 *)(p))->x) +#define put_unaligned_le32(val,p) (((struct __una_u32 *)(p))->x = cpu_to_le32(val)) +#define get_unaligned_le64(p) le64_to_cpu(((const struct __una_u64 *)(p))->x) +#define put_unaligned_le64(val,p) (((struct __una_u64 *)(p))->x = cpu_to_le64(val)) #endif #ifndef noinline @@ -19,8 +19,8 @@ #ifndef _LINUX_LIST_H #define _LINUX_LIST_H -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) +#define LIST_POISON1 ((struct list_head *) 0x00100100) +#define LIST_POISON2 ((struct list_head *) 0x00200200) /* * Simple doubly linked list implementation. @@ -54,17 +54,17 @@ static inline void INIT_LIST_HEAD(struct list_head *list) * the prev/next entries already! */ #ifndef CONFIG_DEBUG_LIST -static inline void __list_add(struct list_head *new, +static inline void __list_add(struct list_head *xnew, struct list_head *prev, struct list_head *next) { - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; + next->prev = xnew; + xnew->next = next; + xnew->prev = prev; + prev->next = xnew; } #else -extern void __list_add(struct list_head *new, +extern void __list_add(struct list_head *xnew, struct list_head *prev, struct list_head *next); #endif @@ -78,12 +78,12 @@ extern void __list_add(struct list_head *new, * This is good for implementing stacks. */ #ifndef CONFIG_DEBUG_LIST -static inline void list_add(struct list_head *new, struct list_head *head) +static inline void list_add(struct list_head *xnew, struct list_head *head) { - __list_add(new, head, head->next); + __list_add(xnew, head, head->next); } #else -extern void list_add(struct list_head *new, struct list_head *head); +extern void list_add(struct list_head *xnew, struct list_head *head); #endif @@ -95,9 +95,9 @@ extern void list_add(struct list_head *new, struct list_head *head); * Insert a new entry before the specified head. * This is useful for implementing queues. */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) +static inline void list_add_tail(struct list_head *xnew, struct list_head *head) { - __list_add(new, head->prev, head); + __list_add(xnew, head->prev, head); } /* @@ -137,18 +137,18 @@ extern void list_del(struct list_head *entry); * Note: if 'old' was empty, it will be overwritten. */ static inline void list_replace(struct list_head *old, - struct list_head *new) + struct list_head *xnew) { - new->next = old->next; - new->next->prev = new; - new->prev = old->prev; - new->prev->next = new; + xnew->next = old->next; + xnew->next->prev = xnew; + xnew->prev = old->prev; + xnew->prev->next = xnew; } static inline void list_replace_init(struct list_head *old, - struct list_head *new) + struct list_head *xnew) { - list_replace(old, new); + list_replace(old, xnew); INIT_LIST_HEAD(old); } /** diff --git a/man/Makefile b/man/Makefile index 4a90b75..1ba23b5 100644 --- a/man/Makefile +++ b/man/Makefile @@ -1,4 +1,4 @@ -GZIP=gzip +GZIPCMD=gzip INSTALL= install prefix ?= /usr/local @@ -6,31 +6,23 @@ bindir = $(prefix)/bin mandir = $(prefix)/man man8dir = $(mandir)/man8 -MANPAGES = mkfs.btrfs.8.gz btrfsctl.8.gz btrfsck.8.gz btrfs-image.8.gz \ - btrfs-show.8.gz btrfs.8.gz +# clear out all suffixes +.SUFFIXES: +# list only those we use +.SUFFIXES: .in .gz -all: $(MANPAGES) - -mkfs.btrfs.8.gz: mkfs.btrfs.8.in - $(GZIP) -n -c mkfs.btrfs.8.in > mkfs.btrfs.8.gz - -btrfs.8.gz: btrfs.8.in - $(GZIP) -n -c btrfs.8.in > btrfs.8.gz +MANPAGES = mkfs.btrfs.8.gz btrfsck.8.gz btrfs-image.8.gz btrfs.8.gz +INFILES = ${MANPAGES:.in=.gz} -btrfsctl.8.gz: btrfsctl.8.in - $(GZIP) -n -c btrfsctl.8.in > btrfsctl.8.gz - -btrfsck.8.gz: btrfsck.8.in - $(GZIP) -n -c btrfsck.8.in > btrfsck.8.gz - -btrfs-image.8.gz: btrfs-image.8.in - $(GZIP) -n -c btrfs-image.8.in > btrfs-image.8.gz +all: $(MANPAGES) -btrfs-show.8.gz: btrfs-show.8.in - $(GZIP) -n -c btrfs-show.8.in > btrfs-show.8.gz +.in.gz : + @echo " [MAN] $@" + $(Q)$(GZIPCMD) -n -c $< > $@ clean : - rm -f $(MANPAGES) + @echo "Cleaning manpages" + $(Q)rm -f $(MANPAGES) install: $(MANPAGES) $(INSTALL) -m755 -d $(DESTDIR)$(man8dir) diff --git a/man/btrfs-image.8.in b/man/btrfs-image.8.in index 7a348f8..7e0e3b0 100644 --- a/man/btrfs-image.8.in +++ b/man/btrfs-image.8.in @@ -24,6 +24,15 @@ compression level (0 ~ 9). .TP \fB\-t\fR \fIvalue\fP number of threads (1 ~ 32) to be used to process the image dump or restore. +.TP +\fB\-o\fP +use the old restore method, this does not fixup the chunk tree so the restored +file system will not be able to be mounted. +.TP +\fB\-w\fP +Walk all the trees manually and copy any blocks that are referenced. Use this +option if your extent tree is corrupted to make sure that all of the metadata is +captured. .SH AVAILABILITY .B btrfs-image is part of btrfs-progs. Btrfs is currently under heavy development, @@ -31,4 +40,4 @@ and not suitable for any uses other than benchmarking and review. Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for further details. .SH SEE ALSO -.BR btrfsck (8), btrfsctl (8), mkfs.btrfs (8) +.BR btrfsck (8), mkfs.btrfs (8) diff --git a/man/btrfs-show.8.in b/man/btrfs-show.8.in deleted file mode 100644 index cb98b68..0000000 --- a/man/btrfs-show.8.in +++ /dev/null @@ -1,22 +0,0 @@ -.TH BTRFS-SHOW 8 -.SH NAME -btrfs-show \- scan the /dev directory for btrfs partitions and print results. -.SH SYNOPSIS -.B btrfs-show -.SH NOTE -.B btrfs-show -is deprecated. Please consider to switch to the btrfs utility. -.SH DESCRIPTION -.B btrfs-show -is used to scan the /dev directory for btrfs partitions and display brief -information such as lable, uuid, etc of each btrfs partition. -.SH OPTIONS -none -.SH AVAILABILITY -.B btrfs-show -is part of btrfs-progs. Btrfs is currently under heavy development, -and not suitable for any uses other than benchmarking and review. -Please refer to the btrfs wiki -http://btrfs.wiki.kernel.org for further details. -.SH SEE ALSO -.BR btrfsck (8), btrfsctl (8), mkfs.btrfs (8), btrfs-image (8) diff --git a/man/btrfs.8.in b/man/btrfs.8.in index be478e0..af7df4d 100644 --- a/man/btrfs.8.in +++ b/man/btrfs.8.in @@ -7,52 +7,74 @@ btrfs \- control a btrfs filesystem .SH SYNOPSIS \fBbtrfs\fP \fBsubvolume snapshot\fP\fI [-r] <source> [<dest>/]<name>\fP .PP -\fBbtrfs\fP \fBsubvolume delete\fP\fI <subvolume>\fP +\fBbtrfs\fP \fBsubvolume delete\fP\fI <subvolume> [<subvolume>...]\fP .PP \fBbtrfs\fP \fBsubvolume create\fP\fI [<dest>/]<name>\fP .PP -\fBbtrfs\fP \fBsubvolume list\fP\fI [-p] <path>\fP +\fBbtrfs\fP \fBsubvolume list\fP\fI [-acgoprts] [-G [+|-]value] [-C [+|-]value] [--sort=rootid,gen,ogen,path] <path>\fP .PP \fBbtrfs\fP \fBsubvolume set-default\fP\fI <id> <path>\fP .PP \fBbtrfs\fP \fBsubvolume get-default\fP\fI <path>\fP .PP -\fBbtrfs\fP \fBfilesystem sync\fP\fI <path> \fP +\fBbtrfs\fP \fBsubvolume find-new\fP\fI <subvolume> <last_gen>\fP .PP -\fBbtrfs\fP \fBfilesystem resize\fP\fI [+/\-]<size>[gkm]|max <filesystem>\fP +\fBbtrfs\fP \fBsubvolume show\fP\fI <path>\fP .PP -\fBbtrfs\fP \fBfilesystem label\fP\fI <dev> [newlabel]\fP +\fBbtrfs\fP \fBfilesystem defragment\fP -c[zlib|lzo] [-l \fIlen\fR] \ +[-s \fIstart\fR] [-t \fIsize\fR] -[vf] <\fIfile\fR>|<\fIdir\fR> \ +[<\fIfile\fR>|<\fIdir\fR>...] .PP -\fBbtrfs\fP \fBfilesystem defrag\fP\fI [options] <file>|<dir> [<file>|<dir>...]\fP +\fBbtrfs\fP \fBfilesystem sync\fP\fI <path> \fP .PP -\fBbtrfs\fP \fBsubvolume find-new\fP\fI <subvolume> <last_gen>\fP +\fBbtrfs\fP \fBfilesystem resize\fP\fI [devid:][+/\-]<size>[gkm]|[devid:]max <filesystem>\fP .PP -\fBbtrfs\fP \fBfilesystem balance\fP\fI <path> \fP +\fBbtrfs\fP \fBfilesystem label\fP\fI <dev> [newlabel]\fP .PP -\fBbtrfs\fP \fBfilesystem defragment\fP\fI <file>|<dir> [<file>|<dir>...]\fP +\fBbtrfs\fP \fBfilesystem balance\fP\fI <path> \fP .PP \fBbtrfs\fP \fBdevice scan\fP\fI [--all-devices|<device> [<device>...]]\fP .PP -\fBbtrfs\fP \fBdevice show\fP\fI [--all-devices|<uuid>|<label>]\fP +\fBbtrfs\fP \fBdevice stats\fP [-z] {\fI<path>\fP|\fI<device>\fP} .PP \fBbtrfs\fP \fBdevice add\fP\fI <device> [<device>...] <path> \fP .PP \fBbtrfs\fP \fBdevice delete\fP\fI <device> [<device>...] <path> \fP .PP -\fBbtrfs\fP \fBscrub start\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP} +\fBbtrfs\fP \fBreplace start\fP \fI[-Bfr] <srcdev>|<devid> <targetdev> <path>\fP +.PP +\fBbtrfs\fP \fBreplace status\fP \fI[-1] <path>\fP +.PP +\fBbtrfs\fP \fBreplace cancel\fP \fI<path>\fP +.PP +\fBbtrfs\fP \fBscrub start\fP [-Bdqru] [-c ioprio_class -n ioprio_classdata] {\fI<path>\fP|\fI<device>\fP} .PP \fBbtrfs\fP \fBscrub cancel\fP {\fI<path>\fP|\fI<device>\fP} .PP -\fBbtrfs\fP \fBscrub resume\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP} +\fBbtrfs\fP \fBscrub resume\fP [-Bdqru] [-c ioprio_class -n ioprio_classdata] {\fI<path>\fP|\fI<device>\fP} .PP \fBbtrfs\fP \fBscrub status\fP [-d] {\fI<path>\fP|\fI<device>\fP} .PP \fBbtrfs\fP \fBinspect-internal inode-resolve\fP [-v] \fI<inode>\fP \fI<path>\fP .PP \fBbtrfs\fP \fBinspect-internal logical-resolve\fP -[-Pv] \fI<logical>\fP \fI<path>\fP +[-Pv] [-s size] \fI<logical>\fP \fI<path>\fP .PP -\fBbtrfs\fP \fBhelp|\-\-help|\-h \fP\fI\fP +\fBbtrfs\fP \fBinspect-internal subvolid-resolve\fP \fI<subvolid>\fP \fI<path>\fP +.PP +\fBbtrfs\fP \fBqgroup assign\fP \fI<src>\fP \fI<dst>\fP \fI<path>\fP +.PP +\fBbtrfs\fP \fBqgroup remove\fP \fI<src>\fP \fI<dst>\fP \fI<path>\fP +.PP +\fBbtrfs\fP \fBqgroup create\fP \fI<qgroupid>\fP \fI<path>\fP +.PP +\fBbtrfs\fP \fBqgroup destroy\fP \fI<qgroupid>\fP \fI<path>\fP +.PP +\fBbtrfs\fP \fBqgroup show\fP \fI<path>\fP +.PP +\fBbtrfs\fP \fBqgroup limit\fP [options] \fI<size>\fP|\fBnone\fP [\fI<qgroupid>\fP] \fI<path>\fP +.PP +\fBbtrfs\fP \fBhelp|\-\-help \fP\fI\fP .PP \fBbtrfs\fP \fB<command> \-\-help \fP\fI\fP .PP @@ -78,7 +100,7 @@ and as .I filesystem sync. In this case .I btrfs -returnsfilesystem sync +returns filesystem sync If a command is terminated by .I --help , the detailed help is showed. If the passed command matches more commands, @@ -98,7 +120,7 @@ subvolume, \fBbtrfs\fR returns an error. If \fI-r\fR is given, the snapshot will be readonly. .TP -\fBsubvolume delete\fR\fI <subvolume>\fR +\fBsubvolume delete\fR\fI <subvolume> [<subvolume>...]\fR Delete the subvolume \fI<subvolume>\fR. If \fI<subvolume>\fR is not a subvolume, \fBbtrfs\fR returns an error. .TP @@ -108,17 +130,54 @@ Create a subvolume in \fI<dest>\fR (or in the current directory if \fI<dest>\fR is omitted). .TP -\fBsubvolume list\fR\fI [-p] <path>\fR +\fBsubvolume list\fR\fI [-acgoprts] [-G [+|-]value] [-C [+|-]value] [--sort=rootid,gen,ogen,path] <path>\fR +.RS List the subvolumes present in the filesystem \fI<path>\fR. For every subvolume the following information is shown by default. ID <ID> top level <ID> path <path> where path is the relative path of the subvolume to the \fItop level\fR subvolume. + The subvolume's ID may be used by the \fBsubvolume set-default\fR command, or -at mount time via the \fIsubvol=\fR option. +at mount time via the \fIsubvolid=\fR option. If \fI-p\fR is given, then \fIparent <ID>\fR is added to the output between ID and top level. The parent's ID may be used at mount time via the \fIsubvolrootid=\fR option. + +\fB-t\fP print the result as a table. + +\fB-a\fP print all the subvolumes in the filesystem and distinguish between +absolute and relative path with respect to the given <path>. + +\fB-c\fP print the ogeneration of the subvolume, aliases: ogen or origin generation + +\fB-g\fP print the generation of the subvolume + +\fB-u\fP print the UUID of the subvolume + +\fB-o\fP print only subvolumes bellow specified <path>. + +\fB-r\fP only readonly subvolumes in the filesystem will be listed. + +\fB-s\fP only snapshot subvolumes in the filesystem will be listed. + +\fB-G [+|-]value\fP +list subvolumes in the filesystem that its generation is +>=, <= or = value. '+' means >= value, '-' means <= value, If there is +neither '+' nor '-', it means = value. + +\fB-C [+|-]value\fP +list subvolumes in the filesystem that its ogeneration is +>=, <= or = value. The usage is the same to '-g' option. + +\fB--sort=rootid,gen,ogen,path\fP +list subvolumes in order by specified items. +you can add '+' or '-' in front of each items, '+' means ascending, '-' +means descending. The default is ascending. + +for \fB--sort\fP you can combine some items together by ',', just like +\f--sort=+ogen,-gen,path,rootid\fR. +.RE .TP \fBsubvolume set-default\fR\fI <id> <path>\fR @@ -132,12 +191,25 @@ Get the default subvolume of the filesystem \fI<path>\fR. The output format is similar to \fBsubvolume list\fR command. .TP -\fBfilesystem defragment\fP -c[zlib|lzo] [-l \fIlen\fR] [-s \fIstart\fR] [-t \fIsize\fR] -[vf] <\fIfile\fR>|<\fIdir\fR> [<\fIfile\fR>|<\fIdir\fR>...] +\fBsubvolume find-new\fR\fI <subvolume> <last_gen>\fR +List the recently modified files in a subvolume, after \fI<last_gen>\fR ID. +.TP + +\fBsubvolume show\fR\fI <path>\fR +Show information of a given subvolume in the \fI<path>\fR. +.TP + +\fBfilesystem defragment\fP -c[zlib|lzo] [-l \fIlen\fR] [-s \fIstart\fR] \ +[-t \fIsize\fR] -[vf] <\fIfile\fR>|<\fIdir\fR> [<\fIfile\fR>|<\fIdir\fR>...] Defragment file data and/or directory metadata. To defragment all files in a directory you have to specify each one on its own or use your shell wildcards. -The start position and the number of bytes to deframention can be specified by \fIstart\fR and \fIlen\fR. Any extent bigger than \fIthresh\fR will be considered already defragged. Use 0 to take the kernel default, and use 1 to say eveery single extent must be rewritten. You can also turn on compression in defragment operations. +The start position and the number of bytes to defragment can be specified by +\fIstart\fR and \fIlen\fR. Any extent bigger than threshold will be +considered already defragged. Use 0 to take the kernel default, and use 1 to +say every single extent must be rewritten. You can also turn on compression in +defragment operations. \fB-v\fP be verbose @@ -151,11 +223,12 @@ The start position and the number of bytes to deframention can be specified by \ \fB-t size\fP defragment only files at least \fIsize\fR bytes big -NOTE: defragmenting with kernels up to 2.6.37 will unlink COW-ed copies of data, don't -use it if you use snapshots, have de-duplicated your data or made copies with -\fBcp --reflink\fP. -\fBsubvolume find-new\fR\fI <subvolume> <last_gen>\fR -List the recently modified files in a subvolume, after \fI<last_gen>\fR ID. +For \fBstart\fP, \fBlen\fP, \fBsize\fP it is possible to append a suffix +like \fBk\fP for 1 KBytes, \fBm\fP for 1 MBytes... + +NOTE: defragmenting with kernels up to 2.6.37 will unlink COW-ed copies of data, +don't use it if you use snapshots, have de-duplicated your data or made +copies with \fBcp --reflink\fP. .TP \fBfilesystem sync\fR\fI <path> \fR @@ -166,32 +239,37 @@ Force a sync for the filesystem identified by \fI<path>\fR. .\" Some wording are extracted by the resize2fs man page .\" -\fBfilesystem resize\fR\fI [+/\-]<size>[gkm]|max <path>\fR -Resize a filesystem identified by \fI<path>\fR. +\fBfilesystem resize\fR\fI [devid:][+/\-]<size>[gkm]|[devid:]max <path>\fR +Resize a filesystem identified by \fI<path>\fR for the underlying device +\fIdevid\fR. The \fIdevid\fR can be found with \fBbtrfs filesystem show\fR and +defaults to 1 if not specified. The \fI<size>\fR parameter specifies the new size of the filesystem. If the prefix \fI+\fR or \fI\-\fR is present the size is increased or decreased by the quantity \fI<size>\fR. If no units are specified, the unit of the \fI<size>\fR parameter defaults to bytes. Optionally, the size parameter may be suffixed by one of the following -the units designators: 'K', 'M', or 'G', kilobytes, megabytes, or gigabytes, +units designators: 'K', 'M', or 'G', kilobytes, megabytes, or gigabytes, respectively. If 'max' is passed, the filesystem will occupy all available space on the -volume(s). +device \fIdevid\fR. The \fBresize\fR command \fBdoes not\fR manipulate the size of underlying partition. If you wish to enlarge/reduce a filesystem, you must make sure you can expand the partition before enlarging the filesystem and shrink the -partition after reducing the size of the filesystem. +partition after reducing the size of the filesystem. This can done using +\fBfdisk(8)\fR or \fBparted(8)\fR to delete the existing partition and recreate +it with the new desired size. When recreating the partition make sure to use +the same starting disk cylinder as before. .TP -\fBbtrfs\fP \fBfilesystem label\fP\fI <dev> [newlabel]\fP +\fBfilesystem label\fP\fI <dev> [newlabel]\fP Show or update the label of a filesystem. \fI<dev>\fR is used to identify the filesystem. If a \fInewlabel\fR optional argument is passed, the label is changed. The -following costraints exist for a label: +following constraints exist for a label: .IP -- the maximum allowable lenght shall be less or equal than 256 chars +- the maximum allowable length shall be less or equal than 256 chars .IP - the label shall not contain the '/' or '\\' characters. @@ -209,11 +287,23 @@ If \fB--all-devices\fP is passed, all the devices under /dev are scanned; otherwise the devices list is extracted from the /proc/partitions file. .TP -\fBdevice balance\fR \fI<path>\fR +\fBfilesystem balance\fR \fI<path>\fR Balance the chunks of the filesystem identified by \fI<path>\fR across the devices. .TP +\fBdevice stats\fP [-z] {\fI<path>\fP|\fI<device>\fP} +Read and print the device IO stats for all devices of the filesystem +identified by \fI<path>\fR or for a single \fI<device>\fR. + +.RS +\fIOptions\fR +.TP +.B -z +Reset stats to zero after reading them. +.RE +.TP + \fBdevice add\fR\fI <dev> [<dev>..] <path>\fR Add device(s) to the filesystem identified by \fI<path>\fR. .TP @@ -230,12 +320,65 @@ Finally, if \fB--all-devices\fP is passed, all the devices under /dev are scanned. .TP +\fBreplace start\fR \fI[-Bfr] <srcdev>|<devid> <targetdev> <path>\fR +Replace device of a btrfs filesystem. +On a live filesystem, duplicate the data to the target device which +is currently stored on the source device. If the source device is not +available anymore, or if the \fB-r\fR option is set, the data is built +only using the RAID redundancy mechanisms. After completion of the +operation, the source device is removed from the filesystem. +If the \fIsrcdev\fR is a numerical value, it is assumed to be the device id +of the filesystem which is mounted at mount_point, otherwise is is +the path to the source device. If the source device is disconnected, +from the system, you have to use the \fIdevid\fR parameter format. +The targetdev needs to be same size or larger than the \fIsrcdev\fR. + +.RS +\fIOptions\fR +.TP +.B -r +only read from \fIsrcdev\fR if no other zero-defect mirror exists (enable +this if your drive has lots of read errors, the access would be very slow) +.TP +.B -f +force using and overwriting \fItargetdev\fR even if it looks like +containing a valid btrfs filesystem. A valid filesystem is +assumed if a btrfs superblock is found which contains a +correct checksum. Devices which are currently mounted are +never allowed to be used as the \fItargetdev\fR +.TP +.B -B +do not background +.RE +.TP + +\fBreplace status\fR \fI[-1] <path>\fR +Print status and progress information of a running device replace operation. + +.RS +\fIOptions\fR +.TP +.B -1 +print once instead of print continously until the replace +operation finishes (or is canceled) +.RE +.TP + +\fBreplace cancel\fR \fI<path>\fR +Cancel a running device replace operation. +.TP + \fBscrub start\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP} +\fBscrub start\fP [-Bdqru] [-c ioprio_class -n ioprio_classdata] {\fI<path>\fP|\fI<device>\fP} Start a scrub on all devices of the filesystem identified by \fI<path>\fR or on a single \fI<device>\fR. Without options, scrub is started as a background process. Progress can be obtained with the \fBscrub status\fR command. Scrubbing involves reading all data from all disks and verifying checksums. Errors are corrected along the way if possible. +.IP +The default IO priority of scrub is the idle class. The priority can be configured similar to the +.BR ionice (1) +syntax. .RS \fIOptions\fR @@ -249,6 +392,14 @@ Quiet. Omit error messages and statistics. Read only mode. Do not attempt to correct anything. .IP -u 5 Scrub unused space as well. (NOT IMPLEMENTED) +.IP -c 5 +Set IO priority class (see +.BR ionice (1) +manpage). +.IP -n 5 +Set IO priority classdata (see +.BR ionice (1) +manpage). .RE .TP @@ -260,7 +411,7 @@ If a \fI<device>\fR is given, the corresponding filesystem is found and \fBscrub cancel\fP behaves as if it was called on that filesystem. .TP -\fBscrub resume\fP [-Bdqru] {\fI<path>\fP|\fI<device>\fP} +\fBscrub resume\fP [-Bdqru] [-c ioprio_class -n ioprio_classdata] {\fI<path>\fP|\fI<device>\fP} Resume a canceled or interrupted scrub cycle on the filesystem identified by \fI<path>\fR or on a given \fI<device>\fR. Does not start a new scrub if the last scrub finished successfully. @@ -295,7 +446,7 @@ verbose mode. print count of returned paths and ioctl() return value .RE .TP -\fBinspect-internal logical-resolve\fP [-Pv] \fI<logical>\fP \fI<path>\fP +\fBinspect-internal logical-resolve\fP [-Pv] [-s bufsize] \fI<logical>\fP \fI<path>\fP Resolves a <logical> address in the filesystem mounted at <path> to all inodes. By default, each inode is then resolved to a file system path (similar to the \fBinode-resolve\fP subcommand). @@ -306,6 +457,38 @@ By default, each inode is then resolved to a file system path (similar to the skip the path resolving and print the inodes instead .IP -v 5 verbose mode. print count of returned paths and all ioctl() return values +.IP -s bufsize 5 +set inode container's size. This is used to increase inode container's size in case it is +not enough to read all the resolved results. The max value one can set is 64k. +.RE +.TP + +\fBinspect-internal subvolid-resolve\fP \fI<subvolid>\fP \fI<path>\fP +Get file system paths for the given subvolume ID. +.TP + +\fBbtrfs qgroup assign\fP \fI<src>\fP \fI<dst>\fP \fI<path>\fP +Enable subvolume qgroup support for a filesystem. +.TP + +\fBbtrfs qgroup remove\fP \fI<src>\fP \fI<dst>\fP \fI<path>\fP +Remove a subvol from a quota group. +.TP + +\fBbtrfs qgroup create\fP \fI<qgroupid>\fP \fI<path>\fP +Create a subvolume quota group. +.TP + +\fBbtrfs qgroup destroy\fP \fI<qgroupid>\fP \fI<path>\fP +Destroy a subvolume quota group. +.TP + +\fBbtrfs qgroup show\fP \fI<path>\fP +Show all subvolume quota groups. +.TP + +\fBbtrfs\fP \fBqgroup limit\fP [options] \fI<size>\fP|\fBnone\fP [\fI<qgroupid>\fP] \fI<path>\fP +Limit the size of a subvolume quota group. .RE .SH EXIT STATUS @@ -319,4 +502,5 @@ and not suitable for any uses other than benchmarking and review. Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for further details. .SH SEE ALSO -.BR mkfs.btrfs (8) +.BR mkfs.btrfs (8), +.BR ionice (1) diff --git a/man/btrfsck.8.in b/man/btrfsck.8.in index 4bf1cff..5004ba0 100644 --- a/man/btrfsck.8.in +++ b/man/btrfsck.8.in @@ -14,4 +14,3 @@ Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for further details. .SH SEE ALSO .BR mkfs.btrfs (8) -.BR btrfsctl (8) diff --git a/man/btrfsctl.8.in b/man/btrfsctl.8.in deleted file mode 100644 index 8705fa6..0000000 --- a/man/btrfsctl.8.in +++ /dev/null @@ -1,48 +0,0 @@ -.TH BTRFSCTL 8 -.SH NAME -btrfsctl \- control a btrfs filesystem -.SH SYNOPSIS -.B btrfsctl -[ \fB\-d\fP\fI file|directory \fP ] -[ \fB\-s\fP\fI snapshot-name directory\fP ] -[ \fB \-S\fP\fI subvolume-name directory\fP ] -[ \fB \-r\fP\fI [+-]size\fP ] -[ \fB \-A\fP\fI device\fP ] -[ \fB \-a\fP ] -[ \fB \-c\fP ] -.SH NOTE -B btrfsctl -is deprecated. Please consider to switch to the btrfs utility. -.SH DESCRIPTION -.B btrfsctl -is used to control the filesystem and the files and directories stored. It is the tool to create a new snapshot for the filesystem. -.SH OPTIONS -.TP -\fB\-d\fR \fIfile|directory\fR -Defragment a file or a directory. If the argument is a directory, the entire b-tree under the directory is defragged. -.TP -\fB\-s\fR \fIsnapshot-name directory\fR -Creates a new \fIsnapshot\fP of the \fIdirectory\fP specified. -.TP -\fB\-S\fR \fIsubvolume-name directory\fR -Creates a new subvolume. -.TP -\fB\-r\fR \fI[+|-]size\fR -Resizes the filesystem with the \fIsize\fP specified. If the value is preceded with a signed symbol, the filesystem is resized with respect to the current filesystem size. \fIsize\fP can be suffixed by k,m or g to represent kilobytes, megabytes, or gigabytes respectively. -.TP -\fB\-A\fR \fIdevice\fR -Scans the \fIdevice\fR for btrfs filesystem. -.TP -\fB\-a\fR -Scans all devices present in the system for btrfs filesystem. -.TP -\fB\-c\fR -Forces a filesystem sync. -.SH AVAILABILITY -.B btrfsctl -is part of btrfs-progs. Btrfs is currently under heavy development, -and not suitable for any uses other than benchmarking and review. -Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for -further details. -.SH SEE ALSO -.BR mkfs.btrfs (8) diff --git a/man/mkfs.btrfs.8.in b/man/mkfs.btrfs.8.in index 432db1b..a3f1503 100644 --- a/man/mkfs.btrfs.8.in +++ b/man/mkfs.btrfs.8.in @@ -1,22 +1,26 @@ .TH MKFS.BTRFS 8 .SH NAME -mkfs.btrfs \- create an btrfs filesystem +mkfs.btrfs \- create a btrfs filesystem .SH SYNOPSIS .B mkfs.btrfs [ \fB\-A\fP\fI alloc-start\fP ] [ \fB\-b\fP\fI byte-count\fP ] -[ \fB \-d\fP\fI data-profile\fP ] -[ \fB \-l\fP\fI leafsize\fP ] -[ \fB \-L\fP\fI label\fP ] -[ \fB \-m\fP\fI metadata profile\fP ] -[ \fB \-M\fP\fI mixed data+metadata\fP ] -[ \fB \-n\fP\fI nodesize\fP ] -[ \fB \-s\fP\fI sectorsize\fP ] -[ \fB \-h\fP ] -[ \fB \-V\fP ] \fI device\fP [ \fI device ...\fP ] +[ \fB\-d\fP\fI data-profile\fP ] +[ \fB\-f\fP ] +[ \fB\-l\fP\fI leafsize\fP ] +[ \fB\-L\fP\fI label\fP ] +[ \fB\-m\fP\fI metadata profile\fP ] +[ \fB\-M\fP\fI mixed data+metadata\fP ] +[ \fB\-n\fP\fI nodesize\fP ] +[ \fB\-s\fP\fI sectorsize\fP ] +[ \fB\-r\fP\fI rootdir\fP ] +[ \fB\-K\fP ] +[ \fB\-h\fP ] +[ \fB\-V\fP ] +\fI device\fP [ \fIdevice ...\fP ] .SH DESCRIPTION .B mkfs.btrfs -is used to create an btrfs filesystem (usually in a disk partition, or an array +is used to create a btrfs filesystem (usually in a disk partition, or an array of disk partitions). .I device is the special file corresponding to the device (e.g \fI/dev/sdXX\fP ). @@ -33,7 +37,12 @@ mkfs.btrfs uses all the available storage for the filesystem. .TP \fB\-d\fR, \fB\-\-data \fItype\fR Specify how the data must be spanned across the devices specified. Valid -values are raid0, raid1, raid10 or single. +values are raid0, raid1, raid5, raid6, raid10 or single. +.TP +\fB\-f\fR, \fB\-\-force\fR +Force overwrite when an existing filesystem is detected on the device. +By default, mkfs.btrfs will not write to the device if it suspects that +there is a filesystem or partition table on the device already. .TP \fB\-l\fR, \fB\-\-leafsize \fIsize\fR Specify the leaf size, the least data item in which btrfs stores data. The @@ -44,7 +53,11 @@ Specify a label for the filesystem. .TP \fB\-m\fR, \fB\-\-metadata \fIprofile\fR Specify how metadata must be spanned across the devices specified. Valid -values are raid0, raid1, raid10 or single. +values are raid0, raid1, raid5, raid6, raid10, single or dup. Single device +will have dup set by default except in the case of SSDs which will default to +single. This is because SSDs can remap blocks internally so duplicate blocks +could end up in the same erase block which negates the benefits of doing +metadata duplication. .TP \fB\-M\fR, \fB\-\-mixed\fR Mix data and metadata chunks together for more efficient space @@ -58,8 +71,17 @@ Specify the nodesize. By default the value is set to the pagesize. \fB\-s\fR, \fB\-\-sectorsize \fIsize\fR Specify the sectorsize, the minimum block allocation. .TP +\fB\-r\fR, \fB\-\-rootdir \fIrootdir\fR +Specify a directory to copy into the newly created fs. +.TP +\fB\-K\fR, \fB\-\-nodiscard \fR +Do not perform whole device TRIM operation by default. +.TP \fB\-V\fR, \fB\-\-version\fR Print the \fBmkfs.btrfs\fP version and exit. +.SH UNIT +As default the unit is the byte, however it is possible to append a suffix +to the arguments like \fBk\fP for KBytes, \fBm\fP for MBytes... .SH AVAILABILITY .B mkfs.btrfs is part of btrfs-progs. Btrfs is currently under heavy development, @@ -19,6 +19,8 @@ #define _XOPEN_SOURCE 500 #define _GNU_SOURCE +#include "kerncompat.h" + #ifndef __CHECKER__ #include <sys/ioctl.h> #include <sys/mount.h> @@ -37,7 +39,8 @@ #include <linux/fs.h> #include <ctype.h> #include <attr/xattr.h> -#include "kerncompat.h" +#include <blkid/blkid.h> +#include <ftw.h> #include "ctree.h" #include "disk-io.h" #include "volumes.h" @@ -54,32 +57,6 @@ struct directory_name_entry { struct list_head list; }; -static u64 parse_size(char *s) -{ - int len = strlen(s); - char c; - u64 mult = 1; - - if (!isdigit(s[len - 1])) { - c = tolower(s[len - 1]); - switch (c) { - case 'g': - mult *= 1024; - case 'm': - mult *= 1024; - case 'k': - mult *= 1024; - case 'b': - break; - default: - fprintf(stderr, "Unknown size descriptor %c\n", c); - exit(1); - } - s[len - 1] = '\0'; - } - return atol(s) * mult; -} - static int make_root_dir(struct btrfs_root *root, int mixed) { struct btrfs_trans_handle *trans; @@ -90,7 +67,7 @@ static int make_root_dir(struct btrfs_root *root, int mixed) int ret; trans = btrfs_start_transaction(root, 1); - bytes_used = btrfs_super_bytes_used(&root->fs_info->super_copy); + bytes_used = btrfs_super_bytes_used(root->fs_info->super_copy); root->fs_info->system_allocs = 1; ret = btrfs_make_block_group(trans, root, bytes_used, @@ -152,7 +129,7 @@ static int make_root_dir(struct btrfs_root *root, int mixed) location.offset = (u64)-1; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, "default", 7, - btrfs_super_root_dir(&root->fs_info->super_copy), + btrfs_super_root_dir(root->fs_info->super_copy), &location, BTRFS_FT_DIR, 0); if (ret) goto err; @@ -228,19 +205,66 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans, static int create_raid_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 data_profile, - u64 metadata_profile, int mixed) + int data_profile_opt, u64 metadata_profile, + int metadata_profile_opt, int mixed, int ssd) { - u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); - u64 allowed; + u64 num_devices = btrfs_super_num_devices(root->fs_info->super_copy); + u64 allowed = 0; + u64 devices_for_raid = num_devices; int ret; - if (num_devices == 1) - allowed = BTRFS_BLOCK_GROUP_DUP; - else if (num_devices >= 4) { - allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10; - } else - allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1; + /* + * Set default profiles according to number of added devices. + * For mixed groups defaults are single/single. + */ + if (!metadata_profile_opt && !mixed) { + if (num_devices == 1 && ssd) + printf("Detected a SSD, turning off metadata " + "duplication. Mkfs with -m dup if you want to " + "force metadata duplication.\n"); + metadata_profile = (num_devices > 1) ? + BTRFS_BLOCK_GROUP_RAID1 : (ssd) ? 0: BTRFS_BLOCK_GROUP_DUP; + } + if (!data_profile_opt && !mixed) { + data_profile = (num_devices > 1) ? + BTRFS_BLOCK_GROUP_RAID0 : 0; /* raid0 or single */ + } + + if (devices_for_raid > 4) + devices_for_raid = 4; + + switch (devices_for_raid) { + default: + case 4: + allowed |= BTRFS_BLOCK_GROUP_RAID10; + case 3: + allowed |= BTRFS_BLOCK_GROUP_RAID6; + case 2: + allowed |= BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5; + break; + case 1: + allowed |= BTRFS_BLOCK_GROUP_DUP; + } + + if (metadata_profile & ~allowed) { + fprintf(stderr, "unable to create FS with metadata " + "profile %llu (have %llu devices)\n", metadata_profile, + num_devices); + exit(1); + } + if (data_profile & ~allowed) { + fprintf(stderr, "unable to create FS with data " + "profile %llu (have %llu devices)\n", data_profile, + num_devices); + exit(1); + } + + /* allow dup'ed data chunks only in mixed mode */ + if (!mixed && (data_profile & BTRFS_BLOCK_GROUP_DUP)) { + fprintf(stderr, "dup for data is allowed only in mixed mode\n"); + exit(1); + } if (allowed & metadata_profile) { u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA; @@ -302,7 +326,8 @@ static void print_usage(void) fprintf(stderr, "options:\n"); fprintf(stderr, "\t -A --alloc-start the offset to start the FS\n"); fprintf(stderr, "\t -b --byte-count total number of bytes in the FS\n"); - fprintf(stderr, "\t -d --data data profile, raid0, raid1, raid10 or single\n"); + fprintf(stderr, "\t -d --data data profile, raid0, raid1, raid5, raid6, raid10, dup or single\n"); + fprintf(stderr, "\t -f --force force overwrite of existing filesystem\n"); fprintf(stderr, "\t -l --leafsize size of btree leaves\n"); fprintf(stderr, "\t -L --label set a label\n"); fprintf(stderr, "\t -m --metadata metadata profile, values like data profile\n"); @@ -310,6 +335,8 @@ static void print_usage(void) fprintf(stderr, "\t -n --nodesize size of btree nodes\n"); fprintf(stderr, "\t -s --sectorsize min block allocation\n"); fprintf(stderr, "\t -r --rootdir the source directory\n"); + fprintf(stderr, "\t -K --nodiscard do not perform whole device TRIM\n"); + fprintf(stderr, "\t -V --version print the mkfs.btrfs version and exit\n"); fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION); exit(1); } @@ -325,21 +352,27 @@ static u64 parse_profile(char *s) if (strcmp(s, "raid0") == 0) { return BTRFS_BLOCK_GROUP_RAID0; } else if (strcmp(s, "raid1") == 0) { - return BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP; + return BTRFS_BLOCK_GROUP_RAID1; + } else if (strcmp(s, "raid5") == 0) { + return BTRFS_BLOCK_GROUP_RAID5; + } else if (strcmp(s, "raid6") == 0) { + return BTRFS_BLOCK_GROUP_RAID6; } else if (strcmp(s, "raid10") == 0) { - return BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP; + return BTRFS_BLOCK_GROUP_RAID10; + } else if (strcmp(s, "dup") == 0) { + return BTRFS_BLOCK_GROUP_DUP; } else if (strcmp(s, "single") == 0) { return 0; } else { - fprintf(stderr, "Unknown option %s\n", s); + fprintf(stderr, "Unknown profile %s\n", s); print_usage(); } + /* not reached */ return 0; } static char *parse_label(char *input) { - int i; int len = strlen(input); if (len >= BTRFS_LABEL_SIZE) { @@ -347,18 +380,13 @@ static char *parse_label(char *input) BTRFS_LABEL_SIZE - 1); exit(1); } - for (i = 0; i < len; i++) { - if (input[i] == '/' || input[i] == '\\') { - fprintf(stderr, "invalid label %s\n", input); - exit(1); - } - } return strdup(input); } static struct option long_options[] = { { "alloc-start", 1, NULL, 'A'}, { "byte-count", 1, NULL, 'b' }, + { "force", 0, NULL, 'f' }, { "leafsize", 1, NULL, 'l' }, { "label", 1, NULL, 'L'}, { "metadata", 1, NULL, 'm' }, @@ -368,6 +396,7 @@ static struct option long_options[] = { { "data", 1, NULL, 'd' }, { "version", 0, NULL, 'V' }, { "rootdir", 1, NULL, 'r' }, + { "nodiscard", 0, NULL, 'K' }, { 0, 0, 0, 0} }; @@ -768,7 +797,7 @@ static int add_file_items(struct btrfs_trans_handle *trans, fd = open(path_name, O_RDONLY); if (fd == -1) { fprintf(stderr, "%s open failed\n", path_name); - goto end; + return ret; } blocks = st->st_size / sectorsize; @@ -878,8 +907,7 @@ static int traverse_directory(struct btrfs_trans_handle *trans, /* Add list for source directory */ dir_entry = malloc(sizeof(struct directory_name_entry)); dir_entry->dir_name = dir_name; - dir_entry->path = malloc(strlen(dir_name) + 1); - strcpy(dir_entry->path, dir_name); + dir_entry->path = strdup(dir_name); parent_inum = highest_inum + BTRFS_FIRST_FREE_OBJECTID; dir_entry->inum = parent_inum; @@ -1086,16 +1114,30 @@ fail: return -1; } +/* + * This ignores symlinks with unreadable targets and subdirs that can't + * be read. It's a best-effort to give a rough estimate of the size of + * a subdir. It doesn't guarantee that prepopulating btrfs from this + * tree won't still run out of space. + * + * The rounding up to 4096 is questionable. Previous code used du -B 4096. + */ +static u64 global_total_size; +static int ftw_add_entry_size(const char *fpath, const struct stat *st, + int type) +{ + if (type == FTW_F || type == FTW_D) + global_total_size += round_up(st->st_size, 4096); + + return 0; +} + static u64 size_sourcedir(char *dir_name, u64 sectorsize, u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret) { u64 dir_size = 0; u64 total_size = 0; int ret; - char command[1024]; - char path[512]; - char *file_name = "temp_file"; - FILE *file; u64 default_chunk_size = 8 * 1024 * 1024; /* 8MB */ u64 allocated_meta_size = 8 * 1024 * 1024; /* 8MB */ u64 allocated_total_size = 20 * 1024 * 1024; /* 20MB */ @@ -1103,23 +1145,14 @@ static u64 size_sourcedir(char *dir_name, u64 sectorsize, u64 num_of_allocated_meta_chunks = allocated_meta_size / default_chunk_size; - ret = sprintf(command, "du -B 4096 -s "); + global_total_size = 0; + ret = ftw(dir_name, ftw_add_entry_size, 10); + dir_size = global_total_size; if (ret < 0) { - fprintf(stderr, "error executing sprintf for du command\n"); - return -1; + fprintf(stderr, "ftw subdir walk of '%s' failed: %s\n", + dir_name, strerror(errno)); + exit(1); } - strcat(command, dir_name); - strcat(command, " > "); - strcat(command, file_name); - ret = system(command); - - file = fopen(file_name, "r"); - ret = fscanf(file, "%lld %s\n", &dir_size, path); - fclose(file); - remove(file_name); - - dir_size *= sectorsize; - *size_of_data_ret = dir_size; num_of_meta_chunks = (dir_size / 2) / default_chunk_size; if (((dir_size / 2) % default_chunk_size) != 0) @@ -1159,6 +1192,67 @@ static int zero_output_file(int out_fd, u64 size, u32 sectorsize) return ret; } +static int check_leaf_or_node_size(u32 size, u32 sectorsize) +{ + if (size < sectorsize) { + fprintf(stderr, + "Illegal leafsize (or nodesize) %u (smaller than %u)\n", + size, sectorsize); + return -1; + } else if (size > BTRFS_MAX_METADATA_BLOCKSIZE) { + fprintf(stderr, + "Illegal leafsize (or nodesize) %u (larger than %u)\n", + size, BTRFS_MAX_METADATA_BLOCKSIZE); + return -1; + } else if (size & (sectorsize - 1)) { + fprintf(stderr, + "Illegal leafsize (or nodesize) %u (not align to %u)\n", + size, sectorsize); + return -1; + } + return 0; +} + +static int is_ssd(const char *file) +{ + blkid_probe probe; + char wholedisk[32]; + char sysfs_path[PATH_MAX]; + dev_t devno; + int fd; + char rotational; + + probe = blkid_new_probe_from_filename(file); + if (!probe) + return 0; + + /* Device number of this disk (possibly a partition) */ + devno = blkid_probe_get_devno(probe); + if (!devno) + return 0; + + /* Get whole disk name (not full path) for this devno */ + blkid_devno_to_wholedisk(devno, wholedisk, sizeof(wholedisk), NULL); + + snprintf(sysfs_path, PATH_MAX, "/sys/block/%s/queue/rotational", + wholedisk); + + blkid_free_probe(probe); + + fd = open(sysfs_path, O_RDONLY); + if (fd < 0) { + return 0; + } + + if (read(fd, &rotational, sizeof(char)) < sizeof(char)) { + close(fd); + return 0; + } + close(fd); + + return !atoi((const char *)&rotational); +} + int main(int ac, char **av) { char *file; @@ -1170,9 +1264,9 @@ int main(int ac, char **av) u64 dev_block_count = 0; u64 blocks[7]; u64 alloc_start = 0; - u64 metadata_profile = BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP; - u64 data_profile = BTRFS_BLOCK_GROUP_RAID0; - u32 leafsize = getpagesize(); + u64 metadata_profile = 0; + u64 data_profile = 0; + u32 leafsize = sysconf(_SC_PAGESIZE); u32 sectorsize = 4096; u32 nodesize = leafsize; u32 stripesize = 4096; @@ -1184,6 +1278,9 @@ int main(int ac, char **av) int mixed = 0; int data_profile_opt = 0; int metadata_profile_opt = 0; + int nodiscard = 0; + int ssd = 0; + int force_overwrite = 0; char *source_dir = NULL; int source_dir_set = 0; @@ -1191,22 +1288,32 @@ int main(int ac, char **av) u64 size_of_data = 0; u64 source_dir_size = 0; char *pretty_buf; + struct btrfs_super_block *super; + u64 flags; + int dev_cnt = 0; + int saved_optind; + char estr[100]; while(1) { int c; - c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:r:VM", long_options, - &option_index); + c = getopt_long(ac, av, "A:b:fl:n:s:m:d:L:r:VMK", + long_options, &option_index); if (c < 0) break; switch(c) { case 'A': alloc_start = parse_size(optarg); break; + case 'f': + force_overwrite = 1; + break; case 'd': data_profile = parse_profile(optarg); data_profile_opt = 1; break; case 'l': + case 'n': + nodesize = parse_size(optarg); leafsize = parse_size(optarg); break; case 'L': @@ -1219,9 +1326,6 @@ int main(int ac, char **av) case 'M': mixed = 1; break; - case 'n': - nodesize = parse_size(optarg); - break; case 's': sectorsize = parse_size(optarg); break; @@ -1241,50 +1345,67 @@ int main(int ac, char **av) source_dir = optarg; source_dir_set = 1; break; + case 'K': + nodiscard=1; + break; default: print_usage(); } } - sectorsize = max(sectorsize, (u32)getpagesize()); - if (leafsize < sectorsize || (leafsize & (sectorsize - 1))) { - fprintf(stderr, "Illegal leafsize %u\n", leafsize); + sectorsize = max(sectorsize, (u32)sysconf(_SC_PAGESIZE)); + if (check_leaf_or_node_size(leafsize, sectorsize)) exit(1); - } - if (nodesize < sectorsize || (nodesize & (sectorsize - 1))) { - fprintf(stderr, "Illegal nodesize %u\n", nodesize); + if (check_leaf_or_node_size(nodesize, sectorsize)) exit(1); - } - ac = ac - optind; - if (ac == 0) + saved_optind = optind; + dev_cnt = ac - optind; + if (dev_cnt == 0) print_usage(); + if (source_dir_set && dev_cnt > 1) { + fprintf(stderr, + "The -r option is limited to a single device\n"); + exit(1); + } + while (dev_cnt-- > 0) { + file = av[optind++]; + if (is_block_device(file)) + if (test_dev_for_mkfs(file, force_overwrite, estr)) { + fprintf(stderr, "Error: %s", estr); + exit(1); + } + } + + /* if we are here that means all devs are good to btrfsify */ + optind = saved_optind; + dev_cnt = ac - optind; + printf("\nWARNING! - %s IS EXPERIMENTAL\n", BTRFS_BUILD_VERSION); printf("WARNING! - see http://btrfs.wiki.kernel.org before using\n\n"); - if (source_dir == 0) { - file = av[optind++]; - ret = check_mounted(file); - if (ret < 0) { - fprintf(stderr, "error checking %s mount status\n", file); - exit(1); - } - if (ret == 1) { - fprintf(stderr, "%s is mounted\n", file); - exit(1); - } - ac--; + file = av[optind++]; + dev_cnt--; + + if (!source_dir_set) { + /* + * open without O_EXCL so that the problem should not + * occur by the following processing. + * (btrfs_register_one_device() fails if O_EXCL is on) + */ fd = open(file, O_RDWR); if (fd < 0) { - fprintf(stderr, "unable to open %s\n", file); + fprintf(stderr, "unable to open %s: %s\n", file, + strerror(errno)); exit(1); } first_file = file; - ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count, &mixed); - if (block_count == 0) - block_count = dev_block_count; + ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count, + block_count, &mixed, nodiscard); + if (block_count && block_count > dev_block_count) { + fprintf(stderr, "%s is smaller than requested size\n", file); + exit(1); + } } else { - ac = 0; - file = av[optind++]; fd = open_target(file); if (fd < 0) { fprintf(stderr, "unable to open the %s\n", file); @@ -1301,13 +1422,13 @@ int main(int ac, char **av) fprintf(stderr, "unable to zero the output file\n"); exit(1); } + /* our "device" is the new image file */ + dev_block_count = block_count; } - if (mixed) { - if (!metadata_profile_opt) - metadata_profile = 0; - if (!data_profile_opt) - data_profile = 0; + ssd = is_ssd(file); + + if (mixed) { if (metadata_profile != data_profile) { fprintf(stderr, "With mixed block groups data and metadata " "profiles must be the same\n"); @@ -1321,14 +1442,20 @@ int main(int ac, char **av) leafsize * i; } - ret = make_btrfs(fd, file, label, blocks, block_count, + ret = make_btrfs(fd, file, label, blocks, dev_block_count, nodesize, leafsize, sectorsize, stripesize); if (ret) { fprintf(stderr, "error during mkfs %d\n", ret); exit(1); } + root = open_ctree(file, 0, O_RDWR); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + close(fd); + exit(1); + } root->fs_info->alloc_start = alloc_start; ret = make_root_dir(root, mixed); @@ -1339,33 +1466,26 @@ int main(int ac, char **av) trans = btrfs_start_transaction(root, 1); - if (ac == 0) + if (dev_cnt == 0) goto raid_groups; btrfs_register_one_device(file); - if (!root) { - fprintf(stderr, "ctree init failed\n"); - return -1; - } zero_end = 1; - while(ac-- > 0) { + while (dev_cnt-- > 0) { int old_mixed = mixed; file = av[optind++]; - ret = check_mounted(file); - if (ret < 0) { - fprintf(stderr, "error checking %s mount status\n", - file); - exit(1); - } - if (ret == 1) { - fprintf(stderr, "%s is mounted\n", file); - exit(1); - } + + /* + * open without O_EXCL so that the problem should not + * occur by the following processing. + * (btrfs_register_one_device() fails if O_EXCL is on) + */ fd = open(file, O_RDWR); if (fd < 0) { - fprintf(stderr, "unable to open %s\n", file); + fprintf(stderr, "unable to open %s: %s\n", file, + strerror(errno)); exit(1); } ret = btrfs_device_already_in_root(root, fd, @@ -1376,8 +1496,8 @@ int main(int ac, char **av) close(fd); continue; } - ret = btrfs_prepare_device(fd, file, zero_end, - &dev_block_count, &mixed); + ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count, + block_count, &mixed, nodiscard); mixed = old_mixed; BUG_ON(ret); @@ -1390,25 +1510,36 @@ int main(int ac, char **av) raid_groups: if (!source_dir_set) { ret = create_raid_groups(trans, root, data_profile, - metadata_profile, mixed); + data_profile_opt, metadata_profile, + metadata_profile_opt, mixed, ssd); BUG_ON(ret); } ret = create_data_reloc_tree(trans, root); BUG_ON(ret); - if (mixed) { - struct btrfs_super_block *super = &root->fs_info->super_copy; - u64 flags = btrfs_super_incompat_flags(super); + super = root->fs_info->super_copy; + flags = btrfs_super_incompat_flags(super); + if (mixed) flags |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS; + + btrfs_set_super_incompat_flags(super, flags); + + if ((data_profile | metadata_profile) & + (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { + struct btrfs_super_block *super = root->fs_info->super_copy; + u64 flags = btrfs_super_incompat_flags(super); + + flags |= BTRFS_FEATURE_INCOMPAT_RAID56; btrfs_set_super_incompat_flags(super, flags); + printf("Setting RAID5/6 feature flag\n"); } printf("fs created label %s on %s\n\tnodesize %u leafsize %u " "sectorsize %u size %s\n", label, first_file, nodesize, leafsize, sectorsize, - pretty_buf = pretty_sizes(btrfs_super_total_bytes(&root->fs_info->super_copy))); + pretty_buf = pretty_sizes(btrfs_super_total_bytes(root->fs_info->super_copy))); free(pretty_buf); printf("%s\n", BTRFS_BUILD_VERSION); @@ -1427,8 +1558,6 @@ raid_groups: ret = close_ctree(root); BUG_ON(ret); - free(label); return 0; } - diff --git a/print-tree.c b/print-tree.c index 6039699..aae47a9 100644 --- a/print-tree.c +++ b/print-tree.c @@ -48,6 +48,12 @@ static int print_dir_item(struct extent_buffer *eb, struct btrfs_item *item, read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len); printf("\t\tnamelen %u datalen %u name: %.*s\n", name_len, data_len, len, namebuf); + if (data_len) { + len = (data_len <= sizeof(namebuf))? data_len: sizeof(namebuf); + read_extent_buffer(eb, namebuf, + (unsigned long)(di + 1) + name_len, len); + printf("\t\tdata %.*s\n", len, namebuf); + } len = sizeof(*di) + name_len + data_len; di = (struct btrfs_dir_item *)((char *)di + len); cur += len; @@ -55,6 +61,42 @@ static int print_dir_item(struct extent_buffer *eb, struct btrfs_item *item, return 0; } +static int print_inode_extref_item(struct extent_buffer *eb, + struct btrfs_item *item, + struct btrfs_inode_extref *extref) +{ + u32 total; + u32 cur = 0; + u32 len; + u32 name_len = 0; + u64 index = 0; + u64 parent_objid; + char namebuf[BTRFS_NAME_LEN]; + + total = btrfs_item_size(eb, item); + + while (cur < total) { + index = btrfs_inode_extref_index(eb, extref); + name_len = btrfs_inode_extref_name_len(eb, extref); + parent_objid = btrfs_inode_extref_parent(eb, extref); + + len = (name_len <= sizeof(namebuf))? name_len: sizeof(namebuf); + + read_extent_buffer(eb, namebuf, (unsigned long)(extref->name), len); + + printf("\t\tinode extref index %llu parent %llu namelen %u " + "name: %.*s\n", + (unsigned long long)index, + (unsigned long long)parent_objid, + name_len, len, namebuf); + + len = sizeof(*extref) + name_len; + extref = (struct btrfs_inode_extref *)((char *)extref + len); + cur += len; + } + return 0; +} + static int print_inode_ref_item(struct extent_buffer *eb, struct btrfs_item *item, struct btrfs_inode_ref *ref) { @@ -94,6 +136,7 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); } } + static void print_dev_item(struct extent_buffer *eb, struct btrfs_dev_item *dev_item) { @@ -159,7 +202,7 @@ static void print_file_extent_item(struct extent_buffer *eb, btrfs_file_extent_compression(eb, fi)); } -static void print_extent_item(struct extent_buffer *eb, int slot) +static void print_extent_item(struct extent_buffer *eb, int slot, int metadata) { struct btrfs_extent_item *ei; struct btrfs_extent_inline_ref *iref; @@ -194,17 +237,21 @@ static void print_extent_item(struct extent_buffer *eb, int slot) (unsigned long long)btrfs_extent_generation(eb, ei), (unsigned long long)flags); - if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !metadata) { struct btrfs_tree_block_info *info; info = (struct btrfs_tree_block_info *)(ei + 1); btrfs_tree_block_key(eb, info, &key); - printf("\t\ttree block key (%llu %x %llu) level %d\n", - (unsigned long long)btrfs_disk_key_objectid(&key), - key.type, - (unsigned long long)btrfs_disk_key_offset(&key), - btrfs_tree_block_level(eb, info)); + printf("\t\ttree block "); + btrfs_print_key(&key); + printf(" level %d\n", btrfs_tree_block_level(eb, info)); iref = (struct btrfs_extent_inline_ref *)(info + 1); - } else { + } else if (metadata) { + struct btrfs_key tmp; + + btrfs_item_key_to_cpu(eb, &tmp, slot); + printf("\t\ttree block skinny level %d\n", (int)tmp.offset); + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else{ iref = (struct btrfs_extent_inline_ref *)(ei + 1); } @@ -239,7 +286,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot) btrfs_shared_data_ref_count(eb, sref)); break; default: - BUG(); + return; } ptr += btrfs_extent_inline_ref_size(type); } @@ -276,8 +323,89 @@ static void print_root_ref(struct extent_buffer *leaf, int slot, char *tag) namelen, namebuf); } -static void print_key_type(u8 type) +static int count_bytes(void *buf, int len, char b) +{ + int cnt = 0; + int i; + for (i = 0; i < len; i++) { + if (((char*)buf)[i] == b) + cnt++; + } + return cnt; +} + +static void print_root(struct extent_buffer *leaf, int slot) +{ + struct btrfs_root_item *ri; + struct btrfs_root_item root_item; + int len; + char uuid_str[128]; + + ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); + len = btrfs_item_size_nr(leaf, slot); + + memset(&root_item, 0, sizeof(root_item)); + read_extent_buffer(leaf, &root_item, (unsigned long)ri, len); + + printf("\t\troot data bytenr %llu level %d dirid %llu refs %u gen %llu\n", + (unsigned long long)btrfs_root_bytenr(&root_item), + btrfs_root_level(&root_item), + (unsigned long long)btrfs_root_dirid(&root_item), + btrfs_root_refs(&root_item), + (unsigned long long)btrfs_root_generation(&root_item)); + + if (root_item.generation == root_item.generation_v2) { + uuid_unparse(root_item.uuid, uuid_str); + printf("\t\tuuid %s\n", uuid_str); + if (count_bytes(root_item.parent_uuid, BTRFS_UUID_SIZE, 0) != BTRFS_UUID_SIZE) { + uuid_unparse(root_item.parent_uuid, uuid_str); + printf("\t\tparent_uuid %s\n", uuid_str); + } + if (count_bytes(root_item.received_uuid, BTRFS_UUID_SIZE, 0) != BTRFS_UUID_SIZE) { + uuid_unparse(root_item.received_uuid, uuid_str); + printf("\t\treceived_uuid %s\n", uuid_str); + } + if (root_item.ctransid) { + printf("\t\tctransid %llu otransid %llu stransid %llu rtransid %llu\n", + btrfs_root_ctransid(&root_item), + btrfs_root_otransid(&root_item), + btrfs_root_stransid(&root_item), + btrfs_root_rtransid(&root_item)); + } + } + if (btrfs_root_refs(&root_item) == 0) { + struct btrfs_key drop_key; + btrfs_disk_key_to_cpu(&drop_key, + &root_item.drop_progress); + printf("\t\tdrop "); + btrfs_print_key(&root_item.drop_progress); + printf(" level %d\n", root_item.drop_level); + } +} + +static void print_free_space_header(struct extent_buffer *leaf, int slot) +{ + struct btrfs_free_space_header *header; + struct btrfs_disk_key location; + + header = btrfs_item_ptr(leaf, slot, struct btrfs_free_space_header); + btrfs_free_space_key(leaf, header, &location); + printf("\t\tlocation "); + btrfs_print_key(&location); + printf("\n"); + printf("\t\tcache generation %llu entries %llu bitmaps %llu\n", + (unsigned long long)btrfs_free_space_generation(leaf, header), + (unsigned long long)btrfs_free_space_entries(leaf, header), + (unsigned long long)btrfs_free_space_bitmaps(leaf, header)); +} + +static void print_key_type(u64 objectid, u8 type) { + if (type == 0 && objectid == BTRFS_FREE_SPACE_OBJECTID) { + printf("UNTYPED"); + return; + } + switch (type) { case BTRFS_INODE_ITEM_KEY: printf("INODE_ITEM"); @@ -285,6 +413,9 @@ static void print_key_type(u8 type) case BTRFS_INODE_REF_KEY: printf("INODE_REF"); break; + case BTRFS_INODE_EXTREF_KEY: + printf("INODE_EXTREF"); + break; case BTRFS_DIR_ITEM_KEY: printf("DIR_ITEM"); break; @@ -315,6 +446,9 @@ static void print_key_type(u8 type) case BTRFS_EXTENT_ITEM_KEY: printf("EXTENT_ITEM"); break; + case BTRFS_METADATA_ITEM_KEY: + printf("METADATA_ITEM"); + break; case BTRFS_TREE_BLOCK_REF_KEY: printf("TREE_BLOCK_REF"); break; @@ -351,18 +485,45 @@ static void print_key_type(u8 type) case BTRFS_DEV_EXTENT_KEY: printf("DEV_EXTENT"); break; + case BTRFS_BALANCE_ITEM_KEY: + printf("BALANCE_ITEM"); + break; + case BTRFS_DEV_REPLACE_KEY: + printf("DEV_REPLACE_ITEM"); + break; case BTRFS_STRING_ITEM_KEY: printf("STRING_ITEM"); break; + case BTRFS_QGROUP_STATUS_KEY: + printf("BTRFS_STATUS_KEY"); + break; + case BTRFS_QGROUP_RELATION_KEY: + printf("BTRFS_QGROUP_RELATION_KEY"); + break; + case BTRFS_QGROUP_INFO_KEY: + printf("BTRFS_QGROUP_INFO_KEY"); + break; + case BTRFS_QGROUP_LIMIT_KEY: + printf("BTRFS_QGROUP_LIMIT_KEY"); + break; + case BTRFS_DEV_STATS_KEY: + printf("DEV_STATS_ITEM"); + break; default: - printf("UNKNOWN"); + printf("UNKNOWN.%d", type); }; } -static void print_objectid(unsigned long long objectid, u8 type) +static void print_objectid(u64 objectid, u8 type) { if (type == BTRFS_DEV_EXTENT_KEY) { - printf("%llu", objectid); /* device id */ + printf("%llu", (unsigned long long)objectid); /* device id */ + return; + } + switch (type) { + case BTRFS_QGROUP_RELATION_KEY: + printf("%llu/%llu", objectid >> 48, + objectid & ((1ll << 48) - 1)); return; } @@ -391,6 +552,9 @@ static void print_objectid(unsigned long long objectid, u8 type) case BTRFS_CSUM_TREE_OBJECTID: printf("CSUM_TREE"); break; + case BTRFS_BALANCE_OBJECTID: + printf("BALANCE"); + break; case BTRFS_ORPHAN_OBJECTID: printf("ORPHAN"); break; @@ -409,9 +573,21 @@ static void print_objectid(unsigned long long objectid, u8 type) case BTRFS_EXTENT_CSUM_OBJECTID: printf("EXTENT_CSUM"); break; + case BTRFS_FREE_SPACE_OBJECTID: + printf("FREE_SPACE"); + break; + case BTRFS_FREE_INO_OBJECTID: + printf("FREE_INO"); + break; + case BTRFS_QUOTA_TREE_OBJECTID: + printf("QUOTA_TREE"); + break; case BTRFS_MULTIPLE_OBJECTIDS: printf("MULTIPLE"); break; + case (u64)-1: + printf("-1"); + break; case BTRFS_FIRST_CHUNK_TREE_OBJECTID: if (type == BTRFS_CHUNK_ITEM_KEY) { printf("FIRST_CHUNK_TREE"); @@ -419,20 +595,34 @@ static void print_objectid(unsigned long long objectid, u8 type) } /* fall-thru */ default: - printf("%llu", objectid); + printf("%llu", (unsigned long long)objectid); } } void btrfs_print_key(struct btrfs_disk_key *disk_key) { - u8 type; + u64 objectid = btrfs_disk_key_objectid(disk_key); + u8 type = btrfs_disk_key_type(disk_key); + u64 offset = btrfs_disk_key_offset(disk_key); + printf("key ("); - type = btrfs_disk_key_type(disk_key); - print_objectid((unsigned long long)btrfs_disk_key_objectid(disk_key), - type); + print_objectid(objectid, type); printf(" "); - print_key_type(type); - printf(" %llu)", (unsigned long long)btrfs_disk_key_offset(disk_key)); + print_key_type(objectid, type); + switch (type) { + case BTRFS_QGROUP_RELATION_KEY: + case BTRFS_QGROUP_INFO_KEY: + case BTRFS_QGROUP_LIMIT_KEY: + printf(" %llu/%llu)", (unsigned long long)(offset >> 48), + (unsigned long long)(offset & ((1ll << 48) - 1))); + break; + default: + if (offset == (u64)-1) + printf(" -1)"); + else + printf(" %llu)", (unsigned long long)offset); + break; + } } void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) @@ -440,7 +630,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) int i; char *str; struct btrfs_item *item; - struct btrfs_root_item *ri; struct btrfs_dir_item *di; struct btrfs_inode_item *ii; struct btrfs_file_extent_item *fi; @@ -448,12 +637,16 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; struct btrfs_inode_ref *iref; + struct btrfs_inode_extref *iref2; struct btrfs_dev_extent *dev_extent; struct btrfs_disk_key disk_key; - struct btrfs_root_item root_item; struct btrfs_block_group_item bg_item; struct btrfs_dir_log_item *dlog; + struct btrfs_qgroup_info_item *qg_info; + struct btrfs_qgroup_limit_item *qg_limit; + struct btrfs_qgroup_status_item *qg_status; u32 nr = btrfs_header_nritems(l); + u64 objectid; u32 type; printf("leaf %llu items %d free space %d generation %llu owner %llu\n", @@ -466,17 +659,23 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); btrfs_item_key(l, &disk_key, i); + objectid = btrfs_disk_key_objectid(&disk_key); type = btrfs_disk_key_type(&disk_key); printf("\titem %d ", i); btrfs_print_key(&disk_key); printf(" itemoff %d itemsize %d\n", btrfs_item_offset(l, item), btrfs_item_size(l, item)); + + if (type == 0 && objectid == BTRFS_FREE_SPACE_OBJECTID) + print_free_space_header(l, i); + switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); - printf("\t\tinode generation %llu size %llu block group %llu mode %o links %u\n", + printf("\t\tinode generation %llu transid %llu size %llu block group %llu mode %o links %u\n", (unsigned long long)btrfs_inode_generation(l, ii), + (unsigned long long)btrfs_inode_transid(l, ii), (unsigned long long)btrfs_inode_size(l, ii), (unsigned long long)btrfs_inode_block_group(l,ii), btrfs_inode_mode(l, ii), @@ -486,6 +685,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) iref = btrfs_item_ptr(l, i, struct btrfs_inode_ref); print_inode_ref_item(l, item, iref); break; + case BTRFS_INODE_EXTREF_KEY: + iref2 = btrfs_item_ptr(l, i, struct btrfs_inode_extref); + print_inode_extref_item(l, item, iref2); + break; case BTRFS_DIR_ITEM_KEY: case BTRFS_DIR_INDEX_KEY: case BTRFS_XATTR_ITEM_KEY: @@ -502,22 +705,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) printf("\t\torphan item\n"); break; case BTRFS_ROOT_ITEM_KEY: - ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - read_extent_buffer(l, &root_item, (unsigned long)ri, sizeof(root_item)); - printf("\t\troot data bytenr %llu level %d dirid %llu refs %u gen %llu\n", - (unsigned long long)btrfs_root_bytenr(&root_item), - btrfs_root_level(&root_item), - (unsigned long long)btrfs_root_dirid(&root_item), - btrfs_root_refs(&root_item), - (unsigned long long)btrfs_root_generation(&root_item)); - if (btrfs_root_refs(&root_item) == 0) { - struct btrfs_key drop_key; - btrfs_disk_key_to_cpu(&drop_key, - &root_item.drop_progress); - printf("\t\tdrop "); - btrfs_print_key(&root_item.drop_progress); - printf(" level %d\n", root_item.drop_level); - } + print_root(l, i); break; case BTRFS_ROOT_REF_KEY: print_root_ref(l, i, "ref"); @@ -526,7 +714,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) print_root_ref(l, i, "backref"); break; case BTRFS_EXTENT_ITEM_KEY: - print_extent_item(l, i); + print_extent_item(l, i, 0); + break; + case BTRFS_METADATA_ITEM_KEY: + print_extent_item(l, i, 1); break; case BTRFS_TREE_BLOCK_REF_KEY: printf("\t\ttree block backref\n"); @@ -598,11 +789,66 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) (unsigned long long) btrfs_dev_extent_length(l, dev_extent)); break; + case BTRFS_QGROUP_STATUS_KEY: + qg_status = btrfs_item_ptr(l, i, + struct btrfs_qgroup_status_item); + printf("\t\tversion %llu generation %llu flags %#llx " + "scan %lld\n", + (unsigned long long) + btrfs_qgroup_status_version(l, qg_status), + (unsigned long long) + btrfs_qgroup_status_generation(l, qg_status), + (unsigned long long) + btrfs_qgroup_status_flags(l, qg_status), + (unsigned long long) + btrfs_qgroup_status_scan(l, qg_status)); + break; + case BTRFS_QGROUP_RELATION_KEY: + break; + case BTRFS_QGROUP_INFO_KEY: + qg_info = btrfs_item_ptr(l, i, + struct btrfs_qgroup_info_item); + printf("\t\tgeneration %llu\n" + "\t\treferenced %lld referenced compressed %lld\n" + "\t\texclusive %lld exclusive compressed %lld\n", + (unsigned long long) + btrfs_qgroup_info_generation(l, qg_info), + (long long) + btrfs_qgroup_info_referenced(l, qg_info), + (long long) + btrfs_qgroup_info_referenced_compressed(l, + qg_info), + (long long) + btrfs_qgroup_info_exclusive(l, qg_info), + (long long) + btrfs_qgroup_info_exclusive_compressed(l, + qg_info)); + break; + case BTRFS_QGROUP_LIMIT_KEY: + qg_limit = btrfs_item_ptr(l, i, + struct btrfs_qgroup_limit_item); + printf("\t\tflags %llx\n" + "\t\tmax referenced %lld max exclusive %lld\n" + "\t\trsv referenced %lld rsv exclusive %lld\n", + (unsigned long long) + btrfs_qgroup_limit_flags(l, qg_limit), + (long long) + btrfs_qgroup_limit_max_referenced(l, qg_limit), + (long long) + btrfs_qgroup_limit_max_exclusive(l, qg_limit), + (long long) + btrfs_qgroup_limit_rsv_referenced(l, qg_limit), + (long long) + btrfs_qgroup_limit_rsv_exclusive(l, qg_limit)); + break; case BTRFS_STRING_ITEM_KEY: /* dirty, but it's simple */ str = l->data + btrfs_item_ptr_offset(l, i); printf("\t\titem data %.*s\n", btrfs_item_size(l, item), str); break; + case BTRFS_DEV_STATS_KEY: + printf("\t\tdevice stats\n"); + break; }; fflush(stdout); } diff --git a/qgroup.c b/qgroup.c new file mode 100644 index 0000000..dafde12 --- /dev/null +++ b/qgroup.c @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2012 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "qgroup.h" +#include "ctree.h" + +u64 parse_qgroupid(char *p) +{ + char *s = strchr(p, '/'); + char *ptr_src_end = p + strlen(p); + char *ptr_parse_end = NULL; + u64 level; + u64 id; + + if (!s) { + id = strtoull(p, &ptr_parse_end, 10); + if (ptr_parse_end != ptr_src_end) + goto err; + return id; + } + level = strtoull(p, &ptr_parse_end, 10); + if (ptr_parse_end != s) + goto err; + + id = strtoull(s+1, &ptr_parse_end, 10); + if (ptr_parse_end != ptr_src_end) + goto err; + + return (level << 48) | id; +err: + fprintf(stderr, "ERROR:invalid qgroupid\n"); + exit(-1); +} + +int qgroup_inherit_size(struct btrfs_qgroup_inherit *p) +{ + return sizeof(*p) + sizeof(p->qgroups[0]) * + (p->num_qgroups + 2 * p->num_ref_copies + + 2 * p->num_excl_copies); +} + +int qgroup_inherit_realloc(struct btrfs_qgroup_inherit **inherit, int n, + int pos) +{ + struct btrfs_qgroup_inherit *out; + int nitems = 0; + + if (*inherit) { + nitems = (*inherit)->num_qgroups + + (*inherit)->num_ref_copies + + (*inherit)->num_excl_copies; + } + + out = calloc(sizeof(*out) + sizeof(out->qgroups[0]) * (nitems + n), 1); + if (out == NULL) { + fprintf(stderr, "ERROR: Not enough memory\n"); + return 13; + } + + if (*inherit) { + struct btrfs_qgroup_inherit *i = *inherit; + int s = sizeof(out->qgroups); + + out->num_qgroups = i->num_qgroups; + out->num_ref_copies = i->num_ref_copies; + out->num_excl_copies = i->num_excl_copies; + memcpy(out->qgroups, i->qgroups, pos * s); + memcpy(out->qgroups + pos + n, i->qgroups + pos, + (nitems - pos) * s); + } + free(*inherit); + *inherit = out; + + return 0; +} + +int qgroup_inherit_add_group(struct btrfs_qgroup_inherit **inherit, char *arg) +{ + int ret; + u64 qgroupid = parse_qgroupid(arg); + int pos = 0; + + if (qgroupid == 0) { + fprintf(stderr, "ERROR: bad qgroup specification\n"); + return 12; + } + + if (*inherit) + pos = (*inherit)->num_qgroups; + ret = qgroup_inherit_realloc(inherit, 1, pos); + if (ret) + return ret; + + (*inherit)->qgroups[(*inherit)->num_qgroups++] = qgroupid; + + return 0; +} + +int qgroup_inherit_add_copy(struct btrfs_qgroup_inherit **inherit, char *arg, + int type) +{ + int ret; + u64 qgroup_src; + u64 qgroup_dst; + char *p; + int pos = 0; + + p = strchr(arg, ':'); + if (!p) { +bad: + fprintf(stderr, "ERROR: bad copy specification\n"); + return 12; + } + *p = 0; + qgroup_src = parse_qgroupid(arg); + qgroup_dst = parse_qgroupid(p + 1); + *p = ':'; + + if (!qgroup_src || !qgroup_dst) + goto bad; + + if (*inherit) + pos = (*inherit)->num_qgroups + + (*inherit)->num_ref_copies * 2 * type; + + ret = qgroup_inherit_realloc(inherit, 2, pos); + if (ret) + return ret; + + (*inherit)->qgroups[pos++] = qgroup_src; + (*inherit)->qgroups[pos++] = qgroup_dst; + + if (!type) + ++(*inherit)->num_ref_copies; + else + ++(*inherit)->num_excl_copies; + + return 0; +} @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Oracle. All rights reserved. + * Copyright (C) 2012 STRATO. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -16,18 +16,18 @@ * Boston, MA 021110-1307, USA. */ -#ifndef __BIT_RADIX__ -#define __BIT_RADIX__ -#include "radix-tree.h" +#ifndef _BTRFS_QGROUP_H +#define _BTRFS_QGROUP_H -int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - unsigned long start, int nr); +#include "ioctl.h" +#include "kerncompat.h" + +u64 parse_qgroupid(char *p); +int qgroup_inherit_size(struct btrfs_qgroup_inherit *p); +int qgroup_inherit_realloc(struct btrfs_qgroup_inherit **inherit, + int incgroups, int inccopies); +int qgroup_inherit_add_group(struct btrfs_qgroup_inherit **inherit, char *arg); +int qgroup_inherit_add_copy(struct btrfs_qgroup_inherit **inherit, char *arg, + int type); -static inline void init_bit_radix(struct radix_tree_root *radix) -{ - INIT_RADIX_TREE(radix, GFP_NOFS); -} #endif diff --git a/quick-test.c b/quick-test.c index fa6fd83..05d73fd 100644 --- a/quick-test.c +++ b/quick-test.c @@ -52,6 +52,10 @@ int main(int ac, char **av) { radix_tree_init(); root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, O_RDWR); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + exit(1); + } trans = btrfs_start_transaction(root, 1); srand(55); btrfs_set_key_type(&ins, BTRFS_STRING_ITEM_KEY); @@ -75,6 +79,10 @@ int main(int ac, char **av) { close_ctree(root); exit(1); root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, O_RDWR); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + exit(1); + } printf("starting search\n"); srand(55); for (i = 0; i < run_size; i++) { @@ -94,6 +102,10 @@ int main(int ac, char **av) { close_ctree(root); root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, O_RDWR); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + exit(1); + } printf("node %p level %d total ptrs %d free spc %lu\n", root->node, btrfs_header_level(root->node), btrfs_header_nritems(root->node), @@ -122,6 +134,10 @@ int main(int ac, char **av) { close_ctree(root); root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, O_RDWR); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + exit(1); + } trans = btrfs_start_transaction(root, 1); srand(128); for (i = 0; i < run_size; i++) { @@ -138,6 +154,10 @@ int main(int ac, char **av) { close_ctree(root); root = open_ctree(av[1], BTRFS_SUPER_INFO_OFFSET, O_RDWR); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + exit(1); + } srand(128); printf("starting search2\n"); for (i = 0; i < run_size; i++) { diff --git a/radix-tree.h b/radix-tree.h index d99ea7e..bf96d83 100644 --- a/radix-tree.h +++ b/radix-tree.h @@ -37,7 +37,11 @@ #ifndef _LINUX_RADIX_TREE_H #define _LINUX_RADIX_TREE_H +#if BTRFS_FLAT_INCLUDES #include "kerncompat.h" +#else +#include <btrfs/kerncompat.h> +#endif /* BTRFS_FLAT_INCLUDES */ #define RADIX_TREE_MAX_TAGS 2 @@ -0,0 +1,99 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6int1.c + * + * 1-way unrolled portable integer math RAID-6 instruction set + * + * This file was postprocessed using unroll.pl and then ported to userspace + */ +#include <stdint.h> +#include <unistd.h> +#include "kerncompat.h" + +/* + * This is the C data type to use + */ + +/* Change this from BITS_PER_LONG if there is something better... */ +#if BITS_PER_LONG == 64 +# define NBYTES(x) ((x) * 0x0101010101010101UL) +# define NSIZE 8 +# define NSHIFT 3 +typedef uint64_t unative_t; +#else +# define NBYTES(x) ((x) * 0x01010101U) +# define NSIZE 4 +# define NSHIFT 2 +typedef uint32_t unative_t; +#endif + +/* + * These sub-operations are separate inlines since they can sometimes be + * specially optimized using architecture-specific hacks. + */ + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ + unative_t vv; + + vv = (v << 1) & NBYTES(0xfe); + return vv; +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ + unative_t vv; + + vv = v & NBYTES(0x80); + vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ + return vv; +} + + +void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + uint8_t **dptr = (uint8_t **)ptrs; + uint8_t *p, *q; + int d, z, z0; + + unative_t wd0, wq0, wp0, w10, w20; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*1 ) { + wq0 = wp0 = *(unative_t *)&dptr[z0][d+0*NSIZE]; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd0 = *(unative_t *)&dptr[z][d+0*NSIZE]; + wp0 ^= wd0; + w20 = MASK(wq0); + w10 = SHLBYTE(wq0); + w20 &= NBYTES(0x1d); + w10 ^= w20; + wq0 = w10 ^ wd0; + } + *(unative_t *)&p[d+NSIZE*0] = wp0; + *(unative_t *)&q[d+NSIZE*0] = wq0; + } +} + diff --git a/random-test.c b/random-test.c index 0003236..3a07e6d 100644 --- a/random-test.c +++ b/random-test.c @@ -356,6 +356,10 @@ int main(int ac, char **av) struct btrfs_trans_handle *trans; radix_tree_init(); root = open_ctree("dbfile", &super); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + exit(1); + } fill_radix(root, &radix); signal(SIGTERM, sigstopper); @@ -398,6 +402,10 @@ int main(int ac, char **av) btrfs_header_nritems(&root->node->node.header)); close_ctree(root, &super); root = open_ctree("dbfile", &super); + if (!root) { + fprintf(stderr, "Open ctree failed\n"); + goto out; + } } while(count--) { ret = ops[op](trans, root, &radix); @@ -93,7 +93,11 @@ static inline struct page * rb_insert_page_cache(struct inode * inode, #ifndef _LINUX_RBTREE_H #define _LINUX_RBTREE_H +#if BTRFS_FLAT_INCLUDES #include "kerncompat.h" +#else +#include <btrfs/kerncompat.h> +#endif /* BTRFS_FLAT_INCLUDES */ struct rb_node { unsigned long rb_parent_color; @@ -145,7 +149,7 @@ extern struct rb_node *rb_first(struct rb_root *); extern struct rb_node *rb_last(struct rb_root *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ -extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, +extern void rb_replace_node(struct rb_node *victim, struct rb_node *xnew, struct rb_root *root); static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, diff --git a/repair.c b/repair.c new file mode 100644 index 0000000..e640465 --- /dev/null +++ b/repair.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2012 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "ctree.h" +#include "extent-cache.h" +#include "utils.h" +#include "repair.h" + +int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info, + struct btrfs_key *first_key, + u64 start, u64 len, int level) + +{ + int ret = 0; + struct btrfs_corrupt_block *corrupt; + + if (!info->corrupt_blocks) + return 0; + + corrupt = malloc(sizeof(*corrupt)); + if (!corrupt) + return -ENOMEM; + + memcpy(&corrupt->key, first_key, sizeof(*first_key)); + corrupt->cache.start = start; + corrupt->cache.size = len; + corrupt->level = level; + + ret = insert_existing_cache_extent(info->corrupt_blocks, &corrupt->cache); + if (ret) + free(corrupt); + BUG_ON(ret && ret != -EEXIST); + return ret; +} + diff --git a/repair.h b/repair.h new file mode 100644 index 0000000..3d0dcb9 --- /dev/null +++ b/repair.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2012 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_REPAIR__ +#define __BTRFS_REPAIR__ + +struct btrfs_corrupt_block { + struct cache_extent cache; + struct btrfs_key key; + int level; +}; + +int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info, + struct btrfs_key *first_key, + u64 start, u64 len, int level); + +#endif diff --git a/root-tree.c b/root-tree.c index 782472c..ba380bd 100644 --- a/root-tree.c +++ b/root-tree.c @@ -47,7 +47,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, slot = path->slots[0] - 1; btrfs_item_key_to_cpu(l, &found_key, slot); if (found_key.objectid != objectid) { - ret = 1; + ret = -ENOENT; goto out; } read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), @@ -69,6 +69,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root int ret; int slot; unsigned long ptr; + u32 old_len; path = btrfs_alloc_path(); BUG_ON(!path); @@ -79,6 +80,42 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root l = path->nodes[0]; slot = path->slots[0]; ptr = btrfs_item_ptr_offset(l, slot); + old_len = btrfs_item_size_nr(l, slot); + + /* + * If this is the first time we update the root item which originated + * from an older kernel, we need to enlarge the item size to make room + * for the added fields. + */ + if (old_len < sizeof(*item)) { + btrfs_release_path(root, path); + ret = btrfs_search_slot(trans, root, key, path, + -1, 1); + if (ret < 0) { + goto out; + } + + ret = btrfs_del_item(trans, root, path); + if (ret < 0) { + goto out; + } + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, root, path, + key, sizeof(*item)); + if (ret < 0) { + goto out; + } + l = path->nodes[0]; + slot = path->slots[0]; + ptr = btrfs_item_ptr_offset(l, slot); + } + + /* + * Update generation_v2 so at the next mount we know the new root + * fields are valid. + */ + btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); + write_extent_buffer(l, item, ptr, sizeof(*item)); btrfs_mark_buffer_dirty(path->nodes[0]); out: @@ -92,6 +129,11 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *item) { int ret; + + /* + * Make sure generation v1 and v2 match. See update_root for details. + */ + btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); ret = btrfs_insert_item(trans, root, key, item, sizeof(*item)); return ret; } @@ -181,14 +223,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; - if (ret) { -btrfs_print_leaf(root, path->nodes[0]); -printk("failed to del %llu %u %llu\n", - (unsigned long long)key->objectid, - key->type, - (unsigned long long)key->offset); - - } BUG_ON(ret != 0); leaf = path->nodes[0]; ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); diff --git a/send-stream.c b/send-stream.c new file mode 100644 index 0000000..88e18e2 --- /dev/null +++ b/send-stream.c @@ -0,0 +1,485 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <uuid/uuid.h> +#include <unistd.h> + +#include "send.h" +#include "send-stream.h" +#include "crc32c.h" + +struct btrfs_send_stream { + int fd; + char read_buf[BTRFS_SEND_BUF_SIZE]; + + int cmd; + struct btrfs_cmd_header *cmd_hdr; + struct btrfs_tlv_header *cmd_attrs[BTRFS_SEND_A_MAX + 1]; + u32 version; + + struct btrfs_send_ops *ops; + void *user; +}; + +static int read_buf(struct btrfs_send_stream *s, void *buf, int len) +{ + int ret; + int pos = 0; + + while (pos < len) { + ret = read(s->fd, (char*)buf + pos, len - pos); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "ERROR: read from stream failed. %s\n", + strerror(-ret)); + goto out; + } + if (ret == 0) { + ret = 1; + goto out; + } + pos += ret; + } + + ret = 0; + +out: + return ret; +} + +/* + * Reads a single command from kernel space and decodes the TLV's into + * s->cmd_attrs + */ +static int read_cmd(struct btrfs_send_stream *s) +{ + int ret; + int cmd; + int cmd_len; + int tlv_type; + int tlv_len; + char *data; + int pos; + struct btrfs_tlv_header *tlv_hdr; + u32 crc; + u32 crc2; + + memset(s->cmd_attrs, 0, sizeof(s->cmd_attrs)); + + ret = read_buf(s, s->read_buf, sizeof(*s->cmd_hdr)); + if (ret < 0) + goto out; + if (ret) { + ret = -EINVAL; + fprintf(stderr, "ERROR: unexpected EOF in stream.\n"); + goto out; + } + + s->cmd_hdr = (struct btrfs_cmd_header *)s->read_buf; + cmd = le16_to_cpu(s->cmd_hdr->cmd); + cmd_len = le32_to_cpu(s->cmd_hdr->len); + + data = s->read_buf + sizeof(*s->cmd_hdr); + ret = read_buf(s, data, cmd_len); + if (ret < 0) + goto out; + if (ret) { + ret = -EINVAL; + fprintf(stderr, "ERROR: unexpected EOF in stream.\n"); + goto out; + } + + crc = le32_to_cpu(s->cmd_hdr->crc); + s->cmd_hdr->crc = 0; + + crc2 = crc32c(0, (unsigned char*)s->read_buf, + sizeof(*s->cmd_hdr) + cmd_len); + + if (crc != crc2) { + ret = -EINVAL; + fprintf(stderr, "ERROR: crc32 mismatch in command.\n"); + goto out; + } + + pos = 0; + while (pos < cmd_len) { + tlv_hdr = (struct btrfs_tlv_header *)data; + tlv_type = le16_to_cpu(tlv_hdr->tlv_type); + tlv_len = le16_to_cpu(tlv_hdr->tlv_len); + + if (tlv_type <= 0 || tlv_type > BTRFS_SEND_A_MAX || + tlv_len < 0 || tlv_len > BTRFS_SEND_BUF_SIZE) { + fprintf(stderr, "ERROR: invalid tlv in cmd. " + "tlv_type = %d, tlv_len = %d\n", + tlv_type, tlv_len); + ret = -EINVAL; + goto out; + } + + s->cmd_attrs[tlv_type] = tlv_hdr; + + data += sizeof(*tlv_hdr) + tlv_len; + pos += sizeof(*tlv_hdr) + tlv_len; + } + + s->cmd = cmd; + ret = 0; + +out: + return ret; +} + +static int tlv_get(struct btrfs_send_stream *s, int attr, void **data, int *len) +{ + int ret; + struct btrfs_tlv_header *h; + + if (attr <= 0 || attr > BTRFS_SEND_A_MAX) { + fprintf(stderr, "ERROR: invalid attribute requested. " + "attr = %d\n", + attr); + ret = -EINVAL; + goto out; + } + + h = s->cmd_attrs[attr]; + if (!h) { + fprintf(stderr, "ERROR: attribute %d requested " + "but not present.\n", attr); + ret = -ENOENT; + goto out; + } + + *len = le16_to_cpu(h->tlv_len); + *data = h + 1; + + ret = 0; + +out: + return ret; +} + +#define __TLV_GOTO_FAIL(expr) \ + if ((ret = expr) < 0) \ + goto tlv_get_failed; + +#define __TLV_DO_WHILE_GOTO_FAIL(expr) \ + do { \ + __TLV_GOTO_FAIL(expr) \ + } while (0) + + +#define TLV_GET(s, attr, data, len) \ + __TLV_DO_WHILE_GOTO_FAIL(tlv_get(s, attr, data, len)) + +#define TLV_CHECK_LEN(expected, got) \ + do { \ + if (expected != got) { \ + fprintf(stderr, "ERROR: invalid size for attribute. " \ + "expected = %d, got = %d\n", \ + (int)expected, (int)got); \ + ret = -EINVAL; \ + goto tlv_get_failed; \ + } \ + } while (0) + +#define TLV_GET_INT(s, attr, bits, v) \ + do { \ + __le##bits *__tmp; \ + int __len; \ + TLV_GET(s, attr, (void**)&__tmp, &__len); \ + TLV_CHECK_LEN(sizeof(*__tmp), __len); \ + *v = le##bits##_to_cpu(*__tmp); \ + } while (0) + +#define TLV_GET_U8(s, attr, v) TLV_GET_INT(s, attr, 8, v) +#define TLV_GET_U16(s, attr, v) TLV_GET_INT(s, attr, 16, v) +#define TLV_GET_U32(s, attr, v) TLV_GET_INT(s, attr, 32, v) +#define TLV_GET_U64(s, attr, v) TLV_GET_INT(s, attr, 64, v) + +static int tlv_get_string(struct btrfs_send_stream *s, int attr, char **str) +{ + int ret; + void *data; + int len; + + TLV_GET(s, attr, &data, &len); + + *str = malloc(len + 1); + if (!*str) + return -ENOMEM; + + memcpy(*str, data, len); + (*str)[len] = 0; + ret = 0; + +tlv_get_failed: + return ret; +} +#define TLV_GET_STRING(s, attr, str) \ + __TLV_DO_WHILE_GOTO_FAIL(tlv_get_string(s, attr, str)) + +static int tlv_get_timespec(struct btrfs_send_stream *s, + int attr, struct timespec *ts) +{ + int ret; + int len; + struct btrfs_timespec *bts; + + TLV_GET(s, attr, (void**)&bts, &len); + TLV_CHECK_LEN(sizeof(*bts), len); + + ts->tv_sec = le64_to_cpu(bts->sec); + ts->tv_nsec = le32_to_cpu(bts->nsec); + ret = 0; + +tlv_get_failed: + return ret; +} +#define TLV_GET_TIMESPEC(s, attr, ts) \ + __TLV_DO_WHILE_GOTO_FAIL(tlv_get_timespec(s, attr, ts)) + +static int tlv_get_uuid(struct btrfs_send_stream *s, int attr, u8 *uuid) +{ + int ret; + int len; + void *data; + + TLV_GET(s, attr, &data, &len); + TLV_CHECK_LEN(BTRFS_UUID_SIZE, len); + memcpy(uuid, data, BTRFS_UUID_SIZE); + + ret = 0; + +tlv_get_failed: + return ret; +} +#define TLV_GET_UUID(s, attr, uuid) \ + __TLV_DO_WHILE_GOTO_FAIL(tlv_get_uuid(s, attr, uuid)) + +static int read_and_process_cmd(struct btrfs_send_stream *s) +{ + int ret; + char *path = NULL; + char *path_to = NULL; + char *clone_path = NULL; + char *xattr_name = NULL; + void *xattr_data = NULL; + void *data = NULL; + struct timespec at; + struct timespec ct; + struct timespec mt; + u8 uuid[BTRFS_UUID_SIZE]; + u8 clone_uuid[BTRFS_UUID_SIZE]; + u64 tmp; + u64 tmp2; + u64 ctransid; + u64 clone_ctransid; + u64 mode; + u64 dev; + u64 clone_offset; + u64 offset; + int len; + int xattr_len; + + ret = read_cmd(s); + if (ret) + goto out; + + switch (s->cmd) { + case BTRFS_SEND_C_SUBVOL: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_UUID(s, BTRFS_SEND_A_UUID, uuid); + TLV_GET_U64(s, BTRFS_SEND_A_CTRANSID, &ctransid); + ret = s->ops->subvol(path, uuid, ctransid, s->user); + break; + case BTRFS_SEND_C_SNAPSHOT: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_UUID(s, BTRFS_SEND_A_UUID, uuid); + TLV_GET_U64(s, BTRFS_SEND_A_CTRANSID, &ctransid); + TLV_GET_UUID(s, BTRFS_SEND_A_CLONE_UUID, clone_uuid); + TLV_GET_U64(s, BTRFS_SEND_A_CLONE_CTRANSID, &clone_ctransid); + ret = s->ops->snapshot(path, uuid, ctransid, clone_uuid, + clone_ctransid, s->user); + break; + case BTRFS_SEND_C_MKFILE: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + ret = s->ops->mkfile(path, s->user); + break; + case BTRFS_SEND_C_MKDIR: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + ret = s->ops->mkdir(path, s->user); + break; + case BTRFS_SEND_C_MKNOD: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_U64(s, BTRFS_SEND_A_MODE, &mode); + TLV_GET_U64(s, BTRFS_SEND_A_RDEV, &dev); + ret = s->ops->mknod(path, mode, dev, s->user); + break; + case BTRFS_SEND_C_MKFIFO: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + ret = s->ops->mkfifo(path, s->user); + break; + case BTRFS_SEND_C_MKSOCK: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + ret = s->ops->mksock(path, s->user); + break; + case BTRFS_SEND_C_SYMLINK: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_STRING(s, BTRFS_SEND_A_PATH_LINK, &path_to); + ret = s->ops->symlink(path, path_to, s->user); + break; + case BTRFS_SEND_C_RENAME: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_STRING(s, BTRFS_SEND_A_PATH_TO, &path_to); + ret = s->ops->rename(path, path_to, s->user); + break; + case BTRFS_SEND_C_LINK: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_STRING(s, BTRFS_SEND_A_PATH_LINK, &path_to); + ret = s->ops->link(path, path_to, s->user); + break; + case BTRFS_SEND_C_UNLINK: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + ret = s->ops->unlink(path, s->user); + break; + case BTRFS_SEND_C_RMDIR: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + ret = s->ops->rmdir(path, s->user); + break; + case BTRFS_SEND_C_WRITE: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_U64(s, BTRFS_SEND_A_FILE_OFFSET, &offset); + TLV_GET(s, BTRFS_SEND_A_DATA, &data, &len); + ret = s->ops->write(path, data, offset, len, s->user); + break; + case BTRFS_SEND_C_CLONE: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_U64(s, BTRFS_SEND_A_FILE_OFFSET, &offset); + TLV_GET_U64(s, BTRFS_SEND_A_CLONE_LEN, &len); + TLV_GET_UUID(s, BTRFS_SEND_A_CLONE_UUID, clone_uuid); + TLV_GET_U64(s, BTRFS_SEND_A_CLONE_CTRANSID, &clone_ctransid); + TLV_GET_STRING(s, BTRFS_SEND_A_CLONE_PATH, &clone_path); + TLV_GET_U64(s, BTRFS_SEND_A_CLONE_OFFSET, &clone_offset); + ret = s->ops->clone(path, offset, len, clone_uuid, + clone_ctransid, clone_path, clone_offset, + s->user); + break; + case BTRFS_SEND_C_SET_XATTR: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_STRING(s, BTRFS_SEND_A_XATTR_NAME, &xattr_name); + TLV_GET(s, BTRFS_SEND_A_XATTR_DATA, &xattr_data, &xattr_len); + ret = s->ops->set_xattr(path, xattr_name, xattr_data, + xattr_len, s->user); + break; + case BTRFS_SEND_C_REMOVE_XATTR: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_STRING(s, BTRFS_SEND_A_XATTR_NAME, &xattr_name); + ret = s->ops->remove_xattr(path, xattr_name, s->user); + break; + case BTRFS_SEND_C_TRUNCATE: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_U64(s, BTRFS_SEND_A_SIZE, &tmp); + ret = s->ops->truncate(path, tmp, s->user); + break; + case BTRFS_SEND_C_CHMOD: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_U64(s, BTRFS_SEND_A_MODE, &tmp); + ret = s->ops->chmod(path, tmp, s->user); + break; + case BTRFS_SEND_C_CHOWN: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_U64(s, BTRFS_SEND_A_UID, &tmp); + TLV_GET_U64(s, BTRFS_SEND_A_GID, &tmp2); + ret = s->ops->chown(path, tmp, tmp2, s->user); + break; + case BTRFS_SEND_C_UTIMES: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_TIMESPEC(s, BTRFS_SEND_A_ATIME, &at); + TLV_GET_TIMESPEC(s, BTRFS_SEND_A_MTIME, &mt); + TLV_GET_TIMESPEC(s, BTRFS_SEND_A_CTIME, &ct); + ret = s->ops->utimes(path, &at, &mt, &ct, s->user); + break; + case BTRFS_SEND_C_UPDATE_EXTENT: + TLV_GET_STRING(s, BTRFS_SEND_A_PATH, &path); + TLV_GET_U64(s, BTRFS_SEND_A_FILE_OFFSET, &offset); + TLV_GET_U64(s, BTRFS_SEND_A_SIZE, &tmp); + ret = s->ops->update_extent(path, offset, tmp, s->user); + break; + case BTRFS_SEND_C_END: + ret = 1; + break; + } + +tlv_get_failed: +out: + free(path); + free(path_to); + free(clone_path); + free(xattr_name); + return ret; +} + +int btrfs_read_and_process_send_stream(int fd, + struct btrfs_send_ops *ops, void *user, + int honor_end_cmd) +{ + int ret; + struct btrfs_send_stream s; + struct btrfs_stream_header hdr; + + s.fd = fd; + s.ops = ops; + s.user = user; + + ret = read_buf(&s, &hdr, sizeof(hdr)); + if (ret < 0) + goto out; + if (ret) { + ret = 1; + goto out; + } + + if (strcmp(hdr.magic, BTRFS_SEND_STREAM_MAGIC)) { + ret = -EINVAL; + fprintf(stderr, "ERROR: Unexpected header\n"); + goto out; + } + + s.version = le32_to_cpu(hdr.version); + if (s.version > BTRFS_SEND_STREAM_VERSION) { + ret = -EINVAL; + fprintf(stderr, "ERROR: Stream version %d not supported. " + "Please upgrade btrfs-progs\n", s.version); + goto out; + } + + while (1) { + ret = read_and_process_cmd(&s); + if (ret < 0) + goto out; + if (ret) { + if (!honor_end_cmd) + ret = 0; + goto out; + } + } + +out: + return ret; +} diff --git a/send-stream.h b/send-stream.h new file mode 100644 index 0000000..17bc669 --- /dev/null +++ b/send-stream.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#ifndef SEND_STREAM_H_ +#define SEND_STREAM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct btrfs_send_ops { + int (*subvol)(const char *path, const u8 *uuid, u64 ctransid, + void *user); + int (*snapshot)(const char *path, const u8 *uuid, u64 ctransid, + const u8 *parent_uuid, u64 parent_ctransid, + void *user); + int (*mkfile)(const char *path, void *user); + int (*mkdir)(const char *path, void *user); + int (*mknod)(const char *path, u64 mode, u64 dev, void *user); + int (*mkfifo)(const char *path, void *user); + int (*mksock)(const char *path, void *user); + int (*symlink)(const char *path, const char *lnk, void *user); + int (*rename)(const char *from, const char *to, void *user); + int (*link)(const char *path, const char *lnk, void *user); + int (*unlink)(const char *path, void *user); + int (*rmdir)(const char *path, void *user); + int (*write)(const char *path, const void *data, u64 offset, u64 len, + void *user); + int (*clone)(const char *path, u64 offset, u64 len, + const u8 *clone_uuid, u64 clone_ctransid, + const char *clone_path, u64 clone_offset, + void *user); + int (*set_xattr)(const char *path, const char *name, const void *data, + int len, void *user); + int (*remove_xattr)(const char *path, const char *name, void *user); + int (*truncate)(const char *path, u64 size, void *user); + int (*chmod)(const char *path, u64 mode, void *user); + int (*chown)(const char *path, u64 uid, u64 gid, void *user); + int (*utimes)(const char *path, struct timespec *at, + struct timespec *mt, struct timespec *ct, + void *user); + int (*update_extent)(const char *path, u64 offset, u64 len, void *user); +}; + +int btrfs_read_and_process_send_stream(int fd, + struct btrfs_send_ops *ops, void *user, + int honor_end_cmd); + +#ifdef __cplusplus +} +#endif + +#endif /* SEND_STREAM_H_ */ diff --git a/send-test.c b/send-test.c new file mode 100644 index 0000000..4a53ae2 --- /dev/null +++ b/send-test.c @@ -0,0 +1,458 @@ +/* + * Copyright (C) 2013 SUSE. All rights reserved. + * + * This code is adapted from cmds-send.c and cmds-receive.c, + * Both of which are: + * + * Copyright (C) 2012 Alexander Block. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#define _GNU_SOURCE + +#include <unistd.h> +#include <stdint.h> +#include <dirent.h> +#include <pthread.h> +#include <math.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <libgen.h> +#include <mntent.h> +#include <limits.h> +#include <stdlib.h> +#include <asm/types.h> +#include <uuid/uuid.h> + +/* + * This should be compilable without the rest of the btrfs-progs + * source distribution. + */ +#if BTRFS_FLAT_INCLUDES +#include "send-utils.h" +#include "send-stream.h" +#else +#include <btrfs/send-utils.h> +#include <btrfs/send-stream.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +static int pipefd[2]; +struct btrfs_ioctl_send_args io_send = {0, }; +static char *subvol_path; +static char *root_path; + +struct recv_args { + char *full_subvol_path; + char *root_path; +}; + +void usage(int error) +{ + printf("send-test <btrfs root> <subvol>\n"); + if (error) + exit(error); +} + +static int print_subvol(const char *path, const u8 *uuid, u64 ctransid, + void *user) +{ + struct recv_args *r = user; + char uuid_str[128]; + + r->full_subvol_path = path_cat(r->root_path, path); + uuid_unparse(uuid, uuid_str); + + printf("subvol\t%s\t%llu\t%s\n", uuid_str, + (unsigned long long)ctransid, r->full_subvol_path); + + return 0; +} + +static int print_snapshot(const char *path, const u8 *uuid, u64 ctransid, + const u8 *parent_uuid, u64 parent_ctransid, + void *user) +{ + struct recv_args *r = user; + char uuid_str[128]; + char parent_uuid_str[128]; + + r->full_subvol_path = path_cat(r->root_path, path); + uuid_unparse(uuid, uuid_str); + uuid_unparse(parent_uuid, parent_uuid_str); + + printf("snapshot\t%s\t%llu\t%s\t%llu\t%s\n", uuid_str, + (unsigned long long)ctransid, parent_uuid_str, + (unsigned long long)parent_ctransid, r->full_subvol_path); + + return 0; +} + +static int print_mkfile(const char *path, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("mkfile\t%s\n", full_path); + + free(full_path); + return 0; +} + +static int print_mkdir(const char *path, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("mkdir\t%s\n", full_path); + + free(full_path); + return 0; +} + +static int print_mknod(const char *path, u64 mode, u64 dev, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("mknod\t%llo\t0x%llx\t%s\n", (unsigned long long)mode, + (unsigned long long)dev, full_path); + + free(full_path); + return 0; +} + +static int print_mkfifo(const char *path, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("mkfifo\t%s\n", full_path); + + free(full_path); + return 0; +} + +static int print_mksock(const char *path, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("mksock\t%s\n", full_path); + + free(full_path); + return 0; +} + +static int print_symlink(const char *path, const char *lnk, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("symlink\t%s\t%s\n", lnk, full_path); + + free(full_path); + return 0; +} + +static int print_rename(const char *from, const char *to, void *user) +{ + struct recv_args *r = user; + char *full_from = path_cat(r->full_subvol_path, from); + char *full_to = path_cat(r->full_subvol_path, to); + + printf("rename\t%s\t%s\n", from, to); + + free(full_from); + free(full_to); + return 0; +} + +static int print_link(const char *path, const char *lnk, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("link\t%s\t%s\n", lnk, full_path); + + free(full_path); + return 0; +} + +static int print_unlink(const char *path, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("unlink\t%s\n", full_path); + + free(full_path); + return 0; +} + +static int print_rmdir(const char *path, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("rmdir\t%s\n", full_path); + + free(full_path); + return 0; +} + +static int print_write(const char *path, const void *data, u64 offset, + u64 len, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("write\t%llu\t%llu\t%s\n", (unsigned long long)offset, + (unsigned long long)len, full_path); + + free(full_path); + return 0; +} + +static int print_clone(const char *path, u64 offset, u64 len, + const u8 *clone_uuid, u64 clone_ctransid, + const char *clone_path, u64 clone_offset, + void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("clone\t%s\t%s\n", full_path, clone_path); + + free(full_path); + return 0; +} + +static int print_set_xattr(const char *path, const char *name, + const void *data, int len, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("set_xattr\t%s\t%s\t%d\n", full_path, + name, len); + + free(full_path); + return 0; +} + +static int print_remove_xattr(const char *path, const char *name, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("remove_xattr\t%s\t%s\n", full_path, name); + + free(full_path); + return 0; +} + +static int print_truncate(const char *path, u64 size, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("truncate\t%llu\t%s\n", (unsigned long long)size, full_path); + + free(full_path); + return 0; +} + +static int print_chmod(const char *path, u64 mode, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("chmod\t%llo\t%s\n", (unsigned long long)mode, full_path); + + free(full_path); + return 0; +} + +static int print_chown(const char *path, u64 uid, u64 gid, void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("chown\t%llu\t%llu\t%s\n", (unsigned long long)uid, + (unsigned long long)gid, full_path); + + free(full_path); + return 0; +} + +static int print_utimes(const char *path, struct timespec *at, + struct timespec *mt, struct timespec *ct, + void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("utimes\t%s\n", full_path); + + free(full_path); + return 0; +} + +static int print_update_extent(const char *path, u64 offset, u64 len, + void *user) +{ + struct recv_args *r = user; + char *full_path = path_cat(r->full_subvol_path, path); + + printf("update_extent\t%s\t%llu\t%llu\n", full_path, offset, len); + + free(full_path); + return 0; +} + +struct btrfs_send_ops send_ops_print = { + .subvol = print_subvol, + .snapshot = print_snapshot, + .mkfile = print_mkfile, + .mkdir = print_mkdir, + .mknod = print_mknod, + .mkfifo = print_mkfifo, + .mksock = print_mksock, + .symlink = print_symlink, + .rename = print_rename, + .link = print_link, + .unlink = print_unlink, + .rmdir = print_rmdir, + .write = print_write, + .clone = print_clone, + .set_xattr = print_set_xattr, + .remove_xattr = print_remove_xattr, + .truncate = print_truncate, + .chmod = print_chmod, + .chown = print_chown, + .utimes = print_utimes, + .update_extent = print_update_extent, +}; + +static void *process_thread(void *arg_) +{ + int ret; + + while (1) { + ret = btrfs_read_and_process_send_stream(pipefd[-1], + &send_ops_print, arg_, 0); + if (ret) + break; + } + + if (ret > 0) + ret = 0; + + return ERR_PTR(ret); +} + +int main(int argc, char **argv) +{ + int ret = 0; + int subvol_fd; + pthread_t t_read; + pthread_attr_t t_attr; + void *t_err = NULL; + struct recv_args r; + + if (argc != 3) + usage(EINVAL); + + root_path = realpath(argv[1], NULL); + if (!root_path) { + ret = errno; + usage(ret); + } + + subvol_path = realpath(argv[2], NULL); + if (!subvol_path) { + ret = errno; + usage(ret); + } + + r.full_subvol_path = subvol_path; + r.root_path = root_path; + + subvol_fd = open(subvol_path, O_RDONLY|O_NOATIME); + if (subvol_fd < 0) { + ret = errno; + fprintf(stderr, "ERROR: Subvolume open failed. %s\n", + strerror(ret)); + goto out; + } + + ret = pthread_attr_init(&t_attr); + if (ret < 0) { + fprintf(stderr, "ERROR: pthread init failed. %s\n", + strerror(ret)); + goto out; + } + + ret = pipe(pipefd); + if (ret < 0) { + ret = errno; + fprintf(stderr, "ERROR: pipe failed. %s\n", strerror(ret)); + goto out; + } + + ret = pthread_create(&t_read, &t_attr, process_thread, &r); + if (ret < 0) { + ret = errno; + fprintf(stderr, "ERROR: pthread create failed. %s\n", + strerror(ret)); + goto out; + } + + io_send.send_fd = pipefd[1]; + io_send.clone_sources_count = 0; + io_send.clone_sources = NULL; + io_send.parent_root = 0; + io_send.flags = BTRFS_SEND_FLAG_NO_FILE_DATA; + + ret = ioctl(subvol_fd, BTRFS_IOC_SEND, &io_send); + if (ret) { + ret = errno; + fprintf(stderr, "ERROR: send ioctl failed with %d: %s\n", ret, + strerror(ret)); + goto out; + } + + close(pipefd[1]); + + ret = pthread_join(t_read, &t_err); + if (ret) { + fprintf(stderr, "ERROR: pthread_join failed: %s\n", + strerror(ret)); + goto out; + } + if (t_err) { + ret = (long int)t_err; + fprintf(stderr, "ERROR: failed to process send stream, ret=%ld " + "(%s)\n", (long int)t_err, strerror(ret)); + goto out; + } + + pthread_attr_destroy(&t_attr); +out: + return ret; +} diff --git a/send-utils.c b/send-utils.c new file mode 100644 index 0000000..bacd47e --- /dev/null +++ b/send-utils.c @@ -0,0 +1,461 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <sys/ioctl.h> + +#include "ctree.h" +#include "send-utils.h" +#include "ioctl.h" +#include "btrfs-list.h" + +static int btrfs_subvolid_resolve_sub(int fd, char *path, size_t *path_len, + u64 subvol_id); + +int btrfs_subvolid_resolve(int fd, char *path, size_t path_len, u64 subvol_id) +{ + if (path_len < 1) + return -EOVERFLOW; + path[0] = '\0'; + path_len--; + path[path_len] = '\0'; + return btrfs_subvolid_resolve_sub(fd, path, &path_len, subvol_id); +} + +static int btrfs_subvolid_resolve_sub(int fd, char *path, size_t *path_len, + u64 subvol_id) +{ + int ret; + struct btrfs_ioctl_search_args search_arg; + struct btrfs_ioctl_ino_lookup_args ino_lookup_arg; + struct btrfs_ioctl_search_header *search_header; + struct btrfs_root_ref *backref_item; + + if (subvol_id == BTRFS_FS_TREE_OBJECTID) { + if (*path_len < 1) + return -EOVERFLOW; + *path = '\0'; + (*path_len)--; + return 0; + } + + memset(&search_arg, 0, sizeof(search_arg)); + search_arg.key.tree_id = BTRFS_ROOT_TREE_OBJECTID; + search_arg.key.min_objectid = subvol_id; + search_arg.key.max_objectid = subvol_id; + search_arg.key.min_type = BTRFS_ROOT_BACKREF_KEY; + search_arg.key.max_type = BTRFS_ROOT_BACKREF_KEY; + search_arg.key.max_offset = (u64)-1; + search_arg.key.max_transid = (u64)-1; + search_arg.key.nr_items = 1; + ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_arg); + if (ret) { + fprintf(stderr, + "ioctl(BTRFS_IOC_TREE_SEARCH, subvol_id %llu) ret=%d, error: %s\n", + (unsigned long long)subvol_id, ret, strerror(errno)); + return ret; + } + + if (search_arg.key.nr_items < 1) { + fprintf(stderr, + "failed to lookup subvol_id %llu!\n", + (unsigned long long)subvol_id); + return -ENOENT; + } + search_header = (struct btrfs_ioctl_search_header *)search_arg.buf; + backref_item = (struct btrfs_root_ref *)(search_header + 1); + if (search_header->offset != BTRFS_FS_TREE_OBJECTID) { + int sub_ret; + + sub_ret = btrfs_subvolid_resolve_sub(fd, path, path_len, + search_header->offset); + if (sub_ret) + return sub_ret; + if (*path_len < 1) + return -EOVERFLOW; + strcat(path, "/"); + (*path_len)--; + } + + if (btrfs_stack_root_ref_dirid(backref_item) != + BTRFS_FIRST_FREE_OBJECTID) { + int len; + + memset(&ino_lookup_arg, 0, sizeof(ino_lookup_arg)); + ino_lookup_arg.treeid = search_header->offset; + ino_lookup_arg.objectid = + btrfs_stack_root_ref_dirid(backref_item); + ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_lookup_arg); + if (ret) { + fprintf(stderr, + "ioctl(BTRFS_IOC_INO_LOOKUP) ret=%d, error: %s\n", + ret, strerror(errno)); + return ret; + } + + len = strlen(ino_lookup_arg.name); + if (*path_len < len) + return -EOVERFLOW; + strcat(path, ino_lookup_arg.name); + (*path_len) -= len; + } + + if (*path_len < btrfs_stack_root_ref_name_len(backref_item)) + return -EOVERFLOW; + strncat(path, (char *)(backref_item + 1), + btrfs_stack_root_ref_name_len(backref_item)); + (*path_len) -= btrfs_stack_root_ref_name_len(backref_item); + return 0; +} + +static struct rb_node *tree_insert(struct rb_root *root, + struct subvol_info *si, + enum subvol_search_type type) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct subvol_info *entry; + __s64 comp; + + while(*p) { + parent = *p; + if (type == subvol_search_by_received_uuid) { + entry = rb_entry(parent, struct subvol_info, + rb_received_node); + + comp = memcmp(entry->received_uuid, si->received_uuid, + BTRFS_UUID_SIZE); + if (!comp) { + if (entry->stransid < si->stransid) + comp = -1; + else if (entry->stransid > si->stransid) + comp = 1; + else + comp = 0; + } + } else if (type == subvol_search_by_uuid) { + entry = rb_entry(parent, struct subvol_info, + rb_local_node); + comp = memcmp(entry->uuid, si->uuid, BTRFS_UUID_SIZE); + } else if (type == subvol_search_by_root_id) { + entry = rb_entry(parent, struct subvol_info, + rb_root_id_node); + comp = entry->root_id - si->root_id; + } else if (type == subvol_search_by_path) { + entry = rb_entry(parent, struct subvol_info, + rb_path_node); + comp = strcmp(entry->path, si->path); + } else { + BUG(); + } + + if (comp < 0) + p = &(*p)->rb_left; + else if (comp > 0) + p = &(*p)->rb_right; + else + return parent; + } + + if (type == subvol_search_by_received_uuid) { + rb_link_node(&si->rb_received_node, parent, p); + rb_insert_color(&si->rb_received_node, root); + } else if (type == subvol_search_by_uuid) { + rb_link_node(&si->rb_local_node, parent, p); + rb_insert_color(&si->rb_local_node, root); + } else if (type == subvol_search_by_root_id) { + rb_link_node(&si->rb_root_id_node, parent, p); + rb_insert_color(&si->rb_root_id_node, root); + } else if (type == subvol_search_by_path) { + rb_link_node(&si->rb_path_node, parent, p); + rb_insert_color(&si->rb_path_node, root); + } + return NULL; +} + +static struct subvol_info *tree_search(struct rb_root *root, + u64 root_id, const u8 *uuid, + u64 stransid, const char *path, + enum subvol_search_type type) +{ + struct rb_node * n = root->rb_node; + struct subvol_info *entry; + __s64 comp; + + while(n) { + if (type == subvol_search_by_received_uuid) { + entry = rb_entry(n, struct subvol_info, + rb_received_node); + comp = memcmp(entry->received_uuid, uuid, + BTRFS_UUID_SIZE); + if (!comp) { + if (entry->stransid < stransid) + comp = -1; + else if (entry->stransid > stransid) + comp = 1; + else + comp = 0; + } + } else if (type == subvol_search_by_uuid) { + entry = rb_entry(n, struct subvol_info, rb_local_node); + comp = memcmp(entry->uuid, uuid, BTRFS_UUID_SIZE); + } else if (type == subvol_search_by_root_id) { + entry = rb_entry(n, struct subvol_info, rb_root_id_node); + comp = entry->root_id - root_id; + } else if (type == subvol_search_by_path) { + entry = rb_entry(n, struct subvol_info, rb_path_node); + comp = strcmp(entry->path, path); + } else { + BUG(); + } + if (comp < 0) + n = n->rb_left; + else if (comp > 0) + n = n->rb_right; + else + return entry; + } + return NULL; +} + +static int count_bytes(void *buf, int len, char b) +{ + int cnt = 0; + int i; + for (i = 0; i < len; i++) { + if (((char*)buf)[i] == b) + cnt++; + } + return cnt; +} + +void subvol_uuid_search_add(struct subvol_uuid_search *s, + struct subvol_info *si) +{ + int cnt; + + tree_insert(&s->root_id_subvols, si, subvol_search_by_root_id); + tree_insert(&s->path_subvols, si, subvol_search_by_path); + + cnt = count_bytes(si->uuid, BTRFS_UUID_SIZE, 0); + if (cnt != BTRFS_UUID_SIZE) + tree_insert(&s->local_subvols, si, subvol_search_by_uuid); + cnt = count_bytes(si->received_uuid, BTRFS_UUID_SIZE, 0); + if (cnt != BTRFS_UUID_SIZE) + tree_insert(&s->received_subvols, si, + subvol_search_by_received_uuid); +} + +struct subvol_info *subvol_uuid_search(struct subvol_uuid_search *s, + u64 root_id, const u8 *uuid, u64 transid, + const char *path, + enum subvol_search_type type) +{ + struct rb_root *root; + if (type == subvol_search_by_received_uuid) + root = &s->received_subvols; + else if (type == subvol_search_by_uuid) + root = &s->local_subvols; + else if (type == subvol_search_by_root_id) + root = &s->root_id_subvols; + else if (type == subvol_search_by_path) + root = &s->path_subvols; + else + return NULL; + return tree_search(root, root_id, uuid, transid, path, type); +} + +int subvol_uuid_search_init(int mnt_fd, struct subvol_uuid_search *s) +{ + int ret; + struct btrfs_ioctl_search_args args; + struct btrfs_ioctl_search_key *sk = &args.key; + struct btrfs_ioctl_search_header *sh; + struct btrfs_root_item *root_item_ptr; + struct btrfs_root_item root_item; + struct subvol_info *si = NULL; + int root_item_valid = 0; + unsigned long off = 0; + int i; + int e; + char *path; + + memset(&args, 0, sizeof(args)); + + sk->tree_id = BTRFS_ROOT_TREE_OBJECTID; + + sk->max_objectid = (u64)-1; + sk->max_offset = (u64)-1; + sk->max_transid = (u64)-1; + sk->min_type = BTRFS_ROOT_ITEM_KEY; + sk->max_type = BTRFS_ROOT_BACKREF_KEY; + sk->nr_items = 4096; + + while(1) { + ret = ioctl(mnt_fd, BTRFS_IOC_TREE_SEARCH, &args); + e = errno; + if (ret < 0) { + fprintf(stderr, "ERROR: can't perform the search- %s\n", + strerror(e)); + return ret; + } + if (sk->nr_items == 0) + break; + + off = 0; + + for (i = 0; i < sk->nr_items; i++) { + sh = (struct btrfs_ioctl_search_header *)(args.buf + + off); + off += sizeof(*sh); + + if ((sh->objectid != 5 && + sh->objectid < BTRFS_FIRST_FREE_OBJECTID) || + sh->objectid > BTRFS_LAST_FREE_OBJECTID) + goto skip; + + if (sh->type == BTRFS_ROOT_ITEM_KEY) { + /* older kernels don't have uuids+times */ + if (sh->len < sizeof(root_item)) { + root_item_valid = 0; + goto skip; + } + root_item_ptr = (struct btrfs_root_item *) + (args.buf + off); + memcpy(&root_item, root_item_ptr, + sizeof(root_item)); + root_item_valid = 1; + } else if (sh->type == BTRFS_ROOT_BACKREF_KEY || + root_item_valid) { + if (!root_item_valid) + goto skip; + + path = btrfs_list_path_for_root(mnt_fd, + sh->objectid); + if (!path) + path = strdup(""); + if (IS_ERR(path)) { + ret = PTR_ERR(path); + fprintf(stderr, "ERROR: unable to " + "resolve path " + "for root %llu\n", + sh->objectid); + goto out; + } + + si = calloc(1, sizeof(*si)); + si->root_id = sh->objectid; + memcpy(si->uuid, root_item.uuid, + BTRFS_UUID_SIZE); + memcpy(si->parent_uuid, root_item.parent_uuid, + BTRFS_UUID_SIZE); + memcpy(si->received_uuid, + root_item.received_uuid, + BTRFS_UUID_SIZE); + si->ctransid = btrfs_root_ctransid(&root_item); + si->otransid = btrfs_root_otransid(&root_item); + si->stransid = btrfs_root_stransid(&root_item); + si->rtransid = btrfs_root_rtransid(&root_item); + si->path = path; + subvol_uuid_search_add(s, si); + root_item_valid = 0; + } else { + goto skip; + } + +skip: + off += sh->len; + + /* + * record the mins in sk so we can make sure the + * next search doesn't repeat this root + */ + sk->min_objectid = sh->objectid; + sk->min_offset = sh->offset; + sk->min_type = sh->type; + } + sk->nr_items = 4096; + if (sk->min_offset < (u64)-1) + sk->min_offset++; + else if (sk->min_objectid < (u64)-1) { + sk->min_objectid++; + sk->min_offset = 0; + sk->min_type = 0; + } else + break; + } + +out: + return ret; +} + +/* + * It's safe to call this function even without the subvol_uuid_search_init() + * call before as long as the subvol_uuid_search structure is all-zero. + */ +void subvol_uuid_search_finit(struct subvol_uuid_search *s) +{ + struct rb_root *root = &s->root_id_subvols; + struct rb_node *node; + + while ((node = rb_first(root))) { + struct subvol_info *entry = + rb_entry(node, struct subvol_info, rb_root_id_node); + + free(entry->path); + rb_erase(node, root); + free(entry); + } + + s->root_id_subvols = RB_ROOT; + s->local_subvols = RB_ROOT; + s->received_subvols = RB_ROOT; + s->path_subvols = RB_ROOT; +} + +char *path_cat(const char *p1, const char *p2) +{ + int p1_len = strlen(p1); + int p2_len = strlen(p2); + char *new = malloc(p1_len + p2_len + 2); + + if (p1_len && p1[p1_len - 1] == '/') + p1_len--; + if (p2_len && p2[p2_len - 1] == '/') + p2_len--; + sprintf(new, "%.*s/%.*s", p1_len, p1, p2_len, p2); + return new; +} + + +char *path_cat3(const char *p1, const char *p2, const char *p3) +{ + int p1_len = strlen(p1); + int p2_len = strlen(p2); + int p3_len = strlen(p3); + char *new = malloc(p1_len + p2_len + p3_len + 3); + + if (p1_len && p1[p1_len - 1] == '/') + p1_len--; + if (p2_len && p2[p2_len - 1] == '/') + p2_len--; + if (p3_len && p3[p3_len - 1] == '/') + p3_len--; + sprintf(new, "%.*s/%.*s/%.*s", p1_len, p1, p2_len, p2, p3_len, p3); + return new; +} + diff --git a/send-utils.h b/send-utils.h new file mode 100644 index 0000000..06af75f --- /dev/null +++ b/send-utils.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#ifndef SEND_UTILS_H_ +#define SEND_UTILS_H_ + +#if BTRFS_FLAT_INCLUDES +#include "ctree.h" +#include "rbtree.h" +#else +#include <btrfs/ctree.h> +#include <btrfs/rbtree.h> +#endif /* BTRFS_FLAT_INCLUDES */ + +#ifdef __cplusplus +extern "C" { +#endif + +enum subvol_search_type { + subvol_search_by_root_id, + subvol_search_by_uuid, + subvol_search_by_received_uuid, + subvol_search_by_path, +}; + +struct subvol_info { + struct rb_node rb_root_id_node; + struct rb_node rb_local_node; + struct rb_node rb_received_node; + struct rb_node rb_path_node; + u64 root_id; + u8 uuid[BTRFS_UUID_SIZE]; + u8 parent_uuid[BTRFS_UUID_SIZE]; + u8 received_uuid[BTRFS_UUID_SIZE]; + u64 ctransid; + u64 otransid; + u64 stransid; + u64 rtransid; + + char *path; +}; + +struct subvol_uuid_search { + struct rb_root root_id_subvols; + struct rb_root local_subvols; + struct rb_root received_subvols; + struct rb_root path_subvols; +}; + +int subvol_uuid_search_init(int mnt_fd, struct subvol_uuid_search *s); +void subvol_uuid_search_finit(struct subvol_uuid_search *s); +struct subvol_info *subvol_uuid_search(struct subvol_uuid_search *s, + u64 root_id, const u8 *uuid, u64 transid, + const char *path, + enum subvol_search_type type); +void subvol_uuid_search_add(struct subvol_uuid_search *s, + struct subvol_info *si); + +int btrfs_subvolid_resolve(int fd, char *path, size_t path_len, u64 subvol_id); + +char *path_cat(const char *p1, const char *p2); +char *path_cat3(const char *p1, const char *p2, const char *p3); + +#ifdef __cplusplus +} +#endif + +#endif /* SEND_UTILS_H_ */ @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2012 Alexander Block. All rights reserved. + * Copyright (C) 2012 STRATO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "ctree.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" +#define BTRFS_SEND_STREAM_VERSION 1 + +#define BTRFS_SEND_BUF_SIZE (1024 * 64) +#define BTRFS_SEND_READ_SIZE (1024 * 48) + +enum btrfs_tlv_type { + BTRFS_TLV_U8, + BTRFS_TLV_U16, + BTRFS_TLV_U32, + BTRFS_TLV_U64, + BTRFS_TLV_BINARY, + BTRFS_TLV_STRING, + BTRFS_TLV_UUID, + BTRFS_TLV_TIMESPEC, +}; + +struct btrfs_stream_header { + char magic[sizeof(BTRFS_SEND_STREAM_MAGIC)]; + __le32 version; +} __attribute__ ((__packed__)); + +struct btrfs_cmd_header { + /* len excluding the header */ + __le32 len; + __le16 cmd; + /* crc including the header with zero crc field */ + __le32 crc; +} __attribute__ ((__packed__)); + +struct btrfs_tlv_header { + __le16 tlv_type; + /* len excluding the header */ + __le16 tlv_len; +} __attribute__ ((__packed__)); + +/* commands */ +enum btrfs_send_cmd { + BTRFS_SEND_C_UNSPEC, + + BTRFS_SEND_C_SUBVOL, + BTRFS_SEND_C_SNAPSHOT, + + BTRFS_SEND_C_MKFILE, + BTRFS_SEND_C_MKDIR, + BTRFS_SEND_C_MKNOD, + BTRFS_SEND_C_MKFIFO, + BTRFS_SEND_C_MKSOCK, + BTRFS_SEND_C_SYMLINK, + + BTRFS_SEND_C_RENAME, + BTRFS_SEND_C_LINK, + BTRFS_SEND_C_UNLINK, + BTRFS_SEND_C_RMDIR, + + BTRFS_SEND_C_SET_XATTR, + BTRFS_SEND_C_REMOVE_XATTR, + + BTRFS_SEND_C_WRITE, + BTRFS_SEND_C_CLONE, + + BTRFS_SEND_C_TRUNCATE, + BTRFS_SEND_C_CHMOD, + BTRFS_SEND_C_CHOWN, + BTRFS_SEND_C_UTIMES, + + BTRFS_SEND_C_END, + BTRFS_SEND_C_UPDATE_EXTENT, + __BTRFS_SEND_C_MAX, +}; +#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1) + +/* attributes in send stream */ +enum { + BTRFS_SEND_A_UNSPEC, + + BTRFS_SEND_A_UUID, + BTRFS_SEND_A_CTRANSID, + + BTRFS_SEND_A_INO, + BTRFS_SEND_A_SIZE, + BTRFS_SEND_A_MODE, + BTRFS_SEND_A_UID, + BTRFS_SEND_A_GID, + BTRFS_SEND_A_RDEV, + BTRFS_SEND_A_CTIME, + BTRFS_SEND_A_MTIME, + BTRFS_SEND_A_ATIME, + BTRFS_SEND_A_OTIME, + + BTRFS_SEND_A_XATTR_NAME, + BTRFS_SEND_A_XATTR_DATA, + + BTRFS_SEND_A_PATH, + BTRFS_SEND_A_PATH_TO, + BTRFS_SEND_A_PATH_LINK, + + BTRFS_SEND_A_FILE_OFFSET, + BTRFS_SEND_A_DATA, + + BTRFS_SEND_A_CLONE_UUID, + BTRFS_SEND_A_CLONE_CTRANSID, + BTRFS_SEND_A_CLONE_PATH, + BTRFS_SEND_A_CLONE_OFFSET, + BTRFS_SEND_A_CLONE_LEN, + + __BTRFS_SEND_A_MAX, +}; +#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1) + +#ifdef __KERNEL__ +long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); +#endif + +#ifdef __cplusplus +} +#endif diff --git a/transaction.h b/transaction.h index a1070e0..e8610b1 100644 --- a/transaction.h +++ b/transaction.h @@ -34,6 +34,7 @@ btrfs_start_transaction(struct btrfs_root *root, int num_blocks) struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *h = malloc(sizeof(*h)); + BUG_ON(!h); BUG_ON(root->commit_root); BUG_ON(fs_info->running_transaction); fs_info->running_transaction = h; @@ -1,5 +1,6 @@ /* * Copyright (C) 2007 Oracle. All rights reserved. + * Copyright (C) 2008 Morey Roof. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -16,10 +17,13 @@ * Boston, MA 021110-1307, USA. */ -#define _XOPEN_SOURCE 600 -#define __USE_XOPEN2K +#define _XOPEN_SOURCE 700 +#define __USE_XOPEN2K8 +#define __XOPEN2K8 /* due to an error in dirent.h, to get dirfd() */ +#define _GNU_SOURCE /* O_NOATIME */ #include <stdio.h> #include <stdlib.h> +#include <string.h> #ifndef __CHECKER__ #include <sys/ioctl.h> #include <sys/mount.h> @@ -31,10 +35,12 @@ #include <fcntl.h> #include <unistd.h> #include <mntent.h> +#include <ctype.h> #include <linux/loop.h> #include <linux/major.h> #include <linux/kdev_t.h> #include <limits.h> +#include <blkid/blkid.h> #include "kerncompat.h" #include "radix-tree.h" #include "ctree.h" @@ -109,7 +115,7 @@ int make_btrfs(int fd, const char *device, const char *label, btrfs_set_super_bytenr(&super, blocks[0]); btrfs_set_super_num_devices(&super, 1); - strncpy((char *)&super.magic, BTRFS_MAGIC, sizeof(super.magic)); + super.magic = cpu_to_le64(BTRFS_MAGIC); btrfs_set_super_generation(&super, 1); btrfs_set_super_root(&super, blocks[1]); btrfs_set_super_chunk_root(&super, blocks[3]); @@ -410,7 +416,7 @@ int make_btrfs(int fd, const char *device, const char *label, return 0; } -static u64 device_size(int fd, struct stat *st) +u64 btrfs_device_size(int fd, struct stat *st) { u64 size; if (S_ISREG(st->st_mode)) { @@ -468,7 +474,7 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans, u32 sectorsize) { struct btrfs_super_block *disk_super; - struct btrfs_super_block *super = &root->fs_info->super_copy; + struct btrfs_super_block *super = root->fs_info->super_copy; struct btrfs_device *device; struct btrfs_dev_item *dev_item; char *buf; @@ -476,7 +482,7 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans, u64 num_devs; int ret; - device = kmalloc(sizeof(*device), GFP_NOFS); + device = kzalloc(sizeof(*device), GFP_NOFS); if (!device) return -ENOMEM; buf = kmalloc(sectorsize, GFP_NOFS); @@ -537,7 +543,7 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans, } int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret, - int *mixed) + u64 max_block_count, int *mixed, int nodiscard) { u64 block_count; u64 bytenr; @@ -550,11 +556,13 @@ int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret, exit(1); } - block_count = device_size(fd, &st); + block_count = btrfs_device_size(fd, &st); if (block_count == 0) { fprintf(stderr, "unable to find %s size\n", file); exit(1); } + if (max_block_count) + block_count = min(block_count, max_block_count); zero_end = 1; if (block_count < 1024 * 1024 * 1024 && !(*mixed)) { @@ -562,11 +570,13 @@ int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret, *mixed = 1; } - /* - * We intentionally ignore errors from the discard ioctl. It is - * not necessary for the mkfs functionality but just an optimization. - */ - discard_blocks(fd, 0, block_count); + if (!nodiscard) { + /* + * We intentionally ignore errors from the discard ioctl. It is + * not necessary for the mkfs functionality but just an optimization. + */ + discard_blocks(fd, 0, block_count); + } ret = zero_dev_start(fd); if (ret) { @@ -604,7 +614,7 @@ int btrfs_make_root_dir(struct btrfs_trans_handle *trans, btrfs_set_stack_inode_size(&inode_item, 0); btrfs_set_stack_inode_nlink(&inode_item, 1); btrfs_set_stack_inode_nbytes(&inode_item, root->leafsize); - btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0555); + btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755); btrfs_set_stack_timespec_sec(&inode_item.atime, now); btrfs_set_stack_timespec_nsec(&inode_item.atime, 0); btrfs_set_stack_timespec_sec(&inode_item.ctime, now); @@ -615,7 +625,7 @@ int btrfs_make_root_dir(struct btrfs_trans_handle *trans, btrfs_set_stack_timespec_nsec(&inode_item.otime, 0); if (root->fs_info->tree_root == root) - btrfs_set_super_root_dir(&root->fs_info->super_copy, objectid); + btrfs_set_super_root_dir(root->fs_info->super_copy, objectid); ret = btrfs_insert_inode(trans, root, objectid, &inode_item); if (ret) @@ -631,6 +641,93 @@ error: return ret; } +/* + * checks if a path is a block device node + * Returns negative errno on failure, otherwise + * returns 1 for blockdev, 0 for not-blockdev + */ +int is_block_device(const char *path) { + struct stat statbuf; + + if (stat(path, &statbuf) < 0) + return -errno; + + return S_ISBLK(statbuf.st_mode); +} + +/* + * Find the mount point for a mounted device. + * On success, returns 0 with mountpoint in *mp. + * On failure, returns -errno (not mounted yields -EINVAL) + * Is noisy on failures, expects to be given a mounted device. + */ +int get_btrfs_mount(const char *dev, char *mp, size_t mp_size) { + int ret; + int fd = -1; + + ret = is_block_device(dev); + if (ret <= 0) { + if (!ret) { + fprintf(stderr, "%s is not a block device\n", dev); + ret = -EINVAL; + } else { + fprintf(stderr, "Could not check %s: %s\n", + dev, strerror(-ret)); + } + goto out; + } + + fd = open(dev, O_RDONLY); + if (fd < 0) { + ret = -errno; + fprintf(stderr, "Could not open %s: %s\n", dev, strerror(errno)); + goto out; + } + + ret = check_mounted_where(fd, dev, mp, mp_size, NULL); + if (!ret) { + fprintf(stderr, "%s is not a mounted btrfs device\n", dev); + ret = -EINVAL; + } else { /* mounted, all good */ + ret = 0; + } +out: + if (fd != -1) + close(fd); + if (ret) + fprintf(stderr, "Could not get mountpoint for %s\n", dev); + return ret; +} + +/* + * Given a pathname, return a filehandle to: + * the original pathname or, + * if the pathname is a mounted btrfs device, to its mountpoint. + * + * On error, return -1, errno should be set. + */ +int open_path_or_dev_mnt(const char *path) +{ + char mp[BTRFS_PATH_NAME_MAX + 1]; + int fdmnt; + + if (is_block_device(path)) { + int ret; + + ret = get_btrfs_mount(path, mp, sizeof(mp)); + if (ret < 0) { + /* not a mounted btrfs dev */ + errno = EINVAL; + return -1; + } + fdmnt = open_file_or_dir(mp); + } else { + fdmnt = open_file_or_dir(path); + } + + return fdmnt; +} + /* checks if a device is a loop device */ int is_loop_device (const char* device) { struct stat statbuf; @@ -647,19 +744,22 @@ int is_loop_device (const char* device) { * the associated file (e.g. /images/my_btrfs.img) */ int resolve_loop_device(const char* loop_dev, char* loop_file, int max_len) { - int loop_fd; - int ret_ioctl; - struct loop_info loopinfo; + int ret; + FILE *f; + char fmt[20]; + char p[PATH_MAX]; + char real_loop_dev[PATH_MAX]; - if ((loop_fd = open(loop_dev, O_RDONLY)) < 0) + if (!realpath(loop_dev, real_loop_dev)) + return -errno; + snprintf(p, PATH_MAX, "/sys/block/%s/loop/backing_file", strrchr(real_loop_dev, '/')); + if (!(f = fopen(p, "r"))) return -errno; - ret_ioctl = ioctl(loop_fd, LOOP_GET_STATUS, &loopinfo); - close(loop_fd); - - if (ret_ioctl == 0) - strncpy(loop_file, loopinfo.lo_name, max_len); - else + snprintf(fmt, 20, "%%%i[^\n]", max_len-1); + ret = fscanf(f, fmt, loop_file); + fclose(f); + if (ret == EOF) return -errno; return 0; @@ -860,8 +960,10 @@ int check_mounted_where(int fd, const char *file, char *where, int size, } /* Did we find an entry in mnt table? */ - if (mnt && size && where) + if (mnt && size && where) { strncpy(where, mnt->mnt_dir, size); + where[size-1] = 0; + } if (fs_dev_ret) *fs_dev_ret = fs_devices_mnt; @@ -873,42 +975,9 @@ out_mntloop_err: return ret; } -/* Gets the mount point of btrfs filesystem that is using the specified device. - * Returns 0 is everything is good, <0 if we have an error. - * TODO: Fix this fucntion and check_mounted to work with multiple drive BTRFS - * setups. - */ -int get_mountpt(char *dev, char *mntpt, size_t size) -{ - struct mntent *mnt; - FILE *f; - int ret = 0; - - f = setmntent("/proc/mounts", "r"); - if (f == NULL) - return -errno; - - while ((mnt = getmntent(f)) != NULL ) - { - if (strcmp(dev, mnt->mnt_fsname) == 0) - { - strncpy(mntpt, mnt->mnt_dir, size); - break; - } - } - - if (mnt == NULL) - { - /* We didn't find an entry so lets report an error */ - ret = -1; - } - - return ret; -} - struct pending_dir { struct list_head list; - char name[256]; + char name[PATH_MAX]; }; void btrfs_register_one_device(char *fname) @@ -921,14 +990,16 @@ void btrfs_register_one_device(char *fname) fd = open("/dev/btrfs-control", O_RDONLY); if (fd < 0) { fprintf(stderr, "failed to open /dev/btrfs-control " - "skipping device registration\n"); + "skipping device registration: %s\n", + strerror(errno)); return; } strncpy(args.name, fname, BTRFS_PATH_NAME_MAX); + args.name[BTRFS_PATH_NAME_MAX-1] = 0; ret = ioctl(fd, BTRFS_IOC_SCAN_DEV, &args); e = errno; if(ret<0){ - fprintf(stderr, "ERROR: unable to scan the device '%s' - %s\n", + fprintf(stderr, "ERROR: device scan failed '%s' - %s\n", fname, strerror(e)); } close(fd); @@ -943,7 +1014,6 @@ int btrfs_scan_one_dir(char *dirname, int run_ioctl) int ret; int fd; int dirname_len; - int pathlen; char *fullpath; struct list_head pending_list; struct btrfs_fs_devices *tmp_devices; @@ -958,8 +1028,7 @@ int btrfs_scan_one_dir(char *dirname, int run_ioctl) again: dirname_len = strlen(pending->name); - pathlen = 1024; - fullpath = malloc(pathlen); + fullpath = malloc(PATH_MAX); dirname = pending->name; if (!fullpath) { @@ -969,6 +1038,7 @@ again: dirp = opendir(dirname); if (!dirp) { fprintf(stderr, "Unable to open %s for scanning\n", dirname); + free(fullpath); return -ENOENT; } while(1) { @@ -977,11 +1047,11 @@ again: break; if (dirent->d_name[0] == '.') continue; - if (dirname_len + strlen(dirent->d_name) + 2 > pathlen) { + if (dirname_len + strlen(dirent->d_name) + 2 > PATH_MAX) { ret = -EFAULT; goto fail; } - snprintf(fullpath, pathlen, "%s/%s", dirname, dirent->d_name); + snprintf(fullpath, PATH_MAX, "%s/%s", dirname, dirent->d_name); ret = lstat(fullpath, &st); if (ret < 0) { fprintf(stderr, "failed to stat %s\n", fullpath); @@ -1003,8 +1073,14 @@ again: } fd = open(fullpath, O_RDONLY); if (fd < 0) { - fprintf(stderr, "failed to read %s: %s\n", fullpath, - strerror(errno)); + /* ignore the following errors: + ENXIO (device don't exists) + ENOMEDIUM (No medium found -> + like a cd tray empty) + */ + if(errno != ENXIO && errno != ENOMEDIUM) + fprintf(stderr, "failed to read %s: %s\n", + fullpath, strerror(errno)); continue; } ret = btrfs_scan_one_device(fd, fullpath, &tmp_devices, @@ -1019,13 +1095,16 @@ again: free(pending); pending = list_entry(pending_list.next, struct pending_dir, list); + free(fullpath); list_del(&pending->list); closedir(dirp); + dirp = NULL; goto again; } ret = 0; fail: free(pending); + free(fullpath); if (dirp) closedir(dirp); return ret; @@ -1034,7 +1113,12 @@ fail: int btrfs_scan_for_fsid(struct btrfs_fs_devices *fs_devices, u64 total_devs, int run_ioctls) { - return btrfs_scan_one_dir("/dev", run_ioctls); + int ret; + + ret = btrfs_scan_block_devices(run_ioctls); + if (ret) + ret = btrfs_scan_one_dir("/dev", run_ioctls); + return ret; } int btrfs_device_already_in_root(struct btrfs_root *root, int fd, @@ -1055,11 +1139,10 @@ int btrfs_device_already_in_root(struct btrfs_root *root, int fd, ret = 0; disk_super = (struct btrfs_super_block *)buf; - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, - sizeof(disk_super->magic))) + if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) goto brelse; - if (!memcmp(disk_super->fsid, root->fs_info->super_copy.fsid, + if (!memcmp(disk_super->fsid, root->fs_info->super_copy->fsid, BTRFS_FSID_SIZE)) ret = 1; brelse: @@ -1074,53 +1157,199 @@ char *pretty_sizes(u64 size) { int num_divs = 0; int pretty_len = 16; - u64 last_size = size; - u64 fract_size = size; float fraction; char *pretty; - while(size > 0) { - fract_size = last_size; - last_size = size; - size /= 1024; - num_divs++; - } - if (num_divs == 0) - num_divs = 1; - if (num_divs > ARRAY_SIZE(size_strs)) - return NULL; + if( size < 1024 ){ + fraction = size; + num_divs = 0; + } else { + u64 last_size = size; + num_divs = 0; + while(size >= 1024){ + last_size = size; + size /= 1024; + num_divs ++; + } - fraction = (float)fract_size / 1024; + if (num_divs >= ARRAY_SIZE(size_strs)) + return NULL; + fraction = (float)last_size / 1024; + } pretty = malloc(pretty_len); - snprintf(pretty, pretty_len, "%.2f%s", fraction, size_strs[num_divs-1]); + snprintf(pretty, pretty_len, "%.2f%s", fraction, size_strs[num_divs]); return pretty; } /* + * __strncpy__null - strncpy with null termination + * @dest: the target array + * @src: the source string + * @n: maximum bytes to copy (size of *dest) + * + * Like strncpy, but ensures destination is null-terminated. + * + * Copies the string pointed to by src, including the terminating null + * byte ('\0'), to the buffer pointed to by dest, up to a maximum + * of n bytes. Then ensure that dest is null-terminated. + */ +char *__strncpy__null(char *dest, const char *src, size_t n) +{ + strncpy(dest, src, n); + if (n > 0) + dest[n - 1] = '\0'; + return dest; +} + +/* * Checks to make sure that the label matches our requirements. * Returns: 0 if everything is safe and usable -1 if the label is too long - -2 if the label contains an invalid character */ -int check_label(char *input) +static int check_label(const char *input) { - int i; int len = strlen(input); - if (len > BTRFS_LABEL_SIZE) { + if (len > BTRFS_LABEL_SIZE - 1) { + fprintf(stderr, "ERROR: Label %s is too long (max %d)\n", + input, BTRFS_LABEL_SIZE - 1); return -1; } - for (i = 0; i < len; i++) { - if (input[i] == '/' || input[i] == '\\') { - return -2; - } - } - return 0; } +static int set_label_unmounted(const char *dev, const char *label) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + int ret; + + ret = check_mounted(dev); + if (ret < 0) { + fprintf(stderr, "FATAL: error checking %s mount status\n", dev); + return -1; + } + if (ret > 0) { + fprintf(stderr, "ERROR: dev %s is mounted, use mount point\n", + dev); + return -1; + } + + /* Open the super_block at the default location + * and as read-write. + */ + root = open_ctree(dev, 0, 1); + if (!root) /* errors are printed by open_ctree() */ + return -1; + + trans = btrfs_start_transaction(root, 1); + snprintf(root->fs_info->super_copy->label, BTRFS_LABEL_SIZE, "%s", + label); + btrfs_commit_transaction(trans, root); + + /* Now we close it since we are done. */ + close_ctree(root); + return 0; +} + +static int set_label_mounted(const char *mount_path, const char *label) +{ + int fd; + + fd = open(mount_path, O_RDONLY | O_NOATIME); + if (fd < 0) { + fprintf(stderr, "ERROR: unable access to '%s'\n", mount_path); + return -1; + } + + if (ioctl(fd, BTRFS_IOC_SET_FSLABEL, label) < 0) { + fprintf(stderr, "ERROR: unable to set label %s\n", + strerror(errno)); + close(fd); + return -1; + } + + close(fd); + return 0; +} + +static int get_label_unmounted(const char *dev) +{ + struct btrfs_root *root; + int ret; + + ret = check_mounted(dev); + if (ret < 0) { + fprintf(stderr, "FATAL: error checking %s mount status\n", dev); + return -1; + } + if (ret > 0) { + fprintf(stderr, "ERROR: dev %s is mounted, use mount point\n", + dev); + return -1; + } + + /* Open the super_block at the default location + * and as read-only. + */ + root = open_ctree(dev, 0, 0); + if(!root) + return -1; + + fprintf(stdout, "%s\n", root->fs_info->super_copy->label); + + /* Now we close it since we are done. */ + close_ctree(root); + return 0; +} + +/* + * If a partition is mounted, try to get the filesystem label via its + * mounted path rather than device. Return the corresponding error + * the user specified the device path. + */ +static int get_label_mounted(const char *mount_path) +{ + char label[BTRFS_LABEL_SIZE]; + int fd; + + fd = open(mount_path, O_RDONLY | O_NOATIME); + if (fd < 0) { + fprintf(stderr, "ERROR: unable access to '%s'\n", mount_path); + return -1; + } + + memset(label, '\0', sizeof(label)); + if (ioctl(fd, BTRFS_IOC_GET_FSLABEL, label) < 0) { + fprintf(stderr, "ERROR: unable get label %s\n", strerror(errno)); + close(fd); + return -1; + } + + fprintf(stdout, "%s\n", label); + close(fd); + return 0; +} + +int get_label(const char *btrfs_dev) +{ + return is_existing_blk_or_reg_file(btrfs_dev) ? + get_label_unmounted(btrfs_dev) : + get_label_mounted(btrfs_dev); +} + +int set_label(const char *btrfs_dev, const char *label) +{ + if (check_label(label)) + return -1; + + return is_existing_blk_or_reg_file(btrfs_dev) ? + set_label_unmounted(btrfs_dev, label) : + set_label_mounted(btrfs_dev, label); +} + int btrfs_scan_block_devices(int run_ioctl) { @@ -1133,24 +1362,43 @@ int btrfs_scan_block_devices(int run_ioctl) int i; char buf[1024]; char fullpath[110]; + int scans = 0; + int special; +scan_again: proc_partitions = fopen("/proc/partitions","r"); if (!proc_partitions) { fprintf(stderr, "Unable to open '/proc/partitions' for scanning\n"); return -ENOENT; } /* skip the header */ - for(i=0; i < 2 ; i++) - if(!fgets(buf, 1023, proc_partitions)){ - fprintf(stderr, "Unable to read '/proc/partitions' for scanning\n"); - fclose(proc_partitions); - return -ENOENT; - } + for (i = 0; i < 2; i++) + if (!fgets(buf, 1023, proc_partitions)) { + fprintf(stderr, + "Unable to read '/proc/partitions' for scanning\n"); + fclose(proc_partitions); + return -ENOENT; + } strcpy(fullpath,"/dev/"); while(fgets(buf, 1023, proc_partitions)) { - i = sscanf(buf," %*d %*d %*d %99s", fullpath+5); + + /* + * multipath and MD devices may register as a btrfs filesystem + * both through the original block device and through + * the special (/dev/mapper or /dev/mdX) entry. + * This scans the special entries last + */ + special = strncmp(fullpath, "/dev/dm-", strlen("/dev/dm-")) == 0; + if (!special) + special = strncmp(fullpath, "/dev/md", strlen("/dev/md")) == 0; + + if (scans == 0 && special) + continue; + if (scans > 0 && !special) + continue; + ret = lstat(fullpath, &st); if (ret < 0) { fprintf(stderr, "failed to stat %s\n", fullpath); @@ -1162,7 +1410,8 @@ int btrfs_scan_block_devices(int run_ioctl) fd = open(fullpath, O_RDONLY); if (fd < 0) { - fprintf(stderr, "failed to read %s\n", fullpath); + fprintf(stderr, "failed to open %s: %s\n", + fullpath, strerror(errno)); continue; } ret = btrfs_scan_one_device(fd, fullpath, &tmp_devices, @@ -1175,6 +1424,386 @@ int btrfs_scan_block_devices(int run_ioctl) } fclose(proc_partitions); + + if (scans == 0) { + scans++; + goto scan_again; + } return 0; } +u64 parse_size(char *s) +{ + int i; + char c; + u64 mult = 1; + + for (i = 0; s && s[i] && isdigit(s[i]); i++) ; + if (!i) { + fprintf(stderr, "ERROR: size value is empty\n"); + exit(50); + } + + if (s[i]) { + c = tolower(s[i]); + switch (c) { + case 'e': + mult *= 1024; + case 'p': + mult *= 1024; + case 't': + mult *= 1024; + case 'g': + mult *= 1024; + case 'm': + mult *= 1024; + case 'k': + mult *= 1024; + case 'b': + break; + default: + fprintf(stderr, "ERROR: Unknown size descriptor " + "'%c'\n", c); + exit(1); + } + } + if (s[i] && s[i+1]) { + fprintf(stderr, "ERROR: Illegal suffix contains " + "character '%c' in wrong position\n", + s[i+1]); + exit(51); + } + return strtoull(s, NULL, 10) * mult; +} + +int open_file_or_dir(const char *fname) +{ + int ret; + struct stat st; + DIR *dirstream; + int fd; + + ret = stat(fname, &st); + if (ret < 0) { + return -1; + } + if (S_ISDIR(st.st_mode)) { + dirstream = opendir(fname); + if (!dirstream) { + return -2; + } + fd = dirfd(dirstream); + } else { + fd = open(fname, O_RDWR); + } + if (fd < 0) { + return -3; + } + return fd; +} + +int get_device_info(int fd, u64 devid, + struct btrfs_ioctl_dev_info_args *di_args) +{ + int ret; + + di_args->devid = devid; + memset(&di_args->uuid, '\0', sizeof(di_args->uuid)); + + ret = ioctl(fd, BTRFS_IOC_DEV_INFO, di_args); + return ret ? -errno : 0; +} + +/* + * For a given path, fill in the ioctl fs_ and info_ args. + * If the path is a btrfs mountpoint, fill info for all devices. + * If the path is a btrfs device, fill in only that device. + * + * The path provided must be either on a mounted btrfs fs, + * or be a mounted btrfs device. + * + * Returns 0 on success, or a negative errno. + */ +int get_fs_info(char *path, struct btrfs_ioctl_fs_info_args *fi_args, + struct btrfs_ioctl_dev_info_args **di_ret) +{ + int fd = -1; + int ret = 0; + int ndevs = 0; + int i = 1; + struct btrfs_fs_devices *fs_devices_mnt = NULL; + struct btrfs_ioctl_dev_info_args *di_args; + char mp[BTRFS_PATH_NAME_MAX + 1]; + + memset(fi_args, 0, sizeof(*fi_args)); + + if (is_block_device(path)) { + /* Ensure it's mounted, then set path to the mountpoint */ + fd = open(path, O_RDONLY); + if (fd < 0) { + ret = -errno; + fprintf(stderr, "Couldn't open %s: %s\n", + path, strerror(errno)); + goto out; + } + ret = check_mounted_where(fd, path, mp, sizeof(mp), + &fs_devices_mnt); + if (!ret) { + ret = -EINVAL; + goto out; + } + if (ret < 0) + goto out; + path = mp; + /* Only fill in this one device */ + fi_args->num_devices = 1; + fi_args->max_id = fs_devices_mnt->latest_devid; + i = fs_devices_mnt->latest_devid; + memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE); + close(fd); + } + + /* at this point path must not be for a block device */ + fd = open_file_or_dir(path); + if (fd < 0) { + ret = -errno; + goto out; + } + + /* fill in fi_args if not just a single device */ + if (fi_args->num_devices != 1) { + ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args); + if (ret < 0) { + ret = -errno; + goto out; + } + } + + if (!fi_args->num_devices) + goto out; + + di_args = *di_ret = malloc(fi_args->num_devices * sizeof(*di_args)); + if (!di_args) { + ret = -errno; + goto out; + } + + for (; i <= fi_args->max_id; ++i) { + BUG_ON(ndevs >= fi_args->num_devices); + ret = get_device_info(fd, i, &di_args[ndevs]); + if (ret == -ENODEV) + continue; + if (ret) + goto out; + ndevs++; + } + + BUG_ON(ndevs == 0); + ret = 0; +out: + if (fd != -1) + close(fd); + return ret; +} + +#define isoctal(c) (((c) & ~7) == '0') + +static inline void translate(char *f, char *t) +{ + while (*f != '\0') { + if (*f == '\\' && + isoctal(f[1]) && isoctal(f[2]) && isoctal(f[3])) { + *t++ = 64*(f[1] & 7) + 8*(f[2] & 7) + (f[3] & 7); + f += 4; + } else + *t++ = *f++; + } + *t = '\0'; + return; +} + +/* + * Checks if the swap device. + * Returns 1 if swap device, < 0 on error or 0 if not swap device. + */ +int is_swap_device(const char *file) +{ + FILE *f; + struct stat st_buf; + dev_t dev; + ino_t ino = 0; + char tmp[PATH_MAX]; + char buf[PATH_MAX]; + char *cp; + int ret = 0; + + if (stat(file, &st_buf) < 0) + return -errno; + if (S_ISBLK(st_buf.st_mode)) + dev = st_buf.st_rdev; + else if (S_ISREG(st_buf.st_mode)) { + dev = st_buf.st_dev; + ino = st_buf.st_ino; + } else + return 0; + + if ((f = fopen("/proc/swaps", "r")) == NULL) + return 0; + + /* skip the first line */ + if (fgets(tmp, sizeof(tmp), f) == NULL) + goto out; + + while (fgets(tmp, sizeof(tmp), f) != NULL) { + if ((cp = strchr(tmp, ' ')) != NULL) + *cp = '\0'; + if ((cp = strchr(tmp, '\t')) != NULL) + *cp = '\0'; + translate(tmp, buf); + if (stat(buf, &st_buf) != 0) + continue; + if (S_ISBLK(st_buf.st_mode)) { + if (dev == st_buf.st_rdev) { + ret = 1; + break; + } + } else if (S_ISREG(st_buf.st_mode)) { + if (dev == st_buf.st_dev && ino == st_buf.st_ino) { + ret = 1; + break; + } + } + } + +out: + fclose(f); + + return ret; +} + +/* + * Check for existing filesystem or partition table on device. + * Returns: + * 1 for existing fs or partition + * 0 for nothing found + * -1 for internal error + */ +static int +check_overwrite( + char *device) +{ + const char *type; + blkid_probe pr = NULL; + int ret; + blkid_loff_t size; + + if (!device || !*device) + return 0; + + ret = -1; /* will reset on success of all setup calls */ + + pr = blkid_new_probe_from_filename(device); + if (!pr) + goto out; + + size = blkid_probe_get_size(pr); + if (size < 0) + goto out; + + /* nothing to overwrite on a 0-length device */ + if (size == 0) { + ret = 0; + goto out; + } + + ret = blkid_probe_enable_partitions(pr, 1); + if (ret < 0) + goto out; + + ret = blkid_do_fullprobe(pr); + if (ret < 0) + goto out; + + /* + * Blkid returns 1 for nothing found and 0 when it finds a signature, + * but we want the exact opposite, so reverse the return value here. + * + * In addition print some useful diagnostics about what actually is + * on the device. + */ + if (ret) { + ret = 0; + goto out; + } + + if (!blkid_probe_lookup_value(pr, "TYPE", &type, NULL)) { + fprintf(stderr, + "%s appears to contain an existing " + "filesystem (%s).\n", device, type); + } else if (!blkid_probe_lookup_value(pr, "PTTYPE", &type, NULL)) { + fprintf(stderr, + "%s appears to contain a partition " + "table (%s).\n", device, type); + } else { + fprintf(stderr, + "%s appears to contain something weird " + "according to blkid\n", device); + } + ret = 1; + +out: + if (pr) + blkid_free_probe(pr); + if (ret == -1) + fprintf(stderr, + "probe of %s failed, cannot detect " + "existing filesystem.\n", device); + return ret; +} + +/* Check if disk is suitable for btrfs + * returns: + * 1: something is wrong, estr provides the error + * 0: all is fine + */ +int test_dev_for_mkfs(char *file, int force_overwrite, char *estr) +{ + int ret, fd; + size_t sz = 100; + + ret = is_swap_device(file); + if (ret < 0) { + snprintf(estr, sz, "error checking %s status: %s\n", file, + strerror(-ret)); + return 1; + } + if (ret == 1) { + snprintf(estr, sz, "%s is a swap device\n", file); + return 1; + } + if (!force_overwrite) { + if (check_overwrite(file)) { + snprintf(estr, sz, "Use the -f option to force overwrite.\n"); + return 1; + } + } + ret = check_mounted(file); + if (ret < 0) { + snprintf(estr, sz, "error checking %s mount status\n", + file); + return 1; + } + if (ret == 1) { + snprintf(estr, sz, "%s is mounted\n", file); + return 1; + } + /* check if the device is busy */ + fd = open(file, O_RDWR|O_EXCL); + if (fd < 0) { + snprintf(estr, sz, "unable to open %s: %s\n", file, + strerror(errno)); + return 1; + } + close(fd); + return 0; +} @@ -19,6 +19,9 @@ #ifndef __UTILS__ #define __UTILS__ +#include <sys/stat.h> +#include "ctree.h" + #define BTRFS_MKFS_SYSTEM_GROUP_SIZE (4 * 1024 * 1024) int make_btrfs(int fd, const char *device, const char *label, @@ -26,8 +29,8 @@ int make_btrfs(int fd, const char *device, const char *label, u32 leafsize, u32 sectorsize, u32 stripesize); int btrfs_make_root_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid); -int btrfs_prepare_device(int fd, char *file, int zero_end, - u64 *block_count_ret, int *mixed); +int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret, + u64 max_block_count, int *mixed, int nodiscard); int btrfs_add_to_fsid(struct btrfs_trans_handle *trans, struct btrfs_root *root, int fd, char *path, u64 block_count, u32 io_width, u32 io_align, @@ -42,8 +45,25 @@ int check_mounted_where(int fd, const char *file, char *where, int size, int btrfs_device_already_in_root(struct btrfs_root *root, int fd, int super_offset); char *pretty_sizes(u64 size); -int check_label(char *input); int get_mountpt(char *dev, char *mntpt, size_t size); - int btrfs_scan_block_devices(int run_ioctl); +u64 parse_size(char *s); +int open_file_or_dir(const char *fname); +int get_device_info(int fd, u64 devid, + struct btrfs_ioctl_dev_info_args *di_args); +int get_fs_info(char *path, struct btrfs_ioctl_fs_info_args *fi_args, + struct btrfs_ioctl_dev_info_args **di_ret); +int get_label(const char *btrfs_dev); +int set_label(const char *btrfs_dev, const char *label); + +char *__strncpy__null(char *dest, const char *src, size_t n); +int is_block_device(const char *file); +int get_btrfs_mount(const char *path, char *mp, size_t mp_size); +int open_path_or_dev_mnt(const char *path); +int is_swap_device(const char *file); +u64 btrfs_device_size(int fd, struct stat *st); +/* Helper to always get proper size of the destination string */ +#define strncpy_null(dest, src) __strncpy__null(dest, src, sizeof(dest)) +int test_dev_for_mkfs(char *file, int force_overwrite, char *estr); + #endif @@ -6,7 +6,7 @@ # Copyright 2008, Oracle # Released under the GNU GPLv2 -v="Btrfs v0.19" +v="v0.20-rc1" which git &> /dev/null if [ $? == 0 -a -d .git ]; then @@ -35,17 +35,22 @@ struct stripe { u64 physical; }; -struct map_lookup { - struct cache_extent ce; - u64 type; - int io_align; - int io_width; - int stripe_len; - int sector_size; - int num_stripes; - int sub_stripes; - struct btrfs_bio_stripe stripes[]; -}; +static inline int nr_parity_stripes(struct map_lookup *map) +{ + if (map->type & BTRFS_BLOCK_GROUP_RAID5) + return 1; + else if (map->type & BTRFS_BLOCK_GROUP_RAID6) + return 2; + else + return 0; +} + +static inline int nr_data_stripes(struct map_lookup *map) +{ + return map->num_stripes - nr_parity_stripes(map); +} + +#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) ) #define map_lookup_size(n) (sizeof(struct map_lookup) + \ (sizeof(struct btrfs_bio_stripe) * (n))) @@ -128,7 +133,14 @@ static int device_list_add(const char *path, btrfs_stack_device_bytes_used(&disk_super->dev_item); list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; - } + } else if (!device->name || strcmp(device->name, path)) { + char *name = strdup(path); + if (!name) + return -ENOMEM; + kfree(device->name); + device->name = name; + } + if (found_transid > fs_devices->latest_trans) { fs_devices->latest_devid = devid; @@ -181,6 +193,9 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags) goto fail; } + if (posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED)) + fprintf(stderr, "Warning, could not drop caches\n"); + if (device->devid == fs_devices->latest_devid) fs_devices->latest_bdev = fd; if (device->devid == fs_devices->lowest_devid) @@ -589,7 +604,7 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, struct btrfs_key *key, struct btrfs_chunk *chunk, int item_size) { - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; struct btrfs_disk_key disk_key; u32 array_size; u8 *ptr; @@ -623,11 +638,21 @@ static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes, return calc_size; else if (type & BTRFS_BLOCK_GROUP_RAID10) return calc_size * (num_stripes / sub_stripes); + else if (type & BTRFS_BLOCK_GROUP_RAID5) + return calc_size * (num_stripes - 1); + else if (type & BTRFS_BLOCK_GROUP_RAID6) + return calc_size * (num_stripes - 2); else return calc_size * num_stripes; } +static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) +{ + /* TODO, add a way to store the preferred stripe size */ + return 64 * 1024; +} + int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 *start, u64 *num_bytes, u64 type) @@ -657,12 +682,14 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int index; int stripe_len = 64 * 1024; struct btrfs_key key; + u64 offset; if (list_empty(dev_list)) { return -ENOSPC; } if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP)) { if (type & BTRFS_BLOCK_GROUP_SYSTEM) { @@ -681,7 +708,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } if (type & BTRFS_BLOCK_GROUP_RAID1) { num_stripes = min_t(u64, 2, - btrfs_super_num_devices(&info->super_copy)); + btrfs_super_num_devices(info->super_copy)); if (num_stripes < 2) return -ENOSPC; min_stripes = 2; @@ -691,20 +718,36 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID0)) { - num_stripes = btrfs_super_num_devices(&info->super_copy); + num_stripes = btrfs_super_num_devices(info->super_copy); min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { - num_stripes = btrfs_super_num_devices(&info->super_copy); + num_stripes = btrfs_super_num_devices(info->super_copy); if (num_stripes < 4) return -ENOSPC; num_stripes &= ~(u32)1; sub_stripes = 2; min_stripes = 4; } + if (type & (BTRFS_BLOCK_GROUP_RAID5)) { + num_stripes = btrfs_super_num_devices(info->super_copy); + if (num_stripes < 2) + return -ENOSPC; + min_stripes = 2; + stripe_len = find_raid56_stripe_len(num_stripes - 1, + btrfs_super_stripesize(info->super_copy)); + } + if (type & (BTRFS_BLOCK_GROUP_RAID6)) { + num_stripes = btrfs_super_num_devices(info->super_copy); + if (num_stripes < 3) + return -ENOSPC; + min_stripes = 3; + stripe_len = find_raid56_stripe_len(num_stripes - 2, + btrfs_super_stripesize(info->super_copy)); + } /* we don't want a chunk larger than 10% of the FS */ - percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); + percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1); max_chunk_size = min(percent_max, max_chunk_size); again: @@ -762,12 +805,13 @@ again: } return -ENOSPC; } - key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - key.type = BTRFS_CHUNK_ITEM_KEY; ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, - &key.offset); + &offset); if (ret) return ret; + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = offset; chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); if (!chunk) @@ -977,6 +1021,10 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) ret = map->num_stripes; else if (map->type & BTRFS_BLOCK_GROUP_RAID10) ret = map->sub_stripes; + else if (map->type & BTRFS_BLOCK_GROUP_RAID5) + ret = 2; + else if (map->type & BTRFS_BLOCK_GROUP_RAID6) + ret = 3; else ret = 1; return ret; @@ -1016,6 +1064,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, u64 bytenr; u64 length; u64 stripe_nr; + u64 rmap_len; int i, j, nr = 0; ce = find_first_cache_extent(&map_tree->cache_tree, chunk_start); @@ -1023,10 +1072,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, map = container_of(ce, struct map_lookup, ce); length = ce->size; + rmap_len = map->stripe_len; if (map->type & BTRFS_BLOCK_GROUP_RAID10) length = ce->size / (map->num_stripes / map->sub_stripes); else if (map->type & BTRFS_BLOCK_GROUP_RAID0) length = ce->size / map->num_stripes; + else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + length = ce->size / nr_data_stripes(map); + rmap_len = map->stripe_len * nr_data_stripes(map); + } buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); @@ -1045,8 +1100,11 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, map->sub_stripes; } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { stripe_nr = stripe_nr * map->num_stripes + i; - } - bytenr = ce->start + stripe_nr * map->stripe_len; + } /* else if RAID[56], multiply by nr_data_stripes(). + * Alternatively, just use rmap_len below instead of + * map->stripe_len */ + + bytenr = ce->start + stripe_nr * rmap_len; for (j = 0; j < nr; j++) { if (buf[j] == bytenr) break; @@ -1057,28 +1115,60 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, *logical = buf; *naddrs = nr; - *stripe_len = map->stripe_len; + *stripe_len = rmap_len; return 0; } +static inline int parity_smaller(u64 a, u64 b) +{ + return a > b; +} + +/* Bubble-sort the stripe set to put the parity/syndrome stripes last */ +static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map) +{ + struct btrfs_bio_stripe s; + int i; + u64 l; + int again = 1; + + while (again) { + again = 0; + for (i = 0; i < bbio->num_stripes - 1; i++) { + if (parity_smaller(raid_map[i], raid_map[i+1])) { + s = bbio->stripes[i]; + l = raid_map[i]; + bbio->stripes[i] = bbio->stripes[i+1]; + raid_map[i] = raid_map[i+1]; + bbio->stripes[i+1] = s; + raid_map[i+1] = l; + again = 1; + } + } + } +} + int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret, int mirror_num) + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map_ret) { return __btrfs_map_block(map_tree, rw, logical, length, NULL, - multi_ret, mirror_num); + multi_ret, mirror_num, raid_map_ret); } int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, u64 *type, - struct btrfs_multi_bio **multi_ret, int mirror_num) + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map_ret) { struct cache_extent *ce; struct map_lookup *map; u64 offset; u64 stripe_offset; u64 stripe_nr; + u64 *raid_map = NULL; int stripes_allocated = 8; int stripes_required = 1; int stripe_index; @@ -1118,11 +1208,26 @@ again: stripes_required = map->sub_stripes; } } + if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) + && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) { + /* RAID[56] write or recovery. Return all stripes */ + stripes_required = map->num_stripes; + + /* Only allocate the map if we've already got a large enough multi_ret */ + if (stripes_allocated >= stripes_required) { + raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); + if (!raid_map) { + kfree(multi); + return -ENOMEM; + } + } + } + /* if our multi bio struct is too small, back off and try again */ - if (multi_ret && rw == WRITE && - stripes_allocated < stripes_required) { - stripes_allocated = map->num_stripes; + if (multi_ret && stripes_allocated < stripes_required) { + stripes_allocated = stripes_required; kfree(multi); + multi = NULL; goto again; } stripe_nr = offset; @@ -1139,6 +1244,7 @@ again: stripe_offset = offset - stripe_offset; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP)) { /* we limit the length of each bio to what fits in a stripe */ @@ -1177,6 +1283,59 @@ again: multi->num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID6)) { + + if (raid_map) { + int i, rot; + u64 tmp; + u64 raid56_full_stripe_start; + u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len; + + /* + * align the start of our data stripe in the logical + * address space + */ + raid56_full_stripe_start = offset / full_stripe_len; + raid56_full_stripe_start *= full_stripe_len; + + /* get the data stripe number */ + stripe_nr = raid56_full_stripe_start / map->stripe_len; + stripe_nr = stripe_nr / nr_data_stripes(map); + + /* Work out the disk rotation on this stripe-set */ + rot = stripe_nr % map->num_stripes; + + /* Fill in the logical address of each stripe */ + tmp = stripe_nr * nr_data_stripes(map); + + for (i = 0; i < nr_data_stripes(map); i++) + raid_map[(i+rot) % map->num_stripes] = + ce->start + (tmp + i) * map->stripe_len; + + raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE; + if (map->type & BTRFS_BLOCK_GROUP_RAID6) + raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE; + + *length = map->stripe_len; + stripe_index = 0; + stripe_offset = 0; + multi->num_stripes = map->num_stripes; + } else { + stripe_index = stripe_nr % nr_data_stripes(map); + stripe_nr = stripe_nr / nr_data_stripes(map); + + /* + * Mirror #0 or #1 means the original data block. + * Mirror #2 is RAID5 parity block. + * Mirror #3 is RAID6 Q block. + */ + if (mirror_num > 1) + stripe_index = nr_data_stripes(map) + mirror_num - 2; + + /* We distribute the parity blocks across stripes */ + stripe_index = (stripe_nr + stripe_index) % map->num_stripes; + } } else { /* * after this do_div call, stripe_nr is the number of stripes @@ -1196,8 +1355,14 @@ again: stripe_index++; } *multi_ret = multi; + if (type) *type = map->type; + + if (raid_map) { + sort_parity_stripes(multi, raid_map); + *raid_map_ret = raid_map; + } out: return 0; } @@ -1222,6 +1387,22 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, return NULL; } +struct btrfs_device *btrfs_find_device_by_devid(struct btrfs_root *root, + u64 devid, int instance) +{ + struct list_head *head = &root->fs_info->fs_devices->devices; + struct btrfs_device *dev; + struct list_head *cur; + int num_found = 0; + + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + if (dev->devid == devid && num_found++ == instance) + return dev; + } + return NULL; +} + int btrfs_bootstrap_super_map(struct btrfs_mapping_tree *map_tree, struct btrfs_fs_devices *fs_devices) { @@ -1460,18 +1641,9 @@ static int read_one_dev(struct btrfs_root *root, return ret; } -int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) -{ - struct btrfs_dev_item *dev_item; - - dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, - dev_item); - return read_one_dev(root, buf, dev_item); -} - int btrfs_read_sys_array(struct btrfs_root *root) { - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; struct extent_buffer *sb; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; @@ -1489,7 +1661,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) if (!sb) return -ENOMEM; btrfs_set_buffer_uptodate(sb); - write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); + write_extent_buffer(sb, super_copy, 0, sizeof(*super_copy)); array_size = btrfs_super_sys_array_size(super_copy); /* @@ -1588,9 +1760,9 @@ again: goto again; } - btrfs_free_path(path); ret = 0; error: + btrfs_free_path(path); return ret; } @@ -18,6 +18,7 @@ #ifndef __BTRFS_VOLUMES_ #define __BTRFS_VOLUMES_ + struct btrfs_device { struct list_head dev_list; struct btrfs_root *dev_root; @@ -88,9 +89,56 @@ struct btrfs_multi_bio { struct btrfs_bio_stripe stripes[]; }; +struct map_lookup { + struct cache_extent ce; + u64 type; + int io_align; + int io_width; + int stripe_len; + int sector_size; + int num_stripes; + int sub_stripes; + struct btrfs_bio_stripe stripes[]; +}; + #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ (sizeof(struct btrfs_bio_stripe) * (n))) +/* + * Restriper's general type filter + */ +#define BTRFS_BALANCE_DATA (1ULL << 0) +#define BTRFS_BALANCE_SYSTEM (1ULL << 1) +#define BTRFS_BALANCE_METADATA (1ULL << 2) + +#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \ + BTRFS_BALANCE_SYSTEM | \ + BTRFS_BALANCE_METADATA) + +#define BTRFS_BALANCE_FORCE (1ULL << 3) +#define BTRFS_BALANCE_RESUME (1ULL << 4) + +/* + * Balance filters + */ +#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0) +#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1) +#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2) +#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) +#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) + +/* + * Profile changing flags. When SOFT is set we won't relocate chunk if + * it already has the target profile (even though it may be + * half-filled). + */ +#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8) +#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9) + +#define BTRFS_RAID5_P_STRIPE ((u64)-2) +#define BTRFS_RAID6_Q_STRIPE ((u64)-1) + + int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 chunk_tree, u64 chunk_objectid, @@ -98,10 +146,12 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, u64 num_bytes, u64 *start); int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, u64 *type, - struct btrfs_multi_bio **multi_ret, int mirror_num); + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret, int mirror_num); + struct btrfs_multi_bio **multi_ret, int mirror_num, + u64 **raid_map_ret); int btrfs_next_metadata(struct btrfs_mapping_tree *map_tree, u64 *logical, u64 *size); int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, @@ -138,4 +188,6 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_chunk *chunk, int item_size); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); +struct btrfs_device *btrfs_find_device_by_devid(struct btrfs_root *root, + u64 devid, int instance); #endif |