diff options
205 files changed, 9721 insertions, 2063 deletions
diff --git a/Documentation/filesystems/mount_api.txt b/Documentation/filesystems/mount_api.txt new file mode 100644 index 000000000000..04f388567f92 --- /dev/null +++ b/Documentation/filesystems/mount_api.txt @@ -0,0 +1,741 @@ + ==================== + FILESYSTEM MOUNT API + ==================== + +CONTENTS + + (1) Overview. + + (2) The filesystem context. + + (3) The filesystem context operations. + + (4) Filesystem context security. + + (5) VFS filesystem context operations. + + (6) Parameter description. + + (7) Parameter helper functions. + + +======== +OVERVIEW +======== + +The creation of new mounts is now to be done in a multistep process: + + (1) Create a filesystem context. + + (2) Parse the parameters and attach them to the context. Parameters are + expected to be passed individually from userspace, though legacy binary + parameters can also be handled. + + (3) Validate and pre-process the context. + + (4) Get or create a superblock and mountable root. + + (5) Perform the mount. + + (6) Return an error message attached to the context. + + (7) Destroy the context. + +To support this, the file_system_type struct gains a new field: + + int (*init_fs_context)(struct fs_context *fc, struct dentry *reference); + +which is invoked to set up the filesystem-specific parts of a filesystem +context, including the additional space. The reference parameter is used to +convey a superblock and an automount point or a point to reconfigure from which +the filesystem may draw extra information (such as namespaces) for submount +(FS_CONTEXT_FOR_SUBMOUNT) or reconfiguration (FS_CONTEXT_FOR_RECONFIGURE) +purposes - otherwise it will be NULL. + +Note that security initialisation is done *after* the filesystem is called so +that the namespaces may be adjusted first. + +If fc->context is FS_CONTEXT_FOR_UMOUNT or FS_CONTEXT_FOR_EMERGENCY_RO, then +the function can return -EOPNOTSUPP to indicate that the filesystem isn't +interested in handling that. The error will be ignored. + + +====================== +THE FILESYSTEM CONTEXT +====================== + +The creation and reconfiguration of a superblock is governed by a filesystem +context. This is represented by the fs_context structure: + + struct fs_context { + const struct fs_context_operations *ops; + struct file_system_type *fs_type; + void *fs_private; + struct dentry *root; + struct user_namespace *user_ns; + struct net *net_ns; + const struct cred *cred; + char *source; + char *subtype; + void *security; + void *s_fs_info; + unsigned int sb_flags; + unsigned int sb_flags_mask; + enum fs_context_purpose purpose:8; + bool sloppy:1; + bool silent:1; + ... + }; + +The fs_context fields are as follows: + + (*) const struct fs_context_operations *ops + + These are operations that can be done on a filesystem context (see + below). This must be set by the ->init_fs_context() file_system_type + operation. + + (*) struct file_system_type *fs_type + + A pointer to the file_system_type of the filesystem that is being + constructed or reconfigured. This retains a reference on the type owner. + + (*) void *fs_private + + A pointer to the file system's private data. This is where the filesystem + will need to store any options it parses. + + (*) struct dentry *root + + A pointer to the root of the mountable tree (and indirectly, the + superblock thereof). This is filled in by the ->get_tree() op. If this + is set, an active reference on root->d_sb must also be held. + + (*) struct user_namespace *user_ns + (*) struct net *net_ns + + There are a subset of the namespaces in use by the invoking process. They + retain references on each namespace. The subscribed namespaces may be + replaced by the filesystem to reflect other sources, such as the parent + mount superblock on an automount. + + (*) const struct cred *cred + + The mounter's credentials. This retains a reference on the credentials. + + (*) char *source + + This specifies the source. It may be a block device (e.g. /dev/sda1) or + something more exotic, such as the "host:/path" that NFS desires. + + (*) char *subtype + + This is a string to be added to the type displayed in /proc/mounts to + qualify it (used by FUSE). This is available for the filesystem to set if + desired. + + (*) void *security + + A place for the LSMs to hang their security data for the superblock. The + relevant security operations are described below. + + (*) void *s_fs_info + + The proposed s_fs_info for a new superblock, set in the superblock by + sget_fc(). This can be used to distinguish superblocks. + + (*) unsigned int sb_flags + (*) unsigned int sb_flags_mask + + Which bits SB_* flags are to be set/cleared in super_block::s_flags. + + (*) enum fs_context_purpose + + This indicates the purpose for which the context is intended. The + available values are: + + FS_CONTEXT_FOR_USER_MOUNT, -- New superblock for user-specified mount + FS_CONTEXT_FOR_KERNEL_MOUNT, -- New superblock for kernel-internal mount + FS_CONTEXT_FOR_SUBMOUNT -- New automatic submount of extant mount + FS_CONTEXT_FOR_ROOT_MOUNT -- Behind-the-scenes root mount (nfs/btrfs) + FS_CONTEXT_FOR_RECONFIGURE -- Change an existing mount + FS_CONTEXT_FOR_UMOUNT -- Reconfigure to R/O for umount() + FS_CONTEXT_FOR_EMERGENCY_RO -- Emergency reconfigure to R/O + + In the last two cases, ->init_fs_context() will not have been called. + + (*) bool sloppy + (*) bool silent + + These are set if the sloppy or silent mount options are given. + + [NOTE] sloppy is probably unnecessary when userspace passes over one + option at a time since the error can just be ignored if userspace deems it + to be unimportant. + + [NOTE] silent is probably redundant with sb_flags & SB_SILENT. + +The mount context is created by calling vfs_new_fs_context() or +vfs_dup_fs_context() and is destroyed with put_fs_context(). Note that the +structure is not refcounted. + +VFS, security and filesystem mount options are set individually with +vfs_parse_mount_option(). Options provided by the old mount(2) system call as +a page of data can be parsed with generic_parse_monolithic(). + +When mounting, the filesystem is allowed to take data from any of the pointers +and attach it to the superblock (or whatever), provided it clears the pointer +in the mount context. + +The filesystem is also allowed to allocate resources and pin them with the +mount context. For instance, NFS might pin the appropriate protocol version +module. + + +================================= +THE FILESYSTEM CONTEXT OPERATIONS +================================= + +The filesystem context points to a table of operations: + + struct fs_context_operations { + void (*free)(struct fs_context *fc); + int (*dup)(struct fs_context *fc, struct fs_context *src_fc); + int (*parse_param)(struct fs_context *fc, + struct struct fs_parameter *param); + int (*parse_monolithic)(struct fs_context *fc, void *data, + size_t data_size); + int (*validate)(struct fs_context *fc); + int (*get_tree)(struct fs_context *fc); + int (*reconfigure)(struct fs_context *fc); + }; + +These operations are invoked by the various stages of the mount procedure to +manage the filesystem context. They are as follows: + + (*) void (*free)(struct fs_context *fc); + + Called to clean up the filesystem-specific part of the filesystem context + when the context is destroyed. It should be aware that parts of the + context may have been removed and NULL'd out by ->get_tree(). + + (*) int (*dup)(struct fs_context *fc, struct fs_context *src_fc); + + Called when a filesystem context has been duplicated to duplicate the + filesystem-private data. An error may be returned to indicate failure to + do this. + + [!] Note that even if this fails, put_fs_context() will be called + immediately thereafter, so ->dup() *must* make the + filesystem-private data safe for ->free(). + + (*) int (*parse_param)(struct fs_context *fc, + struct struct fs_parameter *param); + + Called when a parameter is being added to the filesystem context. param + points to the key name and maybe a value object. VFS-specific options + will have been weeded out and fc->sb_flags updated in the context. + Security options will also have been weeded out and fc->security updated. + + The parameter can be parsed with fs_parse() and fs_lookup_param(). Note + that the source(s) are presented as parameters named "source". + + If successful, 0 should be returned or a negative error code otherwise. + + (*) int (*parse_monolithic)(struct fs_context *fc, + void *data, size_t data_size); + + Called when the mount(2) system call is invoked to pass the entire data + page in one go. If this is expected to be just a list of "key[=val]" + items separated by commas, then this may be set to NULL. + + The return value is as for ->parse_param(). + + If the filesystem (e.g. NFS) needs to examine the data first and then + finds it's the standard key-val list then it may pass it off to + generic_parse_monolithic(). + + (*) int (*validate)(struct fs_context *fc); + + Called when all the options have been applied and the mount is about to + take place. It is should check for inconsistencies from mount options and + it is also allowed to do preliminary resource acquisition. For instance, + the core NFS module could load the NFS protocol module here. + + Note that if fc->purpose == FS_CONTEXT_FOR_RECONFIGURE, some of the + options necessary for a new mount may not be set. + + The return value is as for ->parse_option(). + + (*) int (*get_tree)(struct fs_context *fc); + + Called to get or create the mountable root and superblock, using the + information stored in the filesystem context (reconfiguration goes via a + different vector). It may detach any resources it desires from the + filesystem context and transfer them to the superblock it creates. + + On success it should set fc->root to the mountable root and return 0. In + the case of an error, it should return a negative error code. + + The phase on a userspace-driven context will be set to only allow this to + be called once on any particular context. + + (*) int (*reconfigure)(struct fs_context *fc); + + Called to effect reconfiguration of a superblock using information stored + in the filesystem context. It may detach any resources it desires from + the filesystem context and transfer them to the superblock. The + superblock can be found from fc->root->d_sb. + + On success it should return 0. In the case of an error, it should return + a negative error code. + + [NOTE] reconfigure is intended as a replacement for remount_fs. + + +=========================== +FILESYSTEM CONTEXT SECURITY +=========================== + +The filesystem context contains a security pointer that the LSMs can use for +building up a security context for the superblock to be mounted. There are a +number of operations used by the new mount code for this purpose: + + (*) int security_fs_context_alloc(struct fs_context *fc, + struct dentry *reference); + + Called to initialise fc->security (which is preset to NULL) and allocate + any resources needed. It should return 0 on success or a negative error + code on failure. + + reference will be non-NULL if the context is being created for superblock + reconfiguration (FS_CONTEXT_FOR_RECONFIGURE) in which case it indicates + the root dentry of the superblock to be reconfigured. It will also be + non-NULL in the case of a submount (FS_CONTEXT_FOR_SUBMOUNT) in which case + it indicates the automount point. + + (*) int security_fs_context_dup(struct fs_context *fc, + struct fs_context *src_fc); + + Called to initialise fc->security (which is preset to NULL) and allocate + any resources needed. The original filesystem context is pointed to by + src_fc and may be used for reference. It should return 0 on success or a + negative error code on failure. + + (*) void security_fs_context_free(struct fs_context *fc); + + Called to clean up anything attached to fc->security. Note that the + contents may have been transferred to a superblock and the pointer cleared + during get_tree. + + (*) int security_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param); + + Called for each mount parameter, including the source. The arguments are + as for the ->parse_param() method. It should return 0 to indicate that + the parameter should be passed on to the filesystem, 1 to indicate that + the parameter should be discarded or an error to indicate that the + parameter should be rejected. + + The value pointed to by param may be modified (if a string) or stolen + (provided the value pointer is NULL'd out). If it is stolen, 1 must be + returned to prevent it being passed to the filesystem. + + (*) int security_fs_context_validate(struct fs_context *fc); + + Called after all the options have been parsed to validate the collection + as a whole and to do any necessary allocation so that + security_sb_get_tree() and security_sb_reconfigure() are less likely to + fail. It should return 0 or a negative error code. + + In the case of reconfiguration, the target superblock will be accessible + via fc->root. + + (*) int security_sb_get_tree(struct fs_context *fc); + + Called during the mount procedure to verify that the specified superblock + is allowed to be mounted and to transfer the security data there. It + should return 0 or a negative error code. + + (*) void security_sb_reconfigure(struct fs_context *fc); + + Called to apply any reconfiguration to an LSM's context. It must not + fail. Error checking and resource allocation must be done in advance by + the parameter parsing and validation hooks. + + (*) int security_sb_mountpoint(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags); + + Called during the mount procedure to verify that the root dentry attached + to the context is permitted to be attached to the specified mountpoint. + It should return 0 on success or a negative error code on failure. + + +================================= +VFS FILESYSTEM CONTEXT OPERATIONS +================================= + +There are four operations for creating a filesystem context and +one for destroying a context: + + (*) struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type, + struct dentry *reference, + unsigned int sb_flags, + unsigned int sb_flags_mask, + enum fs_context_purpose purpose); + + Create a filesystem context for a given filesystem type and purpose. This + allocates the filesystem context, sets the superblock flags, initialises + the security and calls fs_type->init_fs_context() to initialise the + filesystem private data. + + reference can be NULL or it may indicate the root dentry of a superblock + that is going to be reconfigured (FS_CONTEXT_FOR_RECONFIGURE, + FS_CONTEXT_FOR_UMOUNT or FS_CONTEXT_FOR_EMERGENCY_RO) or the automount + point that triggered a submount (FS_CONTEXT_FOR_SUBMOUNT). This is + provided as a source of namespace information. + + (*) struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc, + enum fs_context_purpose purpose); + + Duplicate a filesystem context, copying any options noted and duplicating + or additionally referencing any resources held therein. This is available + for use where a filesystem has to get a mount within a mount, such as NFS4 + does by internally mounting the root of the target server and then doing a + private pathwalk to the target directory. + + The purpose in the new context is set from the purpose parameter. + + (*) void put_fs_context(struct fs_context *fc); + + Destroy a filesystem context, releasing any resources it holds. This + calls the ->free() operation. This is intended to be called by anyone who + created a filesystem context. + + [!] filesystem contexts are not refcounted, so this causes unconditional + destruction. + +In all the above operations, apart from the put op, the return is a mount +context pointer or a negative error code. + +For the remaining operations, if an error occurs, a negative error code will be +returned. + + (*) int vfs_get_tree(struct fs_context *fc); + + Get or create the mountable root and superblock, using the parameters in + the filesystem context to select/configure the superblock. This invokes + the ->validate() op and then the ->get_tree() op. + + [NOTE] ->validate() could perhaps be rolled into ->get_tree() and + ->reconfigure(). + + (*) struct vfsmount *vfs_create_mount(struct fs_context *fc); + + Create a mount given the parameters in the specified filesystem context. + Note that this does not attach the mount to anything. + + (*) int vfs_parse_fs_param(struct fs_context *fc, + struct fs_parameter *param); + + Supply a single mount parameter to the filesystem context. This include + the specification of the source/device which is specified as the "source" + parameter (which may be specified multiple times if the filesystem + supports that). + + param specifies the parameter key name and the value. The parameter is + first checked to see if it corresponds to a standard mount flag (in which + case it is used to set an SB_xxx flag and consumed) or a security option + (in which case the LSM consumes it) before it is passed on to the + filesystem. + + The parameter value is typed and can be one of: + + fs_value_is_flag, Parameter not given a value. + fs_value_is_string, Value is a string + fs_value_is_blob, Value is a binary blob + fs_value_is_filename, Value is a filename* + dirfd + fs_value_is_filename_empty, Value is a filename* + dirfd + AT_EMPTY_PATH + fs_value_is_file, Value is an open file (file*) + + If there is a value, that value is stored in a union in the struct in one + of param->{string,blob,name,file}. Note that the function may steal and + clear the pointer, but then becomes responsible for disposing of the + object. + + (*) int vfs_parse_fs_string(struct fs_context *fc, char *key, + const char *value, size_t v_size); + + A wrapper around vfs_parse_fs_param() that just passes a constant string. + + (*) int generic_parse_monolithic(struct fs_context *fc, + void *data, size_t data_len); + + Parse a sys_mount() data page, assuming the form to be a text list + consisting of key[=val] options separated by commas. Each item in the + list is passed to vfs_mount_option(). This is the default when the + ->parse_monolithic() operation is NULL. + + +===================== +PARAMETER DESCRIPTION +===================== + +Parameters are described using structures defined in linux/fs_parser.h. +There's a core description struct that links everything together: + + struct fs_parameter_description { + const char name[16]; + u8 nr_params; + u8 nr_alt_keys; + u8 nr_enums; + bool ignore_unknown; + bool no_source; + const char *const *keys; + const struct constant_table *alt_keys; + const struct fs_parameter_spec *specs; + const struct fs_parameter_enum *enums; + }; + +For example: + + enum afs_param { + Opt_autocell, + Opt_bar, + Opt_dyn, + Opt_foo, + Opt_source, + nr__afs_params + }; + + static const struct fs_parameter_description afs_fs_parameters = { + .name = "kAFS", + .nr_params = nr__afs_params, + .nr_alt_keys = ARRAY_SIZE(afs_param_alt_keys), + .nr_enums = ARRAY_SIZE(afs_param_enums), + .keys = afs_param_keys, + .alt_keys = afs_param_alt_keys, + .specs = afs_param_specs, + .enums = afs_param_enums, + }; + +The members are as follows: + + (1) const char name[16]; + + The name to be used in error messages generated by the parse helper + functions. + + (2) u8 nr_params; + + The number of discrete parameter identifiers. This indicates the number + of elements in the ->types[] array and also limits the values that may be + used in the values that the ->keys[] array maps to. + + It is expected that, for example, two parameters that are related, say + "acl" and "noacl" with have the same ID, but will be flagged to indicate + that one is the inverse of the other. The value can then be picked out + from the parse result. + + (3) const struct fs_parameter_specification *specs; + + Table of parameter specifications, where the entries are of type: + + struct fs_parameter_type { + enum fs_parameter_spec type:8; + u8 flags; + }; + + and the parameter identifier is the index to the array. 'type' indicates + the desired value type and must be one of: + + TYPE NAME EXPECTED VALUE RESULT IN + ======================= ======================= ===================== + fs_param_is_flag No value n/a + fs_param_is_bool Boolean value result->boolean + fs_param_is_u32 32-bit unsigned int result->uint_32 + fs_param_is_u32_octal 32-bit octal int result->uint_32 + fs_param_is_u32_hex 32-bit hex int result->uint_32 + fs_param_is_s32 32-bit signed int result->int_32 + fs_param_is_enum Enum value name result->uint_32 + fs_param_is_string Arbitrary string param->string + fs_param_is_blob Binary blob param->blob + fs_param_is_blockdev Blockdev path * Needs lookup + fs_param_is_path Path * Needs lookup + fs_param_is_fd File descriptor param->file + + And each parameter can be qualified with 'flags': + + fs_param_v_optional The value is optional + fs_param_neg_with_no If key name is prefixed with "no", it is false + fs_param_neg_with_empty If value is "", it is false + fs_param_deprecated The parameter is deprecated. + + For example: + + static const struct fs_parameter_spec afs_param_specs[nr__afs_params] = { + [Opt_autocell] = { fs_param_is flag }, + [Opt_bar] = { fs_param_is_enum }, + [Opt_dyn] = { fs_param_is flag }, + [Opt_foo] = { fs_param_is_bool, fs_param_neg_with_no }, + [Opt_source] = { fs_param_is_string }, + }; + + Note that if the value is of fs_param_is_bool type, fs_parse() will try + to match any string value against "0", "1", "no", "yes", "false", "true". + + [!] NOTE that the table must be sorted according to primary key name so + that ->keys[] is also sorted. + + (4) const char *const *keys; + + Table of primary key names for the parameters. There must be one entry + per defined parameter. The table is optional if ->nr_params is 0. The + table is just an array of names e.g.: + + static const char *const afs_param_keys[nr__afs_params] = { + [Opt_autocell] = "autocell", + [Opt_bar] = "bar", + [Opt_dyn] = "dyn", + [Opt_foo] = "foo", + [Opt_source] = "source", + }; + + [!] NOTE that the table must be sorted such that the table can be searched + with bsearch() using strcmp(). This means that the Opt_* values must + correspond to the entries in this table. + + (5) const struct constant_table *alt_keys; + u8 nr_alt_keys; + + Table of additional key names and their mappings to parameter ID plus the + number of elements in the table. This is optional. The table is just an + array of { name, integer } pairs, e.g.: + + static const struct constant_table afs_param_keys[] = { + { "baz", Opt_bar }, + { "dynamic", Opt_dyn }, + }; + + [!] NOTE that the table must be sorted such that strcmp() can be used with + bsearch() to search the entries. + + The parameter ID can also be fs_param_key_removed to indicate that a + deprecated parameter has been removed and that an error will be given. + This differs from fs_param_deprecated where the parameter may still have + an effect. + + Further, the behaviour of the parameter may differ when an alternate name + is used (for instance with NFS, "v3", "v4.2", etc. are alternate names). + + (6) const struct fs_parameter_enum *enums; + u8 nr_enums; + + Table of enum value names to integer mappings and the number of elements + stored therein. This is of type: + + struct fs_parameter_enum { + u8 param_id; + char name[14]; + u8 value; + }; + + Where the array is an unsorted list of { parameter ID, name }-keyed + elements that indicate the value to map to, e.g.: + + static const struct fs_parameter_enum afs_param_enums[] = { + { Opt_bar, "x", 1}, + { Opt_bar, "y", 23}, + { Opt_bar, "z", 42}, + }; + + If a parameter of type fs_param_is_enum is encountered, fs_parse() will + try to look the value up in the enum table and the result will be stored + in the parse result. + + (7) bool no_source; + + If this is set, fs_parse() will ignore any "source" parameter and not + pass it to the filesystem. + +The parser should be pointed to by the parser pointer in the file_system_type +struct as this will provide validation on registration (if +CONFIG_VALIDATE_FS_PARSER=y) and will allow the description to be queried from +userspace using the fsinfo() syscall. + + +========================== +PARAMETER HELPER FUNCTIONS +========================== + +A number of helper functions are provided to help a filesystem or an LSM +process the parameters it is given. + + (*) int lookup_constant(const struct constant_table tbl[], + const char *name, int not_found); + + Look up a constant by name in a table of name -> integer mappings. The + table is an array of elements of the following type: + + struct constant_table { + const char *name; + int value; + }; + + and it must be sorted such that it can be searched using bsearch() using + strcmp(). If a match is found, the corresponding value is returned. If a + match isn't found, the not_found value is returned instead. + + (*) bool validate_constant_table(const struct constant_table *tbl, + size_t tbl_size, + int low, int high, int special); + + Validate a constant table. Checks that all the elements are appropriately + ordered, that there are no duplicates and that the values are between low + and high inclusive, though provision is made for one allowable special + value outside of that range. If no special value is required, special + should just be set to lie inside the low-to-high range. + + If all is good, true is returned. If the table is invalid, errors are + logged to dmesg, the stack is dumped and false is returned. + + (*) int fs_parse(struct fs_context *fc, + const struct fs_param_parser *parser, + struct fs_parameter *param, + struct fs_param_parse_result *result); + + This is the main interpreter of parameters. It uses the parameter + description (parser) to look up the name of the parameter to use and to + convert that to a parameter ID (stored in result->key). + + If successful, and if the parameter type indicates the result is a + boolean, integer or enum type, the value is converted by this function and + the result stored in result->{boolean,int_32,uint_32}. + + If a match isn't initially made, the key is prefixed with "no" and no + value is present then an attempt will be made to look up the key with the + prefix removed. If this matches a parameter for which the type has flag + fs_param_neg_with_no set, then a match will be made and the value will be + set to false/0/NULL. + + If the parameter is successfully matched and, optionally, parsed + correctly, 1 is returned. If the parameter isn't matched and + parser->ignore_unknown is set, then 0 is returned. Otherwise -EINVAL is + returned. + + (*) bool fs_validate_description(const struct fs_parameter_description *desc); + + This is validates the parameter description. It returns true if the + description is good and false if it is not. + + (*) int fs_lookup_param(struct fs_context *fc, + struct fs_parameter *value, + bool want_bdev, + struct path *_path); + + This takes a parameter that carries a string or filename type and attempts + to do a path lookup on it. If the parameter expects a blockdev, a check + is made that the inode actually represents one. + + Returns 0 if successful and *_path will be set; returns a negative error + code if not. diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index eea8c5ce6335..2e018b8c2e19 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -19,6 +19,7 @@ #include <linux/of_fdt.h> #include <linux/of.h> #include <linux/cache.h> +#include <uapi/linux/mount.h> #include <asm/sections.h> #include <asm/arcregs.h> #include <asm/tlb.h> diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index c10a3e8ee998..a8a4333929f5 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -24,6 +24,7 @@ #include <linux/root_dev.h> #include <linux/screen_info.h> #include <linux/memblock.h> +#include <uapi/linux/mount.h> #include <asm/setup.h> #include <asm/system_info.h> diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index e2bd35b6780c..c09fcc092a54 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -22,7 +22,6 @@ config KVM bool "Kernel-based Virtual Machine (KVM) support" depends on MMU && OF select PREEMPT_NOTIFIERS - select ANON_INODES select ARM_GIC select ARM_GIC_V3 select ARM_GIC_V3_ITS diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 47b23bf617c7..86fe9b3e3ff8 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -23,7 +23,6 @@ config KVM depends on OF select MMU_NOTIFIER select PREEMPT_NOTIFIERS - select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 46bff1661836..a9d4dc6c0427 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -611,7 +611,8 @@ pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) static const struct dentry_operations pfmfs_dentry_operations; static struct dentry * -pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) +pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, + void *data, size_t data_size) { return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations, PFMFS_MAGIC); diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 760aec70dce5..b58dae95189d 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -21,7 +21,6 @@ config KVM depends on MIPS_FP_SUPPORT select EXPORT_UASM select PREEMPT_NOTIFIERS - select ANON_INODES select KVM_GENERIC_DIRTYLOG_READ_PROTECT select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_MMIO diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 68a0e9d5b440..e058d02ee819 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -20,7 +20,6 @@ if VIRTUALIZATION config KVM bool select PREEMPT_NOTIFIERS - select ANON_INODES select HAVE_KVM_EVENTFD select HAVE_KVM_VCPU_ASYNC_IOCTL select SRCU diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index db329d4bf1c3..90d55b47c471 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -734,7 +734,7 @@ out: } static int -spufs_fill_super(struct super_block *sb, void *data, int silent) +spufs_fill_super(struct super_block *sb, void *data, size_t data_size, int silent) { struct spufs_sb_info *info; static const struct super_operations s_ops = { @@ -761,9 +761,9 @@ spufs_fill_super(struct super_block *sb, void *data, int silent) static struct dentry * spufs_mount(struct file_system_type *fstype, int flags, - const char *name, void *data) + const char *name, void *data, size_t data_size) { - return mount_single(fstype, flags, data, spufs_fill_super); + return mount_single(fstype, flags, data, data_size, spufs_fill_super); } static struct file_system_type spufs_type = { diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index c681329fdeec..f75b38fd6cfa 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -266,7 +266,8 @@ static int hypfs_show_options(struct seq_file *s, struct dentry *root) return 0; } -static int hypfs_fill_super(struct super_block *sb, void *data, int silent) +static int hypfs_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct inode *root_inode; struct dentry *root_dentry; @@ -309,9 +310,9 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent) } static struct dentry *hypfs_mount(struct file_system_type *fst, int flags, - const char *devname, void *data) + const char *devname, void *data, size_t data_size) { - return mount_single(fst, flags, data, hypfs_fill_super); + return mount_single(fst, flags, data, data_size, hypfs_fill_super); } static void hypfs_kill_super(struct super_block *sb) diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index a3dbd459cce9..600e4fd11a67 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -21,7 +21,6 @@ config KVM prompt "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM select PREEMPT_NOTIFIERS - select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_VCPU_ASYNC_IOCTL select HAVE_KVM_EVENTFD diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index c286cf5da6e7..2c0e0f37a318 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -32,6 +32,7 @@ #include <linux/of.h> #include <linux/of_fdt.h> #include <linux/uaccess.h> +#include <uapi/linux/mount.h> #include <asm/io.h> #include <asm/page.h> #include <asm/elf.h> diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 13664c377196..7df3d704284c 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -34,6 +34,7 @@ #include <linux/kdebug.h> #include <linux/export.h> #include <linux/start_kernel.h> +#include <uapi/linux/mount.h> #include <asm/io.h> #include <asm/processor.h> diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index cd2825cb8420..014390950333 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -33,6 +33,7 @@ #include <linux/module.h> #include <linux/start_kernel.h> #include <linux/memblock.h> +#include <uapi/linux/mount.h> #include <asm/io.h> #include <asm/processor.h> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 305dcb6498cc..92a16f46ea8d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -46,7 +46,6 @@ config X86 # select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI - select ANON_INODES select ARCH_CLOCKSOURCE_DATA select ARCH_CLOCKSOURCE_INIT select ARCH_DISCARD_MEMBLOCK diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 3cf7b533b3d1..806760188a31 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -398,3 +398,10 @@ 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents 386 i386 rseq sys_rseq __ia32_sys_rseq +387 i386 open_tree sys_open_tree __ia32_sys_open_tree +388 i386 move_mount sys_move_mount __ia32_sys_move_mount +389 i386 fsopen sys_fsopen __ia32_sys_fsopen +390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig +391 i386 fsmount sys_fsmount __ia32_sys_fsmount +392 i386 fspick sys_fspick __ia32_sys_fspick +393 i386 fsinfo sys_fsinfo __ia32_sys_fsinfo diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index f0b1709a5ffb..0823eed2b02e 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -343,6 +343,13 @@ 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq +335 common open_tree __x64_sys_open_tree +336 common move_mount __x64_sys_move_mount +337 common fsopen __x64_sys_fsopen +338 common fsconfig __x64_sys_fsconfig +339 common fsmount __x64_sys_fsmount +340 common fspick __x64_sys_fspick +341 common fsinfo __x64_sys_fsinfo # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 3736f6dc9545..c6bf7f700141 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -33,6 +33,21 @@ #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) + +struct rdt_fs_context { + struct kernfs_fs_context kfc; + bool enable_cdpl2; + bool enable_cdpl3; + bool enable_mba_mbps; +}; + +static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc = fc->fs_private; + + return container_of(kfc, struct rdt_fs_context, kfc); +} + DECLARE_STATIC_KEY_FALSE(rdt_enable_key); /** diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index f27b8115ffa2..37c0ccb50823 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -24,6 +24,7 @@ #include <linux/cpu.h> #include <linux/debugfs.h> #include <linux/fs.h> +#include <linux/fs_parser.h> #include <linux/sysfs.h> #include <linux/kernfs.h> #include <linux/seq_buf.h> @@ -1858,43 +1859,6 @@ static void cdp_disable_all(void) cdpl2_disable(); } -static int parse_rdtgroupfs_options(char *data) -{ - char *token, *o = data; - int ret = 0; - - while ((token = strsep(&o, ",")) != NULL) { - if (!*token) { - ret = -EINVAL; - goto out; - } - - if (!strcmp(token, "cdp")) { - ret = cdpl3_enable(); - if (ret) - goto out; - } else if (!strcmp(token, "cdpl2")) { - ret = cdpl2_enable(); - if (ret) - goto out; - } else if (!strcmp(token, "mba_MBps")) { - ret = set_mba_sc(true); - if (ret) - goto out; - } else { - ret = -EINVAL; - goto out; - } - } - - return 0; - -out: - pr_err("Invalid mount option \"%s\"\n", token); - - return ret; -} - /* * We don't allow rdtgroup directories to be created anywhere * except the root directory. Thus when looking for the rdtgroup @@ -1966,13 +1930,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, struct kernfs_node **mon_data_kn); -static struct dentry *rdt_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +static int rdt_enable_ctx(struct rdt_fs_context *ctx) +{ + int ret = 0; + + if (ctx->enable_cdpl2) + ret = cdpl2_enable(); + + if (!ret && ctx->enable_cdpl3) + ret = cdpl3_enable(); + + if (!ret && ctx->enable_mba_mbps) + ret = set_mba_sc(true); + + return ret; +} + +static int rdt_get_tree(struct fs_context *fc) { + struct rdt_fs_context *ctx = rdt_fc2context(fc); struct rdt_domain *dom; struct rdt_resource *r; - struct dentry *dentry; int ret; cpus_read_lock(); @@ -1981,53 +1959,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, * resctrl file system can only be mounted once. */ if (static_branch_unlikely(&rdt_enable_key)) { - dentry = ERR_PTR(-EBUSY); + ret = -EBUSY; goto out; } - ret = parse_rdtgroupfs_options(data); - if (ret) { - dentry = ERR_PTR(ret); + ret = rdt_enable_ctx(ctx); + if (ret < 0) goto out_cdp; - } closid_init(); ret = rdtgroup_create_info_dir(rdtgroup_default.kn); - if (ret) { - dentry = ERR_PTR(ret); - goto out_cdp; - } + if (ret < 0) + goto out_mba; if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, NULL, "mon_groups", &kn_mongrp); - if (ret) { - dentry = ERR_PTR(ret); + if (ret < 0) goto out_info; - } kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); - if (ret) { - dentry = ERR_PTR(ret); + if (ret < 0) goto out_mongrp; - } kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } ret = rdt_pseudo_lock_init(); - if (ret) { - dentry = ERR_PTR(ret); + if (ret) goto out_mondata; - } - dentry = kernfs_mount(fs_type, flags, rdt_root, - RDTGROUP_SUPER_MAGIC, NULL); - if (IS_ERR(dentry)) + ret = kernfs_get_tree(fc); + if (ret < 0) goto out_psl; if (rdt_alloc_capable) @@ -2056,14 +2023,97 @@ out_mongrp: kernfs_remove(kn_mongrp); out_info: kernfs_remove(kn_info); +out_mba: + if (ctx->enable_mba_mbps) + set_mba_sc(false); out_cdp: cdp_disable_all(); out: rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); cpus_read_unlock(); + return ret; +} + +enum rdt_param { + Opt_cdp, + Opt_cdpl2, + Opt_mba_mpbs, + nr__rdt_params +}; + +static const struct fs_parameter_spec rdt_param_specs[nr__rdt_params] = { + [Opt_cdp] = { fs_param_is_flag }, + [Opt_cdpl2] = { fs_param_is_flag }, + [Opt_mba_mpbs] = { fs_param_is_flag }, +}; + +static const char *const rdt_param_keys[nr__rdt_params] = { + [Opt_cdp] = "cdp", + [Opt_cdpl2] = "cdpl2", + [Opt_mba_mpbs] = "mba_mbps", +}; + +static const struct fs_parameter_description rdt_fs_parameters = { + .name = "rdt", + .nr_params = nr__rdt_params, + .keys = rdt_param_keys, + .specs = rdt_param_specs, + .no_source = true, +}; + +static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + struct fs_parse_result result; + int opt; - return dentry; + opt = fs_parse(fc, &rdt_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_cdp: + ctx->enable_cdpl3 = true; + return 0; + case Opt_cdpl2: + ctx->enable_cdpl2 = true; + return 0; + case Opt_mba_mpbs: + ctx->enable_mba_mbps = true; + return 0; + } + + return -EINVAL; +} + +static void rdt_fs_context_free(struct fs_context *fc) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + + kernfs_free_fs_context(fc); + kfree(ctx); +} + +static const struct fs_context_operations rdt_fs_context_ops = { + .free = rdt_fs_context_free, + .parse_param = rdt_parse_param, + .get_tree = rdt_get_tree, +}; + +static int rdt_init_fs_context(struct fs_context *fc, struct dentry *reference) +{ + struct rdt_fs_context *ctx; + + ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->kfc.root = rdt_root; + ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; + fc->fs_private = &ctx->kfc; + fc->ops = &rdt_fs_context_ops; + return 0; } static int reset_all_ctrls(struct rdt_resource *r) @@ -2236,9 +2286,10 @@ static void rdt_kill_sb(struct super_block *sb) } static struct file_system_type rdt_fs_type = { - .name = "resctrl", - .mount = rdt_mount, - .kill_sb = rdt_kill_sb, + .name = "resctrl", + .init_fs_context = rdt_init_fs_context, + .parameters = &rdt_fs_parameters, + .kill_sb = rdt_kill_sb, }; static int mon_addfile(struct kernfs_node *parent_kn, const char *name, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b74e7bfed6ab..25a9802fffec 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -50,6 +50,7 @@ #include <linux/kvm_para.h> #include <linux/dma-contiguous.h> #include <xen/xen.h> +#include <uapi/linux/mount.h> #include <linux/errno.h> #include <linux/kernel.h> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 1bbec387d289..f3f2e547484b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,7 +27,6 @@ config KVM depends on X86_LOCAL_APIC select PREEMPT_NOTIFIERS select MMU_NOTIFIER - select ANON_INODES select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select IRQ_BYPASS_MANAGER diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 3e63a900b330..ae213ed2a7c8 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -174,7 +174,6 @@ source "drivers/base/regmap/Kconfig" config DMA_SHARED_BUFFER bool default n - select ANON_INODES select IRQ_WORK help This option enables the framework for buffer-sharing between diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index b93fc862d365..6b7010e84d1e 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -25,6 +25,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/kthread.h> +#include <uapi/linux/mount.h> #include "base.h" static struct task_struct *thread; @@ -56,12 +57,12 @@ static int __init mount_param(char *str) __setup("devtmpfs.mount=", mount_param); static struct dentry *dev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, size_t data_size) { #ifdef CONFIG_TMPFS - return mount_single(fs_type, flags, data, shmem_fill_super); + return mount_single(fs_type, flags, data, data_size, shmem_fill_super); #else - return mount_single(fs_type, flags, data, ramfs_fill_super); + return mount_single(fs_type, flags, data, data_size, ramfs_fill_super); #endif } diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index 536e55d3919f..f3e4bc490cf0 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -157,7 +157,6 @@ config TCG_CRB config TCG_VTPM_PROXY tristate "VTPM Proxy Interface" depends on TCG_TPM - select ANON_INODES ---help--- This driver proxies for an emulated TPM (vTPM) running in userspace. A device /dev/vtpmx is provided that creates a device pair diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 6e928f37d084..bc65ca34ec4b 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -418,7 +418,7 @@ static const struct super_operations dax_sops = { }; static struct dentry *dax_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC); } diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index 2e5a0faa2cb1..3fc9c2efc583 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -3,7 +3,6 @@ menu "DMABUF options" config SYNC_FILE bool "Explicit Synchronization Framework" default n - select ANON_INODES select DMA_SHARED_BUFFER ---help--- The Sync File Framework adds explicit syncronization via diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 833a1b51c948..587e5f005b61 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -12,7 +12,6 @@ config ARCH_HAVE_CUSTOM_GPIO_H menuconfig GPIOLIB bool "GPIO Support" - select ANON_INODES help This enables GPIO support through the generic GPIO library. You only need to enable this, if you also want to enable diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 36e8e9cbec52..4126bb6e1a4a 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -419,7 +419,8 @@ static const struct super_operations drm_fs_sops = { }; static struct dentry *drm_fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, + void *data, size_t data_size) { return mount_pseudo(fs_type, "drm:", diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c index 888b7d3f04c3..bf0a355e8f46 100644 --- a/drivers/gpu/drm/i915/i915_gemfs.c +++ b/drivers/gpu/drm/i915/i915_gemfs.c @@ -57,7 +57,7 @@ int i915_gemfs_init(struct drm_i915_private *i915) int flags = 0; int err; - err = sb->s_op->remount_fs(sb, &flags, options); + err = sb->s_op->remount_fs(sb, &flags, options, sizeof(options)); if (err) { kern_unmount(gemfs); return err; diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig index d08aeb41cd07..1dec0fecb6ef 100644 --- a/drivers/iio/Kconfig +++ b/drivers/iio/Kconfig @@ -4,7 +4,6 @@ menuconfig IIO tristate "Industrial I/O support" - select ANON_INODES help The industrial I/O subsystem provides a unified framework for drivers for many different types of embedded sensors using a diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 0a3ec7c726ec..2793ea38649b 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -25,7 +25,6 @@ config INFINIBAND_USER_MAD config INFINIBAND_USER_ACCESS tristate "InfiniBand userspace access (verbs and CM)" - select ANON_INODES depends on MMU ---help--- Userspace InfiniBand access support. This enables the diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 1d940a2885c9..28648ef1f4cc 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -506,7 +506,8 @@ bail: * after device init. The direct add_cntr_files() call handles adding * them from the init code, when the fs is already mounted. */ -static int qibfs_fill_super(struct super_block *sb, void *data, int silent) +static int qibfs_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct qib_devdata *dd, *tmp; unsigned long flags; @@ -541,11 +542,11 @@ bail: } static struct dentry *qibfs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, size_t data_size) { struct dentry *ret; - ret = mount_single(fs_type, flags, data, qibfs_fill_super); + ret = mount_single(fs_type, flags, data, data_size, qibfs_fill_super); if (!IS_ERR(ret)) qib_super = ret->d_sb; return ret; diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 750470ef2049..84c05fd46d04 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -42,7 +42,8 @@ static const struct dentry_operations cxl_fs_dops = { }; static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, + void *data, size_t data_len) { return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops, CXL_PSEUDO_FS_MAGIC); diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index fa840666bdd1..381cca3bc87a 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -88,13 +88,15 @@ static LIST_HEAD(service_processors); static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode); static void ibmasmfs_create_files (struct super_block *sb); -static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent); +static int ibmasmfs_fill_super (struct super_block *sb, void *data, size_t data_size, + int silent); static struct dentry *ibmasmfs_mount(struct file_system_type *fst, - int flags, const char *name, void *data) + int flags, const char *name, + void *data, size_t data_size) { - return mount_single(fst, flags, data, ibmasmfs_fill_super); + return mount_single(fst, flags, data, data_size, ibmasmfs_fill_super); } static const struct super_operations ibmasmfs_s_ops = { @@ -112,7 +114,8 @@ static struct file_system_type ibmasmfs_type = { }; MODULE_ALIAS_FS("ibmasmfs"); -static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent) +static int ibmasmfs_fill_super (struct super_block *sb, + void *data, size_t data_size, int silent) { struct inode *root; diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index d58a61c09304..13706ea5cf50 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -61,9 +61,9 @@ static int get_sb_mtd_set(struct super_block *sb, void *_mtd) * get a superblock on an MTD-backed filesystem */ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, + const char *dev_name, void *data, size_t data_size, struct mtd_info *mtd, - int (*fill_super)(struct super_block *, void *, int)) + int (*fill_super)(struct super_block *, void *, size_t, int)) { struct super_block *sb; int ret; @@ -79,7 +79,7 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, pr_debug("MTDSB: New superblock for device %d (\"%s\")\n", mtd->index, mtd->name); - ret = fill_super(sb, data, flags & SB_SILENT ? 1 : 0); + ret = fill_super(sb, data, data_size, flags & SB_SILENT ? 1 : 0); if (ret < 0) { deactivate_locked_super(sb); return ERR_PTR(ret); @@ -105,8 +105,10 @@ out_error: * get a superblock on an MTD-backed filesystem by MTD device number */ static struct dentry *mount_mtd_nr(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, int mtdnr, - int (*fill_super)(struct super_block *, void *, int)) + const char *dev_name, + void *data, size_t data_size, int mtdnr, + int (*fill_super)(struct super_block *, void *, + size_t, int)) { struct mtd_info *mtd; @@ -116,15 +118,16 @@ static struct dentry *mount_mtd_nr(struct file_system_type *fs_type, int flags, return ERR_CAST(mtd); } - return mount_mtd_aux(fs_type, flags, dev_name, data, mtd, fill_super); + return mount_mtd_aux(fs_type, flags, dev_name, data, data_size, mtd, + fill_super); } /* * set up an MTD-based superblock */ struct dentry *mount_mtd(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) + const char *dev_name, void *data, size_t data_size, + int (*fill_super)(struct super_block *, void *, size_t, int)) { #ifdef CONFIG_BLOCK struct block_device *bdev; @@ -153,7 +156,7 @@ struct dentry *mount_mtd(struct file_system_type *fs_type, int flags, if (!IS_ERR(mtd)) return mount_mtd_aux( fs_type, flags, - dev_name, data, mtd, + dev_name, data, data_size, mtd, fill_super); printk(KERN_NOTICE "MTD:" @@ -170,7 +173,7 @@ struct dentry *mount_mtd(struct file_system_type *fs_type, int flags, pr_debug("MTDSB: mtd%%d, mtdnr %d\n", mtdnr); return mount_mtd_nr(fs_type, flags, - dev_name, data, + dev_name, data, data_size, mtdnr, fill_super); } } @@ -197,7 +200,8 @@ struct dentry *mount_mtd(struct file_system_type *fs_type, int flags, if (major != MTD_BLOCK_MAJOR) goto not_an_MTD_device; - return mount_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super); + return mount_mtd_nr(fs_type, flags, dev_name, data, data_size, mtdnr, + fill_super); not_an_MTD_device: #endif /* CONFIG_BLOCK */ diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 4ea08979312c..c721d7fd7c7e 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -238,7 +238,8 @@ struct dentry *oprofilefs_mkdir(struct dentry *parent, char const *name) } -static int oprofilefs_fill_super(struct super_block *sb, void *data, int silent) +static int oprofilefs_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct inode *root_inode; @@ -265,9 +266,10 @@ static int oprofilefs_fill_super(struct super_block *sb, void *data, int silent) static struct dentry *oprofilefs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_single(fs_type, flags, data, oprofilefs_fill_super); + return mount_single(fs_type, flags, data, data_size, + oprofilefs_fill_super); } diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index 37b8dc60f5f6..1ca13e5e4ec2 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -50,7 +50,7 @@ static const struct dentry_operations ocxlflash_fs_dops = { */ static struct dentry *ocxlflash_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { return mount_pseudo(fs_type, "ocxlflash:", NULL, &ocxlflash_fs_dops, OCXLFLASH_FS_MAGIC); diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c index f69e619807a1..d2a69ea6baad 100644 --- a/drivers/staging/erofs/super.c +++ b/drivers/staging/erofs/super.c @@ -518,7 +518,7 @@ struct erofs_mount_private { /* support mount_bdev() with options */ static int erofs_fill_super(struct super_block *sb, - void *_priv, int silent) + void *_priv, size_t data_size, int silent) { struct erofs_mount_private *priv = _priv; @@ -526,9 +526,9 @@ static int erofs_fill_super(struct super_block *sb, priv->options, silent); } -static struct dentry *erofs_mount( - struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static struct dentry *erofs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, size_t data_size) { struct erofs_mount_private priv = { .dev_name = dev_name, @@ -536,7 +536,7 @@ static struct dentry *erofs_mount( }; return mount_bdev(fs_type, flags, dev_name, - &priv, erofs_fill_super); + &priv, sizeof(priv), erofs_fill_super); } static void erofs_kill_sb(struct super_block *sb) @@ -648,7 +648,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static int erofs_remount(struct super_block *sb, int *flags, char *data) +static int erofs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct erofs_sb_info *sbi = EROFS_SB(sb); unsigned int org_mnt_opt = sbi->mount_opt; diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 31e8bf3578c8..0ded4424f839 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1366,7 +1366,8 @@ struct ffs_sb_fill_data { struct ffs_data *ffs_data; }; -static int ffs_sb_fill(struct super_block *sb, void *_data, int silent) +static int ffs_sb_fill(struct super_block *sb, void *_data, size_t data_size, + int silent) { struct ffs_sb_fill_data *data = _data; struct inode *inode; @@ -1494,7 +1495,7 @@ invalid: static struct dentry * ffs_fs_mount(struct file_system_type *t, int flags, - const char *dev_name, void *opts) + const char *dev_name, void *opts, size_t data_size) { struct ffs_sb_fill_data data = { .perms = { @@ -1536,7 +1537,7 @@ ffs_fs_mount(struct file_system_type *t, int flags, ffs->private_data = ffs_dev; data.ffs_data = ffs; - rv = mount_nodev(t, flags, &data, ffs_sb_fill); + rv = mount_nodev(t, flags, &data, sizeof(data), ffs_sb_fill); if (IS_ERR(rv) && data.ffs_data) { ffs_release_dev(data.ffs_data); ffs_data_put(data.ffs_data); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 37ca0e669bd8..286a982b43a3 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1990,7 +1990,8 @@ static const struct super_operations gadget_fs_operations = { }; static int -gadgetfs_fill_super (struct super_block *sb, void *opts, int silent) +gadgetfs_fill_super (struct super_block *sb, void *opts, size_t data_size, + int silent) { struct inode *inode; struct dev_data *dev; @@ -2046,9 +2047,9 @@ Enomem: /* "mount -t gadgetfs path /dev/gadget" ends up here */ static struct dentry * gadgetfs_mount (struct file_system_type *t, int flags, - const char *path, void *opts) + const char *path, void *opts, size_t data_size) { - return mount_single (t, flags, opts, gadgetfs_fill_super); + return mount_single (t, flags, opts, data_size, gadgetfs_fill_super); } static void diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 9de5ed38da83..3798d77d131c 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -22,7 +22,6 @@ menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64) - select ANON_INODES help VFIO provides a framework for secure userspace device drivers. See Documentation/vfio.txt for more details. diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 728ecd1eea30..d36ccb72e5e4 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -724,7 +724,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, } static struct dentry *balloon_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { static const struct dentry_operations ops = { .d_dname = simple_dname, diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 71ddfb4cf61c..fc4e6e43b66f 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -42,7 +42,8 @@ static const struct file_operations capabilities_file_ops = { .llseek = default_llseek, }; -static int xenfs_fill_super(struct super_block *sb, void *data, int silent) +static int xenfs_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { static const struct tree_descr xenfs_files[] = { [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR }, @@ -69,9 +70,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) static struct dentry *xenfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_single(fs_type, flags, data, xenfs_fill_super); + return mount_single(fs_type, flags, data, data_size, xenfs_fill_super); } static struct file_system_type xenfs_type = { diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 48ce50484e80..7def28abd3a5 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -116,7 +116,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, */ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, size_t data_size) { struct super_block *sb = NULL; struct inode *inode = NULL; diff --git a/fs/Kconfig b/fs/Kconfig index ac474a61be37..25700b152c75 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -8,6 +8,13 @@ menu "File systems" config DCACHE_WORD_ACCESS bool +config VALIDATE_FS_PARSER + bool "Validate filesystem parameter description" + default y + help + Enable this to perform validation of the parameter description for a + filesystem when it is registered. + if BLOCK config FS_IOMAP diff --git a/fs/Makefile b/fs/Makefile index 23fcd8c164a3..2612e9930332 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -13,6 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o d_path.o \ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ + fs_context.o fs_parser.o fsopen.o \ fs_types.o ifeq ($(CONFIG_BLOCK),y) @@ -25,7 +26,7 @@ obj-$(CONFIG_PROC_FS) += proc_namespace.o obj-y += notify/ obj-$(CONFIG_EPOLL) += eventpoll.o -obj-$(CONFIG_ANON_INODES) += anon_inodes.o +obj-y += anon_inodes.o obj-$(CONFIG_SIGNALFD) += signalfd.o obj-$(CONFIG_TIMERFD) += timerfd.o obj-$(CONFIG_EVENTFD) += eventfd.o diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 7e099a7a4eb1..d5e90b6d61aa 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -210,7 +210,7 @@ static int parse_options(struct super_block *sb, char *options) return 0; } -static int adfs_remount(struct super_block *sb, int *flags, char *data) +static int adfs_remount(struct super_block *sb, int *flags, char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_NODIRATIME; @@ -363,7 +363,8 @@ static inline unsigned long adfs_discsize(struct adfs_discrecord *dr, int block_ return discsize; } -static int adfs_fill_super(struct super_block *sb, void *data, int silent) +static int adfs_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct adfs_discrecord *dr; struct buffer_head *bh; @@ -523,9 +524,9 @@ error: } static struct dentry *adfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, adfs_fill_super); } static struct file_system_type adfs_fs_type = { diff --git a/fs/affs/super.c b/fs/affs/super.c index d1ad11a8a4a5..69dd5da6d88b 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -26,7 +26,8 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); static int affs_show_options(struct seq_file *m, struct dentry *root); -static int affs_remount (struct super_block *sb, int *flags, char *data); +static int affs_remount (struct super_block *sb, int *flags, + char *data, size_t data_size); static void affs_commit_super(struct super_block *sb, int wait) @@ -335,7 +336,8 @@ static int affs_show_options(struct seq_file *m, struct dentry *root) * hopefully have the guts to do so. Until then: sorry for the mess. */ -static int affs_fill_super(struct super_block *sb, void *data, int silent) +static int affs_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct affs_sb_info *sbi; struct buffer_head *root_bh = NULL; @@ -550,7 +552,7 @@ got_root: } static int -affs_remount(struct super_block *sb, int *flags, char *data) +affs_remount(struct super_block *sb, int *flags, char *data, size_t data_size) { struct affs_sb_info *sbi = AFFS_SB(sb); int blocksize; @@ -633,9 +635,10 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf) } static struct dentry *affs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + affs_fill_super); } static void affs_kill_sb(struct super_block *sb) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5da3b09b7518..a2805e063358 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -36,15 +36,14 @@ struct pagevec; struct afs_call; -struct afs_mount_params { - bool rwpath; /* T if the parent should be considered R/W */ +struct afs_fs_context { bool force; /* T to force cell type */ bool autocell; /* T if set auto mount operation */ bool dyn_root; /* T if dynamic root */ + bool no_cell; /* T if the source is "none" (for dynroot) */ afs_voltype_t type; /* type of volume requested */ - int volnamesz; /* size of volume name */ + unsigned int volnamesz; /* size of volume name */ const char *volname; /* name of volume to mount */ - struct net *net_ns; /* Network namespace in effect */ struct afs_net *net; /* the AFS net namespace stuff */ struct afs_cell *cell; /* cell in which to find volume */ struct afs_volume *volume; /* volume record */ @@ -223,6 +222,7 @@ struct afs_super_info { struct afs_cell *cell; /* The cell in which the volume resides */ struct afs_volume *volume; /* volume record */ bool dyn_root; /* True if dynamic root */ + bool autocell; /* True if autocell */ }; static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) @@ -1265,7 +1265,7 @@ static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume) return volume; } -extern struct afs_volume *afs_create_volume(struct afs_mount_params *); +extern struct afs_volume *afs_create_volume(struct afs_fs_context *); extern void afs_activate_volume(struct afs_volume *); extern void afs_deactivate_volume(struct afs_volume *); extern void afs_put_volume(struct afs_cell *, struct afs_volume *); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 2e51c6994148..fd9a086d8804 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -47,6 +47,8 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); static unsigned long afs_mntpt_expiry_timeout = 10 * 60; +static const char afs_root_volume[] = "root.cell"; + /* * no valid lookup procedure on this sort of dir */ @@ -68,107 +70,108 @@ static int afs_mntpt_open(struct inode *inode, struct file *file) } /* - * create a vfsmount to be automounted + * Set the parameters for the proposed superblock. */ -static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) +static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) { - struct afs_super_info *as; - struct vfsmount *mnt; - struct afs_vnode *vnode; - struct page *page; - char *devname, *options; - bool rwpath = false; + struct afs_fs_context *ctx = fc->fs_private; + struct afs_vnode *vnode = AFS_FS_I(d_inode(mntpt)); + struct afs_cell *cell; + const char *p; int ret; - _enter("{%pd}", mntpt); - - BUG_ON(!d_inode(mntpt)); - - ret = -ENOMEM; - devname = (char *) get_zeroed_page(GFP_KERNEL); - if (!devname) - goto error_no_devname; - - options = (char *) get_zeroed_page(GFP_KERNEL); - if (!options) - goto error_no_options; - - vnode = AFS_FS_I(d_inode(mntpt)); if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { /* if the directory is a pseudo directory, use the d_name */ - static const char afs_root_cell[] = ":root.cell."; unsigned size = mntpt->d_name.len; - ret = -ENOENT; - if (size < 2 || size > AFS_MAXCELLNAME) - goto error_no_page; + if (size < 2) + return -ENOENT; + p = mntpt->d_name.name; if (mntpt->d_name.name[0] == '.') { - devname[0] = '%'; - memcpy(devname + 1, mntpt->d_name.name + 1, size - 1); - memcpy(devname + size, afs_root_cell, - sizeof(afs_root_cell)); - rwpath = true; - } else { - devname[0] = '#'; - memcpy(devname + 1, mntpt->d_name.name, size); - memcpy(devname + size + 1, afs_root_cell, - sizeof(afs_root_cell)); + size--; + p++; + ctx->type = AFSVL_RWVOL; + ctx->force = true; + } + if (size > AFS_MAXCELLNAME) + return -ENAMETOOLONG; + + cell = afs_lookup_cell(ctx->net, p, size, NULL, false); + if (IS_ERR(cell)) { + pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt); + return PTR_ERR(cell); } + afs_put_cell(ctx->net, ctx->cell); + ctx->cell = cell; + + ctx->volname = afs_root_volume; + ctx->volnamesz = sizeof(afs_root_volume) - 1; } else { /* read the contents of the AFS special symlink */ + struct page *page; loff_t size = i_size_read(d_inode(mntpt)); char *buf; - ret = -EINVAL; if (size > PAGE_SIZE - 1) - goto error_no_page; + return -EINVAL; page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto error_no_page; - } + if (IS_ERR(page)) + return PTR_ERR(page); if (PageError(page)) { ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); - goto error; + put_page(page); + return ret; } - buf = kmap_atomic(page); - memcpy(devname, buf, size); - kunmap_atomic(buf); + buf = kmap(page); + ret = vfs_parse_fs_string(fc, "source", buf, size); + kunmap(page); put_page(page); - page = NULL; + if (ret < 0) + return ret; } - /* work out what options we want */ - as = AFS_FS_S(mntpt->d_sb); - if (as->cell) { - memcpy(options, "cell=", 5); - strcpy(options + 5, as->cell->name); - if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath) - strcat(options, ",rwpath"); - } + return 0; +} - /* try and do the mount */ - _debug("--- attempting mount %s -o %s ---", devname, options); - mnt = vfs_submount(mntpt, &afs_fs_type, devname, options); - _debug("--- mount result %p ---", mnt); +/* + * create a vfsmount to be automounted + */ +static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) +{ + struct fs_context *fc; + struct vfsmount *mnt; + int ret; + + BUG_ON(!d_inode(mntpt)); + + fc = vfs_new_fs_context(&afs_fs_type, mntpt, 0, 0, + FS_CONTEXT_FOR_SUBMOUNT); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + ret = afs_mntpt_set_params(fc, mntpt); + if (ret < 0) + goto error_fc; + + ret = vfs_get_tree(fc); + if (ret < 0) + goto error_fc; + + mnt = vfs_create_mount(fc, 0); + if (IS_ERR(mnt)) { + ret = PTR_ERR(mnt); + goto error_fc; + } - free_page((unsigned long) devname); - free_page((unsigned long) options); - _leave(" = %p", mnt); + put_fs_context(fc); return mnt; -error: - put_page(page); -error_no_page: - free_page((unsigned long) options); -error_no_options: - free_page((unsigned long) devname); -error_no_devname: - _leave(" = %d", ret); +error_fc: + put_fs_context(fc); return ERR_PTR(ret); } diff --git a/fs/afs/super.c b/fs/afs/super.c index dcd07fe99871..ad87054e5c67 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -1,6 +1,6 @@ /* AFS superblock handling * - * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved. + * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved. * * This software may be freely redistributed under the terms of the * GNU General Public License. @@ -21,30 +21,33 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> #include <linux/statfs.h> #include <linux/sched.h> #include <linux/nsproxy.h> #include <linux/magic.h> +#include <linux/fsinfo.h> #include <net/net_namespace.h> #include "internal.h" static void afs_i_init_once(void *foo); -static struct dentry *afs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data); static void afs_kill_super(struct super_block *sb); static struct inode *afs_alloc_inode(struct super_block *sb); static void afs_destroy_inode(struct inode *inode); static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); +static int afs_fsinfo(struct path *path, struct fsinfo_kparams *params); static int afs_show_devname(struct seq_file *m, struct dentry *root); static int afs_show_options(struct seq_file *m, struct dentry *root); +static int afs_init_fs_context(struct fs_context *fc, struct dentry *reference); +static const struct fs_parameter_description afs_fs_parameters; struct file_system_type afs_fs_type = { - .owner = THIS_MODULE, - .name = "afs", - .mount = afs_mount, - .kill_sb = afs_kill_super, - .fs_flags = 0, + .owner = THIS_MODULE, + .name = "afs", + .init_fs_context = afs_init_fs_context, + .parameters = &afs_fs_parameters, + .kill_sb = afs_kill_super, + .fs_flags = 0, }; MODULE_ALIAS_FS("afs"); @@ -52,6 +55,7 @@ int afs_net_id; static const struct super_operations afs_super_ops = { .statfs = afs_statfs, + .fsinfo = afs_fsinfo, .alloc_inode = afs_alloc_inode, .drop_inode = afs_drop_inode, .destroy_inode = afs_destroy_inode, @@ -63,22 +67,31 @@ static const struct super_operations afs_super_ops = { static struct kmem_cache *afs_inode_cachep; static atomic_t afs_count_active_inodes; -enum { - afs_no_opt, - afs_opt_cell, - afs_opt_dyn, - afs_opt_rwpath, - afs_opt_vol, - afs_opt_autocell, +enum afs_param { + Opt_autocell, + Opt_dyn, + Opt_source, + nr__afs_params }; -static const match_table_t afs_options_list = { - { afs_opt_cell, "cell=%s" }, - { afs_opt_dyn, "dyn" }, - { afs_opt_rwpath, "rwpath" }, - { afs_opt_vol, "vol=%s" }, - { afs_opt_autocell, "autocell" }, - { afs_no_opt, NULL }, +static const struct fs_parameter_spec afs_param_specs[nr__afs_params] = { + [Opt_autocell] = { fs_param_is_flag }, + [Opt_dyn] = { fs_param_is_flag }, + [Opt_source] = { fs_param_is_string }, +}; + +static const char *const afs_param_keys[nr__afs_params] = { + [Opt_autocell] = "autocell", + [Opt_dyn] = "dyn", + [Opt_source] = "source", +}; + +static const struct fs_parameter_description afs_fs_parameters = { + .name = "kAFS", + .nr_params = nr__afs_params, + .source_param = Opt_source, + .keys = afs_param_keys, + .specs = afs_param_specs, }; /* @@ -184,90 +197,29 @@ static int afs_show_options(struct seq_file *m, struct dentry *root) if (as->dyn_root) seq_puts(m, ",dyn"); - if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags)) + if (as->autocell) seq_puts(m, ",autocell"); return 0; } /* - * parse the mount options - * - this function has been shamelessly adapted from the ext3 fs which - * shamelessly adapted it from the msdos fs - */ -static int afs_parse_options(struct afs_mount_params *params, - char *options, const char **devname) -{ - struct afs_cell *cell; - substring_t args[MAX_OPT_ARGS]; - char *p; - int token; - - _enter("%s", options); - - options[PAGE_SIZE - 1] = 0; - - while ((p = strsep(&options, ","))) { - if (!*p) - continue; - - token = match_token(p, afs_options_list, args); - switch (token) { - case afs_opt_cell: - rcu_read_lock(); - cell = afs_lookup_cell_rcu(params->net, - args[0].from, - args[0].to - args[0].from); - rcu_read_unlock(); - if (IS_ERR(cell)) - return PTR_ERR(cell); - afs_put_cell(params->net, params->cell); - params->cell = cell; - break; - - case afs_opt_rwpath: - params->rwpath = true; - break; - - case afs_opt_vol: - *devname = args[0].from; - break; - - case afs_opt_autocell: - params->autocell = true; - break; - - case afs_opt_dyn: - params->dyn_root = true; - break; - - default: - printk(KERN_ERR "kAFS:" - " Unknown or invalid mount option: '%s'\n", p); - return -EINVAL; - } - } - - _leave(" = 0"); - return 0; -} - -/* - * parse a device name to get cell name, volume name, volume type and R/W - * selector - * - this can be one of the following: + * Parse the source name to get cell name, volume name, volume type and R/W + * selector. + * + * This can be one of the following: * "%[cell:]volume[.]" R/W volume - * "#[cell:]volume[.]" R/O or R/W volume (rwpath=0), - * or R/W (rwpath=1) volume + * "#[cell:]volume[.]" R/O or R/W volume (R/O parent), + * or R/W (R/W parent) volume * "%[cell:]volume.readonly" R/O volume * "#[cell:]volume.readonly" R/O volume * "%[cell:]volume.backup" Backup volume * "#[cell:]volume.backup" Backup volume */ -static int afs_parse_device_name(struct afs_mount_params *params, - const char *name) +static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) { + struct afs_fs_context *ctx = fc->fs_private; struct afs_cell *cell; - const char *cellname, *suffix; + const char *cellname, *suffix, *name = param->string; int cellnamesz; _enter(",%s", name); @@ -278,69 +230,149 @@ static int afs_parse_device_name(struct afs_mount_params *params, } if ((name[0] != '%' && name[0] != '#') || !name[1]) { + /* To use dynroot, we don't want to have to provide a source */ + if (strcmp(name, "none") == 0) { + ctx->no_cell = true; + return 0; + } printk(KERN_ERR "kAFS: unparsable volume name\n"); return -EINVAL; } /* determine the type of volume we're looking for */ - params->type = AFSVL_ROVOL; - params->force = false; - if (params->rwpath || name[0] == '%') { - params->type = AFSVL_RWVOL; - params->force = true; + if (name[0] == '%') { + ctx->type = AFSVL_RWVOL; + ctx->force = true; } name++; /* split the cell name out if there is one */ - params->volname = strchr(name, ':'); - if (params->volname) { + ctx->volname = strchr(name, ':'); + if (ctx->volname) { cellname = name; - cellnamesz = params->volname - name; - params->volname++; + cellnamesz = ctx->volname - name; + ctx->volname++; } else { - params->volname = name; + ctx->volname = name; cellname = NULL; cellnamesz = 0; } /* the volume type is further affected by a possible suffix */ - suffix = strrchr(params->volname, '.'); + suffix = strrchr(ctx->volname, '.'); if (suffix) { if (strcmp(suffix, ".readonly") == 0) { - params->type = AFSVL_ROVOL; - params->force = true; + ctx->type = AFSVL_ROVOL; + ctx->force = true; } else if (strcmp(suffix, ".backup") == 0) { - params->type = AFSVL_BACKVOL; - params->force = true; + ctx->type = AFSVL_BACKVOL; + ctx->force = true; } else if (suffix[1] == 0) { } else { suffix = NULL; } } - params->volnamesz = suffix ? - suffix - params->volname : strlen(params->volname); + ctx->volnamesz = suffix ? + suffix - ctx->volname : strlen(ctx->volname); _debug("cell %*.*s [%p]", - cellnamesz, cellnamesz, cellname ?: "", params->cell); + cellnamesz, cellnamesz, cellname ?: "", ctx->cell); /* lookup the cell record */ - if (cellname || !params->cell) { - cell = afs_lookup_cell(params->net, cellname, cellnamesz, + if (cellname) { + cell = afs_lookup_cell(ctx->net, cellname, cellnamesz, NULL, false); if (IS_ERR(cell)) { - printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", + pr_err("kAFS: unable to lookup cell '%*.*s'\n", cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } - afs_put_cell(params->net, params->cell); - params->cell = cell; + afs_put_cell(ctx->net, ctx->cell); + ctx->cell = cell; } _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", - params->cell->name, params->cell, - params->volnamesz, params->volnamesz, params->volname, - suffix ?: "-", params->type, params->force ? " FORCE" : ""); + ctx->cell->name, ctx->cell, + ctx->volnamesz, ctx->volnamesz, ctx->volname, + suffix ?: "-", ctx->type, ctx->force ? " FORCE" : ""); + + fc->source = param->string; + param->string = NULL; + return 0; +} + +/* + * Parse a single mount parameter. + */ +static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct fs_parse_result result; + struct afs_fs_context *ctx = fc->fs_private; + int opt; + + opt = fs_parse(fc, &afs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_source: + return afs_parse_source(fc, param); + + case Opt_autocell: + ctx->autocell = true; + break; + + case Opt_dyn: + ctx->dyn_root = true; + break; + + default: + return -EINVAL; + } + + _leave(" = 0"); + return 0; +} + +/* + * Validate the options, get the cell key and look up the volume. + */ +static int afs_validate_fc(struct fs_context *fc) +{ + struct afs_fs_context *ctx = fc->fs_private; + struct afs_volume *volume; + struct key *key; + + if (!ctx->dyn_root) { + if (ctx->no_cell) { + pr_warn("kAFS: Can only specify source 'none' with -o dyn\n"); + return -EINVAL; + } + + if (!ctx->cell) { + pr_warn("kAFS: No cell specified\n"); + return -EDESTADDRREQ; + } + + /* We try to do the mount securely. */ + key = afs_request_key(ctx->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + + ctx->key = key; + + if (ctx->volume) { + afs_put_volume(ctx->cell, ctx->volume); + ctx->volume = NULL; + } + + volume = afs_create_volume(ctx); + if (IS_ERR(volume)) + return PTR_ERR(volume); + + ctx->volume = volume; + } return 0; } @@ -348,39 +380,34 @@ static int afs_parse_device_name(struct afs_mount_params *params, /* * check a superblock to see if it's the one we're looking for */ -static int afs_test_super(struct super_block *sb, void *data) +static int afs_test_super(struct super_block *sb, struct fs_context *fc) { - struct afs_super_info *as1 = data; + struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as = AFS_FS_S(sb); - return (as->net_ns == as1->net_ns && + return (as->net_ns == fc->net_ns && as->volume && - as->volume->vid == as1->volume->vid && + as->volume->vid == ctx->volume->vid && !as->dyn_root); } -static int afs_dynroot_test_super(struct super_block *sb, void *data) +static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc) { - struct afs_super_info *as1 = data; struct afs_super_info *as = AFS_FS_S(sb); - return (as->net_ns == as1->net_ns && + return (as->net_ns == fc->net_ns && as->dyn_root); } -static int afs_set_super(struct super_block *sb, void *data) +static int afs_set_super(struct super_block *sb, struct fs_context *fc) { - struct afs_super_info *as = data; - - sb->s_fs_info = as; return set_anon_super(sb, NULL); } /* * fill in the superblock */ -static int afs_fill_super(struct super_block *sb, - struct afs_mount_params *params) +static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) { struct afs_super_info *as = AFS_FS_S(sb); struct afs_fid fid; @@ -412,13 +439,13 @@ static int afs_fill_super(struct super_block *sb, fid.vnode = 1; fid.vnode_hi = 0; fid.unique = 1; - inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); + inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL); } if (IS_ERR(inode)) return PTR_ERR(inode); - if (params->autocell || params->dyn_root) + if (as->autocell || as->dyn_root) set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); ret = -ENOMEM; @@ -443,17 +470,22 @@ error: return ret; } -static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) +static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) { + struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as; as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (as) { - as->net_ns = get_net(params->net_ns); - if (params->dyn_root) + as->net_ns = get_net(fc->net_ns); + if (ctx->dyn_root) { as->dyn_root = true; - else - as->cell = afs_get_cell(params->cell); + } else { + as->cell = afs_get_cell(ctx->cell); + as->volume = __afs_get_volume(ctx->volume); + } + if (ctx->autocell) + as->autocell = true; } return as; } @@ -475,7 +507,7 @@ static void afs_kill_super(struct super_block *sb) if (as->dyn_root) afs_dynroot_depopulate(sb); - + /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ @@ -488,111 +520,128 @@ static void afs_kill_super(struct super_block *sb) } /* - * get an AFS superblock + * Get an AFS superblock and root directory. */ -static struct dentry *afs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *options) +static int afs_get_tree(struct fs_context *fc) { - struct afs_mount_params params; + struct afs_fs_context *ctx = fc->fs_private; struct super_block *sb; - struct afs_volume *candidate; - struct key *key; struct afs_super_info *as; int ret; - _enter(",,%s,%p", dev_name, options); - - memset(¶ms, 0, sizeof(params)); - - ret = -EINVAL; - if (current->nsproxy->net_ns != &init_net) - goto error; - params.net_ns = current->nsproxy->net_ns; - params.net = afs_net(params.net_ns); - - /* parse the options and device name */ - if (options) { - ret = afs_parse_options(¶ms, options, &dev_name); - if (ret < 0) - goto error; - } - - if (!params.dyn_root) { - ret = afs_parse_device_name(¶ms, dev_name); - if (ret < 0) - goto error; - - /* try and do the mount securely */ - key = afs_request_key(params.cell); - if (IS_ERR(key)) { - _leave(" = %ld [key]", PTR_ERR(key)); - ret = PTR_ERR(key); - goto error; - } - params.key = key; - } + _enter(""); /* allocate a superblock info record */ ret = -ENOMEM; - as = afs_alloc_sbi(¶ms); + as = afs_alloc_sbi(fc); if (!as) - goto error_key; - - if (!params.dyn_root) { - /* Assume we're going to need a volume record; at the very - * least we can use it to update the volume record if we have - * one already. This checks that the volume exists within the - * cell. - */ - candidate = afs_create_volume(¶ms); - if (IS_ERR(candidate)) { - ret = PTR_ERR(candidate); - goto error_as; - } - - as->volume = candidate; - } + goto error; + fc->s_fs_info = as; /* allocate a deviceless superblock */ - sb = sget(fs_type, - as->dyn_root ? afs_dynroot_test_super : afs_test_super, - afs_set_super, flags, as); + sb = sget_fc(fc, + as->dyn_root ? afs_dynroot_test_super : afs_test_super, + afs_set_super); if (IS_ERR(sb)) { ret = PTR_ERR(sb); - goto error_as; + goto error; } if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); - ret = afs_fill_super(sb, ¶ms); + ret = afs_fill_super(sb, ctx); if (ret < 0) goto error_sb; - as = NULL; sb->s_flags |= SB_ACTIVE; } else { _debug("reuse"); ASSERTCMP(sb->s_flags, &, SB_ACTIVE); - afs_destroy_sbi(as); - as = NULL; } - afs_put_cell(params.net, params.cell); - key_put(params.key); + fc->root = dget(sb->s_root); _leave(" = 0 [%p]", sb); - return dget(sb->s_root); + return 0; error_sb: deactivate_locked_super(sb); - goto error_key; -error_as: - afs_destroy_sbi(as); -error_key: - key_put(params.key); error: - afs_put_cell(params.net, params.cell); _leave(" = %d", ret); - return ERR_PTR(ret); + return ret; +} + +static void afs_free_fc(struct fs_context *fc) +{ + struct afs_fs_context *ctx = fc->fs_private; + + afs_destroy_sbi(fc->s_fs_info); + afs_put_volume(ctx->cell, ctx->volume); + afs_put_cell(ctx->net, ctx->cell); + key_put(ctx->key); + kfree(ctx); +} + +static const struct fs_context_operations afs_context_ops = { + .free = afs_free_fc, + .parse_param = afs_parse_param, + .validate = afs_validate_fc, + .get_tree = afs_get_tree, +}; + +/* + * Set up the filesystem mount context. + */ +static int afs_init_fs_context(struct fs_context *fc, struct dentry *reference) +{ + struct afs_fs_context *ctx; + struct afs_super_info *src_as; + struct afs_cell *cell; + + ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->type = AFSVL_ROVOL; + + switch (fc->purpose) { + case FS_CONTEXT_FOR_USER_MOUNT: + case FS_CONTEXT_FOR_KERNEL_MOUNT: + ctx->net = afs_net(fc->net_ns); + + /* Default to the workstation cell. */ + rcu_read_lock(); + cell = afs_lookup_cell_rcu(ctx->net, NULL, 0); + rcu_read_unlock(); + if (IS_ERR(cell)) + cell = NULL; + ctx->cell = cell; + break; + + case FS_CONTEXT_FOR_SUBMOUNT: + if (!reference) { + kfree(ctx); + return -EINVAL; + } + + src_as = AFS_FS_S(reference->d_sb); + ASSERT(src_as); + + ctx->net = afs_net(fc->net_ns); + if (src_as->cell) + ctx->cell = afs_get_cell(src_as->cell); + if (src_as->volume && src_as->volume->type == AFSVL_RWVOL) { + ctx->type = AFSVL_RWVOL; + ctx->force = true; + } + break; + + default: + break; + } + + fc->fs_private = ctx; + fc->ops = &afs_context_ops; + return 0; } /* @@ -726,3 +775,162 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) return ret; } + +/* + * Get filesystem information. + */ +static int afs_fsinfo(struct path *path, struct fsinfo_kparams *params) +{ + struct fsinfo_timestamp_info *tsinfo; + struct fsinfo_server_address *addr; + struct fsinfo_capabilities *caps; + struct fsinfo_supports *sup; + struct dentry *dentry = path->dentry; + struct afs_server_list *slist; + struct afs_super_info *as = AFS_FS_S(dentry->d_sb); + struct afs_addr_list *alist; + struct afs_server *server; + struct afs_volume *volume = as->volume; + struct afs_cell *cell = as->cell; + struct afs_net *net = afs_d2net(dentry); + const char *str = NULL; + bool dyn_root = as->dyn_root; + int ret; + + switch (params->request) { + case FSINFO_ATTR_TIMESTAMP_INFO: + tsinfo = params->buffer; + tsinfo->minimum_timestamp = 0; + tsinfo->maximum_timestamp = UINT_MAX; + tsinfo->mtime_gran_mantissa = 1; + tsinfo->mtime_gran_exponent = 0; + return sizeof(*tsinfo); + + case FSINFO_ATTR_SUPPORTS: + sup = params->buffer; + sup->stx_mask = (STATX_TYPE | STATX_MODE | + STATX_NLINK | + STATX_UID | STATX_GID | + STATX_MTIME | STATX_INO | + STATX_SIZE); + sup->stx_attributes = STATX_ATTR_AUTOMOUNT; + return sizeof(*sup); + + case FSINFO_ATTR_CAPABILITIES: + caps = params->buffer; + if (dyn_root) { + fsinfo_set_cap(caps, FSINFO_CAP_IS_AUTOMOUNTER_FS); + fsinfo_set_cap(caps, FSINFO_CAP_AUTOMOUNTS); + } else { + fsinfo_set_cap(caps, FSINFO_CAP_IS_NETWORK_FS); + fsinfo_set_cap(caps, FSINFO_CAP_AUTOMOUNTS); + fsinfo_set_cap(caps, FSINFO_CAP_ADV_LOCKS); + fsinfo_set_cap(caps, FSINFO_CAP_UIDS); + fsinfo_set_cap(caps, FSINFO_CAP_GIDS); + fsinfo_set_cap(caps, FSINFO_CAP_VOLUME_ID); + fsinfo_set_cap(caps, FSINFO_CAP_VOLUME_NAME); + fsinfo_set_cap(caps, FSINFO_CAP_CELL_NAME); + fsinfo_set_cap(caps, FSINFO_CAP_IVER_MONO_INCR); + fsinfo_set_cap(caps, FSINFO_CAP_SYMLINKS); + fsinfo_set_cap(caps, FSINFO_CAP_HARD_LINKS_1DIR); + fsinfo_set_cap(caps, FSINFO_CAP_HAS_MTIME); + } + return sizeof(*caps); + + case FSINFO_ATTR_VOLUME_NAME: + if (dyn_root) + return -EOPNOTSUPP; + memcpy(params->buffer, volume->name, volume->name_len); + return volume->name_len; + + case FSINFO_ATTR_CELL_NAME: + if (dyn_root) + return -EOPNOTSUPP; + memcpy(params->buffer, cell->name, cell->name_len); + return cell->name_len; + + case FSINFO_ATTR_SERVER_NAME: + if (dyn_root) + return -EOPNOTSUPP; + read_lock(&volume->servers_lock); + slist = afs_get_serverlist(volume->servers); + read_unlock(&volume->servers_lock); + + if (params->Nth < slist->nr_servers) { + server = slist->servers[params->Nth].server; + ret = sprintf(params->buffer, "%pU", &server->uuid); + } else { + ret = -ENODATA; + } + + afs_put_serverlist(net, slist); + return ret; + + case FSINFO_ATTR_SERVER_ADDRESS: + addr = params->buffer; + if (dyn_root) + return -EOPNOTSUPP; + read_lock(&volume->servers_lock); + slist = afs_get_serverlist(volume->servers); + read_unlock(&volume->servers_lock); + + ret = -ENODATA; + if (params->Nth >= slist->nr_servers) + goto put_slist; + server = slist->servers[params->Nth].server; + + read_lock(&server->fs_lock); + alist = afs_get_addrlist(rcu_access_pointer(server->addresses)); + read_unlock(&server->fs_lock); + if (!alist) + goto put_slist; + + if (params->Mth >= alist->nr_addrs) + goto put_alist; + + memcpy(addr, &alist->addrs[params->Mth], + sizeof(struct sockaddr_rxrpc)); + ret = sizeof(*addr); + + put_alist: + afs_put_addrlist(alist); + put_slist: + afs_put_serverlist(net, slist); + return ret; + + case FSINFO_ATTR_PARAMETER: + if (params->Mth) + return -ENODATA; + switch (params->Nth) { + case Opt_source: + if (dyn_root) + return 0; + return sprintf(params->buffer, "source=%c%s:%s%s", + volume->type == AFSVL_RWVOL ? '%' : '#', + cell->name, + volume->name, + volume->type == AFSVL_RWVOL ? "" : + volume->type == AFSVL_ROVOL ? ".readonly" : + ".backup"); + case Opt_autocell: + if (as->autocell) + str = "autocell"; + goto string; + case Opt_dyn: + if (dyn_root) + str = "dyn"; + goto string; + default: + return -ENODATA; + } + + default: + return generic_fsinfo(path, params); + } + +string: + if (!str) + return 0; + strcpy(params->buffer, str); + return strlen(params->buffer); +} diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 00975ed3640f..f6eba2def0a1 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -21,7 +21,7 @@ static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" }; /* * Allocate a volume record and load it up from a vldb record. */ -static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, +static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, struct afs_vldb_entry *vldb, unsigned long type_mask) { @@ -113,7 +113,7 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, * - Rule 3: If parent volume is R/W, then only mount R/W volume unless * explicitly told otherwise */ -struct afs_volume *afs_create_volume(struct afs_mount_params *params) +struct afs_volume *afs_create_volume(struct afs_fs_context *params) { struct afs_vldb_entry *vldb; struct afs_volume *volume; @@ -233,7 +233,8 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) } static struct dentry *aio_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL, AIO_RING_MAGIC); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 91262c34b797..bbeca44f8ddf 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -39,7 +39,8 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { }; static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { return mount_pseudo(fs_type, "anon_inode:", NULL, &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 9f9cadbfbd7a..821494ab5554 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -198,7 +198,7 @@ static inline void managed_dentry_clear_managed(struct dentry *dentry) /* Initializing function */ -int autofs_fill_super(struct super_block *, void *, int); +int autofs_fill_super(struct super_block *, void *, size_t, int); struct autofs_info *autofs_new_ino(struct autofs_sb_info *); void autofs_clean_ino(struct autofs_info *); diff --git a/fs/autofs/init.c b/fs/autofs/init.c index 79ae07d9592f..d0b80ff8afcb 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -11,9 +11,9 @@ #include "autofs_i.h" static struct dentry *autofs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_nodev(fs_type, flags, data, autofs_fill_super); + return mount_nodev(fs_type, flags, data, data_size, autofs_fill_super); } static struct file_system_type autofs_fs_type = { diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 846c052569dd..93e014d7f813 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -201,7 +201,8 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, return (*pipefd < 0); } -int autofs_fill_super(struct super_block *s, void *data, int silent) +int autofs_fill_super(struct super_block *s, void *data, size_t data_size, + int silent) { struct inode *root_inode; struct dentry *root; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 4700b4534439..31f760ea2494 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -52,7 +52,7 @@ static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static void befs_put_super(struct super_block *); -static int befs_remount(struct super_block *, int *, char *); +static int befs_remount(struct super_block *, int *, char *, size_t); static int befs_statfs(struct dentry *, struct kstatfs *); static int befs_show_options(struct seq_file *, struct dentry *); static int parse_options(char *, struct befs_mount_options *); @@ -810,7 +810,7 @@ befs_put_super(struct super_block *sb) * Load a set of NLS translations if needed. */ static int -befs_fill_super(struct super_block *sb, void *data, int silent) +befs_fill_super(struct super_block *sb, void *data, size_t data_size, int silent) { struct buffer_head *bh; struct befs_sb_info *befs_sb; @@ -942,7 +942,7 @@ unacquire_none: } static int -befs_remount(struct super_block *sb, int *flags, char *data) +befs_remount(struct super_block *sb, int *flags, char *data, size_t data_size) { sync_filesystem(sb); if (!(*flags & SB_RDONLY)) @@ -976,9 +976,10 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf) static struct dentry * befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + befs_fill_super); } static struct file_system_type befs_fs_type = { diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index d81c148682e7..f7ef2913bd9d 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -317,7 +317,8 @@ void bfs_dump_imap(const char *prefix, struct super_block *s) #endif } -static int bfs_fill_super(struct super_block *s, void *data, int silent) +static int bfs_fill_super(struct super_block *s, void *data, size_t data_size, + int silent) { struct buffer_head *bh, *sbh; struct bfs_super_block *bfs_sb; @@ -463,9 +464,10 @@ out: } static struct dentry *bfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + bfs_fill_super); } static struct file_system_type bfs_fs_type = { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index aa4a7a23ff99..757128ace6f9 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -820,7 +820,8 @@ static const struct super_operations s_ops = { .evict_inode = bm_evict_inode, }; -static int bm_fill_super(struct super_block *sb, void *data, int silent) +static int bm_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { int err; static const struct tree_descr bm_files[] = { @@ -836,9 +837,9 @@ static int bm_fill_super(struct super_block *sb, void *data, int silent) } static struct dentry *bm_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_single(fs_type, flags, data, bm_fill_super); + return mount_single(fs_type, flags, data, data_size, bm_fill_super); } static struct linux_binfmt misc_format = { diff --git a/fs/block_dev.c b/fs/block_dev.c index a80b4f0ee7c4..cdda48fefc55 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -789,7 +789,7 @@ static const struct super_operations bdev_sops = { }; static struct dentry *bd_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { struct dentry *dent; dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d3c6bbc0aa3a..f517e1351b7b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,7 +61,8 @@ static const struct super_operations btrfs_super_ops; static struct file_system_type btrfs_fs_type; static struct file_system_type btrfs_root_fs_type; -static int btrfs_remount(struct super_block *sb, int *flags, char *data); +static int btrfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size); const char *btrfs_decode_error(int errno) { @@ -1458,7 +1459,7 @@ out: return root; } -static int parse_security_options(char *orig_opts, +static int parse_security_options(char *orig_opts, size_t data_size, struct security_mnt_opts *sec_opts) { char *secdata = NULL; @@ -1467,7 +1468,7 @@ static int parse_security_options(char *orig_opts, secdata = alloc_secdata(); if (!secdata) return -ENOMEM; - ret = security_sb_copy_data(orig_opts, secdata); + ret = security_sb_copy_data(orig_opts, data_size, secdata); if (ret) { free_secdata(secdata); return ret; @@ -1515,7 +1516,8 @@ static int setup_security_options(struct btrfs_fs_info *fs_info, * for multiple device setup. Make sure to keep it in sync. */ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, - int flags, const char *device_name, void *data) + int flags, const char *device_name, + void *data, size_t data_size) { struct block_device *bdev = NULL; struct super_block *s; @@ -1531,7 +1533,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, security_init_mnt_opts(&new_sec_opts); if (data) { - error = parse_security_options(data, &new_sec_opts); + error = parse_security_options(data, data_size, &new_sec_opts); if (error) return ERR_PTR(error); } @@ -1647,7 +1649,7 @@ error_sec_opts: * "btrfs subvolume set-default", mount_subvol() is called always. */ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, - const char *device_name, void *data) + const char *device_name, void *data, size_t data_size) { struct vfsmount *mnt_root; struct dentry *root; @@ -1667,21 +1669,24 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } /* mount device's root (/) */ - mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data); + mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, + data, data_size); if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) { if (flags & SB_RDONLY) { mnt_root = vfs_kern_mount(&btrfs_root_fs_type, - flags & ~SB_RDONLY, device_name, data); + flags & ~SB_RDONLY, device_name, + data, data_size); } else { mnt_root = vfs_kern_mount(&btrfs_root_fs_type, - flags | SB_RDONLY, device_name, data); + flags | SB_RDONLY, device_name, + data, data_size); if (IS_ERR(mnt_root)) { root = ERR_CAST(mnt_root); goto out; } down_write(&mnt_root->mnt_sb->s_umount); - error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL); + error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL, 0); up_write(&mnt_root->mnt_sb->s_umount); if (error < 0) { root = ERR_PTR(error); @@ -1763,7 +1768,8 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); } -static int btrfs_remount(struct super_block *sb, int *flags, char *data) +static int btrfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; @@ -1782,7 +1788,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) struct security_mnt_opts new_sec_opts; security_init_mnt_opts(&new_sec_opts); - ret = parse_security_options(data, &new_sec_opts); + ret = parse_security_options(data, data_size, &new_sec_opts); if (ret) goto restore; ret = setup_security_options(fs_info, sb, diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 8a59597f1883..835ccb186d67 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -24,7 +24,7 @@ static const struct super_operations btrfs_test_super_ops = { static struct dentry *btrfs_test_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { return mount_pseudo(fs_type, "btrfs_test:", &btrfs_test_super_ops, NULL, BTRFS_TEST_MAGIC); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b5ecd6f50360..62371335a481 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1038,7 +1038,8 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) } static struct dentry *ceph_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { struct super_block *sb; struct ceph_fs_client *fsc; diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index b97c74efd04a..cf0637445094 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -260,7 +260,8 @@ static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt, if (IS_ERR(mountdata)) return (struct vfsmount *)mountdata; - mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata); + mnt = vfs_submount(mntpt, &cifs_fs_type, devname, + mountdata, strlen(mountdata) + 1); kfree(mountdata); kfree(devname); return mnt; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 865706edb307..79e0648e90ec 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -605,7 +605,8 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) } #endif -static int cifs_remount(struct super_block *sb, int *flags, char *data) +static int cifs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_NODIRATIME; @@ -708,7 +709,8 @@ static int cifs_set_super(struct super_block *sb, void *data) static struct dentry * cifs_smb3_do_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, bool is_smb3) + int flags, const char *dev_name, void *data, size_t data_size, + bool is_smb3) { int rc; struct super_block *sb; @@ -736,7 +738,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, goto out_nls; } - cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); + cifs_sb->mountdata = kstrndup(data, data_size, GFP_KERNEL); if (cifs_sb->mountdata == NULL) { root = ERR_PTR(-ENOMEM); goto out_free; @@ -808,16 +810,18 @@ out_nls: static struct dentry * smb3_do_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return cifs_smb3_do_mount(fs_type, flags, dev_name, data, true); + return cifs_smb3_do_mount(fs_type, flags, dev_name, data, data_size, + true); } static struct dentry * cifs_do_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return cifs_smb3_do_mount(fs_type, flags, dev_name, data, false); + return cifs_smb3_do_mount(fs_type, flags, dev_name, data, data_size, + false); } static ssize_t diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 97424cf206c0..dd819c150f70 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -93,7 +93,8 @@ void coda_destroy_inodecache(void) kmem_cache_destroy(coda_inode_cachep); } -static int coda_remount(struct super_block *sb, int *flags, char *data) +static int coda_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_NOATIME; @@ -150,7 +151,8 @@ Ebadf: return -1; } -static int coda_fill_super(struct super_block *sb, void *data, int silent) +static int coda_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct inode *root = NULL; struct venus_comm *vc; @@ -316,9 +318,10 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf) /* init_coda: used by filesystems.c to register coda */ static struct dentry *coda_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { - return mount_nodev(fs_type, flags, data, coda_fill_super); + return mount_nodev(fs_type, flags, data, data_size, coda_fill_super); } struct file_system_type coda_fs_type = { diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6e30949d9f77..3eb4dd19c49b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -429,13 +429,6 @@ static int mt_ioctl_trans(struct file *file, #endif /* CONFIG_BLOCK */ -/* Bluetooth ioctls */ -#define HCIUARTSETPROTO _IOW('U', 200, int) -#define HCIUARTGETPROTO _IOR('U', 201, int) -#define HCIUARTGETDEVICE _IOR('U', 202, int) -#define HCIUARTSETFLAGS _IOW('U', 203, int) -#define HCIUARTGETFLAGS _IOR('U', 204, int) - #define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t) #define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t) #define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t) @@ -819,32 +812,6 @@ COMPATIBLE_IOCTL(RNDADDENTROPY) COMPATIBLE_IOCTL(RNDZAPENTCNT) COMPATIBLE_IOCTL(RNDCLEARPOOL) /* Bluetooth */ -COMPATIBLE_IOCTL(HCIDEVUP) -COMPATIBLE_IOCTL(HCIDEVDOWN) -COMPATIBLE_IOCTL(HCIDEVRESET) -COMPATIBLE_IOCTL(HCIDEVRESTAT) -COMPATIBLE_IOCTL(HCIGETDEVLIST) -COMPATIBLE_IOCTL(HCIGETDEVINFO) -COMPATIBLE_IOCTL(HCIGETCONNLIST) -COMPATIBLE_IOCTL(HCIGETCONNINFO) -COMPATIBLE_IOCTL(HCIGETAUTHINFO) -COMPATIBLE_IOCTL(HCISETRAW) -COMPATIBLE_IOCTL(HCISETSCAN) -COMPATIBLE_IOCTL(HCISETAUTH) -COMPATIBLE_IOCTL(HCISETENCRYPT) -COMPATIBLE_IOCTL(HCISETPTYPE) -COMPATIBLE_IOCTL(HCISETLINKPOL) -COMPATIBLE_IOCTL(HCISETLINKMODE) -COMPATIBLE_IOCTL(HCISETACLMTU) -COMPATIBLE_IOCTL(HCISETSCOMTU) -COMPATIBLE_IOCTL(HCIBLOCKADDR) -COMPATIBLE_IOCTL(HCIUNBLOCKADDR) -COMPATIBLE_IOCTL(HCIINQUIRY) -COMPATIBLE_IOCTL(HCIUARTSETPROTO) -COMPATIBLE_IOCTL(HCIUARTGETPROTO) -COMPATIBLE_IOCTL(HCIUARTGETDEVICE) -COMPATIBLE_IOCTL(HCIUARTSETFLAGS) -COMPATIBLE_IOCTL(HCIUARTGETFLAGS) COMPATIBLE_IOCTL(RFCOMMCREATEDEV) COMPATIBLE_IOCTL(RFCOMMRELEASEDEV) COMPATIBLE_IOCTL(RFCOMMGETDEVLIST) diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index cfd91320e869..c9c7c14eb9db 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -66,7 +66,8 @@ static struct configfs_dirent configfs_root = { .s_iattr = NULL, }; -static int configfs_fill_super(struct super_block *sb, void *data, int silent) +static int configfs_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct inode *inode; struct dentry *root; @@ -103,9 +104,9 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) } static struct dentry *configfs_do_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_single(fs_type, flags, data, configfs_fill_super); + return mount_single(fs_type, flags, data, data_size, configfs_fill_super); } static struct file_system_type configfs_fs_type = { diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 9352487bd0fc..f5806bdcd5da 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -506,7 +506,8 @@ static void cramfs_kill_sb(struct super_block *sb) kfree(sbi); } -static int cramfs_remount(struct super_block *sb, int *flags, char *data) +static int cramfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_RDONLY; @@ -607,7 +608,8 @@ static int cramfs_finalize_super(struct super_block *sb, return 0; } -static int cramfs_blkdev_fill_super(struct super_block *sb, void *data, +static int cramfs_blkdev_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct cramfs_sb_info *sbi; @@ -629,8 +631,8 @@ static int cramfs_blkdev_fill_super(struct super_block *sb, void *data, return cramfs_finalize_super(sb, &super.root); } -static int cramfs_mtd_fill_super(struct super_block *sb, void *data, - int silent) +static int cramfs_mtd_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct cramfs_sb_info *sbi; struct cramfs_super super; @@ -952,18 +954,19 @@ static const struct super_operations cramfs_ops = { }; static struct dentry *cramfs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, + void *data, size_t data_size) { struct dentry *ret = ERR_PTR(-ENOPROTOOPT); if (IS_ENABLED(CONFIG_CRAMFS_MTD)) { - ret = mount_mtd(fs_type, flags, dev_name, data, + ret = mount_mtd(fs_type, flags, dev_name, data, data_size, cramfs_mtd_fill_super); if (!IS_ERR(ret)) return ret; } if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV)) { - ret = mount_bdev(fs_type, flags, dev_name, data, + ret = mount_bdev(fs_type, flags, dev_name, data, data_size, cramfs_blkdev_fill_super); } return ret; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 13b01351dd1c..57ba6d891c85 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -130,7 +130,8 @@ static int debugfs_apply_options(struct super_block *sb) return 0; } -static int debugfs_remount(struct super_block *sb, int *flags, char *data) +static int debugfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { int err; struct debugfs_fs_info *fsi = sb->s_fs_info; @@ -190,7 +191,7 @@ static struct vfsmount *debugfs_automount(struct path *path) { debugfs_automount_t f; f = (debugfs_automount_t)path->dentry->d_fsdata; - return f(path->dentry, d_inode(path->dentry)->i_private); + return f(path->dentry, d_inode(path->dentry)->i_private, 0); } static const struct dentry_operations debugfs_dops = { @@ -199,7 +200,8 @@ static const struct dentry_operations debugfs_dops = { .d_automount = debugfs_automount, }; -static int debug_fill_super(struct super_block *sb, void *data, int silent) +static int debug_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { static const struct tree_descr debug_files[] = {{""}}; struct debugfs_fs_info *fsi; @@ -235,9 +237,9 @@ fail: static struct dentry *debug_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_single(fs_type, flags, data, debug_fill_super); + return mount_single(fs_type, flags, data, data_size, debug_fill_super); } static struct file_system_type debug_fs_type = { @@ -539,7 +541,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir); struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, debugfs_automount_t f, - void *data) + void *data, size_t data_size) { struct dentry *dentry = start_creating(name, parent); struct inode *inode; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c53814539070..42165efb4d28 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -384,7 +384,8 @@ static void update_ptmx_mode(struct pts_fs_info *fsi) } } -static int devpts_remount(struct super_block *sb, int *flags, char *data) +static int devpts_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { int err; struct pts_fs_info *fsi = DEVPTS_SB(sb); @@ -445,7 +446,8 @@ static void *new_pts_fs_info(struct super_block *sb) } static int -devpts_fill_super(struct super_block *s, void *data, int silent) +devpts_fill_super(struct super_block *s, void *data, size_t data_size, + int silent) { struct inode *inode; int error; @@ -502,9 +504,9 @@ fail: * instance are independent of the PTYs in other devpts instances. */ static struct dentry *devpts_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_nodev(fs_type, flags, data, devpts_fill_super); + return mount_nodev(fs_type, flags, data, data_size, devpts_fill_super); } static void devpts_kill_sb(struct super_block *sb) diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 025d66a705db..5d029b7e069a 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -488,7 +488,7 @@ static struct file_system_type ecryptfs_fs_type; * @raw_data: The options passed into the kernel */ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) + const char *dev_name, void *raw_data, size_t data_size) { struct super_block *s; struct ecryptfs_sb_info *sbi; diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 5b68e4294faa..db0e417f1c7e 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -191,7 +191,8 @@ static int efivarfs_destroy(struct efivar_entry *entry, void *data) return 0; } -static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) +static int efivarfs_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct inode *inode = NULL; struct dentry *root; @@ -227,9 +228,11 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) } static struct dentry *efivarfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { - return mount_single(fs_type, flags, data, efivarfs_fill_super); + return mount_single(fs_type, flags, data, data_size, + efivarfs_fill_super); } static void efivarfs_kill_sb(struct super_block *sb) diff --git a/fs/efs/super.c b/fs/efs/super.c index 6ffb7ba1547a..ce85f22651f3 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -19,12 +19,14 @@ #include <linux/efs_fs_sb.h> static int efs_statfs(struct dentry *dentry, struct kstatfs *buf); -static int efs_fill_super(struct super_block *s, void *d, int silent); +static int efs_fill_super(struct super_block *s, void *d, size_t data_size, + int silent); static struct dentry *efs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + efs_fill_super); } static void efs_kill_sb(struct super_block *s) @@ -113,7 +115,8 @@ static void destroy_inodecache(void) kmem_cache_destroy(efs_inode_cachep); } -static int efs_remount(struct super_block *sb, int *flags, char *data) +static int efs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_RDONLY; @@ -253,7 +256,8 @@ static int efs_validate_super(struct efs_sb_info *sb, struct efs_super *super) { return 0; } -static int efs_fill_super(struct super_block *s, void *d, int silent) +static int efs_fill_super(struct super_block *s, void *d, size_t data_size, + int silent) { struct efs_sb_info *sb; struct buffer_head *bh; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 906839a4da8f..7d9c40e5838e 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -705,7 +705,8 @@ out: /* * Read the superblock from the OSD and fill in the fields */ -static int exofs_fill_super(struct super_block *sb, void *data, int silent) +static int exofs_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct inode *root; struct exofs_mountopt *opts = data; @@ -861,7 +862,7 @@ free_sbi: */ static struct dentry *exofs_mount(struct file_system_type *type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { struct exofs_mountopt opts; int ret; @@ -874,7 +875,7 @@ static struct dentry *exofs_mount(struct file_system_type *type, if (!opts.dev_name) opts.dev_name = dev_name; - return mount_nodev(type, flags, &opts, exofs_fill_super); + return mount_nodev(type, flags, &opts, 0, exofs_fill_super); } /* diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 73b2d528237f..341188f16b56 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -39,7 +39,8 @@ #include "acl.h" static void ext2_write_super(struct super_block *sb); -static int ext2_remount (struct super_block * sb, int * flags, char * data); +static int ext2_remount (struct super_block * sb, int * flags, + char * data, size_t data_size); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); static int ext2_sync_fs(struct super_block *sb, int wait); static int ext2_freeze(struct super_block *sb); @@ -815,7 +816,8 @@ static unsigned long descriptor_loc(struct super_block *sb, return ext2_group_first_block_no(sb, bg) + has_super; } -static int ext2_fill_super(struct super_block *sb, void *data, int silent) +static int ext2_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); struct buffer_head * bh; @@ -1320,7 +1322,8 @@ static void ext2_write_super(struct super_block *sb) ext2_sync_fs(sb, 1); } -static int ext2_remount (struct super_block * sb, int * flags, char * data) +static int ext2_remount (struct super_block * sb, int * flags, + char *data, size_t data_size) { struct ext2_sb_info * sbi = EXT2_SB(sb); struct ext2_super_block * es; @@ -1471,9 +1474,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) } static struct dentry *ext2_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + ext2_fill_super); } #ifdef CONFIG_QUOTA diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 53ff6c2a26ed..5603a4a1a864 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -70,12 +70,13 @@ static void ext4_mark_recovery_complete(struct super_block *sb, static void ext4_clear_journal_err(struct super_block *sb, struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); -static int ext4_remount(struct super_block *sb, int *flags, char *data); +static int ext4_remount(struct super_block *sb, int *flags, + char *data, size_t data_size); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); static int ext4_freeze(struct super_block *sb); static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data); + const char *dev_name, void *data, size_t data_size); static inline int ext2_feature_set_ok(struct super_block *sb); static inline int ext3_feature_set_ok(struct super_block *sb); static int ext4_feature_set_ok(struct super_block *sb, int readonly); @@ -3510,7 +3511,8 @@ static void ext4_set_resv_clusters(struct super_block *sb) atomic64_set(&sbi->s_resv_clusters, resv_clusters); } -static int ext4_fill_super(struct super_block *sb, void *data, int silent) +static int ext4_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); char *orig_data = kstrdup(data, GFP_KERNEL); @@ -5111,7 +5113,8 @@ struct ext4_mount_options { #endif }; -static int ext4_remount(struct super_block *sb, int *flags, char *data) +static int ext4_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -5888,9 +5891,10 @@ static int ext4_get_next_id(struct super_block *sb, struct kqid *qid) #endif static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + ext4_fill_super); } #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e184ad4e4e90..983685f5adac 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1500,7 +1500,8 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) f2fs_sync_fs(sbi->sb, 1); } -static int f2fs_remount(struct super_block *sb, int *flags, char *data) +static int f2fs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; @@ -3016,7 +3017,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sbi->readdir_ra = 1; } -static int f2fs_fill_super(struct super_block *sb, void *data, int silent) +static int f2fs_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; @@ -3452,9 +3454,10 @@ free_sbi: } static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + f2fs_fill_super); } static void kill_f2fs_super(struct super_block *sb) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index c0b5b5c3373b..e981e9de928f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -792,7 +792,8 @@ static void __exit fat_destroy_inodecache(void) kmem_cache_destroy(fat_inode_cachep); } -static int fat_remount(struct super_block *sb, int *flags, char *data) +static int fat_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { bool new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index f2cd365a4e86..ddaf85496c5a 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -647,16 +647,18 @@ static void setup(struct super_block *sb) sb->s_flags |= SB_NOATIME; } -static int msdos_fill_super(struct super_block *sb, void *data, int silent) +static int msdos_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { return fat_fill_super(sb, data, silent, 0, setup); } static struct dentry *msdos_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + msdos_fill_super); } static struct file_system_type msdos_fs_type = { diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 996c8c25e9c6..a1603e8aa8c5 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1044,16 +1044,18 @@ static void setup(struct super_block *sb) sb->s_d_op = &vfat_dentry_ops; } -static int vfat_fill_super(struct super_block *sb, void *data, int silent) +static int vfat_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { return fat_fill_super(sb, data, silent, 1, setup); } static struct dentry *vfat_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + vfat_fill_super); } static struct file_system_type vfat_fs_type = { diff --git a/fs/file_table.c b/fs/file_table.c index e49af4caf15d..e03c8d121c6c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -255,6 +255,7 @@ static void __fput(struct file *file) struct dentry *dentry = file->f_path.dentry; struct vfsmount *mnt = file->f_path.mnt; struct inode *inode = file->f_inode; + fmode_t mode = file->f_mode; if (unlikely(!(file->f_mode & FMODE_OPENED))) goto out; @@ -277,18 +278,20 @@ static void __fput(struct file *file) if (file->f_op->release) file->f_op->release(inode, file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && - !(file->f_mode & FMODE_PATH))) { + !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); } fops_put(file->f_op); put_pid(file->f_owner.pid); - if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) + if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); - if (file->f_mode & FMODE_WRITER) { + if (mode & FMODE_WRITER) { put_write_access(inode); __mnt_drop_write(mnt); } dput(dentry); + if (unlikely(mode & FMODE_NEED_UNMOUNT)) + dissolve_on_fput(mnt); mntput(mnt); out: file_free(file); diff --git a/fs/filesystems.c b/fs/filesystems.c index b03f57b1105b..9135646e41ac 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/fs_parser.h> /* * Handling of filesystem drivers list. @@ -73,6 +74,9 @@ int register_filesystem(struct file_system_type * fs) int res = 0; struct file_system_type ** p; + if (fs->parameters && !fs_validate_description(fs->parameters)) + return -EINVAL; + BUG_ON(strchr(fs->name, '.')); if (fs->next) return -EBUSY; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 48b24bb50d02..1c6cf91f6de9 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -113,7 +113,8 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) return 0; } -static int vxfs_remount(struct super_block *sb, int *flags, char *data) +static int vxfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_RDONLY; @@ -199,6 +200,7 @@ static int vxfs_try_sb_magic(struct super_block *sbp, int silent, * vxfs_read_super - read superblock into memory and initialize filesystem * @sbp: VFS superblock (to fill) * @dp: fs private mount data + * @data_size: size of mount data * @silent: do not complain loudly when sth is wrong * * Description: @@ -211,7 +213,8 @@ static int vxfs_try_sb_magic(struct super_block *sbp, int silent, * Locking: * We are under @sbp->s_lock. */ -static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) +static int vxfs_fill_super(struct super_block *sbp, void *dp, size_t data_size, + int silent) { struct vxfs_sb_info *infp; struct vxfs_sb *rsbp; @@ -312,9 +315,10 @@ out: * The usual module blurb. */ static struct dentry *vxfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + vxfs_fill_super); } static struct file_system_type vxfs_fs_type = { diff --git a/fs/fs_context.c b/fs/fs_context.c new file mode 100644 index 000000000000..d6e9f5e7a2df --- /dev/null +++ b/fs/fs_context.c @@ -0,0 +1,776 @@ +/* Provide a way to create a superblock configuration context within the kernel + * that allows a superblock to be set up prior to mounting. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/nsproxy.h> +#include <linux/slab.h> +#include <linux/magic.h> +#include <linux/security.h> +#include <linux/mnt_namespace.h> +#include <linux/pid_namespace.h> +#include <linux/user_namespace.h> +#include <linux/bsearch.h> +#include <net/net_namespace.h> +#include <asm/sections.h> +#include "mount.h" +#include "internal.h" + +enum legacy_fs_param { + LEGACY_FS_UNSET_PARAMS, + LEGACY_FS_NO_PARAMS, + LEGACY_FS_MONOLITHIC_PARAMS, + LEGACY_FS_INDIVIDUAL_PARAMS, + LEGACY_FS_MAGIC_PARAMS, +}; + +struct legacy_fs_context { + char *legacy_data; /* Data page for legacy filesystems */ + char *secdata; + size_t data_size; + enum legacy_fs_param param_type; +}; + +static const struct constant_table common_set_sb_flag[] = { + { "dirsync", SB_DIRSYNC }, + { "lazytime", SB_LAZYTIME }, + { "mand", SB_MANDLOCK }, + { "posixacl", SB_POSIXACL }, + { "ro", SB_RDONLY }, + { "sync", SB_SYNCHRONOUS }, +}; + +static const struct constant_table common_clear_sb_flag[] = { + { "async", SB_SYNCHRONOUS }, + { "nolazytime", SB_LAZYTIME }, + { "nomand", SB_MANDLOCK }, + { "rw", SB_RDONLY }, + { "silent", SB_SILENT }, +}; + +static const char *const forbidden_sb_flag[] = { + "bind", + "dev", + "exec", + "move", + "noatime", + "nodev", + "nodiratime", + "noexec", + "norelatime", + "nostrictatime", + "nosuid", + "private", + "rec", + "relatime", + "remount", + "shared", + "slave", + "strictatime", + "suid", + "unbindable", +}; + +static int cmp_flag_name(const void *name, const void *entry) +{ + const char **e = (const char **)entry; + return strcmp(name, *e); +} + +/* + * Check for a common mount option that manipulates s_flags. + */ +static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) +{ + unsigned int token; + + if (bsearch(key, forbidden_sb_flag, ARRAY_SIZE(forbidden_sb_flag), + sizeof(forbidden_sb_flag[0]), cmp_flag_name)) + return -EINVAL; + + token = lookup_constant(common_set_sb_flag, key, 0); + if (token) { + fc->sb_flags |= token; + fc->sb_flags_mask |= token; + return 0; + } + + token = lookup_constant(common_clear_sb_flag, key, 0); + if (token) { + fc->sb_flags &= ~token; + fc->sb_flags_mask |= token; + return 0; + } + + return -ENOPARAM; +} + +/** + * vfs_parse_fs_param - Add a single parameter to a superblock config + * @fc: The filesystem context to modify + * @param: The parameter + * + * A single mount option in string form is applied to the filesystem context + * being set up. Certain standard options (for example "ro") are translated + * into flag bits without going to the filesystem. The active security module + * is allowed to observe and poach options. Any other options are passed over + * to the filesystem to parse. + * + * This may be called multiple times for a context. + * + * Returns 0 on success and a negative error code on failure. In the event of + * failure, supplementary error information may have been set. + */ +int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param) +{ + int ret; + + if (!param->key) + return invalf(fc, "Unnamed parameter\n"); + + ret = vfs_parse_sb_flag(fc, param->key); + if (ret != -ENOPARAM) + return ret; + + ret = security_fs_context_parse_param(fc, param); + if (ret != -ENOPARAM) + /* Param belongs to the LSM or is disallowed by the LSM; so + * don't pass to the FS. + */ + return ret; + + if (fc->ops->parse_param) { + ret = fc->ops->parse_param(fc, param); + if (ret != -ENOPARAM) + return ret; + } + + /* If the filesystem doesn't take any arguments, give it the + * default handling of source. + */ + if (strcmp(param->key, "source") == 0) { + if (param->type != fs_value_is_string) + return invalf(fc, "VFS: Non-string source"); + if (fc->source) + return invalf(fc, "VFS: Multiple sources"); + fc->source = param->string; + param->string = NULL; + return 0; + } + + return invalf(fc, "%s: Unknown parameter '%s'", + fc->fs_type->name, param->key); +} +EXPORT_SYMBOL(vfs_parse_fs_param); + +/** + * vfs_parse_fs_string - Convenience function to just parse a string. + */ +int vfs_parse_fs_string(struct fs_context *fc, const char *key, + const char *value, size_t v_size) +{ + int ret; + + struct fs_parameter param = { + .key = key, + .type = fs_value_is_string, + .size = v_size, + }; + + if (v_size > 0) { + param.string = kmemdup_nul(value, v_size, GFP_KERNEL); + if (!param.string) + return -ENOMEM; + } + + ret = vfs_parse_fs_param(fc, ¶m); + kfree(param.string); + return ret; +} +EXPORT_SYMBOL(vfs_parse_fs_string); + +/** + * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * @ctx: The superblock configuration to fill in. + * @data: The data to parse + * @data_size: The amount of data + * + * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be + * called from the ->monolithic_mount_data() fs_context operation. + * + * Returns 0 on success or the error returned by the ->parse_option() fs_context + * operation on failure. + */ +int generic_parse_monolithic(struct fs_context *fc, void *data, size_t data_size) +{ + char *options = data, *key; + int ret = 0; + + if (!options) + return 0; + + while ((key = strsep(&options, ",")) != NULL) { + if (*key) { + size_t v_len = 0; + char *value = strchr(key, '='); + + if (value) { + if (value == key) + continue; + *value++ = 0; + v_len = strlen(value); + } + ret = vfs_parse_fs_string(fc, key, value, v_len); + if (ret < 0) + break; + } + } + + return ret; +} +EXPORT_SYMBOL(generic_parse_monolithic); + +/** + * vfs_new_fs_context - Create a filesystem context. + * @fs_type: The filesystem type. + * @reference: The dentry from which this one derives (or NULL) + * @sb_flags: Filesystem/superblock flags (SB_*) + * @sb_flags_mask: Applicable members of @sb_flags + * @purpose: The purpose that this configuration shall be used for. + * + * Open a filesystem and create a mount context. The mount context is + * initialised with the supplied flags and, if a submount/automount from + * another superblock (referred to by @reference) is supplied, may have + * parameters such as namespaces copied across from that superblock. + */ +struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type, + struct dentry *reference, + unsigned int sb_flags, + unsigned int sb_flags_mask, + enum fs_context_purpose purpose) +{ + int (*init_fs_context)(struct fs_context *, struct dentry *); + struct fs_context *fc; + int ret = -ENOMEM; + + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + fc->purpose = purpose; + fc->sb_flags = sb_flags; + fc->sb_flags_mask = sb_flags_mask; + fc->fs_type = get_filesystem(fs_type); + fc->cred = get_current_cred(); + + mutex_init(&fc->uapi_mutex); + + switch (purpose) { + case FS_CONTEXT_FOR_KERNEL_MOUNT: + fc->sb_flags |= SB_KERNMOUNT; + /* Fallthrough */ + case FS_CONTEXT_FOR_USER_MOUNT: + fc->user_ns = get_user_ns(fc->cred->user_ns); + fc->net_ns = get_net(current->nsproxy->net_ns); + break; + case FS_CONTEXT_FOR_SUBMOUNT: + case FS_CONTEXT_FOR_ROOT_MOUNT: + fc->user_ns = get_user_ns(reference->d_sb->s_user_ns); + fc->net_ns = get_net(current->nsproxy->net_ns); + break; + case FS_CONTEXT_FOR_RECONFIGURE: + case FS_CONTEXT_FOR_UMOUNT: + case FS_CONTEXT_FOR_EMERGENCY_RO: + /* We don't pin any namespaces as the superblock's + * subscriptions cannot be changed at this point. + */ + atomic_inc(&reference->d_sb->s_active); + fc->root = dget(reference); + break; + } + + /* TODO: Make all filesystems support this unconditionally */ + init_fs_context = fc->fs_type->init_fs_context; + if (!init_fs_context) + init_fs_context = legacy_init_fs_context; + + ret = init_fs_context(fc, reference); + if (ret < 0) + goto err_fc; + fc->need_free = true; + + /* Do the security check last because ->init_fs_context may change the + * namespace subscriptions. + */ + ret = security_fs_context_alloc(fc, reference); + if (ret < 0) + goto err_fc; + + return fc; + +err_fc: + put_fs_context(fc); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(vfs_new_fs_context); + +/** + * vfs_dup_fc_config: Duplicate a filesystem context. + * @src_fc: The context to copy. + * @purpose: The purpose to set in the new mount + */ +struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc, + enum fs_context_purpose purpose) +{ + struct fs_context *fc; + int ret; + + if (!src_fc->ops->dup) + return ERR_PTR(-EOPNOTSUPP); + + fc = kmemdup(src_fc, sizeof(struct fs_context), GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + mutex_init(&fc->uapi_mutex); + + fc->fs_private = NULL; + fc->s_fs_info = NULL; + fc->source = NULL; + fc->security = NULL; + get_filesystem(fc->fs_type); + get_net(fc->net_ns); + get_user_ns(fc->user_ns); + get_cred(fc->cred); + if (fc->log) + refcount_inc(&fc->log->usage); + + /* Can't call put until we've called ->dup */ + ret = fc->ops->dup(fc, src_fc); + if (ret < 0) + goto err_fc; + + ret = security_fs_context_dup(fc, src_fc); + if (ret < 0) + goto err_fc; + return fc; + +err_fc: + put_fs_context(fc); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(vfs_dup_fs_context); + +/** + * logfc - Log a message to a filesystem context + * @fc: The filesystem context to log to. + * @fmt: The format of the buffer. + */ +void logfc(struct fs_context *fc, const char *fmt, ...) +{ + static const char store_failure[] = "OOM: Can't store error string"; + struct fc_log *log = fc ? fc->log : NULL; + const char *p; + va_list va; + char *q; + u8 freeable; + + va_start(va, fmt); + if (!strchr(fmt, '%')) { + p = fmt; + goto unformatted_string; + } + if (strcmp(fmt, "%s") == 0) { + p = va_arg(va, const char *); + goto unformatted_string; + } + + q = kvasprintf(GFP_KERNEL, fmt, va); +copied_string: + if (!q) + goto store_failure; + freeable = 1; + goto store_string; + +unformatted_string: + if ((unsigned long)p >= (unsigned long)__start_rodata && + (unsigned long)p < (unsigned long)__end_rodata) + goto const_string; + if (log && within_module_core((unsigned long)p, log->owner)) + goto const_string; + q = kstrdup(p, GFP_KERNEL); + goto copied_string; + +store_failure: + p = store_failure; +const_string: + q = (char *)p; + freeable = 0; +store_string: + if (!log) { + switch (fmt[0]) { + case 'w': + printk(KERN_WARNING "%s\n", q + 2); + break; + case 'e': + printk(KERN_ERR "%s\n", q + 2); + break; + default: + printk(KERN_NOTICE "%s\n", q + 2); + break; + } + if (freeable) + kfree(q); + } else { + unsigned int logsize = ARRAY_SIZE(log->buffer); + u8 index; + + index = log->head & (logsize - 1); + BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) || + sizeof(log->tail) != sizeof(u8)); + if ((u8)(log->head - log->tail) == logsize) { + /* The buffer is full, discard the oldest message */ + if (log->need_free & (1 << index)) + kfree(log->buffer[index]); + log->tail++; + } + + log->buffer[index] = q; + log->need_free &= ~(1 << index); + log->need_free |= freeable << index; + log->head++; + } + va_end(va); +} +EXPORT_SYMBOL(logfc); + +/* + * Free a logging structure. + */ +static void put_fc_log(struct fs_context *fc) +{ + struct fc_log *log = fc->log; + int i; + + if (log) { + if (refcount_dec_and_test(&log->usage)) { + fc->log = NULL; + for (i = 0; i <= 7; i++) + if (log->need_free & (1 << i)) + kfree(log->buffer[i]); + kfree(log); + } + } +} + +/** + * put_fs_context - Dispose of a superblock configuration context. + * @fc: The context to dispose of. + */ +void put_fs_context(struct fs_context *fc) +{ + struct super_block *sb; + + if (fc->root) { + sb = fc->root->d_sb; + dput(fc->root); + fc->root = NULL; + deactivate_super(sb); + } + + if (fc->need_free && fc->ops && fc->ops->free) + fc->ops->free(fc); + + security_fs_context_free(fc); + if (fc->net_ns) + put_net(fc->net_ns); + put_user_ns(fc->user_ns); + if (fc->cred) + put_cred(fc->cred); + kfree(fc->subtype); + put_fc_log(fc); + put_filesystem(fc->fs_type); + kfree(fc->source); + kfree(fc); +} +EXPORT_SYMBOL(put_fs_context); + +/* + * Free the config for a filesystem that doesn't support fs_context. + */ +static void legacy_fs_context_free(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + + if (ctx) { + free_secdata(ctx->secdata); + switch (ctx->param_type) { + case LEGACY_FS_UNSET_PARAMS: + case LEGACY_FS_NO_PARAMS: + break; + case LEGACY_FS_MAGIC_PARAMS: + break; /* ctx->data is a weird pointer */ + default: + kfree(ctx->legacy_data); + break; + } + + kfree(ctx); + } +} + +/* + * Duplicate a legacy config. + */ +static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) +{ + struct legacy_fs_context *ctx; + struct legacy_fs_context *src_ctx = src_fc->fs_private; + + ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + switch (ctx->param_type) { + case LEGACY_FS_MONOLITHIC_PARAMS: + case LEGACY_FS_INDIVIDUAL_PARAMS: + ctx->legacy_data = kmemdup(src_ctx->legacy_data, + src_ctx->data_size, GFP_KERNEL); + if (!ctx->legacy_data) { + kfree(ctx); + return -ENOMEM; + } + /* Fall through */ + default: + break; + } + + fc->fs_private = ctx; + return 0; +} + +/* + * Add a parameter to a legacy config. We build up a comma-separated list of + * options. + */ +static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct legacy_fs_context *ctx = fc->fs_private; + unsigned int size = ctx->data_size; + size_t len = 0; + + if (strcmp(param->key, "source") == 0) { + if (param->type != fs_value_is_string) + return invalf(fc, "VFS: Legacy: Non-string source"); + if (fc->source) + return invalf(fc, "VFS: Legacy: Multiple sources"); + fc->source = param->string; + param->string = NULL; + return 0; + } + + if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) && + strcmp(param->key, "subtype") == 0) { + if (param->type != fs_value_is_string) + return invalf(fc, "VFS: Legacy: Non-string subtype"); + if (fc->subtype) + return invalf(fc, "VFS: Legacy: Multiple subtype"); + fc->subtype = param->string; + param->string = NULL; + return 0; + } + + if (ctx->param_type != LEGACY_FS_UNSET_PARAMS && + ctx->param_type != LEGACY_FS_INDIVIDUAL_PARAMS) + return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); + + switch (param->type) { + case fs_value_is_string: + len = 1 + param->size; + /* Fall through */ + case fs_value_is_flag: + len += strlen(param->key); + break; + default: + return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported", + param->key); + } + + if (len > PAGE_SIZE - 2 - size) + return invalf(fc, "VFS: Legacy: Cumulative options too large"); + if (strchr(param->key, ',') || + (param->type == fs_value_is_string && + memchr(param->string, ',', param->size))) + return invalf(fc, "VFS: Legacy: Option '%s' contained comma", + param->key); + if (!ctx->legacy_data) { + ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!ctx->legacy_data) + return -ENOMEM; + } + + ctx->legacy_data[size++] = ','; + len = strlen(param->key); + memcpy(ctx->legacy_data + size, param->key, len); + size += len; + if (param->type == fs_value_is_string) { + ctx->legacy_data[size++] = '='; + memcpy(ctx->legacy_data + size, param->string, param->size); + size += param->size; + } + ctx->legacy_data[size] = '\0'; + ctx->data_size = size; + ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS; + return 0; +} + +/* + * Add monolithic mount data. + */ +static int legacy_parse_monolithic(struct fs_context *fc, void *data, size_t data_size) +{ + struct legacy_fs_context *ctx = fc->fs_private; + + if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) { + pr_warn("VFS: Can't mix monolithic and individual options\n"); + return -EINVAL; + } + + if (!data) { + ctx->param_type = LEGACY_FS_NO_PARAMS; + return 0; + } + + ctx->data_size = data_size; + if (data_size > 0) { + ctx->legacy_data = kmemdup(data, data_size, GFP_KERNEL); + if (!ctx->legacy_data) + return -ENOMEM; + ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS; + } else { + /* Some filesystems pass weird pointers through that we don't + * want to copy. They can indicate this by setting data_size + * to 0. + */ + ctx->legacy_data = data; + ctx->param_type = LEGACY_FS_MAGIC_PARAMS; + } + + return 0; +} + +/* + * Use the legacy mount validation step to strip out and process security + * config options. + */ +static int legacy_validate(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + + switch (ctx->param_type) { + case LEGACY_FS_UNSET_PARAMS: + ctx->param_type = LEGACY_FS_NO_PARAMS; + /* Fall through */ + case LEGACY_FS_NO_PARAMS: + case LEGACY_FS_MAGIC_PARAMS: + return 0; + default: + break; + } + + if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA) + return 0; + + ctx->secdata = alloc_secdata(); + if (!ctx->secdata) + return -ENOMEM; + + return security_sb_copy_data(ctx->legacy_data, ctx->data_size, + ctx->secdata); +} + +/* + * Get a mountable root with the legacy mount command. + */ +static int legacy_get_tree(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + struct super_block *sb; + struct dentry *root; + + root = fc->fs_type->mount(fc->fs_type, fc->sb_flags, + fc->source, ctx->legacy_data, + ctx->data_size); + if (IS_ERR(root)) + return PTR_ERR(root); + + sb = root->d_sb; + BUG_ON(!sb); + + fc->root = root; + return 0; +} + +/* + * Handle remount. + */ +static int legacy_reconfigure(struct fs_context *fc) +{ + struct legacy_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; + + if (!sb->s_op->remount_fs) + return 0; + + return sb->s_op->remount_fs(sb, &fc->sb_flags, + ctx ? ctx->legacy_data : NULL, + ctx ? ctx->data_size : 0); +} + +const struct fs_context_operations legacy_fs_context_ops = { + .free = legacy_fs_context_free, + .dup = legacy_fs_context_dup, + .parse_param = legacy_parse_param, + .parse_monolithic = legacy_parse_monolithic, + .validate = legacy_validate, + .get_tree = legacy_get_tree, + .reconfigure = legacy_reconfigure, +}; + +/* + * Initialise a legacy context for a filesystem that doesn't support + * fs_context. + */ +int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry) +{ + switch (fc->purpose) { + default: + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), + GFP_KERNEL); + if (!fc->fs_private) + return -ENOMEM; + break; + + case FS_CONTEXT_FOR_UMOUNT: + case FS_CONTEXT_FOR_EMERGENCY_RO: + if (!fc->root->d_sb->s_op->remount_fs) + return -EOPNOTSUPP; + break; + } + + fc->ops = &legacy_fs_context_ops; + return 0; +} diff --git a/fs/fs_parser.c b/fs/fs_parser.c new file mode 100644 index 000000000000..cee210eddd10 --- /dev/null +++ b/fs/fs_parser.c @@ -0,0 +1,555 @@ +/* Filesystem parameter parser. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/export.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/slab.h> +#include <linux/security.h> +#include <linux/namei.h> +#include <linux/bsearch.h> +#include "internal.h" + +static const struct constant_table bool_names[] = { + { "0", false }, + { "1", true }, + { "false", false }, + { "no", false }, + { "true", true }, + { "yes", true }, +}; + +static int cmp_constant(const void *name, const void *entry) +{ + const struct constant_table *e = entry; + return strcmp(name, e->name); +} + +/** + * lookup_constant - Look up a constant by name in an ordered table + * @tbl: The table of constants to search. + * @tbl_size: The size of the table. + * @name: The name to look up. + * @not_found: The value to return if the name is not found. + */ +int __lookup_constant(const struct constant_table *tbl, size_t tbl_size, + const char *name, int not_found) +{ + const struct constant_table *e; + + e = bsearch(name, tbl, tbl_size, sizeof(tbl[0]), cmp_constant); + if (!e) + return not_found; + return e->value; +} +EXPORT_SYMBOL(__lookup_constant); + +static int cmp_key(const void *name, const void *entry) +{ + const char *const *e = entry; + return strcmp(name, *e); +} + +static int fs_lookup_key(const struct fs_parameter_description *desc, + struct fs_parameter *param) +{ + const char *const *e; + + e = bsearch(param->key, desc->keys, desc->nr_params, + sizeof(const char *), cmp_key); + if (e) + return e - desc->keys; + + return __lookup_constant(desc->alt_keys, desc->nr_alt_keys, param->key, + -ENOPARAM); +} + +/* + * fs_parse - Parse a filesystem configuration parameter + * @fc: The filesystem context to log errors through. + * @desc: The parameter description to use. + * @param: The parameter. + * @result: Where to place the result of the parse + * + * Parse a filesystem configuration parameter and attempt a conversion for a + * simple parameter for which this is requested. If successful, the determined + * parameter ID is placed into @result->key, the desired type is indicated in + * @result->t and any converted value is placed into an appropriate member of + * the union in @result. + * + * The function returns the parameter number if the parameter was matched, + * -ENOPARAM if it wasn't matched and @desc->ignore_unknown indicated that + * unknown parameters are okay and -EINVAL if there was a conversion issue or + * the parameter wasn't recognised and unknowns aren't okay. + */ +int fs_parse(struct fs_context *fc, + const struct fs_parameter_description *desc, + struct fs_parameter *param, + struct fs_parse_result *result) +{ + int ret, k, i, b; + + result->has_value = !!param->string; + + k = fs_lookup_key(desc, param); + if (k == -ENOPARAM) { + /* If we didn't find something that looks like "noxxx", see if + * "xxx" takes the "no"-form negative - but only if there + * wasn't an value. + */ + if (result->has_value) + goto unknown_parameter; + if (param->key[0] != 'n' || param->key[1] != 'o' || !param->key[2]) + goto unknown_parameter; + + k = fs_lookup_key(desc, param); + if (k == -ENOPARAM) + goto unknown_parameter; + if (!(desc->specs[k].flags & fs_param_neg_with_no)) + goto unknown_parameter; + result->key = k; + result->uint_32 = 0; + result->negated = true; + goto okay; + } + + result->key = k; + result->negated = false; + if (result->key == fsconfig_key_removed) + return invalf(fc, "%s: Unsupported parameter name '%s'", + desc->name, param->key); + + result->t = desc->specs[result->key]; + if (result->t.flags & fs_param_deprecated) + warnf(fc, "%s: Deprecated parameter '%s'", + desc->name, param->key); + + /* Certain parameter types only take a string and convert it. */ + switch (result->t.type) { + case __fs_param_wasnt_defined: + return -EINVAL; + case fs_param_is_u32: + case fs_param_is_u32_octal: + case fs_param_is_u32_hex: + case fs_param_is_s32: + case fs_param_is_u64: + case fs_param_is_enum: + case fs_param_is_string: + if (param->type != fs_value_is_string) + goto bad_value; + if (!result->has_value) { + if (desc->specs[k].flags & fs_param_v_optional) + goto okay; + goto bad_value; + } + /* Fall through */ + default: + break; + } + + /* Try to turn the type we were given into the type desired by the + * parameter and give an error if we can't. + */ + switch (result->t.type) { + case fs_param_is_flag: + if (param->type != fs_value_is_flag && + (param->type != fs_value_is_string || result->has_value)) + return invalf(fc, "%s: Unexpected value for '%s'", + desc->name, param->key); + result->boolean = true; + goto okay; + + case fs_param_is_bool: + switch (param->type) { + case fs_value_is_flag: + result->boolean = true; + goto okay; + case fs_value_is_string: + if (param->size == 0) { + result->boolean = true; + goto okay; + } + b = lookup_constant(bool_names, param->string, -1); + if (b == -1) + goto bad_value; + result->boolean = b; + goto okay; + default: + goto bad_value; + } + + case fs_param_is_u32: + ret = kstrtouint(param->string, 0, &result->uint_32); + goto maybe_okay; + case fs_param_is_u32_octal: + ret = kstrtouint(param->string, 8, &result->uint_32); + goto maybe_okay; + case fs_param_is_u32_hex: + ret = kstrtouint(param->string, 16, &result->uint_32); + goto maybe_okay; + case fs_param_is_s32: + ret = kstrtoint(param->string, 0, &result->int_32); + goto maybe_okay; + case fs_param_is_u64: + ret = kstrtoull(param->string, 0, &result->uint_64); + goto maybe_okay; + + case fs_param_is_enum: + for (i = 0; i < desc->nr_enums; i++) { + if (desc->enums[i].param_id == result->key && + strcmp(desc->enums[i].name, param->string) == 0) { + result->uint_32 = desc->enums[i].value; + goto okay; + } + } + goto bad_value; + + case fs_param_is_string: + goto okay; + case fs_param_is_blob: + if (param->type != fs_value_is_blob) + goto bad_value; + goto okay; + + case fs_param_is_fd: { + if (param->type != fs_value_is_file) + goto bad_value; + goto okay; + } + + case fs_param_is_blockdev: + case fs_param_is_path: + goto okay; + default: + BUG(); + } + +maybe_okay: + if (ret < 0) + goto bad_value; +okay: + return result->key; + +bad_value: + return invalf(fc, "%s: Bad value for '%s'", desc->name, param->key); +unknown_parameter: + return -ENOPARAM; +} +EXPORT_SYMBOL(fs_parse); + +/** + * fs_lookup_param - Look up a path referred to by a parameter + * @fc: The filesystem context to log errors through. + * @param: The parameter. + * @want_bdev: T if want a blockdev + * @_path: The result of the lookup + */ +int fs_lookup_param(struct fs_context *fc, + struct fs_parameter *param, + bool want_bdev, + struct path *_path) +{ + struct filename *f; + unsigned int flags = 0; + bool put_f; + int ret; + + switch (param->type) { + case fs_value_is_string: + f = getname_kernel(param->string); + if (IS_ERR(f)) + return PTR_ERR(f); + put_f = true; + break; + case fs_value_is_filename_empty: + flags = LOOKUP_EMPTY; + /* Fall through */ + case fs_value_is_filename: + f = param->name; + put_f = false; + break; + default: + return invalf(fc, "%s: not usable as path", param->key); + } + + ret = filename_lookup(param->dirfd, f, flags, _path, NULL); + if (ret < 0) { + errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name); + goto out; + } + + if (want_bdev && + !S_ISBLK(d_backing_inode(_path->dentry)->i_mode)) { + path_put(_path); + _path->dentry = NULL; + _path->mnt = NULL; + errorf(fc, "%s: Non-blockdev passed as '%s'", + param->key, f->name); + ret = -ENOTBLK; + } + +out: + if (put_f) + putname(f); + return ret; +} +EXPORT_SYMBOL(fs_lookup_param); + +#ifdef CONFIG_VALIDATE_FS_PARSER +/** + * validate_constant_table - Validate a constant table + * @name: Name to use in reporting + * @tbl: The constant table to validate. + * @tbl_size: The size of the table. + * @low: The lowest permissible value. + * @high: The highest permissible value. + * @special: One special permissible value outside of the range. + */ +bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, + int low, int high, int special) +{ + size_t i; + bool good = true; + + if (tbl_size == 0) { + pr_warn("VALIDATE C-TBL: Empty\n"); + return true; + } + + for (i = 0; i < tbl_size; i++) { + if (!tbl[i].name) { + pr_err("VALIDATE C-TBL[%zu]: Null\n", i); + good = false; + } else if (i > 0 && tbl[i - 1].name) { + int c = strcmp(tbl[i-1].name, tbl[i].name); + + if (c == 0) { + pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n", + i, tbl[i].name); + good = false; + } + if (c > 0) { + pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n", + i, tbl[i-1].name, tbl[i].name); + good = false; + } + } + + if (tbl[i].value != special && + (tbl[i].value < low || tbl[i].value > high)) { + pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n", + i, tbl[i].name, tbl[i].value, low, high); + good = false; + } + } + + return good; +} + +static bool validate_list(const char *const *tbl, size_t tbl_size) +{ + size_t i; + bool good = true; + + for (i = 0; i < tbl_size; i++) { + if (!tbl[i]) { + pr_err("VALIDATE LIST[%zu]: Null\n", i); + good = false; + } else if (i > 0 && tbl[i - 1]) { + int c = strcmp(tbl[i-1], tbl[i]); + + if (c == 0) { + pr_err("VALIDATE LIST[%zu]: Duplicate %s\n", + i, tbl[i]); + good = false; + } + if (c > 0) { + pr_err("VALIDATE LIST[%zu]: Missorted %s>=%s\n", + i, tbl[i-1], tbl[i]); + good = false; + } + } + } + + return good; +} + +/** + * fs_validate_description - Validate a parameter description + * @desc: The parameter description to validate. + */ +bool fs_validate_description(const struct fs_parameter_description *desc) +{ + const char *name = desc->name; + bool good = true, enums = false; + int i, j; + + pr_notice("*** VALIDATE %s ***\n", name); + + if (!name[0]) { + pr_err("VALIDATE Parser: No name\n"); + name = "Unknown"; + good = false; + } + + if (desc->nr_params) { + if (!desc->specs) { + pr_err("VALIDATE %s: Missing types table\n", name); + good = false; + goto no_specs; + } + + for (i = 0; i < desc->nr_params; i++) { + enum fs_parameter_type t = desc->specs[i].type; + if (t == __fs_param_wasnt_defined) { + pr_err("VALIDATE %s: [%u] Undefined type\n", + name, i); + good = false; + } else if (t >= nr__fs_parameter_type) { + pr_err("VALIDATE %s: [%u] Bad type %u\n", + name, i, t); + good = false; + } else if (t == fs_param_is_enum) { + enums = true; + } + } + } + +no_specs: + if (desc->nr_params) { + if (!desc->keys) { + pr_err("VALIDATE %s: Missing keys list\n", name); + good = false; + goto no_keys; + } + + if (!validate_list(desc->keys, desc->nr_params)) { + pr_err("VALIDATE %s: Bad keys table\n", name); + good = false; + } + + /* The "source" parameter is used to convey the device/source + * information. + */ + if (desc->no_source) { + if (bsearch("source", desc->keys, desc->nr_params, + sizeof(const char *), cmp_key)) { + pr_err("VALIDATE %s: Source key, but marked no_source\n", + name); + good = false; + } + + if (desc->source_param != 0) { + pr_err("VALIDATE %s: source_param not zero\n", + name); + good = false; + } + } else { + if (desc->source_param >= desc->nr_params) { + pr_err("VALIDATE %s: source_param is out of range\n", + name); + good = false; + goto no_keys; + } + + if (strcmp(desc->keys[desc->source_param], "source") != 0) { + pr_err("VALIDATE %s: No source key, but not marked no_source\n", + name); + good = false; + } + } + } else { + if (desc->source_param) { + pr_err("VALIDATE %s: source_param not zero\n", name); + good = false; + } + } + +no_keys: + if (desc->nr_alt_keys) { + if (!desc->nr_params) { + pr_err("VALIDATE %s: %u alt_keys but no params\n", + name, desc->nr_alt_keys); + good = false; + goto no_alt_keys; + } + if (!desc->alt_keys) { + pr_err("VALIDATE %s: Missing alt_keys table\n", name); + good = false; + goto no_alt_keys; + } + + if (!validate_constant_table(desc->alt_keys, desc->nr_alt_keys, + 0, desc->nr_params - 1, + fsconfig_key_removed)) { + pr_err("VALIDATE %s: Bad alt_keys table\n", name); + good = false; + } + } + +no_alt_keys: + if (desc->nr_enums) { + if (!enums) { + pr_err("VALIDATE %s: Enum table but no enum-type values\n", + name); + good = false; + goto no_enums; + } + if (!desc->enums) { + pr_err("VALIDATE %s: Missing enums table\n", name); + good = false; + goto no_enums; + } + + for (j = 0; j < desc->nr_enums; j++) { + const struct fs_parameter_enum *e = &desc->enums[j]; + + if (!e->name[0]) { + pr_err("VALIDATE %s: e[%u] no name\n", name, j); + good = false; + } + if (e->param_id >= desc->nr_params) { + pr_err("VALIDATE %s: e[%u] bad param %u\n", + name, j, e->param_id); + good = false; + } + if (desc->specs[e->param_id].type != fs_param_is_enum) { + pr_err("VALIDATE %s: e[%u] enum val for non-enum type %u\n", + name, j, e->param_id); + good = false; + } + } + + for (i = 0; i < desc->nr_params; i++) { + if (desc->specs[i].type != fs_param_is_enum) + continue; + for (j = 0; j < desc->nr_enums; j++) + if (desc->enums[j].param_id == i) + break; + if (j == desc->nr_enums) { + pr_err("VALIDATE %s: t[%u] enum with no vals\n", + name, i); + good = false; + } + } + } else { + if (enums) { + pr_err("VALIDATE %s: enum-type values, but no enum table\n", + name); + good = false; + goto no_enums; + } + } + +no_enums: + return good; +} +#endif /* CONFIG_VALIDATE_FS_PARSER */ diff --git a/fs/fsopen.c b/fs/fsopen.c new file mode 100644 index 000000000000..aaaaa17a233c --- /dev/null +++ b/fs/fsopen.c @@ -0,0 +1,568 @@ +/* Filesystem access-by-fd. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/security.h> +#include <linux/anon_inodes.h> +#include <linux/namei.h> +#include <linux/file.h> +#include <uapi/linux/mount.h> +#include "internal.h" +#include "mount.h" + +/* + * Allow the user to read back any error, warning or informational messages. + */ +static ssize_t fscontext_read(struct file *file, + char __user *_buf, size_t len, loff_t *pos) +{ + struct fs_context *fc = file->private_data; + struct fc_log *log = fc->log; + unsigned int logsize = ARRAY_SIZE(log->buffer); + ssize_t ret; + char *p; + bool need_free; + int index, n; + + ret = mutex_lock_interruptible(&fc->uapi_mutex); + if (ret < 0) + return ret; + + if (log->head == log->tail) { + mutex_unlock(&fc->uapi_mutex); + return -ENODATA; + } + + index = log->tail & (logsize - 1); + p = log->buffer[index]; + need_free = log->need_free & (1 << index); + log->buffer[index] = NULL; + log->need_free &= ~(1 << index); + log->tail++; + mutex_unlock(&fc->uapi_mutex); + + ret = -EMSGSIZE; + n = strlen(p); + if (n > len) + goto err_free; + ret = -EFAULT; + if (copy_to_user(_buf, p, n) != 0) + goto err_free; + ret = n; + +err_free: + if (need_free) + kfree(p); + return ret; +} + +static int fscontext_release(struct inode *inode, struct file *file) +{ + struct fs_context *fc = file->private_data; + + if (fc) { + file->private_data = NULL; + put_fs_context(fc); + } + return 0; +} + +const struct file_operations fscontext_fops = { + .read = fscontext_read, + .release = fscontext_release, + .llseek = no_llseek, +}; + +/* + * Attach a filesystem context to a file and an fd. + */ +static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags) +{ + int fd; + + fd = anon_inode_getfd("fscontext", &fscontext_fops, fc, + O_RDWR | o_flags); + if (fd < 0) + put_fs_context(fc); + return fd; +} + +static int fscontext_alloc_log(struct fs_context *fc) +{ + fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL); + if (!fc->log) + return -ENOMEM; + refcount_set(&fc->log->usage, 1); + fc->log->owner = fc->fs_type->owner; + return 0; +} + +/* + * Open a filesystem by name so that it can be configured for mounting. + * + * We are allowed to specify a container in which the filesystem will be + * opened, thereby indicating which namespaces will be used (notably, which + * network namespace will be used for network filesystems). + */ +SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags) +{ + struct file_system_type *fs_type; + struct fs_context *fc; + const char *fs_name; + int ret; + + if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + if (flags & ~FSOPEN_CLOEXEC) + return -EINVAL; + + fs_name = strndup_user(_fs_name, PAGE_SIZE); + if (IS_ERR(fs_name)) + return PTR_ERR(fs_name); + + fs_type = get_fs_type(fs_name); + kfree(fs_name); + if (!fs_type) + return -ENODEV; + + fc = vfs_new_fs_context(fs_type, NULL, 0, 0, FS_CONTEXT_FOR_USER_MOUNT); + put_filesystem(fs_type); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + fc->phase = FS_CONTEXT_CREATE_PARAMS; + + ret = fscontext_alloc_log(fc); + if (ret < 0) + goto err_fc; + + return fscontext_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0); + +err_fc: + put_fs_context(fc); + return ret; +} + +/* + * Pick a superblock into a context for reconfiguration. + */ +SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags) +{ + struct fs_context *fc; + struct path target; + unsigned int lookup_flags; + int ret; + + if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + if ((flags & ~(FSPICK_CLOEXEC | + FSPICK_SYMLINK_NOFOLLOW | + FSPICK_NO_AUTOMOUNT | + FSPICK_EMPTY_PATH)) != 0) + return -EINVAL; + + lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT; + if (flags & FSPICK_SYMLINK_NOFOLLOW) + lookup_flags &= ~LOOKUP_FOLLOW; + if (flags & FSPICK_NO_AUTOMOUNT) + lookup_flags &= ~LOOKUP_AUTOMOUNT; + if (flags & FSPICK_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + ret = user_path_at(dfd, path, lookup_flags, &target); + if (ret < 0) + goto err; + + ret = -EINVAL; + if (target.mnt->mnt_root != target.dentry) + goto err_path; + + fc = vfs_new_fs_context(target.dentry->d_sb->s_type, target.dentry, + 0, 0, FS_CONTEXT_FOR_RECONFIGURE); + if (IS_ERR(fc)) { + ret = PTR_ERR(fc); + goto err_path; + } + + fc->phase = FS_CONTEXT_RECONF_PARAMS; + + ret = fscontext_alloc_log(fc); + if (ret < 0) + goto err_fc; + + path_put(&target); + return fscontext_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0); + +err_fc: + put_fs_context(fc); +err_path: + path_put(&target); +err: + return ret; +} + +/* + * Check the state and apply the configuration. Note that this function is + * allowed to 'steal' the value by setting param->xxx to NULL before returning. + */ +static int vfs_fsconfig(struct fs_context *fc, struct fs_parameter *param) +{ + int ret; + + /* We need to reinitialise the context if we have reconfiguration + * pending after creation or a previous reconfiguration. + */ + if (fc->phase == FS_CONTEXT_AWAITING_RECONF) { + if (fc->fs_type->init_fs_context) { + ret = fc->fs_type->init_fs_context(fc, fc->root); + if (ret < 0) { + fc->phase = FS_CONTEXT_FAILED; + return ret; + } + fc->need_free = true; + } else { + /* Leave legacy context ops in place */ + } + + /* Do the security check last because ->init_fs_context may + * change the namespace subscriptions. + */ + ret = security_fs_context_alloc(fc, fc->root); + if (ret < 0) { + fc->phase = FS_CONTEXT_FAILED; + return ret; + } + + fc->phase = FS_CONTEXT_RECONF_PARAMS; + } + + if (fc->phase != FS_CONTEXT_CREATE_PARAMS && + fc->phase != FS_CONTEXT_RECONF_PARAMS) + return -EBUSY; + + return vfs_parse_fs_param(fc, param); +} + +/* + * Reconfigure a superblock. + */ +int vfs_reconfigure_sb(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + int ret; + + if (fc->ops->validate) { + ret = fc->ops->validate(fc); + if (ret < 0) + return ret; + } + + ret = security_fs_context_validate(fc); + if (ret) + return ret; + + if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + down_write(&sb->s_umount); + ret = reconfigure_super(fc); + up_write(&sb->s_umount); + return ret; +} + +/* + * Clean up a context after performing an action on it and put it into a state + * from where it can be used to reconfigure a superblock. + */ +void vfs_clean_context(struct fs_context *fc) +{ + if (fc->ops && fc->ops->free) + fc->ops->free(fc); + fc->need_free = false; + fc->fs_private = NULL; + fc->s_fs_info = NULL; + fc->sb_flags = 0; + fc->sloppy = false; + fc->silent = false; + security_fs_context_free(fc); + fc->security = NULL; + kfree(fc->subtype); + fc->subtype = NULL; + kfree(fc->source); + fc->source = NULL; + + fc->purpose = FS_CONTEXT_FOR_RECONFIGURE; + fc->phase = FS_CONTEXT_AWAITING_RECONF; +} + +/* + * Perform an action on a context. + */ +static int vfs_fsconfig_action(struct fs_context *fc, enum fsconfig_command cmd) +{ + int ret = -EINVAL; + + switch (cmd) { + case FSCONFIG_CMD_CREATE: + if (fc->phase != FS_CONTEXT_CREATE_PARAMS) + return -EBUSY; + fc->phase = FS_CONTEXT_CREATING; + ret = vfs_get_tree(fc); + if (ret == 0) + fc->phase = FS_CONTEXT_AWAITING_MOUNT; + else + fc->phase = FS_CONTEXT_FAILED; + return ret; + + case FSCONFIG_CMD_RECONFIGURE: + if (fc->phase == FS_CONTEXT_AWAITING_RECONF) { + /* This is probably pointless, since no changes have + * been proposed. + */ + if (fc->fs_type->init_fs_context) { + ret = fc->fs_type->init_fs_context(fc, fc->root); + if (ret < 0) { + fc->phase = FS_CONTEXT_FAILED; + return ret; + } + fc->need_free = true; + } + fc->phase = FS_CONTEXT_RECONF_PARAMS; + } + + fc->phase = FS_CONTEXT_RECONFIGURING; + ret = vfs_reconfigure_sb(fc); + if (ret == 0) + vfs_clean_context(fc); + else + fc->phase = FS_CONTEXT_FAILED; + return ret; + + default: + return -EOPNOTSUPP; + } +} + +/** + * sys_fsconfig - Set parameters and trigger actions on a context + * @fd: The filesystem context to act upon + * @cmd: The action to take + * @_key: Where appropriate, the parameter key to set + * @_value: Where appropriate, the parameter value to set + * @aux: Additional information for the value + * + * This system call is used to set parameters on a context, including + * superblock settings, data source and security labelling. + * + * Actions include triggering the creation of a superblock and the + * reconfiguration of the superblock attached to the specified context. + * + * When setting a parameter, @cmd indicates the type of value being proposed + * and @_key indicates the parameter to be altered. + * + * @_value and @aux are used to specify the value, should a value be required: + * + * (*) fsconfig_set_flag: No value is specified. The parameter must be boolean + * in nature. The key may be prefixed with "no" to invert the + * setting. @_value must be NULL and @aux must be 0. + * + * (*) fsconfig_set_string: A string value is specified. The parameter can be + * expecting boolean, integer, string or take a path. A conversion to an + * appropriate type will be attempted (which may include looking up as a + * path). @_value points to a NUL-terminated string and @aux must be 0. + * + * (*) fsconfig_set_binary: A binary blob is specified. @_value points to the + * blob and @aux indicates its size. The parameter must be expecting a + * blob. + * + * (*) fsconfig_set_path: A non-empty path is specified. The parameter must be + * expecting a path object. @_value points to a NUL-terminated string that + * is the path and @aux is a file descriptor at which to start a relative + * lookup or AT_FDCWD. + * + * (*) fsconfig_set_path_empty: As fsconfig_set_path, but with AT_EMPTY_PATH + * implied. + * + * (*) fsconfig_set_fd: An open file descriptor is specified. @_value must be + * NULL and @aux indicates the file descriptor. + */ +SYSCALL_DEFINE5(fsconfig, + int, fd, + unsigned int, cmd, + const char __user *, _key, + const void __user *, _value, + int, aux) +{ + struct fs_context *fc; + struct fd f; + int ret; + + struct fs_parameter param = { + .type = fs_value_is_undefined, + }; + + if (fd < 0) + return -EINVAL; + + switch (cmd) { + case FSCONFIG_SET_FLAG: + if (!_key || _value || aux) + return -EINVAL; + break; + case FSCONFIG_SET_STRING: + if (!_key || !_value || aux) + return -EINVAL; + break; + case FSCONFIG_SET_BINARY: + if (!_key || !_value || aux <= 0 || aux > 1024 * 1024) + return -EINVAL; + break; + case FSCONFIG_SET_PATH: + case FSCONFIG_SET_PATH_EMPTY: + if (!_key || !_value || (aux != AT_FDCWD && aux < 0)) + return -EINVAL; + break; + case FSCONFIG_SET_FD: + if (!_key || _value || aux < 0) + return -EINVAL; + break; + case FSCONFIG_CMD_CREATE: + case FSCONFIG_CMD_RECONFIGURE: + if (_key || _value || aux) + return -EINVAL; + break; + default: + return -EOPNOTSUPP; + } + + f = fdget(fd); + if (!f.file) + return -EBADF; + ret = -EINVAL; + if (f.file->f_op != &fscontext_fops) + goto out_f; + + fc = f.file->private_data; + if (fc->ops == &legacy_fs_context_ops) { + switch (cmd) { + case FSCONFIG_SET_BINARY: + case FSCONFIG_SET_PATH: + case FSCONFIG_SET_PATH_EMPTY: + case FSCONFIG_SET_FD: + ret = -EOPNOTSUPP; + goto out_f; + } + } + + if (_key) { + param.key = strndup_user(_key, 256); + if (IS_ERR(param.key)) { + ret = PTR_ERR(param.key); + goto out_f; + } + } + + switch (cmd) { + case FSCONFIG_SET_STRING: + param.type = fs_value_is_string; + param.string = strndup_user(_value, 256); + if (IS_ERR(param.string)) { + ret = PTR_ERR(param.string); + goto out_key; + } + param.size = strlen(param.string); + break; + case FSCONFIG_SET_BINARY: + param.type = fs_value_is_blob; + param.size = aux; + param.blob = memdup_user_nul(_value, aux); + if (IS_ERR(param.blob)) { + ret = PTR_ERR(param.blob); + goto out_key; + } + break; + case FSCONFIG_SET_PATH: + param.type = fs_value_is_filename; + param.name = getname_flags(_value, 0, NULL); + if (IS_ERR(param.name)) { + ret = PTR_ERR(param.name); + goto out_key; + } + param.dirfd = aux; + param.size = strlen(param.name->name); + break; + case FSCONFIG_SET_PATH_EMPTY: + param.type = fs_value_is_filename_empty; + param.name = getname_flags(_value, LOOKUP_EMPTY, NULL); + if (IS_ERR(param.name)) { + ret = PTR_ERR(param.name); + goto out_key; + } + param.dirfd = aux; + param.size = strlen(param.name->name); + break; + case FSCONFIG_SET_FD: + param.type = fs_value_is_file; + ret = -EBADF; + param.file = fget(aux); + if (!param.file) + goto out_key; + break; + default: + break; + } + + ret = mutex_lock_interruptible(&fc->uapi_mutex); + if (ret == 0) { + switch (cmd) { + case FSCONFIG_CMD_CREATE: + case FSCONFIG_CMD_RECONFIGURE: + ret = vfs_fsconfig_action(fc, cmd); + break; + default: + ret = vfs_fsconfig(fc, ¶m); + break; + } + mutex_unlock(&fc->uapi_mutex); + } + + /* Clean up the our record of any value that we obtained from + * userspace. Note that the value may have been stolen by the LSM or + * filesystem, in which case the value pointer will have been cleared. + */ + switch (cmd) { + case FSCONFIG_SET_STRING: + case FSCONFIG_SET_BINARY: + kfree(param.string); + break; + case FSCONFIG_SET_PATH: + case FSCONFIG_SET_PATH_EMPTY: + if (param.name) + putname(param.name); + break; + case FSCONFIG_SET_FD: + if (param.file) + fput(param.file); + break; + default: + break; + } +out_key: + kfree(param.key); +out_f: + fdput(f); + return ret; +} diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 989df5accaee..74be4d2a7557 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -315,7 +315,8 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) drop_nlink(d_inode(fuse_control_sb->s_root)); } -static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) +static int fuse_ctl_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { static const struct tree_descr empty_descr = {""}; struct fuse_conn *fc; @@ -342,9 +343,11 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) } static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) + int flags, const char *dev_name, + void *raw_data, size_t data_size) { - return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super); + return mount_single(fs_type, flags, raw_data, data_size, + fuse_ctl_fill_super); } static void fuse_ctl_kill_sb(struct super_block *sb) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 073865371f9b..f6a8d72e6cbb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -136,7 +136,8 @@ static void fuse_evict_inode(struct inode *inode) } } -static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) +static int fuse_remount_fs(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); if (*flags & SB_MANDLOCK) @@ -1072,7 +1073,8 @@ void fuse_dev_free(struct fuse_dev *fud) } EXPORT_SYMBOL_GPL(fuse_dev_free); -static int fuse_fill_super(struct super_block *sb, void *data, int silent) +static int fuse_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct fuse_dev *fud; struct fuse_conn *fc; @@ -1229,9 +1231,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) static struct dentry *fuse_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *raw_data) + void *raw_data, size_t data_size) { - return mount_nodev(fs_type, flags, raw_data, fuse_fill_super); + return mount_nodev(fs_type, flags, raw_data, data_size, + fuse_fill_super); } static void fuse_sb_destroy(struct super_block *sb) @@ -1268,9 +1271,10 @@ MODULE_ALIAS_FS("fuse"); #ifdef CONFIG_BLOCK static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, int flags, const char *dev_name, - void *raw_data) + void *raw_data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super); + return mount_bdev(fs_type, flags, dev_name, raw_data, data_size, + fuse_fill_super); } static void fuse_kill_sb_blk(struct super_block *sb) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b041cb8ae383..b756f1b24962 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1220,6 +1220,7 @@ static int test_gfs2_super(struct super_block *s, void *ptr) * @flags: Mount flags * @dev_name: The name of the device * @data: The mount arguments + * @data_size: The size of the mount arguments * * Q. Why not use get_sb_bdev() ? * A. We need to select one of two root directories to mount, independent @@ -1229,7 +1230,7 @@ static int test_gfs2_super(struct super_block *s, void *ptr) */ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, size_t data_size) { struct block_device *bdev; struct super_block *s; @@ -1326,7 +1327,8 @@ static int set_meta_super(struct super_block *s, void *ptr) } static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { struct super_block *s; struct gfs2_sbd *sdp; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ca71163ff7cf..7e4410a57d17 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1228,11 +1228,13 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) * @sb: the filesystem * @flags: the remount flags * @data: extra data passed in (not used right now) + * @data_size: size of the extra data * * Returns: errno */ -static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) +static int gfs2_remount_fs(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_args args = sdp->sd_args; /* Default to current settings */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 173876782f73..e739b381b041 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -111,7 +111,8 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int hfs_remount(struct super_block *sb, int *flags, char *data) +static int hfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_NODIRATIME; @@ -382,7 +383,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) * hfs_btree_init() to get the necessary data about the extents and * catalog B-trees and, finally, reading the root inode into memory. */ -static int hfs_fill_super(struct super_block *sb, void *data, int silent) +static int hfs_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct hfs_sb_info *sbi; struct hfs_find_data fd; @@ -458,9 +460,11 @@ bail: } static struct dentry *hfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + hfs_fill_super); } static struct file_system_type hfs_fs_type = { diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index eb4535eba95d..e12e051d46b4 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -326,7 +326,8 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int hfsplus_remount(struct super_block *sb, int *flags, char *data) +static int hfsplus_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) @@ -371,7 +372,8 @@ static const struct super_operations hfsplus_sops = { .show_options = hfsplus_show_options, }; -static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) +static int hfsplus_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct hfsplus_vh *vhdr; struct hfsplus_sb_info *sbi; @@ -643,9 +645,11 @@ static void hfsplus_destroy_inode(struct inode *inode) #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) static struct dentry *hfsplus_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + hfsplus_fill_super); } static struct file_system_type hfsplus_fs_type = { diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 444c7b170359..7af63cb655a3 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -911,7 +911,8 @@ static const struct inode_operations hostfs_link_iops = { .get_link = hostfs_get_link, }; -static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) +static int hostfs_fill_sb_common(struct super_block *sb, + void *d, size_t data_size, int silent) { struct inode *root_inode; char *host_root_path, *req_root = d; @@ -971,9 +972,9 @@ out: static struct dentry *hostfs_read_sb(struct file_system_type *type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_nodev(type, flags, data, hostfs_fill_sb_common); + return mount_nodev(type, flags, data, data_size, hostfs_fill_sb_common); } static void hostfs_kill_sb(struct super_block *s) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index f2c3ebcd309c..53e585b27c05 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -445,7 +445,8 @@ HPFS filesystem options:\n\ \n"); } -static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) +static int hpfs_remount_fs(struct super_block *s, int *flags, + char *data, size_t data_size) { kuid_t uid; kgid_t gid; @@ -540,7 +541,8 @@ static const struct super_operations hpfs_sops = .show_options = hpfs_show_options, }; -static int hpfs_fill_super(struct super_block *s, void *options, int silent) +static int hpfs_fill_super(struct super_block *s, + void *options, size_t data_size, int silent) { struct buffer_head *bh0, *bh1, *bh2; struct hpfs_boot_block *bootblock; @@ -757,9 +759,10 @@ bail0: } static struct dentry *hpfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + hpfs_fill_super); } static struct file_system_type hpfs_fs_type = { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 32920a10100e..762028994f47 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -27,7 +27,8 @@ #include <linux/backing-dev.h> #include <linux/hugetlb.h> #include <linux/pagevec.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> +#include <linux/fsinfo.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/dnotify.h> @@ -45,11 +46,17 @@ const struct file_operations hugetlbfs_file_operations; static const struct inode_operations hugetlbfs_dir_inode_operations; static const struct inode_operations hugetlbfs_inode_operations; -struct hugetlbfs_config { +enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT }; + +struct hugetlbfs_fs_context { struct hstate *hstate; + unsigned long long max_size_opt; + unsigned long long min_size_opt; long max_hpages; long nr_inodes; long min_hpages; + enum hugetlbfs_size_type max_val_type; + enum hugetlbfs_size_type min_val_type; kuid_t uid; kgid_t gid; umode_t mode; @@ -57,22 +64,43 @@ struct hugetlbfs_config { int sysctl_hugetlb_shm_group; -enum { - Opt_size, Opt_nr_inodes, - Opt_mode, Opt_uid, Opt_gid, - Opt_pagesize, Opt_min_size, - Opt_err, +enum hugetlb_param { + Opt_gid, + Opt_min_size, + Opt_mode, + Opt_nr_inodes, + Opt_pagesize, + Opt_size, + Opt_uid, + nr__hugetlb_params +}; + +static const struct fs_parameter_spec hugetlb_param_specs[nr__hugetlb_params] = { + [Opt_gid] = { fs_param_is_u32 }, + [Opt_min_size] = { fs_param_is_string }, + [Opt_mode] = { fs_param_is_u32 }, + [Opt_nr_inodes] = { fs_param_is_string }, + [Opt_pagesize] = { fs_param_is_string }, + [Opt_size] = { fs_param_is_string }, + [Opt_uid] = { fs_param_is_u32 }, }; -static const match_table_t tokens = { - {Opt_size, "size=%s"}, - {Opt_nr_inodes, "nr_inodes=%s"}, - {Opt_mode, "mode=%o"}, - {Opt_uid, "uid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_pagesize, "pagesize=%s"}, - {Opt_min_size, "min_size=%s"}, - {Opt_err, NULL}, +static const char *const hugetlb_param_keys[nr__hugetlb_params] = { + [Opt_gid] = "gid", + [Opt_min_size] = "min_size", + [Opt_mode] = "mode", + [Opt_nr_inodes] = "nr_inodes", + [Opt_pagesize] = "pagesize", + [Opt_size] = "size", + [Opt_uid] = "uid", +}; + +static const struct fs_parameter_description hugetlb_fs_parameters = { + .name = "hugetlbfs", + .nr_params = nr__hugetlb_params, + .keys = hugetlb_param_keys, + .specs = hugetlb_param_specs, + .no_source = true, }; #ifdef CONFIG_NUMA @@ -708,16 +736,16 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) } static struct inode *hugetlbfs_get_root(struct super_block *sb, - struct hugetlbfs_config *config) + struct hugetlbfs_fs_context *ctx) { struct inode *inode; inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); - inode->i_mode = S_IFDIR | config->mode; - inode->i_uid = config->uid; - inode->i_gid = config->gid; + inode->i_mode = S_IFDIR | ctx->mode; + inode->i_uid = ctx->uid; + inode->i_gid = ctx->gid; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_op = &hugetlbfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; @@ -920,6 +948,70 @@ static int hugetlbfs_show_options(struct seq_file *m, struct dentry *root) return 0; } +static int hugetlbfs_fsinfo(struct path *path, struct fsinfo_kparams *params) +{ + struct dentry *dentry = path->dentry; + struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); + struct hugepage_subpool *spool = sbinfo->spool; + unsigned long hpage_size = huge_page_size(sbinfo->hstate); + unsigned hpage_shift = huge_page_shift(sbinfo->hstate); + char mod; + + switch (params->request) { + case FSINFO_ATTR_PARAMETER: + if (params->Mth) + return -ENODATA; + switch (params->Nth) { + case Opt_uid: + if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) + return sprintf(params->buffer, "uid=%u", + from_kuid_munged(&init_user_ns, + sbinfo->uid)); + return 0; + case Opt_gid: + if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) + return sprintf(params->buffer, "gid=%u", + from_kgid_munged(&init_user_ns, + sbinfo->gid)); + return 0; + + case Opt_size: + if (!spool || spool->max_hpages == -1) + return 0; + return sprintf(params->buffer, "size=%llu", + (unsigned long long)spool->max_hpages << hpage_shift); + case Opt_min_size: + if (!spool || spool->min_hpages == -1) + return 0; + return sprintf(params->buffer, "min_size=%llu", + (unsigned long long)spool->min_hpages << hpage_shift); + case Opt_pagesize: + hpage_size /= 1024; + mod = 'K'; + if (hpage_size >= 1024) { + hpage_size /= 1024; + mod = 'M'; + } + return sprintf(params->buffer, "pagesize=%lu%c", + hpage_size, mod); + + case Opt_mode: + if (sbinfo->mode == 0755) + return 0; + return sprintf(params->buffer, "mode=%o", sbinfo->mode); + case Opt_nr_inodes: + if (sbinfo->max_inodes == -1) + return 0; + return sprintf(params->buffer, "nr_inodes=%lu", + sbinfo->max_inodes); + default: + return -ENODATA; + } + default: + return generic_fsinfo(path, params); + } +} + static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); @@ -1079,10 +1171,9 @@ static const struct super_operations hugetlbfs_ops = { .statfs = hugetlbfs_statfs, .put_super = hugetlbfs_put_super, .show_options = hugetlbfs_show_options, + .fsinfo = hugetlbfs_fsinfo, }; -enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT }; - /* * Convert size option passed from command line to number of huge pages * in the pool specified by hstate. Size option could be in bytes @@ -1105,170 +1196,151 @@ hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt, return size_opt; } -static int -hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) +/* + * Parse one mount parameter. + */ +static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - char *p, *rest; - substring_t args[MAX_OPT_ARGS]; - int option; - unsigned long long max_size_opt = 0, min_size_opt = 0; - enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE; - - if (!options) + struct hugetlbfs_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + char *rest; + unsigned long ps; + int opt; + + opt = fs_parse(fc, &hugetlb_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_uid: + ctx->uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(ctx->uid)) + goto bad_val; return 0; - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; + case Opt_gid: + ctx->gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(ctx->gid)) + goto bad_val; + return 0; - token = match_token(p, tokens, args); - switch (token) { - case Opt_uid: - if (match_int(&args[0], &option)) - goto bad_val; - pconfig->uid = make_kuid(current_user_ns(), option); - if (!uid_valid(pconfig->uid)) - goto bad_val; - break; + case Opt_mode: + ctx->mode = result.uint_32 & 01777U; + return 0; - case Opt_gid: - if (match_int(&args[0], &option)) - goto bad_val; - pconfig->gid = make_kgid(current_user_ns(), option); - if (!gid_valid(pconfig->gid)) - goto bad_val; - break; + case Opt_size: + /* memparse() will accept a K/M/G without a digit */ + if (!isdigit(param->string[0])) + goto bad_val; + ctx->max_size_opt = memparse(param->string, &rest); + ctx->max_val_type = SIZE_STD; + if (*rest == '%') + ctx->max_val_type = SIZE_PERCENT; + return 0; - case Opt_mode: - if (match_octal(&args[0], &option)) - goto bad_val; - pconfig->mode = option & 01777U; - break; + case Opt_nr_inodes: + /* memparse() will accept a K/M/G without a digit */ + if (!isdigit(param->string[0])) + goto bad_val; + ctx->nr_inodes = memparse(param->string, &rest); + return 0; - case Opt_size: { - /* memparse() will accept a K/M/G without a digit */ - if (!isdigit(*args[0].from)) - goto bad_val; - max_size_opt = memparse(args[0].from, &rest); - max_val_type = SIZE_STD; - if (*rest == '%') - max_val_type = SIZE_PERCENT; - break; + case Opt_pagesize: + ps = memparse(param->string, &rest); + ctx->hstate = size_to_hstate(ps); + if (!ctx->hstate) { + pr_err("Unsupported page size %lu MB\n", ps >> 20); + return -EINVAL; } + return 0; - case Opt_nr_inodes: - /* memparse() will accept a K/M/G without a digit */ - if (!isdigit(*args[0].from)) - goto bad_val; - pconfig->nr_inodes = memparse(args[0].from, &rest); - break; + case Opt_min_size: + /* memparse() will accept a K/M/G without a digit */ + if (!isdigit(param->string[0])) + goto bad_val; + ctx->min_size_opt = memparse(param->string, &rest); + ctx->min_val_type = SIZE_STD; + if (*rest == '%') + ctx->min_val_type = SIZE_PERCENT; + return 0; - case Opt_pagesize: { - unsigned long ps; - ps = memparse(args[0].from, &rest); - pconfig->hstate = size_to_hstate(ps); - if (!pconfig->hstate) { - pr_err("Unsupported page size %lu MB\n", - ps >> 20); - return -EINVAL; - } - break; - } + default: + return -EINVAL; + } - case Opt_min_size: { - /* memparse() will accept a K/M/G without a digit */ - if (!isdigit(*args[0].from)) - goto bad_val; - min_size_opt = memparse(args[0].from, &rest); - min_val_type = SIZE_STD; - if (*rest == '%') - min_val_type = SIZE_PERCENT; - break; - } +bad_val: + return invalf(fc, "hugetlbfs: Bad value '%s' for mount option '%s'\n", + param->string, param->key); +} - default: - pr_err("Bad mount option: \"%s\"\n", p); - return -EINVAL; - break; - } - } +/* + * Validate the parsed options. + */ +static int hugetlbfs_validate(struct fs_context *fc) +{ + struct hugetlbfs_fs_context *ctx = fc->fs_private; /* * Use huge page pool size (in hstate) to convert the size * options to number of huge pages. If NO_SIZE, -1 is returned. */ - pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, - max_size_opt, max_val_type); - pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, - min_size_opt, min_val_type); + ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate, + ctx->max_size_opt, + ctx->max_val_type); + ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate, + ctx->min_size_opt, + ctx->min_val_type); /* * If max_size was specified, then min_size must be smaller */ - if (max_val_type > NO_SIZE && - pconfig->min_hpages > pconfig->max_hpages) { - pr_err("minimum size can not be greater than maximum size\n"); + if (ctx->max_val_type > NO_SIZE && + ctx->min_hpages > ctx->max_hpages) { + pr_err("Minimum size can not be greater than maximum size\n"); return -EINVAL; } return 0; - -bad_val: - pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p); - return -EINVAL; } static int -hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) +hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc) { - int ret; - struct hugetlbfs_config config; + struct hugetlbfs_fs_context *ctx = fc->fs_private; struct hugetlbfs_sb_info *sbinfo; - config.max_hpages = -1; /* No limit on size by default */ - config.nr_inodes = -1; /* No limit on number of inodes by default */ - config.uid = current_fsuid(); - config.gid = current_fsgid(); - config.mode = 0755; - config.hstate = &default_hstate; - config.min_hpages = -1; /* No default minimum size */ - ret = hugetlbfs_parse_options(data, &config); - if (ret) - return ret; - sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); if (!sbinfo) return -ENOMEM; sb->s_fs_info = sbinfo; - sbinfo->hstate = config.hstate; spin_lock_init(&sbinfo->stat_lock); - sbinfo->max_inodes = config.nr_inodes; - sbinfo->free_inodes = config.nr_inodes; - sbinfo->spool = NULL; - sbinfo->uid = config.uid; - sbinfo->gid = config.gid; - sbinfo->mode = config.mode; + sbinfo->hstate = ctx->hstate; + sbinfo->max_inodes = ctx->nr_inodes; + sbinfo->free_inodes = ctx->nr_inodes; + sbinfo->spool = NULL; + sbinfo->uid = ctx->uid; + sbinfo->gid = ctx->gid; + sbinfo->mode = ctx->mode; /* * Allocate and initialize subpool if maximum or minimum size is * specified. Any needed reservations (for minimim size) are taken * taken when the subpool is created. */ - if (config.max_hpages != -1 || config.min_hpages != -1) { - sbinfo->spool = hugepage_new_subpool(config.hstate, - config.max_hpages, - config.min_hpages); + if (ctx->max_hpages != -1 || ctx->min_hpages != -1) { + sbinfo->spool = hugepage_new_subpool(ctx->hstate, + ctx->max_hpages, + ctx->min_hpages); if (!sbinfo->spool) goto out_free; } sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_blocksize = huge_page_size(config.hstate); - sb->s_blocksize_bits = huge_page_shift(config.hstate); + sb->s_blocksize = huge_page_size(ctx->hstate); + sb->s_blocksize_bits = huge_page_shift(ctx->hstate); sb->s_magic = HUGETLBFS_MAGIC; sb->s_op = &hugetlbfs_ops; sb->s_time_gran = 1; - sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config)); + sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx)); if (!sb->s_root) goto out_free; return 0; @@ -1278,16 +1350,51 @@ out_free: return -ENOMEM; } -static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hugetlbfs_get_tree(struct fs_context *fc) { - return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super); + return vfs_get_super(fc, vfs_get_independent_super, hugetlbfs_fill_super); +} + +static void hugetlbfs_fs_context_free(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +static const struct fs_context_operations hugetlbfs_fs_context_ops = { + .free = hugetlbfs_fs_context_free, + .parse_param = hugetlbfs_parse_param, + .validate = hugetlbfs_validate, + .get_tree = hugetlbfs_get_tree, +}; + +static int hugetlbfs_init_fs_context(struct fs_context *fc, + struct dentry *reference) +{ + struct hugetlbfs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->max_hpages = -1; /* No limit on size by default */ + ctx->nr_inodes = -1; /* No limit on number of inodes by default */ + ctx->uid = current_fsuid(); + ctx->gid = current_fsgid(); + ctx->mode = 0755; + ctx->hstate = &default_hstate; + ctx->min_hpages = -1; /* No default minimum size */ + ctx->max_val_type = NO_SIZE; + ctx->min_val_type = NO_SIZE; + fc->fs_private = ctx; + fc->ops = &hugetlbfs_fs_context_ops; + return 0; } static struct file_system_type hugetlbfs_fs_type = { - .name = "hugetlbfs", - .mount = hugetlbfs_mount, - .kill_sb = kill_litter_super, + .name = "hugetlbfs", + .init_fs_context = hugetlbfs_init_fs_context, + .parameters = &hugetlb_fs_parameters, + .kill_sb = kill_litter_super, }; static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; @@ -1372,8 +1479,47 @@ out: return file; } +static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h) +{ + struct hugetlbfs_fs_context *ctx; + struct fs_context *fc; + struct vfsmount *mnt; + int ret; + + fc = vfs_new_fs_context(&hugetlbfs_fs_type, NULL, 0, 0, + FS_CONTEXT_FOR_KERNEL_MOUNT); + if (IS_ERR(fc)) { + ret = PTR_ERR(fc); + goto err; + } + + ctx = fc->fs_private; + ctx->hstate = h; + + ret = vfs_get_tree(fc); + if (ret < 0) + goto err_fc; + + mnt = vfs_create_mount(fc, 0); + if (IS_ERR(mnt)) { + ret = PTR_ERR(mnt); + goto err_fc; + } + + put_fs_context(fc); + return mnt; + +err_fc: + put_fs_context(fc); +err: + pr_err("Cannot mount internal hugetlbfs for page size %uK", + 1U << (h->order + PAGE_SHIFT - 10)); + return ERR_PTR(ret); +} + static int __init init_hugetlbfs_fs(void) { + struct vfsmount *mnt; struct hstate *h; int error; int i; @@ -1396,24 +1542,16 @@ static int __init init_hugetlbfs_fs(void) i = 0; for_each_hstate(h) { - char buf[50]; - unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); - - snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); - hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type, - buf); - - if (IS_ERR(hugetlbfs_vfsmount[i])) { - pr_err("Cannot mount internal hugetlbfs for " - "page size %uK", ps_kb); - error = PTR_ERR(hugetlbfs_vfsmount[i]); - hugetlbfs_vfsmount[i] = NULL; + mnt = mount_one_hugetlbfs(h); + if (IS_ERR(mnt) && i == 0) { + error = PTR_ERR(mnt); + goto out; } + hugetlbfs_vfsmount[i] = mnt; i++; } - /* Non default hstates are optional */ - if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) - return 0; + + return 0; out: kmem_cache_destroy(hugetlbfs_inode_cachep); diff --git a/fs/internal.h b/fs/internal.h index d410186bc369..73942ff5aa09 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -52,8 +52,21 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, extern void __init chrdev_init(void); /* + * fs_context.c + */ +extern const struct fs_context_operations legacy_fs_context_ops; +extern int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry); + +/* + * fsopen.c + */ +extern void vfs_clean_context(struct fs_context *fc); + +/* * namei.c */ +extern int filename_lookup(int dfd, struct filename *name, unsigned flags, + struct path *path, struct path *root); extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); @@ -72,6 +85,7 @@ int do_linkat(int olddfd, const char __user *oldname, int newdfd, */ extern void *copy_mount_options(const void __user *); extern char *copy_mount_string(const void __user *); +extern int parse_monolithic_mount_data(struct fs_context *, void *, size_t); extern struct vfsmount *lookup_mnt(const struct path *); extern int finish_automount(struct vfsmount *, struct path *); @@ -85,6 +99,7 @@ extern int __mnt_want_write_file(struct file *); extern void __mnt_drop_write(struct vfsmount *); extern void __mnt_drop_write_file(struct file *); +extern void dissolve_on_fput(struct vfsmount *); /* * fs_struct.c */ @@ -99,10 +114,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); /* * super.c */ -extern int do_remount_sb(struct super_block *, int, void *, int); +extern int reconfigure_super(struct fs_context *); extern bool trylock_super(struct super_block *sb); -extern struct dentry *mount_fs(struct file_system_type *, - int, const char *, void *); extern struct super_block *user_get_super(dev_t); /* diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 488a9e7f8f66..729aac13299c 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -112,7 +112,8 @@ static void destroy_inodecache(void) kmem_cache_destroy(isofs_inode_cachep); } -static int isofs_remount(struct super_block *sb, int *flags, char *data) +static int isofs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); if (!(*flags & SB_RDONLY)) @@ -620,7 +621,8 @@ static bool rootdir_empty(struct super_block *sb, unsigned long block) * Note: a check_disk_change() has been done immediately prior * to this call, so we don't need to check again. */ -static int isofs_fill_super(struct super_block *s, void *data, int silent) +static int isofs_fill_super(struct super_block *s, void *data, size_t data_size, + int silent) { struct buffer_head *bh = NULL, *pri_bh = NULL; struct hs_primary_descriptor *h_pri = NULL; @@ -1565,9 +1567,10 @@ struct inode *__isofs_iget(struct super_block *sb, } static struct dentry *isofs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + isofs_fill_super); } static struct file_system_type iso9660_fs_type = { diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 902a7dd10e5c..793ad30970ff 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -238,7 +238,8 @@ static int jffs2_parse_options(struct jffs2_sb_info *c, char *data) return 0; } -static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data) +static int jffs2_remount_fs(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); int err; @@ -267,7 +268,8 @@ static const struct super_operations jffs2_super_operations = /* * fill in the superblock */ -static int jffs2_fill_super(struct super_block *sb, void *data, int silent) +static int jffs2_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct jffs2_sb_info *c; int ret; @@ -310,9 +312,9 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) static struct dentry *jffs2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super); + return mount_mtd(fs_type, flags, dev_name, data, data_size, jffs2_fill_super); } static void jffs2_put_super (struct super_block *sb) diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 65d8fc87ab11..427f6be9ea3d 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -456,7 +456,8 @@ cleanup: return 0; } -static int jfs_remount(struct super_block *sb, int *flags, char *data) +static int jfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { s64 newLVSize = 0; int rc = 0; @@ -516,7 +517,8 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) return 0; } -static int jfs_fill_super(struct super_block *sb, void *data, int silent) +static int jfs_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct jfs_sb_info *sbi; struct inode *inode; @@ -698,9 +700,10 @@ out: } static struct dentry *jfs_do_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + jfs_fill_super); } static int jfs_sync_fs(struct super_block *sb, int wait) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index fdf527b6d79c..7d56b624e0dc 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -17,18 +17,19 @@ #include <linux/namei.h> #include <linux/seq_file.h> #include <linux/exportfs.h> +#include <linux/fsinfo.h> #include "kernfs-internal.h" struct kmem_cache *kernfs_node_cache; -static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data) +int kernfs_reconfigure(struct fs_context *fc) { - struct kernfs_root *root = kernfs_info(sb)->root; + struct kernfs_root *root = kernfs_info(fc->root->d_sb)->root; struct kernfs_syscall_ops *scops = root->syscall_ops; - if (scops && scops->remount_fs) - return scops->remount_fs(root, flags, data); + if (scops && scops->reconfigure) + return scops->reconfigure(root, fc); return 0; } @@ -55,14 +56,28 @@ static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry) return 0; } +static int kernfs_sop_fsinfo(struct path *path, struct fsinfo_kparams *params) +{ + struct kernfs_root *root = kernfs_root(kernfs_dentry_node(path->dentry)); + struct kernfs_syscall_ops *scops = root->syscall_ops; + int ret; + + if (scops && scops->fsinfo) { + ret = scops->fsinfo(root, params); + if (ret != -EAGAIN) + return ret; + } + return generic_fsinfo(path, params); +} + const struct super_operations kernfs_sops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .evict_inode = kernfs_evict_inode, - .remount_fs = kernfs_sop_remount_fs, .show_options = kernfs_sop_show_options, .show_path = kernfs_sop_show_path, + .fsinfo = kernfs_sop_fsinfo, }; /* @@ -218,7 +233,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, } while (true); } -static int kernfs_fill_super(struct super_block *sb, unsigned long magic) +static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc) { struct kernfs_super_info *info = kernfs_info(sb); struct inode *inode; @@ -229,7 +244,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; - sb->s_magic = magic; + sb->s_magic = kfc->magic; sb->s_op = &kernfs_sops; sb->s_xattr = kernfs_xattr_handlers; if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) @@ -259,21 +274,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) return 0; } -static int kernfs_test_super(struct super_block *sb, void *data) +static int kernfs_test_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_super_info *sb_info = kernfs_info(sb); - struct kernfs_super_info *info = data; + struct kernfs_super_info *info = fc->s_fs_info; return sb_info->root == info->root && sb_info->ns == info->ns; } -static int kernfs_set_super(struct super_block *sb, void *data) +static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) { - int error; - error = set_anon_super(sb, data); - if (!error) - sb->s_fs_info = data; - return error; + struct kernfs_fs_context *kfc = fc->fs_private; + + kfc->ns_tag = NULL; + return set_anon_super_fc(sb, fc); } /** @@ -290,63 +304,60 @@ const void *kernfs_super_ns(struct super_block *sb) } /** - * kernfs_mount_ns - kernfs mount helper - * @fs_type: file_system_type of the fs being mounted - * @flags: mount flags specified for the mount - * @root: kernfs_root of the hierarchy being mounted - * @magic: file system specific magic number - * @new_sb_created: tell the caller if we allocated a new superblock - * @ns: optional namespace tag of the mount + * kernfs_get_tree - kernfs filesystem access/retrieval helper + * @fc: The filesystem context. * - * This is to be called from each kernfs user's file_system_type->mount() - * implementation, which should pass through the specified @fs_type and - * @flags, and specify the hierarchy and namespace tag to mount via @root - * and @ns, respectively. - * - * The return value can be passed to the vfs layer verbatim. + * This is to be called from each kernfs user's fs_context->ops->get_tree() + * implementation, which should set the specified ->@fs_type and ->@flags, and + * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, + * respectively. */ -struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, unsigned long magic, - bool *new_sb_created, const void *ns) +int kernfs_get_tree(struct fs_context *fc) { + struct kernfs_fs_context *kfc = fc->fs_private; struct super_block *sb; struct kernfs_super_info *info; int error; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - info->root = root; - info->ns = ns; + info->root = kfc->root; + info->ns = kfc->ns_tag; INIT_LIST_HEAD(&info->node); - sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags, - &init_user_ns, info); - if (IS_ERR(sb) || sb->s_fs_info != info) - kfree(info); + fc->s_fs_info = info; + sb = sget_fc(fc, kernfs_test_super, kernfs_set_super); if (IS_ERR(sb)) - return ERR_CAST(sb); - - if (new_sb_created) - *new_sb_created = !sb->s_root; + return PTR_ERR(sb); if (!sb->s_root) { struct kernfs_super_info *info = kernfs_info(sb); - error = kernfs_fill_super(sb, magic); + kfc->new_sb_created = true; + + error = kernfs_fill_super(sb, kfc); if (error) { deactivate_locked_super(sb); - return ERR_PTR(error); + return error; } sb->s_flags |= SB_ACTIVE; mutex_lock(&kernfs_mutex); - list_add(&info->node, &root->supers); + list_add(&info->node, &info->root->supers); mutex_unlock(&kernfs_mutex); } - return dget(sb->s_root); + fc->root = dget(sb->s_root); + return 0; +} + +void kernfs_free_fs_context(struct fs_context *fc) +{ + /* Note that we don't deal with kfc->ns_tag here. */ + kfree(fc->s_fs_info); + fc->s_fs_info = NULL; } /** diff --git a/fs/libfs.c b/fs/libfs.c index 0fb590d79f30..b1744c071ab0 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -9,6 +9,7 @@ #include <linux/slab.h> #include <linux/cred.h> #include <linux/mount.h> +#include <linux/fs_context.h> #include <linux/vfs.h> #include <linux/quotaops.h> #include <linux/mutex.h> @@ -574,13 +575,30 @@ static DEFINE_SPINLOCK(pin_fs_lock); int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) { + struct fs_context *fc; struct vfsmount *mnt = NULL; + int ret; + spin_lock(&pin_fs_lock); if (unlikely(!*mount)) { spin_unlock(&pin_fs_lock); - mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); + + fc = vfs_new_fs_context(type, NULL, 0, 0, + FS_CONTEXT_FOR_KERNEL_MOUNT); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + ret = vfs_get_tree(fc); + if (ret < 0) { + put_fs_context(fc); + return ret; + } + + mnt = vfs_create_mount(fc, 0); + put_fs_context(fc); if (IS_ERR(mnt)) return PTR_ERR(mnt); + spin_lock(&pin_fs_lock); if (!*mount) *mount = mnt; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 72e308c3e66b..3d91d9096b24 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -22,7 +22,8 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc); static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); -static int minix_remount (struct super_block * sb, int * flags, char * data); +static int minix_remount (struct super_block * sb, int * flags, + char * data, size_t data_size); static void minix_evict_inode(struct inode *inode) { @@ -118,7 +119,8 @@ static const struct super_operations minix_sops = { .remount_fs = minix_remount, }; -static int minix_remount (struct super_block * sb, int * flags, char * data) +static int minix_remount (struct super_block * sb, int * flags, + char * data, size_t data_size) { struct minix_sb_info * sbi = minix_sb(sb); struct minix_super_block * ms; @@ -155,7 +157,8 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) return 0; } -static int minix_fill_super(struct super_block *s, void *data, int silent) +static int minix_fill_super(struct super_block *s, void *data, size_t data_size, + int silent) { struct buffer_head *bh; struct buffer_head **map; @@ -651,9 +654,10 @@ void minix_truncate(struct inode * inode) } static struct dentry *minix_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + minix_fill_super); } static struct file_system_type minix_fs_type = { diff --git a/fs/namei.c b/fs/namei.c index 0cab6494978c..fb913148d4d1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2333,8 +2333,8 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path return err; } -static int filename_lookup(int dfd, struct filename *name, unsigned flags, - struct path *path, struct path *root) +int filename_lookup(int dfd, struct filename *name, unsigned flags, + struct path *path, struct path *root) { int retval; struct nameidata nd; diff --git a/fs/namespace.c b/fs/namespace.c index a7f91265ea67..7669ead735b8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -20,12 +20,15 @@ #include <linux/init.h> /* init_rootfs */ #include <linux/fs_struct.h> /* get_fs_root et.al. */ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ +#include <linux/file.h> #include <linux/uaccess.h> #include <linux/proc_ns.h> #include <linux/magic.h> #include <linux/memblock.h> #include <linux/task_work.h> #include <linux/sched/task.h> +#include <uapi/linux/mount.h> +#include <linux/fs_context.h> #include "pnode.h" #include "internal.h" @@ -245,13 +248,9 @@ out_free_cache: * mnt_want/drop_write() will _keep_ the filesystem * r/w. */ -int __mnt_is_readonly(struct vfsmount *mnt) +bool __mnt_is_readonly(struct vfsmount *mnt) { - if (mnt->mnt_flags & MNT_READONLY) - return 1; - if (sb_rdonly(mnt->mnt_sb)) - return 1; - return 0; + return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb); } EXPORT_SYMBOL_GPL(__mnt_is_readonly); @@ -507,11 +506,12 @@ static int mnt_make_readonly(struct mount *mnt) return ret; } -static void __mnt_unmake_readonly(struct mount *mnt) +static int __mnt_unmake_readonly(struct mount *mnt) { lock_mount_hash(); mnt->mnt.mnt_flags &= ~MNT_READONLY; unlock_mount_hash(); + return 0; } int sb_prepare_remount_readonly(struct super_block *sb) @@ -942,55 +942,6 @@ static struct mount *skip_mnt_tree(struct mount *p) return p; } -struct vfsmount * -vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) -{ - struct mount *mnt; - struct dentry *root; - - if (!type) - return ERR_PTR(-ENODEV); - - mnt = alloc_vfsmnt(name); - if (!mnt) - return ERR_PTR(-ENOMEM); - - if (flags & SB_KERNMOUNT) - mnt->mnt.mnt_flags = MNT_INTERNAL; - - root = mount_fs(type, flags, name, data); - if (IS_ERR(root)) { - mnt_free_id(mnt); - free_vfsmnt(mnt); - return ERR_CAST(root); - } - - mnt->mnt.mnt_root = root; - mnt->mnt.mnt_sb = root->d_sb; - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; - lock_mount_hash(); - list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - unlock_mount_hash(); - return &mnt->mnt; -} -EXPORT_SYMBOL_GPL(vfs_kern_mount); - -struct vfsmount * -vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, - const char *name, void *data) -{ - /* Until it is worked out how to pass the user namespace - * through from the parent mount to the submount don't support - * unprivileged mounts with submounts. - */ - if (mountpoint->d_sb->s_user_ns != &init_user_ns) - return ERR_PTR(-EPERM); - - return vfs_kern_mount(type, SB_SUBMOUNT, name, data); -} -EXPORT_SYMBOL_GPL(vfs_submount); - static struct mount *clone_mnt(struct mount *old, struct dentry *root, int flag) { @@ -1466,6 +1417,40 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) static void shrink_submounts(struct mount *mnt); +static int do_umount_root(struct super_block *sb) +{ + int ret = 0; + struct fs_context fc = { + .purpose = FS_CONTEXT_FOR_UMOUNT, + .fs_type = sb->s_type, + .root = sb->s_root, + .sb_flags = SB_RDONLY, + .sb_flags_mask = SB_RDONLY, + }; + + down_write(&sb->s_umount); + if (!sb_rdonly(sb)) { + int ret; + + if (fc.fs_type->init_fs_context) + ret = fc.fs_type->init_fs_context(&fc, NULL); + else + ret = legacy_init_fs_context(&fc, NULL); + + switch (ret) { + case 0: + ret = reconfigure_super(&fc); + fc.ops->free(&fc); + break; + case -EOPNOTSUPP: + ret = 0; + break; + } + } + up_write(&sb->s_umount); + return ret; +} + static int do_umount(struct mount *mnt, int flags) { struct super_block *sb = mnt->mnt.mnt_sb; @@ -1531,11 +1516,7 @@ static int do_umount(struct mount *mnt, int flags) */ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; - down_write(&sb->s_umount); - if (!sb_rdonly(sb)) - retval = do_remount_sb(sb, SB_RDONLY, NULL, 0); - up_write(&sb->s_umount); - return retval; + return do_umount_root(sb); } namespace_lock(); @@ -1790,6 +1771,18 @@ struct vfsmount *collect_mounts(const struct path *path) return &tree->mnt; } +void dissolve_on_fput(struct vfsmount *mnt) +{ + namespace_lock(); + lock_mount_hash(); + if (!real_mount(mnt)->mnt_ns && !(mnt->mnt_flags & MNT_UMOUNT)) { + mntget(mnt); + umount_tree(real_mount(mnt), UMOUNT_CONNECTED); + } + unlock_mount_hash(); + namespace_unlock(); +} + void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); @@ -1971,7 +1964,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, return PTR_ERR(smp); /* Is there space to add these mounts to the mount namespace? */ - if (!parent_path) { + if (!source_mnt->mnt_ns) { err = count_mounts(ns, source_mnt); if (err) goto out; @@ -2149,6 +2142,30 @@ static bool has_locked_children(struct mount *mnt, struct dentry *dentry) return false; } +static struct mount *__do_loopback(struct path *old_path, int recurse) +{ + struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt); + + if (IS_MNT_UNBINDABLE(old)) + return mnt; + + if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations) + return mnt; + + if (!recurse && has_locked_children(old, old_path->dentry)) + return mnt; + + if (recurse) + mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE); + else + mnt = clone_mnt(old, old_path->dentry, 0); + + if (!IS_ERR(mnt)) + mnt->mnt.mnt_flags &= ~MNT_LOCKED; + + return mnt; +} + /* * do loopback mount. */ @@ -2156,7 +2173,7 @@ static int do_loopback(struct path *path, const char *old_name, int recurse) { struct path old_path; - struct mount *mnt = NULL, *old, *parent; + struct mount *mnt = NULL, *parent; struct mountpoint *mp; int err; if (!old_name || !*old_name) @@ -2170,38 +2187,21 @@ static int do_loopback(struct path *path, const char *old_name, goto out; mp = lock_mount(path); - err = PTR_ERR(mp); - if (IS_ERR(mp)) + if (IS_ERR(mp)) { + err = PTR_ERR(mp); goto out; + } - old = real_mount(old_path.mnt); parent = real_mount(path->mnt); - - err = -EINVAL; - if (IS_MNT_UNBINDABLE(old)) - goto out2; - if (!check_mnt(parent)) goto out2; - if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations) - goto out2; - - if (!recurse && has_locked_children(old, old_path.dentry)) - goto out2; - - if (recurse) - mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE); - else - mnt = clone_mnt(old, old_path.dentry, 0); - + mnt = __do_loopback(&old_path, recurse); if (IS_ERR(mnt)) { err = PTR_ERR(mnt); goto out2; } - mnt->mnt.mnt_flags &= ~MNT_LOCKED; - err = graft_tree(mnt, parent, mp); if (err) { lock_mount_hash(); @@ -2215,21 +2215,174 @@ out: return err; } -static int change_mount_flags(struct vfsmount *mnt, int ms_flags) +SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags) { - int error = 0; - int readonly_request = 0; + struct file *file; + struct path path; + int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; + bool detached = flags & OPEN_TREE_CLONE; + int error; + int fd; + + BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC); + + if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | + AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE | + OPEN_TREE_CLOEXEC)) + return -EINVAL; + + if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE) + return -EINVAL; + + if (flags & AT_NO_AUTOMOUNT) + lookup_flags &= ~LOOKUP_AUTOMOUNT; + if (flags & AT_SYMLINK_NOFOLLOW) + lookup_flags &= ~LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + + if (detached && !may_mount()) + return -EPERM; + + fd = get_unused_fd_flags(flags & O_CLOEXEC); + if (fd < 0) + return fd; - if (ms_flags & MS_RDONLY) - readonly_request = 1; - if (readonly_request == __mnt_is_readonly(mnt)) + error = user_path_at(dfd, filename, lookup_flags, &path); + if (error) + goto out; + + if (detached) { + struct mount *mnt = __do_loopback(&path, flags & AT_RECURSIVE); + if (IS_ERR(mnt)) { + error = PTR_ERR(mnt); + goto out2; + } + mntput(path.mnt); + path.mnt = &mnt->mnt; + } + + file = dentry_open(&path, O_PATH, current_cred()); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto out3; + } + + if (detached) + file->f_mode |= FMODE_NEED_UNMOUNT; + path_put(&path); + fd_install(fd, file); + return fd; + +out3: + if (detached) + dissolve_on_fput(path.mnt); +out2: + path_put(&path); +out: + put_unused_fd(fd); + return error; +} + +/* + * Don't allow locked mount flags to be cleared. + * + * No locks need to be held here while testing the various MNT_LOCK + * flags because those flags can never be cleared once they are set. + */ +static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags) +{ + unsigned int fl = mnt->mnt.mnt_flags; + + if ((fl & MNT_LOCK_READONLY) && + !(mnt_flags & MNT_READONLY)) + return false; + + if ((fl & MNT_LOCK_NODEV) && + !(mnt_flags & MNT_NODEV)) + return false; + + if ((fl & MNT_LOCK_NOSUID) && + !(mnt_flags & MNT_NOSUID)) + return false; + + if ((fl & MNT_LOCK_NOEXEC) && + !(mnt_flags & MNT_NOEXEC)) + return false; + + if ((fl & MNT_LOCK_ATIME) && + ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) + return false; + + return true; +} + +static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags) +{ + bool readonly_request = (mnt_flags & MNT_READONLY); + + if (readonly_request == __mnt_is_readonly(&mnt->mnt)) return 0; if (readonly_request) - error = mnt_make_readonly(real_mount(mnt)); - else - __mnt_unmake_readonly(real_mount(mnt)); - return error; + return mnt_make_readonly(mnt); + + return __mnt_unmake_readonly(mnt); +} + +/* + * Update the user-settable attributes on a mount. The caller must hold + * sb->s_umount for writing. + */ +static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags) +{ + lock_mount_hash(); + mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; + mnt->mnt.mnt_flags = mnt_flags; + touch_mnt_namespace(mnt->mnt_ns); + unlock_mount_hash(); +} + +/* + * Handle reconfiguration of the mountpoint only without alteration of the + * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND + * to mount(2). + */ +static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags) +{ + struct super_block *sb = path->mnt->mnt_sb; + struct mount *mnt = real_mount(path->mnt); + int ret; + + if (!check_mnt(mnt)) + return -EINVAL; + + if (path->dentry != mnt->mnt.mnt_root) + return -EINVAL; + + if (!can_change_locked_flags(mnt, mnt_flags)) + return -EPERM; + + down_write(&sb->s_umount); + ret = change_mount_ro_state(mnt, mnt_flags); + if (ret == 0) + set_mount_attributes(mnt, mnt_flags); + up_write(&sb->s_umount); + return ret; +} + +/* + * Parse the monolithic page of mount data given to sys_mount(). + */ +int parse_monolithic_mount_data(struct fs_context *fc, void *data, size_t data_size) +{ + int (*monolithic_mount_data)(struct fs_context *, void *, size_t); + + monolithic_mount_data = fc->ops->parse_monolithic; + if (!monolithic_mount_data) + monolithic_mount_data = generic_parse_monolithic; + + return monolithic_mount_data(fc, data, data_size); } /* @@ -2238,11 +2391,12 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) * on it - tough luck. */ static int do_remount(struct path *path, int ms_flags, int sb_flags, - int mnt_flags, void *data) + int mnt_flags, void *data, size_t data_size) { int err; struct super_block *sb = path->mnt->mnt_sb; struct mount *mnt = real_mount(path->mnt); + struct fs_context *fc; if (!check_mnt(mnt)) return -EINVAL; @@ -2250,52 +2404,41 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, if (path->dentry != path->mnt->mnt_root) return -EINVAL; - /* Don't allow changing of locked mnt flags. - * - * No locks need to be held here while testing the various - * MNT_LOCK flags because those flags can never be cleared - * once they are set. - */ - if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && - !(mnt_flags & MNT_READONLY)) { - return -EPERM; - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && - !(mnt_flags & MNT_NODEV)) { - return -EPERM; - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && - !(mnt_flags & MNT_NOSUID)) { - return -EPERM; - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) && - !(mnt_flags & MNT_NOEXEC)) { - return -EPERM; - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && - ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) { + if (!can_change_locked_flags(mnt, mnt_flags)) return -EPERM; + + fc = vfs_new_fs_context(path->dentry->d_sb->s_type, + path->dentry, sb_flags, MS_RMT_MASK, + FS_CONTEXT_FOR_RECONFIGURE); + err = PTR_ERR(fc); + if (IS_ERR(fc)) + goto err; + + err = parse_monolithic_mount_data(fc, data, data_size); + if (err < 0) + goto err_fc; + + if (fc->ops->validate) { + err = fc->ops->validate(fc); + if (err < 0) + goto err_fc; } - err = security_sb_remount(sb, data); + err = security_fs_context_validate(fc); if (err) - return err; + goto err_fc; down_write(&sb->s_umount); - if (ms_flags & MS_BIND) - err = change_mount_flags(path->mnt, ms_flags); - else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) - err = -EPERM; - else - err = do_remount_sb(sb, sb_flags, data, 0); - if (!err) { - lock_mount_hash(); - mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; - mnt->mnt.mnt_flags = mnt_flags; - touch_mnt_namespace(mnt->mnt_ns); - unlock_mount_hash(); + err = -EPERM; + if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { + err = reconfigure_super(fc); + if (!err) + set_mount_attributes(mnt, mnt_flags); } up_write(&sb->s_umount); +err_fc: + put_fs_context(fc); +err: return err; } @@ -2309,48 +2452,74 @@ static inline int tree_contains_unbindable(struct mount *mnt) return 0; } -static int do_move_mount(struct path *path, const char *old_name) +/* + * Check that there aren't references to earlier/same mount namespaces in the + * specified subtree. Such references can act as pins for mount namespaces + * that aren't checked by the mount-cycle checking code, thereby allowing + * cycles to be made. + */ +static bool check_for_nsfs_mounts(struct mount *subtree) { - struct path old_path, parent_path; + struct mount *p; + bool ret = false; + + lock_mount_hash(); + for (p = subtree; p; p = next_mnt(p, subtree)) + if (mnt_ns_loop(p->mnt.mnt_root)) + goto out; + + ret = true; +out: + unlock_mount_hash(); + return ret; +} + +static int do_move_mount(struct path *old_path, struct path *new_path) +{ + struct path parent_path = {.mnt = NULL, .dentry = NULL}; struct mount *p; struct mount *old; struct mountpoint *mp; int err; - if (!old_name || !*old_name) - return -EINVAL; - err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); - if (err) - return err; + bool attached; - mp = lock_mount(path); + mp = lock_mount(new_path); err = PTR_ERR(mp); if (IS_ERR(mp)) goto out; - old = real_mount(old_path.mnt); - p = real_mount(path->mnt); + old = real_mount(old_path->mnt); + p = real_mount(new_path->mnt); err = -EINVAL; - if (!check_mnt(p) || !check_mnt(old)) + /* The mountpoint must be in our namespace. */ + if (!check_mnt(p)) + goto out1; + /* The thing moved should be either ours or completely unattached. */ + if (old->mnt_ns && !check_mnt(old)) goto out1; - if (old->mnt.mnt_flags & MNT_LOCKED) + attached = mnt_has_parent(old); + /* + * We need to allow open_tree(OPEN_TREE_CLONE) or fsmount() followed by + * move_mount(), but mustn't allow "/" to be moved. + */ + if (old->mnt_ns && !attached) goto out1; - err = -EINVAL; - if (old_path.dentry != old_path.mnt->mnt_root) + if (old->mnt.mnt_flags & (MNT_LOCKED | MNT_UMOUNT)) goto out1; - if (!mnt_has_parent(old)) + if (old_path->dentry != old_path->mnt->mnt_root) goto out1; - if (d_is_dir(path->dentry) != - d_is_dir(old_path.dentry)) + if (d_is_dir(new_path->dentry) != + d_is_dir(old_path->dentry)) goto out1; /* * Don't move a mount residing in a shared parent. */ - if (IS_MNT_SHARED(old->mnt_parent)) + if (attached && IS_MNT_SHARED(old->mnt_parent)) goto out1; /* * Don't move a mount tree containing unbindable mounts to a destination @@ -2359,14 +2528,24 @@ static int do_move_mount(struct path *path, const char *old_name) if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) goto out1; err = -ELOOP; + if (!check_for_nsfs_mounts(old)) + goto out1; for (; mnt_has_parent(p); p = p->mnt_parent) if (p == old) goto out1; - err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path); + err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, + attached ? &parent_path : NULL); if (err) goto out1; + /* If the mount wasn't already attached, we need to get an extra ref + * for it. We can do this post-attachment, even though we lost our + * ref, because we still hold the namespace lock. + */ + if (!attached) + mntget(&old->mnt); + /* if the mount is moved, it should no longer be expire * automatically */ list_del_init(&old->mnt_expire); @@ -2375,31 +2554,24 @@ out1: out: if (!err) path_put(&parent_path); - path_put(&old_path); return err; } -static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) +static int do_move_mount_old(struct path *path, const char *old_name) { + struct path old_path; int err; - const char *subtype = strchr(fstype, '.'); - if (subtype) { - subtype++; - err = -EINVAL; - if (!subtype[0]) - goto err; - } else - subtype = ""; - - mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); - err = -ENOMEM; - if (!mnt->mnt_sb->s_subtype) - goto err; - return mnt; - err: - mntput(mnt); - return ERR_PTR(err); + if (!old_name || !*old_name) + return -EINVAL; + + err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); + if (err) + return err; + + err = do_move_mount(&old_path, path); + path_put(&old_path); + return err; } /* @@ -2446,43 +2618,109 @@ unlock: return err; } -static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags); +static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags); + +/* + * Create a new mount using a superblock configuration and request it + * be added to the namespace tree. + */ +static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags) +{ + struct vfsmount *mnt; + int ret; + + ret = security_sb_mountpoint(fc, mountpoint, + mnt_flags & ~MNT_INTERNAL_FLAGS); + if (ret < 0) + return ret; + + if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) { + pr_warn("VFS: Mount too revealing\n"); + return -EPERM; + } + + mnt = vfs_create_mount(fc, mnt_flags); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + ret = do_add_mount(real_mount(mnt), mountpoint, mnt_flags); + if (ret < 0) + goto err_mnt; + return ret; + +err_mnt: + mntput(mnt); + return ret; +} /* * create a new mount for userspace and request it to be added into the * namespace's tree */ -static int do_new_mount(struct path *path, const char *fstype, int sb_flags, - int mnt_flags, const char *name, void *data) +static int do_new_mount(struct path *mountpoint, const char *fstype, + int sb_flags, int mnt_flags, const char *name, + void *data, size_t data_size) { - struct file_system_type *type; - struct vfsmount *mnt; + struct file_system_type *fs_type; + struct fs_context *fc; + const char *subtype = NULL; int err; if (!fstype) return -EINVAL; - type = get_fs_type(fstype); - if (!type) - return -ENODEV; + err = -ENODEV; + fs_type = get_fs_type(fstype); + if (!fs_type) + goto out; - mnt = vfs_kern_mount(type, sb_flags, name, data); - if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && - !mnt->mnt_sb->s_subtype) - mnt = fs_set_subtype(mnt, fstype); + if (fs_type->fs_flags & FS_HAS_SUBTYPE) { + subtype = strchr(fstype, '.'); + if (subtype) { + subtype++; + if (!subtype[0]) { + put_filesystem(fs_type); + return -EINVAL; + } + } else { + subtype = ""; + } + } - put_filesystem(type); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); + fc = vfs_new_fs_context(fs_type, NULL, sb_flags, sb_flags, + FS_CONTEXT_FOR_USER_MOUNT); + put_filesystem(fs_type); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + goto out; + } - if (mount_too_revealing(mnt, &mnt_flags)) { - mntput(mnt); - return -EPERM; + if (subtype) { + err = vfs_parse_fs_string(fc, "subtype", + subtype, strlen(subtype)); + if (err < 0) + goto out; } - err = do_add_mount(real_mount(mnt), path, mnt_flags); - if (err) - mntput(mnt); + if (name) { + err = vfs_parse_fs_string(fc, "source", name, strlen(name)); + if (err < 0) + goto out_fc; + } + + err = parse_monolithic_mount_data(fc, data, data_size); + if (err < 0) + goto out_fc; + + err = vfs_get_tree(fc); + if (err < 0) + goto out_fc; + + err = do_new_mount_fc(fc, mountpoint, mnt_flags); +out_fc: + put_fs_context(fc); +out: return err; } @@ -2724,6 +2962,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, { struct path path; unsigned int mnt_flags = 0, sb_flags; + size_t data_size = data_page ? PAGE_SIZE : 0; int retval = 0; /* Discard magic */ @@ -2742,8 +2981,8 @@ long do_mount(const char *dev_name, const char __user *dir_name, if (retval) return retval; - retval = security_sb_mount(dev_name, &path, - type_page, flags, data_page); + retval = security_sb_mount(dev_name, &path, type_page, flags, + data_page, data_size); if (!retval && !may_mount()) retval = -EPERM; if (!retval && (flags & SB_MANDLOCK) && !may_mandlock()) @@ -2788,18 +3027,20 @@ long do_mount(const char *dev_name, const char __user *dir_name, SB_LAZYTIME | SB_I_VERSION); - if (flags & MS_REMOUNT) + if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND)) + retval = do_reconfigure_mnt(&path, mnt_flags); + else if (flags & MS_REMOUNT) retval = do_remount(&path, flags, sb_flags, mnt_flags, - data_page); + data_page, data_size); else if (flags & MS_BIND) retval = do_loopback(&path, dev_name, flags & MS_REC); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&path, flags); else if (flags & MS_MOVE) - retval = do_move_mount(&path, dev_name); + retval = do_move_mount_old(&path, dev_name); else retval = do_new_mount(&path, type_page, sb_flags, mnt_flags, - dev_name, data_page); + dev_name, data_page, data_size); dput_out: path_put(&path); return retval; @@ -3031,6 +3272,293 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, return ksys_mount(dev_name, dir_name, type, flags, data); } +/** + * vfs_create_mount - Create a mount for a configured superblock + * @fc: The configuration context with the superblock attached + * @mnt_flags: The mount flags to apply + * + * Create a mount to an already configured superblock. If necessary, the + * caller should invoke vfs_get_tree() before calling this. + * + * Note that this does not attach the mount to anything. + */ +struct vfsmount *vfs_create_mount(struct fs_context *fc, unsigned int mnt_flags) +{ + struct mount *mnt; + + if (!fc->root) + return ERR_PTR(-EINVAL); + + mnt = alloc_vfsmnt(fc->source ?: "none"); + if (!mnt) + return ERR_PTR(-ENOMEM); + + if (fc->purpose == FS_CONTEXT_FOR_KERNEL_MOUNT) + /* It's a longterm mount, don't release mnt until we unmount + * before file sys is unregistered + */ + mnt_flags |= MNT_INTERNAL; + + atomic_inc(&fc->root->d_sb->s_active); + mnt->mnt.mnt_flags = mnt_flags; + mnt->mnt.mnt_sb = fc->root->d_sb; + mnt->mnt.mnt_root = dget(fc->root); + mnt->mnt_mountpoint = mnt->mnt.mnt_root; + mnt->mnt_parent = mnt; + + lock_mount_hash(); + list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); + unlock_mount_hash(); + return &mnt->mnt; +} +EXPORT_SYMBOL(vfs_create_mount); + +struct vfsmount *vfs_kern_mount(struct file_system_type *type, + int sb_flags, const char *devname, + void *data, size_t data_size) +{ + struct fs_context *fc; + struct vfsmount *mnt; + int ret; + + if (!type) + return ERR_PTR(-EINVAL); + + fc = vfs_new_fs_context(type, NULL, sb_flags, sb_flags, + sb_flags & SB_KERNMOUNT ? + FS_CONTEXT_FOR_KERNEL_MOUNT : + FS_CONTEXT_FOR_USER_MOUNT); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + if (devname) { + ret = vfs_parse_fs_string(fc, "source", + devname, strlen(devname)); + if (ret < 0) + goto err_fc; + } + + ret = parse_monolithic_mount_data(fc, data, data_size); + if (ret < 0) + goto err_fc; + + ret = vfs_get_tree(fc); + if (ret < 0) + goto err_fc; + + mnt = vfs_create_mount(fc, 0); +out: + put_fs_context(fc); + return mnt; +err_fc: + mnt = ERR_PTR(ret); + goto out; +} +EXPORT_SYMBOL_GPL(vfs_kern_mount); + +struct vfsmount * +vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, + const char *name, void *data, size_t data_size) +{ + /* Until it is worked out how to pass the user namespace + * through from the parent mount to the submount don't support + * unprivileged mounts with submounts. + */ + if (mountpoint->d_sb->s_user_ns != &init_user_ns) + return ERR_PTR(-EPERM); + + return vfs_kern_mount(type, SB_SUBMOUNT, name, data, data_size); +} +EXPORT_SYMBOL_GPL(vfs_submount); + +struct vfsmount *kern_mount(struct file_system_type *type) +{ + return vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL, 0); +} +EXPORT_SYMBOL_GPL(kern_mount); + +/* + * Create a kernel mount representation for a new, prepared superblock + * (specified by fs_fd) and attach to an open_tree-like file descriptor. + */ +SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, + unsigned int, attr_flags) +{ + struct fs_context *fc; + struct file *file; + struct path newmount; + struct fd f; + unsigned int mnt_flags = 0; + long ret; + + if (!may_mount()) + return -EPERM; + + if ((flags & ~(FSMOUNT_CLOEXEC)) != 0) + return -EINVAL; + + if (attr_flags & ~(MOUNT_ATTR_RDONLY | + MOUNT_ATTR_NOSUID | + MOUNT_ATTR_NODEV | + MOUNT_ATTR_NOEXEC | + MOUNT_ATTR__ATIME | + MOUNT_ATTR_NODIRATIME)) + return -EINVAL; + + if (attr_flags & MOUNT_ATTR_RDONLY) + mnt_flags |= MNT_READONLY; + if (attr_flags & MOUNT_ATTR_NOSUID) + mnt_flags |= MNT_NOSUID; + if (attr_flags & MOUNT_ATTR_NODEV) + mnt_flags |= MNT_NODEV; + if (attr_flags & MOUNT_ATTR_NOEXEC) + mnt_flags |= MNT_NOEXEC; + if (attr_flags & MOUNT_ATTR_NODIRATIME) + mnt_flags |= MNT_NODIRATIME; + + switch (attr_flags & MOUNT_ATTR__ATIME) { + case MOUNT_ATTR_STRICTATIME: + break; + case MOUNT_ATTR_NOATIME: + mnt_flags |= MNT_NOATIME; + break; + case MOUNT_ATTR_RELATIME: + mnt_flags |= MNT_RELATIME; + break; + default: + return -EINVAL; + } + + f = fdget(fs_fd); + if (!f.file) + return -EBADF; + + ret = -EINVAL; + if (f.file->f_op != &fscontext_fops) + goto err_fsfd; + + fc = f.file->private_data; + + /* There must be a valid superblock or we can't mount it */ + ret = -EINVAL; + if (!fc->root) + goto err_fsfd; + + ret = -EPERM; + if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) { + pr_warn("VFS: Mount too revealing\n"); + goto err_fsfd; + } + + ret = mutex_lock_interruptible(&fc->uapi_mutex); + if (ret < 0) + goto err_fsfd; + + ret = -EBUSY; + if (fc->phase != FS_CONTEXT_AWAITING_MOUNT) + goto err_unlock; + + ret = -EPERM; + if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock()) + goto err_unlock; + + newmount.mnt = vfs_create_mount(fc, mnt_flags); + if (IS_ERR(newmount.mnt)) { + ret = PTR_ERR(newmount.mnt); + goto err_unlock; + } + newmount.dentry = dget(fc->root); + + /* We've done the mount bit - now move the file context into more or + * less the same state as if we'd done an fspick(). We don't want to + * do any memory allocation or anything like that at this point as we + * don't want to have to handle any errors incurred. + */ + vfs_clean_context(fc); + + /* Attach to an apparent O_PATH fd with a note that we need to unmount + * it, not just simply put it. + */ + file = dentry_open(&newmount, O_PATH, fc->cred); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err_path; + } + file->f_mode |= FMODE_NEED_UNMOUNT; + + ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0); + if (ret >= 0) + fd_install(ret, file); + else + fput(file); + +err_path: + path_put(&newmount); +err_unlock: + mutex_unlock(&fc->uapi_mutex); +err_fsfd: + fdput(f); + return ret; +} + +/* + * Move a mount from one place to another. In combination with + * fsopen()/fsmount() this is used to install a new mount and in combination + * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy + * a mount subtree. + * + * Note the flags value is a combination of MOVE_MOUNT_* flags. + */ +SYSCALL_DEFINE5(move_mount, + int, from_dfd, const char *, from_pathname, + int, to_dfd, const char *, to_pathname, + unsigned int, flags) +{ + struct path from_path, to_path; + unsigned int lflags; + int ret = 0; + + if (!may_mount()) + return -EPERM; + + if (flags & ~MOVE_MOUNT__MASK) + return -EINVAL; + + /* If someone gives a pathname, they aren't permitted to move + * from an fd that requires unmount as we can't get at the flag + * to clear it afterwards. + */ + lflags = 0; + if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; + if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY; + + ret = user_path_at(from_dfd, from_pathname, lflags, &from_path); + if (ret < 0) + return ret; + + lflags = 0; + if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; + if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY; + + ret = user_path_at(to_dfd, to_pathname, lflags, &to_path); + if (ret < 0) + goto out_from; + + ret = security_move_mount(&from_path, &to_path); + if (ret < 0) + goto out_to; + + ret = do_move_mount(&from_path, &to_path); + +out_to: + path_put(&to_path); +out_from: + path_put(&from_path); + return ret; +} + /* * Return true if path is reachable from root * @@ -3189,7 +3717,7 @@ static void __init init_mount_tree(void) type = get_fs_type("rootfs"); if (!type) panic("Can't find rootfs type"); - mnt = vfs_kern_mount(type, 0, "rootfs", NULL); + mnt = vfs_kern_mount(type, 0, "rootfs", NULL, 0); put_filesystem(type); if (IS_ERR(mnt)) panic("Can't create rootfs"); @@ -3251,21 +3779,6 @@ void put_mnt_ns(struct mnt_namespace *ns) free_mnt_ns(ns); } -struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) -{ - struct vfsmount *mnt; - mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data); - if (!IS_ERR(mnt)) { - /* - * it is a longterm mount, don't release mnt until - * we unmount before file sys is unregistered - */ - real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL; - } - return mnt; -} -EXPORT_SYMBOL_GPL(kern_mount_data); - void kern_unmount(struct vfsmount *mnt) { /* release long term mount so mount point can be released */ @@ -3306,7 +3819,8 @@ bool current_chrooted(void) return chrooted; } -static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, +static bool mnt_already_visible(struct mnt_namespace *ns, + const struct super_block *sb, int *new_mnt_flags) { int new_flags = *new_mnt_flags; @@ -3318,7 +3832,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, struct mount *child; int mnt_flags; - if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type) + if (mnt->mnt.mnt_sb->s_type != sb->s_type) continue; /* This mount is not fully visible if it's root directory @@ -3369,7 +3883,7 @@ found: return visible; } -static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) +static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags) { const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV; struct mnt_namespace *ns = current->nsproxy->mnt_ns; @@ -3379,7 +3893,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) return false; /* Can this filesystem be too revealing? */ - s_iflags = mnt->mnt_sb->s_iflags; + s_iflags = sb->s_iflags; if (!(s_iflags & SB_I_USERNS_VISIBLE)) return false; @@ -3389,7 +3903,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) return true; } - return !mnt_already_visible(ns, mnt, new_mnt_flags); + return !mnt_already_visible(ns, sb, new_mnt_flags); } bool mnt_may_suid(struct vfsmount *mnt) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8357ff69962f..db0f3ca3a35c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -405,7 +405,7 @@ int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_ int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, struct nfs_mount_info *, struct nfs_subversion *); -struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); +struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *, size_t); struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, const char *, struct nfs_mount_info *); void nfs_kill_super(struct super_block *); @@ -466,7 +466,7 @@ int nfs_show_options(struct seq_file *, struct dentry *); int nfs_show_devname(struct seq_file *, struct dentry *); int nfs_show_path(struct seq_file *, struct dentry *); int nfs_show_stats(struct seq_file *, struct dentry *); -int nfs_remount(struct super_block *sb, int *flags, char *raw_data); +int nfs_remount(struct super_block *sb, int *flags, char *raw_data, size_t data_size); /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e5686be67be8..df9e87331558 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -216,7 +216,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, const char *devname, struct nfs_clone_mount *mountdata) { - return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata); + return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, + mountdata, 0); } /** diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 24f06dcc2b08..191cb4202056 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -278,7 +278,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, mountdata->hostname, mountdata->mnt_path); - mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata); + mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, + mountdata, 0); if (!IS_ERR(mnt)) break; } diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 6fb7cb6b3f4b..e72e5dbdfcd0 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -18,11 +18,11 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc); static void nfs4_evict_inode(struct inode *inode); static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); + int flags, const char *dev_name, void *raw_data, size_t data_size); static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); + int flags, const char *dev_name, void *raw_data, size_t data_size); static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); + int flags, const char *dev_name, void *raw_data, size_t data_size); static struct file_system_type nfs4_remote_fs_type = { .owner = THIS_MODULE, @@ -105,7 +105,7 @@ static void nfs4_evict_inode(struct inode *inode) */ static struct dentry * nfs4_remote_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *info) + const char *dev_name, void *info, size_t data_size) { struct nfs_mount_info *mount_info = info; struct nfs_server *server; @@ -127,7 +127,7 @@ out: } static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, - int flags, void *data, const char *hostname) + int flags, void *data, size_t data_size, const char *hostname) { struct vfsmount *root_mnt; char *root_devname; @@ -142,7 +142,8 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, snprintf(root_devname, len, "[%s]:/", hostname); else snprintf(root_devname, len, "%s:/", hostname); - root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); + root_mnt = vfs_kern_mount(fs_type, flags, root_devname, + data, data_size); kfree(root_devname); return root_mnt; } @@ -247,8 +248,8 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, - data->nfs_server.hostname); + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, 0, + data->nfs_server.hostname); data->nfs_server.export_path = export_path; res = nfs_follow_remote_path(root_mnt, export_path); @@ -261,7 +262,8 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, static struct dentry * nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) + const char *dev_name, + void *raw_data, size_t data_size) { struct nfs_mount_info mount_info = { .fill_super = nfs_fill_super, @@ -294,7 +296,8 @@ out: * Create an NFS4 server record on referral traversal */ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) + int flags, const char *dev_name, + void *raw_data, size_t data_size) { struct nfs_clone_mount *data = raw_data; char *export_path; @@ -306,8 +309,8 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, export_path = data->mnt_path; data->mnt_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, - flags, data, data->hostname); + root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, flags, + data, 0, data->hostname); data->mnt_path = export_path; res = nfs_follow_remote_path(root_mnt, export_path); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ac4b2f005778..21f12b79348a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -287,7 +287,8 @@ static match_table_t nfs_vers_tokens = { }; static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); + int flags, const char *dev_name, + void *raw_data, size_t data_size); struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, @@ -1203,7 +1204,7 @@ static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, * skipped as they are encountered. If there were no errors, return 1; * otherwise return 0 (zero). */ -static int nfs_parse_mount_options(char *raw, +static int nfs_parse_mount_options(char *raw, size_t raw_size, struct nfs_parsed_mount_data *mnt) { char *p, *string, *secdata; @@ -1221,7 +1222,7 @@ static int nfs_parse_mount_options(char *raw, if (!secdata) goto out_nomem; - rc = security_sb_copy_data(raw, secdata); + rc = security_sb_copy_data(raw, raw_size, secdata); if (rc) goto out_security_failure; @@ -2151,7 +2152,7 @@ static int nfs_validate_mount_data(struct file_system_type *fs_type, } #endif -static int nfs_validate_text_mount_data(void *options, +static int nfs_validate_text_mount_data(void *options, size_t data_size, struct nfs_parsed_mount_data *args, const char *dev_name) { @@ -2160,7 +2161,7 @@ static int nfs_validate_text_mount_data(void *options, int max_pathlen = NFS_MAXPATHLEN; struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - if (nfs_parse_mount_options((char *)options, args) == 0) + if (nfs_parse_mount_options((char *)options, data_size, args) == 0) return -EINVAL; if (!nfs_verify_server_address(sap)) @@ -2243,7 +2244,7 @@ nfs_compare_remount_data(struct nfs_server *nfss, } int -nfs_remount(struct super_block *sb, int *flags, char *raw_data) +nfs_remount(struct super_block *sb, int *flags, char *raw_data, size_t data_size) { int error; struct nfs_server *nfss = sb->s_fs_info; @@ -2290,7 +2291,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) /* overwrite those values with any that were specified */ error = -EINVAL; - if (!nfs_parse_mount_options((char *)options, data)) + if (!nfs_parse_mount_options((char *)options, data_size, data)) goto out; /* @@ -2662,7 +2663,7 @@ error_splat_super: EXPORT_SYMBOL_GPL(nfs_fs_mount_common); struct dentry *nfs_fs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) + int flags, const char *dev_name, void *raw_data, size_t data_size) { struct nfs_mount_info mount_info = { .fill_super = nfs_fill_super, @@ -2680,7 +2681,8 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, /* Validate the mount data */ error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name); if (error == NFS_TEXT_DATA) - error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name); + error = nfs_validate_text_mount_data(raw_data, data_size, + mount_info.parsed, dev_name); if (error < 0) { mntroot = ERR_PTR(error); goto out; @@ -2724,7 +2726,7 @@ EXPORT_SYMBOL_GPL(nfs_kill_super); */ static struct dentry * nfs_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) + const char *dev_name, void *raw_data, size_t data_size) { struct nfs_clone_mount *data = raw_data; struct nfs_mount_info mount_info = { diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 38b223c1378e..9246c26ba320 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1150,7 +1150,8 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) * populating the filesystem. */ -static int nfsd_fill_super(struct super_block * sb, void * data, int silent) +static int nfsd_fill_super(struct super_block * sb, + void * data, size_t data_size, int silent) { static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, @@ -1185,10 +1186,11 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) } static struct dentry *nfsd_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { struct net *net = current->nsproxy->net_ns; - return mount_ns(fs_type, flags, data, net, net->user_ns, nfsd_fill_super); + return mount_ns(fs_type, flags, data, data_size, + net, net->user_ns, nfsd_fill_super); } static void nfsd_umount(struct super_block *sb) diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 26290aa1023f..81b66c609ca6 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -60,7 +60,8 @@ struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); -static int nilfs_remount(struct super_block *sb, int *flags, char *data); +static int nilfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size); void __nilfs_msg(struct super_block *sb, const char *level, const char *fmt, ...) @@ -1109,7 +1110,8 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) return err; } -static int nilfs_remount(struct super_block *sb, int *flags, char *data) +static int nilfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct the_nilfs *nilfs = sb->s_fs_info; unsigned long old_sb_flags; @@ -1269,7 +1271,7 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data) static struct dentry * nilfs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, size_t data_size) { struct nilfs_super_data sd; struct super_block *s; @@ -1337,7 +1339,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, * Try remount to setup mount states if the current * tree is not mounted and only snapshots use this sb. */ - err = nilfs_remount(s, &flags, data); + err = nilfs_remount(s, &flags, data, data_size); if (err) goto failed_super; } diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig index 41355ce74ac0..f5b0b3af32dd 100644 --- a/fs/notify/fanotify/Kconfig +++ b/fs/notify/fanotify/Kconfig @@ -1,7 +1,6 @@ config FANOTIFY bool "Filesystem wide access notification" select FSNOTIFY - select ANON_INODES default n ---help--- Say Y here to enable fanotify support. fanotify is a file access diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig index b981fc0c8379..0161c74e76e2 100644 --- a/fs/notify/inotify/Kconfig +++ b/fs/notify/inotify/Kconfig @@ -1,6 +1,5 @@ config INOTIFY_USER bool "Inotify support for userspace" - select ANON_INODES select FSNOTIFY default y ---help--- diff --git a/fs/nsfs.c b/fs/nsfs.c index 60702d677bd4..f069eb6495b0 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -263,7 +263,8 @@ static const struct super_operations nsfs_ops = { .show_path = nsfs_show_path, }; static struct dentry *nsfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { return mount_pseudo(fs_type, "nsfs:", &nsfs_ops, &ns_dentry_operations, NSFS_MAGIC); diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index bb7159f697f2..8501bbcceb5a 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -456,6 +456,7 @@ static inline int ntfs_clear_volume_flags(ntfs_volume *vol, VOLUME_FLAGS flags) * @sb: superblock of mounted ntfs filesystem * @flags: remount flags * @opt: remount options string + * @data_size: size of the options string * * Change the mount options of an already mounted ntfs filesystem. * @@ -463,7 +464,8 @@ static inline int ntfs_clear_volume_flags(ntfs_volume *vol, VOLUME_FLAGS flags) * ntfs_remount() returns successfully (i.e. returns 0). Otherwise, * @sb->s_flags are not changed. */ -static int ntfs_remount(struct super_block *sb, int *flags, char *opt) +static int ntfs_remount(struct super_block *sb, int *flags, + char *opt, size_t data_size) { ntfs_volume *vol = NTFS_SB(sb); @@ -2694,6 +2696,7 @@ static const struct super_operations ntfs_sops = { * ntfs_fill_super - mount an ntfs filesystem * @sb: super block of ntfs filesystem to mount * @opt: string containing the mount options + * @data_size: size of the mount options string * @silent: silence error output * * ntfs_fill_super() is called by the VFS to mount the device described by @sb @@ -2708,7 +2711,8 @@ static const struct super_operations ntfs_sops = { * * NOTE: @sb->s_flags contains the mount options flags. */ -static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) +static int ntfs_fill_super(struct super_block *sb, void *opt, size_t data_size, + const int silent) { ntfs_volume *vol; struct buffer_head *bh; @@ -3060,9 +3064,10 @@ struct kmem_cache *ntfs_index_ctx_cache; DEFINE_MUTEX(ntfs_lock); static struct dentry *ntfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + ntfs_fill_super); } static struct file_system_type ntfs_fs_type = { diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 602c71f32740..642e471a6472 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -568,6 +568,7 @@ bail: static int dlmfs_fill_super(struct super_block * sb, void * data, + size_t data_size, int silent) { sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -617,9 +618,9 @@ static const struct inode_operations dlmfs_file_inode_operations = { }; static struct dentry *dlmfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_nodev(fs_type, flags, data, dlmfs_fill_super); + return mount_nodev(fs_type, flags, data, data_size, dlmfs_fill_super); } static struct file_system_type dlmfs_fs_type = { diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 3415e0b09398..62237837a098 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -107,7 +107,8 @@ static int ocfs2_check_set_options(struct super_block *sb, static int ocfs2_show_options(struct seq_file *s, struct dentry *root); static void ocfs2_put_super(struct super_block *sb); static int ocfs2_mount_volume(struct super_block *sb); -static int ocfs2_remount(struct super_block *sb, int *flags, char *data); +static int ocfs2_remount(struct super_block *sb, int *flags, + char *data, size_t data_size); static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err); static int ocfs2_initialize_mem_caches(void); static void ocfs2_free_mem_caches(void); @@ -633,7 +634,8 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits, return (((unsigned long long)bytes) << bitshift) - trim; } -static int ocfs2_remount(struct super_block *sb, int *flags, char *data) +static int ocfs2_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { int incompat_features; int ret = 0; @@ -999,7 +1001,8 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) } } -static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) +static int ocfs2_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct dentry *root; int status, sector_size; @@ -1236,9 +1239,10 @@ read_super_error: static struct dentry *ocfs2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + ocfs2_fill_super); } static struct file_system_type ocfs2_fs_type = { diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index ee14af9e26f2..e5258fefcd2b 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -454,7 +454,8 @@ static int parse_options(char *options, struct omfs_sb_info *sbi) return 1; } -static int omfs_fill_super(struct super_block *sb, void *data, int silent) +static int omfs_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct buffer_head *bh, *bh2; struct omfs_super_block *omfs_sb; @@ -596,9 +597,11 @@ end: } static struct dentry *omfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + omfs_fill_super); } static struct file_system_type omfs_fs_type = { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 1b2d0d2fe2ee..6b8e009cca17 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -366,7 +366,8 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) return inode; } -static int openprom_remount(struct super_block *sb, int *flags, char *data) +static int openprom_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_NOATIME; @@ -380,7 +381,8 @@ static const struct super_operations openprom_sops = { .remount_fs = openprom_remount, }; -static int openprom_fill_super(struct super_block *s, void *data, int silent) +static int openprom_fill_super(struct super_block *s, + void *data, size_t data_size, int silent) { struct inode *root_inode; struct op_inode_info *oi; @@ -415,9 +417,10 @@ out_no_root: } static struct dentry *openprom_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_single(fs_type, flags, data, openprom_fill_super); + return mount_single(fs_type, flags, data, data_size, + openprom_fill_super); } static struct file_system_type openprom_fs_type = { diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 17b24ad6b264..ed38b9a5e43a 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -318,7 +318,7 @@ extern uint64_t orangefs_features; struct dentry *orangefs_mount(struct file_system_type *fst, int flags, const char *devname, - void *data); + void *data, size_t data_size); void orangefs_kill_sb(struct super_block *sb); int orangefs_remount(struct orangefs_sb_info_s *); diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index dfaee90d30bd..784daf6667d1 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -207,7 +207,8 @@ out_op_release: * Remount as initiated by VFS layer. We just need to reparse the mount * options, no need to signal pvfs2-client-core about it. */ -static int orangefs_remount_fs(struct super_block *sb, int *flags, char *data) +static int orangefs_remount_fs(struct super_block *sb, int *flags, + char *data, size_t data_size) { gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_remount_fs: called\n"); return parse_mount_options(sb, data, 1); @@ -457,7 +458,7 @@ static int orangefs_fill_sb(struct super_block *sb, struct dentry *orangefs_mount(struct file_system_type *fst, int flags, const char *devname, - void *data) + void *data, size_t data_size) { int ret = -EINVAL; struct super_block *sb = ERR_PTR(-EINVAL); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 0116735cc321..daeb8c4e6ee3 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -365,7 +365,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) return 0; } -static int ovl_remount(struct super_block *sb, int *flags, char *data) +static int ovl_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct ovl_fs *ofs = sb->s_fs_info; @@ -1418,7 +1419,8 @@ out_err: goto out; } -static int ovl_fill_super(struct super_block *sb, void *data, int silent) +static int ovl_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct path upperpath = { }; struct dentry *root_dentry; @@ -1563,9 +1565,10 @@ out: } static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) + const char *dev_name, + void *raw_data, size_t data_size) { - return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); + return mount_nodev(fs_type, flags, raw_data, data_size, ovl_fill_super); } static struct file_system_type ovl_fs_type = { diff --git a/fs/pipe.c b/fs/pipe.c index bdc5d3c0977d..d1b2ba38ebb2 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1158,7 +1158,8 @@ static const struct super_operations pipefs_ops = { * d_name - pipe: will go nicely and kill the special-casing in procfs. */ static struct dentry *pipefs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { return mount_pseudo(fs_type, "pipe:", &pipefs_ops, &pipefs_dentry_operations, PIPEFS_MAGIC); diff --git a/fs/pnode.c b/fs/pnode.c index 53d411a371ce..1100e810d855 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -10,6 +10,7 @@ #include <linux/mount.h> #include <linux/fs.h> #include <linux/nsproxy.h> +#include <uapi/linux/mount.h> #include "internal.h" #include "pnode.h" diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 5792f9e39466..4e38156e2531 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -24,7 +24,6 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/mount.h> -#include <linux/magic.h> #include <linux/uaccess.h> @@ -124,13 +123,12 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static const struct super_operations proc_sops = { +const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .evict_inode = proc_evict_inode, .statfs = simple_statfs, - .remount_fs = proc_remount, .show_options = proc_show_options, }; @@ -490,51 +488,3 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) pde_put(de); return inode; } - -int proc_fill_super(struct super_block *s, void *data, int silent) -{ - struct pid_namespace *ns = get_pid_ns(s->s_fs_info); - struct inode *root_inode; - int ret; - - if (!proc_parse_options(data, ns)) - return -EINVAL; - - /* User space would break if executables or devices appear on proc */ - s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; - s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; - s->s_blocksize = 1024; - s->s_blocksize_bits = 10; - s->s_magic = PROC_SUPER_MAGIC; - s->s_op = &proc_sops; - s->s_time_gran = 1; - - /* - * procfs isn't actually a stacking filesystem; however, there is - * too much magic going on inside it to permit stacking things on - * top of it - */ - s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; - - /* procfs dentries and inodes don't require IO to create */ - s->s_shrink.seeks = 0; - - pde_get(&proc_root); - root_inode = proc_get_inode(s, &proc_root); - if (!root_inode) { - pr_err("proc_fill_super: get root inode failed\n"); - return -ENOMEM; - } - - s->s_root = d_make_root(root_inode); - if (!s->s_root) { - pr_err("proc_fill_super: allocate dentry failed\n"); - return -ENOMEM; - } - - ret = proc_setup_self(s); - if (ret) { - return ret; - } - return proc_setup_thread_self(s); -} diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5185d7f6a51e..40f905143d39 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -205,13 +205,12 @@ struct pde_opener { struct completion *c; } __randomize_layout; extern const struct inode_operations proc_link_inode_operations; - extern const struct inode_operations proc_pid_link_inode_operations; +extern const struct super_operations proc_sops; void proc_init_kmemcache(void); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); -extern int proc_fill_super(struct super_block *, void *data, int flags); extern void proc_entry_rundown(struct proc_dir_entry *); /* @@ -269,10 +268,8 @@ static inline void proc_tty_init(void) {} * root.c */ extern struct proc_dir_entry proc_root; -extern int proc_parse_options(char *options, struct pid_namespace *pid); extern void proc_self_init(void); -extern int proc_remount(struct super_block *, int *, char *); /* * task_[no]mmu.c diff --git a/fs/proc/root.c b/fs/proc/root.c index f4b1a9d2eca6..b0627e622850 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -19,86 +19,189 @@ #include <linux/module.h> #include <linux/bitops.h> #include <linux/user_namespace.h> +#include <linux/fs_context.h> #include <linux/mount.h> #include <linux/pid_namespace.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> #include <linux/cred.h> +#include <linux/magic.h> +#include <linux/slab.h> #include "internal.h" -enum { - Opt_gid, Opt_hidepid, Opt_err, +struct proc_fs_context { + struct pid_namespace *pid_ns; + unsigned int mask; + int hidepid; + int gid; }; -static const match_table_t tokens = { - {Opt_hidepid, "hidepid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_err, NULL}, +enum proc_param { + Opt_gid, + Opt_hidepid, + nr__proc_params }; -int proc_parse_options(char *options, struct pid_namespace *pid) +static const struct fs_parameter_spec proc_param_specs[nr__proc_params] = { + [Opt_gid] = { fs_param_is_u32 }, + [Opt_hidepid] = { fs_param_is_u32 }, +}; + +static const char *const proc_param_keys[nr__proc_params] = { + [Opt_gid] = "gid", + [Opt_hidepid] = "hidepid", +}; + +static const struct fs_parameter_description proc_fs_parameters = { + .name = "proc", + .nr_params = nr__proc_params, + .keys = proc_param_keys, + .specs = proc_param_specs, + .no_source = true, +}; + +static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct proc_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, &proc_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_gid: + ctx->gid = result.uint_32; + break; + + case Opt_hidepid: + ctx->hidepid = result.uint_32; + if (ctx->hidepid < HIDEPID_OFF || + ctx->hidepid > HIDEPID_INVISIBLE) + return invalf(fc, "proc: hidepid value must be between 0 and 2.\n"); + break; + + default: + return -EINVAL; + } + + ctx->mask |= 1 << result.key; + return 0; +} + +static void proc_apply_options(struct super_block *s, + struct fs_context *fc, + struct pid_namespace *pid_ns, + struct user_namespace *user_ns) +{ + struct proc_fs_context *ctx = fc->fs_private; + + if (ctx->mask & (1 << Opt_gid)) + pid_ns->pid_gid = make_kgid(user_ns, ctx->gid); + if (ctx->mask & (1 << Opt_hidepid)) + pid_ns->hide_pid = ctx->hidepid; +} + +static int proc_fill_super(struct super_block *s, struct fs_context *fc) { - char *p; - substring_t args[MAX_OPT_ARGS]; - int option; - - if (!options) - return 1; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; - - args[0].to = args[0].from = NULL; - token = match_token(p, tokens, args); - switch (token) { - case Opt_gid: - if (match_int(&args[0], &option)) - return 0; - pid->pid_gid = make_kgid(current_user_ns(), option); - break; - case Opt_hidepid: - if (match_int(&args[0], &option)) - return 0; - if (option < HIDEPID_OFF || - option > HIDEPID_INVISIBLE) { - pr_err("proc: hidepid value must be between 0 and 2.\n"); - return 0; - } - pid->hide_pid = option; - break; - default: - pr_err("proc: unrecognized mount option \"%s\" " - "or missing value\n", p); - return 0; - } + struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info); + struct inode *root_inode; + int ret; + + proc_apply_options(s, fc, pid_ns, current_user_ns()); + + /* User space would break if executables or devices appear on proc */ + s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; + s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->s_magic = PROC_SUPER_MAGIC; + s->s_op = &proc_sops; + s->s_time_gran = 1; + + /* + * procfs isn't actually a stacking filesystem; however, there is + * too much magic going on inside it to permit stacking things on + * top of it + */ + s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + + pde_get(&proc_root); + root_inode = proc_get_inode(s, &proc_root); + if (!root_inode) { + pr_err("proc_fill_super: get root inode failed\n"); + return -ENOMEM; } - return 1; + s->s_root = d_make_root(root_inode); + if (!s->s_root) { + pr_err("proc_fill_super: allocate dentry failed\n"); + return -ENOMEM; + } + + ret = proc_setup_self(s); + if (ret) { + return ret; + } + return proc_setup_thread_self(s); } -int proc_remount(struct super_block *sb, int *flags, char *data) +static int proc_reconfigure(struct fs_context *fc) { + struct super_block *sb = fc->root->d_sb; struct pid_namespace *pid = sb->s_fs_info; sync_filesystem(sb); - return !proc_parse_options(data, pid); + + proc_apply_options(sb, fc, pid, current_user_ns()); + return 0; } -static struct dentry *proc_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int proc_get_tree(struct fs_context *fc) { - struct pid_namespace *ns; + struct proc_fs_context *ctx = fc->fs_private; + + fc->s_fs_info = ctx->pid_ns; + return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super); +} - if (flags & SB_KERNMOUNT) { - ns = data; - data = NULL; - } else { - ns = task_active_pid_ns(current); +static void proc_fs_context_free(struct fs_context *fc) +{ + struct proc_fs_context *ctx = fc->fs_private; + + if (ctx->pid_ns) + put_pid_ns(ctx->pid_ns); + kfree(ctx); +} + +static const struct fs_context_operations proc_fs_context_ops = { + .free = proc_fs_context_free, + .parse_param = proc_parse_param, + .get_tree = proc_get_tree, + .reconfigure = proc_reconfigure, +}; + +static int proc_init_fs_context(struct fs_context *fc, struct dentry *reference) +{ + struct proc_fs_context *ctx; + + switch (fc->purpose) { + case FS_CONTEXT_FOR_UMOUNT: + case FS_CONTEXT_FOR_EMERGENCY_RO: + return -EOPNOTSUPP; + default: + break; } - return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); + ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->pid_ns = get_pid_ns(task_active_pid_ns(current)); + fc->fs_private = ctx; + fc->ops = &proc_fs_context_ops; + return 0; } static void proc_kill_sb(struct super_block *sb) @@ -115,10 +218,11 @@ static void proc_kill_sb(struct super_block *sb) } static struct file_system_type proc_fs_type = { - .name = "proc", - .mount = proc_mount, - .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "proc", + .init_fs_context = proc_init_fs_context, + .parameters = &proc_fs_parameters, + .kill_sb = proc_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; void __init proc_root_init(void) @@ -156,7 +260,7 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr { if (!proc_pid_lookup(dir, dentry, flags)) return NULL; - + return proc_lookup(dir, dentry, flags); } @@ -209,9 +313,36 @@ struct proc_dir_entry proc_root = { int pid_ns_prepare_proc(struct pid_namespace *ns) { + struct proc_fs_context *ctx; + struct fs_context *fc; struct vfsmount *mnt; + int ret; + + fc = vfs_new_fs_context(&proc_fs_type, NULL, 0, 0, + FS_CONTEXT_FOR_KERNEL_MOUNT); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + if (fc->user_ns != ns->user_ns) { + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(ns->user_ns); + } + + ctx = fc->fs_private; + if (ctx->pid_ns != ns) { + put_pid_ns(ctx->pid_ns); + get_pid_ns(ns); + ctx->pid_ns = ns; + } + + ret = vfs_get_tree(fc); + if (ret < 0) { + put_fs_context(fc); + return ret; + } - mnt = kern_mount_data(&proc_fs_type, ns); + mnt = vfs_create_mount(fc, 0); + put_fs_context(fc); if (IS_ERR(mnt)) return PTR_ERR(mnt); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 8cf2218b46a7..3306232c399a 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -271,7 +271,8 @@ static int pstore_show_options(struct seq_file *m, struct dentry *root) return 0; } -static int pstore_remount(struct super_block *sb, int *flags, char *data) +static int pstore_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); parse_options(data); @@ -432,7 +433,8 @@ void pstore_get_records(int quiet) inode_unlock(d_inode(root)); } -static int pstore_fill_super(struct super_block *sb, void *data, int silent) +static int pstore_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct inode *inode; @@ -464,9 +466,9 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent) } static struct dentry *pstore_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_single(fs_type, flags, data, pstore_fill_super); + return mount_single(fs_type, flags, data, data_size, pstore_fill_super); } static void pstore_kill_sb(struct super_block *sb) diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 3d46fe302fcb..be35529c8052 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -29,7 +29,8 @@ static const struct super_operations qnx4_sops; static struct inode *qnx4_alloc_inode(struct super_block *sb); static void qnx4_destroy_inode(struct inode *inode); -static int qnx4_remount(struct super_block *sb, int *flags, char *data); +static int qnx4_remount(struct super_block *sb, int *flags, + char *data, size_t data_size); static int qnx4_statfs(struct dentry *, struct kstatfs *); static const struct super_operations qnx4_sops = @@ -40,7 +41,8 @@ static const struct super_operations qnx4_sops = .remount_fs = qnx4_remount, }; -static int qnx4_remount(struct super_block *sb, int *flags, char *data) +static int qnx4_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct qnx4_sb_info *qs; @@ -183,7 +185,8 @@ static const char *qnx4_checkroot(struct super_block *sb, return "bitmap file not found."; } -static int qnx4_fill_super(struct super_block *s, void *data, int silent) +static int qnx4_fill_super(struct super_block *s, void *data, size_t data_size, + int silent) { struct buffer_head *bh; struct inode *root; @@ -383,9 +386,10 @@ static void destroy_inodecache(void) } static struct dentry *qnx4_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + qnx4_fill_super); } static struct file_system_type qnx4_fs_type = { diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 4aeb26bcb4d0..a415c1b5f936 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -30,7 +30,8 @@ static const struct super_operations qnx6_sops; static void qnx6_put_super(struct super_block *sb); static struct inode *qnx6_alloc_inode(struct super_block *sb); static void qnx6_destroy_inode(struct inode *inode); -static int qnx6_remount(struct super_block *sb, int *flags, char *data); +static int qnx6_remount(struct super_block *sb, int *flags, + char *data, size_t data_size); static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf); static int qnx6_show_options(struct seq_file *seq, struct dentry *root); @@ -53,7 +54,8 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static int qnx6_remount(struct super_block *sb, int *flags, char *data) +static int qnx6_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_RDONLY; @@ -294,7 +296,8 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s, static struct inode *qnx6_private_inode(struct super_block *s, struct qnx6_root_node *p); -static int qnx6_fill_super(struct super_block *s, void *data, int silent) +static int qnx6_fill_super(struct super_block *s, void *data, size_t data_size, + int silent) { struct buffer_head *bh1 = NULL, *bh2 = NULL; struct qnx6_super_block *sb1 = NULL, *sb2 = NULL; @@ -643,9 +646,10 @@ static void destroy_inodecache(void) } static struct dentry *qnx6_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, qnx6_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + qnx6_fill_super); } static struct file_system_type qnx6_fs_type = { diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 11201b2d06b9..2e9b23b4a98b 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -217,7 +217,7 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts) return 0; } -int ramfs_fill_super(struct super_block *sb, void *data, int silent) +int ramfs_fill_super(struct super_block *sb, void *data, size_t data_size, int silent) { struct ramfs_fs_info *fsi; struct inode *inode; @@ -248,9 +248,9 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent) } struct dentry *ramfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_nodev(fs_type, flags, data, ramfs_fill_super); + return mount_nodev(fs_type, flags, data, data_size, ramfs_fill_super); } static void ramfs_kill_sb(struct super_block *sb) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1fc934d24459..d8631cb38485 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -61,7 +61,8 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) is_reiserfs_jr(rs)); } -static int reiserfs_remount(struct super_block *s, int *flags, char *data); +static int reiserfs_remount(struct super_block *s, int *flags, + char *data, size_t data_size); static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); static int reiserfs_sync_fs(struct super_block *s, int wait) @@ -1433,7 +1434,8 @@ static void handle_quota_files(struct super_block *s, char **qf_names, } #endif -static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) +static int reiserfs_remount(struct super_block *s, int *mount_flags, + char *arg, size_t data_size) { struct reiserfs_super_block *rs; struct reiserfs_transaction_handle th; @@ -1898,7 +1900,8 @@ static int function2code(hashf_t func) if (!(silent)) \ reiserfs_warning(s, id, __VA_ARGS__) -static int reiserfs_fill_super(struct super_block *s, void *data, int silent) +static int reiserfs_fill_super(struct super_block *s, void *data, size_t data_size, + int silent) { struct inode *root_inode; struct reiserfs_transaction_handle th; @@ -2600,9 +2603,10 @@ out: static struct dentry *get_super_block(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + reiserfs_fill_super); } static int __init init_reiserfs_fs(void) diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 6ccb51993a76..a6a53403a035 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -430,7 +430,8 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) /* * remounting must involve read-only */ -static int romfs_remount(struct super_block *sb, int *flags, char *data) +static int romfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_RDONLY; @@ -464,7 +465,8 @@ static __u32 romfs_checksum(const void *data, int size) /* * fill in the superblock */ -static int romfs_fill_super(struct super_block *sb, void *data, int silent) +static int romfs_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct romfs_super_block *rsb; struct inode *root; @@ -557,16 +559,17 @@ error_rsb: */ static struct dentry *romfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { struct dentry *ret = ERR_PTR(-EINVAL); #ifdef CONFIG_ROMFS_ON_MTD - ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super); + ret = mount_mtd(fs_type, flags, dev_name, data, data_size, + romfs_fill_super); #endif #ifdef CONFIG_ROMFS_ON_BLOCK if (ret == ERR_PTR(-EINVAL)) - ret = mount_bdev(fs_type, flags, dev_name, data, + ret = mount_bdev(fs_type, flags, dev_name, data, data_size, romfs_fill_super); #endif return ret; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 40e657386fa5..6e27e56bd4f2 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -76,7 +76,8 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(short } -static int squashfs_fill_super(struct super_block *sb, void *data, int silent) +static int squashfs_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { struct squashfs_sb_info *msblk; struct squashfs_super_block *sblk = NULL; @@ -371,7 +372,8 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) } -static int squashfs_remount(struct super_block *sb, int *flags, char *data) +static int squashfs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { sync_filesystem(sb); *flags |= SB_RDONLY; @@ -399,9 +401,11 @@ static void squashfs_put_super(struct super_block *sb) static struct dentry *squashfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + squashfs_fill_super); } diff --git a/fs/statfs.c b/fs/statfs.c index f0216629621d..0e348b6c8241 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -9,6 +9,8 @@ #include <linux/security.h> #include <linux/uaccess.h> #include <linux/compat.h> +#include <linux/fsinfo.h> +#include <linux/fs_parser.h> #include "internal.h" static int flags_by_mnt(int mnt_flags) @@ -394,3 +396,588 @@ COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) return 0; } #endif + +/* + * Get basic filesystem stats from statfs. + */ +static int fsinfo_generic_statfs(struct dentry *dentry, + struct fsinfo_statfs *p) +{ + struct kstatfs buf; + int ret; + + ret = statfs_by_dentry(dentry, &buf); + if (ret < 0) + return ret; + + p->f_blocks = buf.f_blocks; + p->f_bfree = buf.f_bfree; + p->f_bavail = buf.f_bavail; + p->f_files = buf.f_files; + p->f_ffree = buf.f_ffree; + p->f_favail = buf.f_ffree; + p->f_bsize = buf.f_bsize; + p->f_frsize = buf.f_frsize; + return sizeof(*p); +} + +static int fsinfo_generic_ids(struct dentry *dentry, + struct fsinfo_ids *p) +{ + struct super_block *sb; + struct kstatfs buf; + int ret; + + ret = statfs_by_dentry(dentry, &buf); + if (ret < 0) + return ret; + + sb = dentry->d_sb; + p->f_fstype = sb->s_magic; + p->f_dev_major = MAJOR(sb->s_dev); + p->f_dev_minor = MINOR(sb->s_dev); + p->f_flags = ST_VALID | flags_by_sb(sb->s_flags); + + memcpy(&p->f_fsid, &buf.f_fsid, sizeof(p->f_fsid)); + strlcpy(p->f_fs_name, dentry->d_sb->s_type->name, sizeof(p->f_fs_name)); + return sizeof(*p); +} + +static int fsinfo_generic_limits(struct dentry *dentry, + struct fsinfo_limits *lim) +{ + struct super_block *sb = dentry->d_sb; + + lim->max_file_size = sb->s_maxbytes; + lim->max_hard_links = sb->s_max_links; + lim->max_uid = UINT_MAX; + lim->max_gid = UINT_MAX; + lim->max_projid = UINT_MAX; + lim->max_filename_len = NAME_MAX; + lim->max_symlink_len = PAGE_SIZE; + lim->max_xattr_name_len = XATTR_NAME_MAX; + lim->max_xattr_body_len = XATTR_SIZE_MAX; + lim->max_dev_major = 0xffffff; + lim->max_dev_minor = 0xff; + return sizeof(*lim); +} + +static int fsinfo_generic_supports(struct dentry *dentry, + struct fsinfo_supports *c) +{ + struct super_block *sb = dentry->d_sb; + + c->stx_mask = STATX_BASIC_STATS; + if (sb->s_d_op && sb->s_d_op->d_automount) + c->stx_attributes |= STATX_ATTR_AUTOMOUNT; + return sizeof(*c); +} + +static int fsinfo_generic_capabilities(struct dentry *dentry, + struct fsinfo_capabilities *c) +{ + struct super_block *sb = dentry->d_sb; + + if (sb->s_mtd) + fsinfo_set_cap(c, FSINFO_CAP_IS_FLASH_FS); + else if (sb->s_bdev) + fsinfo_set_cap(c, FSINFO_CAP_IS_BLOCK_FS); + + if (sb->s_quota_types & QTYPE_MASK_USR) + fsinfo_set_cap(c, FSINFO_CAP_USER_QUOTAS); + if (sb->s_quota_types & QTYPE_MASK_GRP) + fsinfo_set_cap(c, FSINFO_CAP_GROUP_QUOTAS); + if (sb->s_quota_types & QTYPE_MASK_PRJ) + fsinfo_set_cap(c, FSINFO_CAP_PROJECT_QUOTAS); + if (sb->s_d_op && sb->s_d_op->d_automount) + fsinfo_set_cap(c, FSINFO_CAP_AUTOMOUNTS); + if (sb->s_id[0]) + fsinfo_set_cap(c, FSINFO_CAP_VOLUME_ID); + + fsinfo_set_cap(c, FSINFO_CAP_HAS_ATIME); + fsinfo_set_cap(c, FSINFO_CAP_HAS_CTIME); + fsinfo_set_cap(c, FSINFO_CAP_HAS_MTIME); + return sizeof(*c); +} + +static int fsinfo_generic_timestamp_info(struct dentry *dentry, + struct fsinfo_timestamp_info *ts) +{ + struct super_block *sb = dentry->d_sb; + + /* If unset, assume 1s granularity */ + u16 mantissa = 1; + s8 exponent = 0; + + ts->minimum_timestamp = S64_MIN; + ts->maximum_timestamp = S64_MAX; + if (sb->s_time_gran < 1000000000) { + if (sb->s_time_gran < 1000) + exponent = -9; + else if (sb->s_time_gran < 1000000) + exponent = -6; + else + exponent = -3; + } +#define set_gran(x) \ + do { \ + ts->x##_mantissa = mantissa; \ + ts->x##_exponent = exponent; \ + } while (0) + set_gran(atime_gran); + set_gran(btime_gran); + set_gran(ctime_gran); + set_gran(mtime_gran); + return sizeof(*ts); +} + +static int fsinfo_generic_volume_uuid(struct dentry *dentry, + struct fsinfo_volume_uuid *vu) +{ + struct super_block *sb = dentry->d_sb; + + memcpy(vu, &sb->s_uuid, sizeof(*vu)); + return sizeof(*vu); +} + +static int fsinfo_generic_volume_id(struct dentry *dentry, char *buf) +{ + struct super_block *sb = dentry->d_sb; + size_t len = strlen(sb->s_id); + + memcpy(buf, sb->s_id, len + 1); + return len; +} + +static int fsinfo_generic_name_encoding(struct dentry *dentry, char *buf) +{ + static const char encoding[] = "utf8"; + + memcpy(buf, encoding, sizeof(encoding) - 1); + return sizeof(encoding) - 1; +} + +static int fsinfo_generic_io_size(struct dentry *dentry, + struct fsinfo_io_size *c) +{ + struct super_block *sb = dentry->d_sb; + struct kstatfs buf; + int ret; + + if (sb->s_op->statfs == simple_statfs) { + c->dio_size_gran = 1; + c->dio_mem_align = 1; + } else { + ret = statfs_by_dentry(dentry, &buf); + if (ret < 0) + return ret; + c->dio_size_gran = buf.f_bsize; + c->dio_mem_align = buf.f_bsize; + } + return sizeof(*c); +} + +static int fsinfo_generic_param_description(struct file_system_type *f, + struct fsinfo_kparams *params) +{ + const struct fs_parameter_description *desc = f->parameters; + struct fsinfo_param_description *p = params->buffer; + + if (!desc) + return -ENODATA; + + p->nr_params = desc->nr_params; + p->nr_names = desc->nr_params + desc->nr_alt_keys; + p->nr_enum_names = desc->nr_enums; + p->source_param = desc->no_source ? UINT_MAX : desc->source_param; + return sizeof(*p); +} + +static int fsinfo_generic_param_specification(struct file_system_type *f, + struct fsinfo_kparams *params) +{ + const struct fs_parameter_description *desc = f->parameters; + struct fsinfo_param_specification *p = params->buffer; + + if (!desc || !desc->specs || params->Nth >= desc->nr_params) + return -ENODATA; + + p->type = desc->specs[params->Nth].type; + p->flags = desc->specs[params->Nth].flags; + return sizeof(*p); +} + +static int fsinfo_generic_param_name(struct file_system_type *f, + struct fsinfo_kparams *params) +{ + const struct fs_parameter_description *desc = f->parameters; + struct fsinfo_param_name *p = params->buffer; + const char *name; + unsigned int n = params->Nth; + + if (!desc || !desc->keys) + return -ENODATA; + + if (n < desc->nr_params) { + p->param_index = n; + name = desc->keys[n]; + goto out; + } + + n -= desc->nr_params; + if (n < desc->nr_alt_keys) { + p->param_index = desc->alt_keys[n].value; + name = desc->alt_keys[n].name; + goto out; + } + return -ENODATA; + +out: + strcpy(p->name, name); + return sizeof(*p); +} + +static int fsinfo_generic_param_enum(struct file_system_type *f, + struct fsinfo_kparams *params) +{ + const struct fs_parameter_description *desc = f->parameters; + struct fsinfo_param_enum *p = params->buffer; + + if (!desc || !desc->enums || params->Nth >= desc->nr_enums) + return -ENODATA; + + p->param_index = desc->enums[params->Nth].param_id; + strcpy(p->name, desc->enums[params->Nth].name); + return sizeof(*p); +} + +/* + * Implement some queries generically from stuff in the superblock. + */ +int generic_fsinfo(struct path *path, struct fsinfo_kparams *params) +{ + struct dentry *dentry = path->dentry; + struct file_system_type *f = dentry->d_sb->s_type; + +#define _gen(X, Y) FSINFO_ATTR_##X: return fsinfo_generic_##Y(dentry, params->buffer) +#define _genf(X, Y) FSINFO_ATTR_##X: return fsinfo_generic_##Y(f, params) + + switch (params->request) { + case _gen(STATFS, statfs); + case _gen(IDS, ids); + case _gen(LIMITS, limits); + case _gen(SUPPORTS, supports); + case _gen(CAPABILITIES, capabilities); + case _gen(TIMESTAMP_INFO, timestamp_info); + case _gen(VOLUME_UUID, volume_uuid); + case _gen(VOLUME_ID, volume_id); + case _gen(NAME_ENCODING, name_encoding); + case _gen(IO_SIZE, io_size); + case _genf(PARAM_DESCRIPTION, param_description); + case _genf(PARAM_SPECIFICATION, param_specification); + case _genf(PARAM_NAME, param_name); + case _genf(PARAM_ENUM, param_enum); + default: + return -EOPNOTSUPP; + } +} +EXPORT_SYMBOL(generic_fsinfo); + +/* + * Retrieve the filesystem info. We make some stuff up if the operation is not + * supported. + */ +int vfs_fsinfo(struct path *path, struct fsinfo_kparams *params) +{ + struct dentry *dentry = path->dentry; + int (*fsinfo)(struct path *, struct fsinfo_kparams *); + int ret; + + if (params->request == FSINFO_ATTR_FSINFO) { + struct fsinfo_fsinfo *info = params->buffer; + + info->max_attr = FSINFO_ATTR__NR; + info->max_cap = FSINFO_CAP__NR; + return sizeof(*info); + } + + fsinfo = dentry->d_sb->s_op->fsinfo; + if (!fsinfo) { + if (!dentry->d_sb->s_op->statfs) + return -EOPNOTSUPP; + fsinfo = generic_fsinfo; + } + + ret = security_sb_statfs(dentry); + if (ret) + return ret; + + ret = fsinfo(path, params); + if (ret < 0) + return ret; + + if (params->request == FSINFO_ATTR_IDS && + params->buffer && + path->mnt) { + struct fsinfo_ids *p = params->buffer; + + p->f_flags |= flags_by_mnt(path->mnt->mnt_flags); + } + return ret; +} + +static int vfs_fsinfo_path(int dfd, const char __user *filename, + struct fsinfo_kparams *params) +{ + struct path path; + unsigned lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT; + int ret = -EINVAL; + + if ((params->at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | + AT_EMPTY_PATH)) != 0) + return -EINVAL; + + if (params->at_flags & AT_SYMLINK_NOFOLLOW) + lookup_flags &= ~LOOKUP_FOLLOW; + if (params->at_flags & AT_NO_AUTOMOUNT) + lookup_flags &= ~LOOKUP_AUTOMOUNT; + if (params->at_flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + +retry: + ret = user_path_at(dfd, filename, lookup_flags, &path); + if (ret) + goto out; + + ret = vfs_fsinfo(&path, params); + path_put(&path); + if (retry_estale(ret, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } +out: + return ret; +} + +static int vfs_fsinfo_fscontext(struct fs_context *fc, + struct fsinfo_kparams *params) +{ + struct file_system_type *f = fc->fs_type; + int ret; + + if (fc->ops == &legacy_fs_context_ops) + return -EOPNOTSUPP; + + /* Filesystem parameter query is static information and doesn't need a + * lock to read it. + */ + switch (params->request) { + case _genf(PARAM_DESCRIPTION, param_description); + case _genf(PARAM_SPECIFICATION, param_specification); + case _genf(PARAM_NAME, param_name); + case _genf(PARAM_ENUM, param_enum); + default: + break; + } + + ret = mutex_lock_interruptible(&fc->uapi_mutex); + if (ret < 0) + return ret; + + ret = -EIO; + if (fc->root) { + struct path path = { .dentry = fc->root }; + + ret = vfs_fsinfo(&path, params); + } + + mutex_unlock(&fc->uapi_mutex); + return ret; +} + +static int vfs_fsinfo_fd(unsigned int fd, struct fsinfo_kparams *params) +{ + struct fd f = fdget_raw(fd); + int ret = -EBADF; + + if (f.file) { + if (f.file->f_op == &fscontext_fops) + ret = vfs_fsinfo_fscontext(f.file->private_data, + params); + else + ret = vfs_fsinfo(&f.file->f_path, params); + fdput(f); + } + return ret; +} + +/* + * Return buffer information by requestable attribute. + * + * STRUCT indicates a fixed-size structure with only one instance. + * STRUCT_N indicates a 1D array of STRUCT, indexed by Nth + * STRUCT_NM indicates a 2D-array of STRUCT, indexed by Nth, Mth + * STRING indicates a string with only one instance. + * STRING_N indicates a 1D array of STRING, indexed by Nth + * STRING_NM indicates a 2D-array of STRING, indexed by Nth, Mth + * + * If an entry is marked STRUCT, STRUCT_N or STRUCT_NM then if no buffer is + * supplied to sys_fsinfo(), sys_fsinfo() will handle returning the buffer size + * without calling vfs_fsinfo() and the filesystem. + * + * No struct may have more than 252 bytes (ie. 0x3f * 4) + */ +#define FSINFO_STRING(X,Y) [FSINFO_ATTR_##X] = 0x0000 +#define FSINFO_STRUCT(X,Y) [FSINFO_ATTR_##X] = sizeof(struct fsinfo_##Y) +#define FSINFO_STRING_N(X,Y) [FSINFO_ATTR_##X] = 0x4000 +#define FSINFO_STRUCT_N(X,Y) [FSINFO_ATTR_##X] = 0x4000 | sizeof(struct fsinfo_##Y) +#define FSINFO_STRUCT_NM(X,Y) [FSINFO_ATTR_##X] = 0x8000 | sizeof(struct fsinfo_##Y) +#define FSINFO_STRING_NM(X,Y) [FSINFO_ATTR_##X] = 0x8000 +static const u16 fsinfo_buffer_sizes[FSINFO_ATTR__NR] = { + FSINFO_STRUCT (STATFS, statfs), + FSINFO_STRUCT (FSINFO, fsinfo), + FSINFO_STRUCT (IDS, ids), + FSINFO_STRUCT (LIMITS, limits), + FSINFO_STRUCT (CAPABILITIES, capabilities), + FSINFO_STRUCT (SUPPORTS, supports), + FSINFO_STRUCT (TIMESTAMP_INFO, timestamp_info), + FSINFO_STRING (VOLUME_ID, volume_id), + FSINFO_STRUCT (VOLUME_UUID, volume_uuid), + FSINFO_STRING (VOLUME_NAME, volume_name), + FSINFO_STRING (CELL_NAME, cell_name), + FSINFO_STRING (DOMAIN_NAME, domain_name), + FSINFO_STRING_N (SERVER_NAME, server_name), + FSINFO_STRUCT_NM (SERVER_ADDRESS, server_address), + FSINFO_STRING_NM (PARAMETER, parameter), + FSINFO_STRING_N (SOURCE, source), + FSINFO_STRING (NAME_ENCODING, name_encoding), + FSINFO_STRING (NAME_CODEPAGE, name_codepage), + FSINFO_STRUCT (IO_SIZE, io_size), + FSINFO_STRUCT (PARAM_DESCRIPTION, param_description), + FSINFO_STRUCT_N (PARAM_SPECIFICATION, param_specification), + FSINFO_STRUCT_N (PARAM_NAME, param_name), + FSINFO_STRUCT_N (PARAM_ENUM, param_enum), +}; + +/** + * sys_fsinfo - System call to get filesystem information + * @dfd: Base directory to pathwalk from or fd referring to filesystem. + * @filename: Filesystem to query or NULL. + * @_params: Parameters to define request (or NULL for enhanced statfs). + * @_buffer: Result buffer. + * @buf_size: Size of result buffer. + * + * Get information on a filesystem. The filesystem attribute to be queried is + * indicated by @_params->request, and some of the attributes can have multiple + * values, indexed by @_params->Nth and @_params->Mth. If @_params is NULL, + * then the 0th fsinfo_attr_statfs attribute is queried. If an attribute does + * not exist, EOPNOTSUPP is returned; if the Nth,Mth value does not exist, + * ENODATA is returned. + * + * On success, the size of the attribute's value is returned. If @buf_size is + * 0 or @_buffer is NULL, only the size is returned. If the size of the value + * is larger than @buf_size, it will be truncated by the copy. If the size of + * the value is smaller than @buf_size then the excess buffer space will be + * cleared. The full size of the value will be returned, irrespective of how + * much data is actually placed in the buffer. + */ +SYSCALL_DEFINE5(fsinfo, + int, dfd, const char __user *, filename, + struct fsinfo_params __user *, _params, + void __user *, _buffer, size_t, buf_size) +{ + struct fsinfo_params user_params; + struct fsinfo_kparams params; + size_t size, n; + int ret; + + if (_params) { + if (copy_from_user(&user_params, _params, sizeof(user_params))) + return -EFAULT; + if (user_params.__reserved[0] || + user_params.__reserved[1] || + user_params.__reserved[2] || + user_params.__reserved[3] || + user_params.__reserved[4] || + user_params.__reserved[5]) + return -EINVAL; + if (user_params.request >= FSINFO_ATTR__NR) + return -EOPNOTSUPP; + params.at_flags = user_params.at_flags; + params.request = user_params.request; + params.Nth = user_params.Nth; + params.Mth = user_params.Mth; + } else { + params.at_flags = 0; + params.request = FSINFO_ATTR_STATFS; + params.Nth = 0; + params.Mth = 0; + } + + if (!_buffer || !buf_size) { + buf_size = 0; + _buffer = NULL; + } + + /* Allocate an appropriately-sized buffer. We will truncate the + * contents when we write the contents back to userspace. + */ + size = fsinfo_buffer_sizes[params.request]; + switch (size & 0xc000) { + case 0x0000: + if (params.Nth != 0) + return -ENODATA; + /* Fall through */ + case 0x4000: + if (params.Mth != 0) + return -ENODATA; + /* Fall through */ + case 0x8000: + break; + case 0xc000: + return -ENOBUFS; + } + + size &= ~0xc000; + if (size == 0) { + params.string_val = true; + params.buf_size = 4096; + } else { + params.string_val = false; + params.buf_size = size; + if (buf_size == 0) + return size; /* We know how big the buffer should be */ + } + + /* We always allocate a buffer for a string, even if buf_size == 0 and + * we're not going to return any data. This means that the filesystem + * code needn't care about whether the buffer actually exists or not. + */ + params.buffer = kzalloc(params.buf_size, GFP_KERNEL); + if (!params.buffer) + return -ENOMEM; + + if (filename) + ret = vfs_fsinfo_path(dfd, filename, ¶ms); + else + ret = vfs_fsinfo_fd(dfd, ¶ms); + if (ret < 0) + goto error; + + n = ret; + if (n > buf_size) + n = buf_size; + + if (n > 0 && copy_to_user(_buffer, params.buffer, buf_size)) + ret = -EFAULT; + + /* Clear any part of the buffer that we won't fill if we're putting a + * struct in there rather than a string. + */ + if (buf_size > n && !params.string_val && + clear_user(_buffer + n, buf_size - n) != 0) + return -EFAULT; +error: + kfree(params.buffer); + return ret; +} diff --git a/fs/super.c b/fs/super.c index ca53a08497ed..94c84776bb88 100644 --- a/fs/super.c +++ b/fs/super.c @@ -35,6 +35,8 @@ #include <linux/fsnotify.h> #include <linux/lockdep.h> #include <linux/user_namespace.h> +#include <linux/fs_context.h> +#include <uapi/linux/mount.h> #include "internal.h" static int thaw_super_locked(struct super_block *sb); @@ -186,16 +188,13 @@ static void destroy_unused_super(struct super_block *s) } /** - * alloc_super - create new superblock - * @type: filesystem type superblock should belong to - * @flags: the mount flags - * @user_ns: User namespace for the super_block + * alloc_super - Create new superblock + * @fc: The filesystem configuration context * * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. */ -static struct super_block *alloc_super(struct file_system_type *type, int flags, - struct user_namespace *user_ns) +static struct super_block *alloc_super(struct fs_context *fc) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static const struct super_operations default_op; @@ -205,9 +204,9 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, return NULL; INIT_LIST_HEAD(&s->s_mounts); - s->s_user_ns = get_user_ns(user_ns); + s->s_user_ns = get_user_ns(fc->user_ns); init_rwsem(&s->s_umount); - lockdep_set_class(&s->s_umount, &type->s_umount_key); + lockdep_set_class(&s->s_umount, &fc->fs_type->s_umount_key); /* * sget() can have s_umount recursion. * @@ -231,12 +230,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, for (i = 0; i < SB_FREEZE_LEVELS; i++) { if (__percpu_init_rwsem(&s->s_writers.rw_sem[i], sb_writers_name[i], - &type->s_writers_key[i])) + &fc->fs_type->s_writers_key[i])) goto fail; } init_waitqueue_head(&s->s_writers.wait_unfrozen); s->s_bdi = &noop_backing_dev_info; - s->s_flags = flags; + s->s_flags = fc->sb_flags; if (s->s_user_ns != &init_user_ns) s->s_iflags |= SB_I_NODEV; INIT_HLIST_NODE(&s->s_instances); @@ -250,7 +249,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_count = 1; atomic_set(&s->s_active, 1); mutex_init(&s->s_vfs_rename_mutex); - lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); + lockdep_set_class(&s->s_vfs_rename_mutex, &fc->fs_type->s_vfs_rename_key); init_rwsem(&s->s_dquot.dqio_sem); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; @@ -475,6 +474,91 @@ void generic_shutdown_super(struct super_block *sb) EXPORT_SYMBOL(generic_shutdown_super); /** + * sget_fc - Find or create a superblock + * @fc: Filesystem context. + * @test: Comparison callback + * @set: Setup callback + * + * Find or create a superblock using the parameters stored in the filesystem + * context and the two callback functions. + * + * If an extant superblock is matched, then that will be returned with an + * elevated reference count that the caller must transfer or discard. + * + * If no match is made, a new superblock will be allocated and basic + * initialisation will be performed (s_type, s_fs_info and s_id will be set and + * the set() callback will be invoked), the superblock will be published and it + * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE + * as yet unset. + */ +struct super_block *sget_fc(struct fs_context *fc, + int (*test)(struct super_block *, struct fs_context *), + int (*set)(struct super_block *, struct fs_context *)) +{ + struct super_block *s = NULL; + struct super_block *old; + int err; + + if (!(fc->sb_flags & SB_KERNMOUNT) && + fc->purpose != FS_CONTEXT_FOR_SUBMOUNT) { + /* Don't allow mounting unless the caller has CAP_SYS_ADMIN + * over the namespace. + */ + if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT) && + !capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + else if (!ns_capable(fc->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + } + +retry: + spin_lock(&sb_lock); + if (test) { + hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) { + if (test(old, fc)) + goto share_extant_sb; + } + } + if (!s) { + spin_unlock(&sb_lock); + s = alloc_super(fc); + if (!s) + return ERR_PTR(-ENOMEM); + goto retry; + } + + s->s_fs_info = fc->s_fs_info; + err = set(s, fc); + if (err) { + s->s_fs_info = NULL; + spin_unlock(&sb_lock); + destroy_unused_super(s); + return ERR_PTR(err); + } + fc->s_fs_info = NULL; + s->s_type = fc->fs_type; + strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id)); + list_add_tail(&s->s_list, &super_blocks); + hlist_add_head(&s->s_instances, &s->s_type->fs_supers); + spin_unlock(&sb_lock); + get_filesystem(s->s_type); + register_shrinker_prepared(&s->s_shrink); + return s; + +share_extant_sb: + if (fc->user_ns != old->s_user_ns) { + spin_unlock(&sb_lock); + destroy_unused_super(s); + return ERR_PTR(-EBUSY); + } + if (!grab_super(old)) + goto retry; + destroy_unused_super(s); + return old; +} +EXPORT_SYMBOL(sget_fc); + +/** * sget_userns - find or create a superblock * @type: filesystem type superblock should belong to * @test: comparison callback @@ -516,7 +600,14 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns); + { + struct fs_context fc = { + .fs_type = type, + .sb_flags = flags & ~SB_SUBMOUNT, + .user_ns = user_ns, + }; + s = alloc_super(&fc); + } if (!s) return ERR_PTR(-ENOMEM); goto retry; @@ -834,28 +925,30 @@ rescan: } /** - * do_remount_sb - asks filesystem to change mount options. - * @sb: superblock in question - * @sb_flags: revised superblock flags - * @data: the rest of options - * @force: whether or not to force the change + * reconfigure_super - asks filesystem to change superblock parameters + * @fc: The superblock and configuration * - * Alters the mount options of a mounted file system. + * Alters the configuration parameters of a live superblock. */ -int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) +int reconfigure_super(struct fs_context *fc) { + struct super_block *sb = fc->root->d_sb; int retval; - int remount_ro; + int remount_ro = false; + if (fc->sb_flags_mask & ~MS_RMT_MASK) + return -EINVAL; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; + if (fc->sb_flags_mask & SB_RDONLY) { #ifdef CONFIG_BLOCK - if (!(sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) - return -EACCES; + if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) + return -EACCES; #endif - remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); + remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); + } if (remount_ro) { if (!hlist_empty(&sb->s_pins)) { @@ -866,15 +959,16 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) return 0; if (sb->s_writers.frozen != SB_UNFROZEN) return -EBUSY; - remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb); + remount_ro = !sb_rdonly(sb); } } shrink_dcache_sb(sb); - /* If we are remounting RDONLY and current sb is read/write, - make sure there are no rw files opened */ + /* If we are reconfiguring to RDONLY and current sb is read/write, + * make sure there are no files open for writing. + */ if (remount_ro) { - if (force) { + if (fc->purpose == FS_CONTEXT_FOR_EMERGENCY_RO) { sb->s_readonly_remount = 1; smp_wmb(); } else { @@ -884,17 +978,21 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force) } } - if (sb->s_op->remount_fs) { - retval = sb->s_op->remount_fs(sb, &sb_flags, data); - if (retval) { - if (!force) + if (fc->ops->reconfigure) { + retval = fc->ops->reconfigure(fc); + if (retval == 0) { + security_sb_reconfigure(fc); + } else { + if (fc->purpose != FS_CONTEXT_FOR_EMERGENCY_RO) goto cancel_readonly; /* If forced remount, go ahead despite any errors */ WARN(1, "forced remount of a %s fs returned %i\n", sb->s_type->name, retval); } } - sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (sb_flags & MS_RMT_MASK); + + WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | + (fc->sb_flags & fc->sb_flags_mask))); /* Needs to be ordered wrt mnt_is_readonly() */ smp_wmb(); sb->s_readonly_remount = 0; @@ -918,13 +1016,29 @@ cancel_readonly: static void do_emergency_remount_callback(struct super_block *sb) { + struct fs_context fc = { + .purpose = FS_CONTEXT_FOR_EMERGENCY_RO, + .fs_type = sb->s_type, + .root = sb->s_root, + .sb_flags = SB_RDONLY, + .sb_flags_mask = SB_RDONLY, + }; + down_write(&sb->s_umount); if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && !sb_rdonly(sb)) { + int ret; + + if (fc.fs_type->init_fs_context) + ret = fc.fs_type->init_fs_context(&fc, NULL); + else + ret = legacy_init_fs_context(&fc, NULL); + /* * What lock protects sb->s_flags?? */ - do_remount_sb(sb, SB_RDONLY, NULL, 1); + if (ret == 0) + reconfigure_super(&fc); } up_write(&sb->s_umount); } @@ -1054,8 +1168,9 @@ static int ns_set_super(struct super_block *sb, void *data) } struct dentry *mount_ns(struct file_system_type *fs_type, - int flags, void *data, void *ns, struct user_namespace *user_ns, - int (*fill_super)(struct super_block *, void *, int)) + int flags, void *data, size_t data_size, + void *ns, struct user_namespace *user_ns, + int (*fill_super)(struct super_block *, void *, size_t, int)) { struct super_block *sb; @@ -1072,7 +1187,7 @@ struct dentry *mount_ns(struct file_system_type *fs_type, if (!sb->s_root) { int err; - err = fill_super(sb, data, flags & SB_SILENT ? 1 : 0); + err = fill_super(sb, data, data_size, flags & SB_SILENT ? 1 : 0); if (err) { deactivate_locked_super(sb); return ERR_PTR(err); @@ -1086,6 +1201,89 @@ struct dentry *mount_ns(struct file_system_type *fs_type, EXPORT_SYMBOL(mount_ns); +int set_anon_super_fc(struct super_block *sb, struct fs_context *fc) +{ + return set_anon_super(sb, NULL); +} +EXPORT_SYMBOL(set_anon_super_fc); + +static int test_keyed_super(struct super_block *sb, struct fs_context *fc) +{ + return sb->s_fs_info == fc->s_fs_info; +} + +static int test_single_super(struct super_block *s, struct fs_context *fc) +{ + return 1; +} + +/** + * vfs_get_super - Get a superblock with a search key set in s_fs_info. + * @fc: The filesystem context holding the parameters + * @keying: How to distinguish superblocks + * @fill_super: Helper to initialise a new superblock + * + * Search for a superblock and create a new one if not found. The search + * criterion is controlled by @keying. If the search fails, a new superblock + * is created and @fill_super() is called to initialise it. + * + * @keying can take one of a number of values: + * + * (1) vfs_get_single_super - Only one superblock of this type may exist on the + * system. This is typically used for special system filesystems. + * + * (2) vfs_get_keyed_super - Multiple superblocks may exist, but they must have + * distinct keys (where the key is in s_fs_info). Searching for the same + * key again will turn up the superblock for that key. + * + * (3) vfs_get_independent_super - Multiple superblocks may exist and are + * unkeyed. Each call will get a new superblock. + * + * A permissions check is made by sget_fc() unless we're getting a superblock + * for a kernel-internal mount or a submount. + */ +int vfs_get_super(struct fs_context *fc, + enum vfs_get_super_keying keying, + int (*fill_super)(struct super_block *sb, + struct fs_context *fc)) +{ + int (*test)(struct super_block *, struct fs_context *); + struct super_block *sb; + + switch (keying) { + case vfs_get_single_super: + test = test_single_super; + break; + case vfs_get_keyed_super: + test = test_keyed_super; + break; + case vfs_get_independent_super: + test = NULL; + break; + default: + BUG(); + } + + sb = sget_fc(fc, test, set_anon_super_fc); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + int err = fill_super(sb, fc); + if (err) { + deactivate_locked_super(sb); + return err; + } + + sb->s_flags |= SB_ACTIVE; + } + + BUG_ON(fc->root); + fc->root = dget(sb->s_root); + return 0; +} +EXPORT_SYMBOL(vfs_get_super); + #ifdef CONFIG_BLOCK static int set_bdev_super(struct super_block *s, void *data) { @@ -1102,8 +1300,8 @@ static int test_bdev_super(struct super_block *s, void *data) } struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int flags, const char *dev_name, void *data, size_t data_size, + int (*fill_super)(struct super_block *, void *, size_t, int)) { struct block_device *bdev; struct super_block *s; @@ -1155,7 +1353,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, s->s_mode = mode; snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); + error = fill_super(s, data, data_size, flags & SB_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); goto error; @@ -1192,8 +1390,8 @@ EXPORT_SYMBOL(kill_block_super); #endif struct dentry *mount_nodev(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int flags, void *data, size_t data_size, + int (*fill_super)(struct super_block *, void *, size_t, int)) { int error; struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); @@ -1201,7 +1399,7 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, if (IS_ERR(s)) return ERR_CAST(s); - error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); + error = fill_super(s, data, data_size, flags & SB_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); return ERR_PTR(error); @@ -1211,14 +1409,50 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_nodev); +static int reconfigure_single(struct super_block *s, + int flags, void *data, size_t data_size) +{ + struct fs_context *fc; + int ret; + + /* The caller really need to be passing fc down into mount_single(), + * then a chunk of this can be removed. Better yet, reconfiguration + * shouldn't happen, but rather the second mount should be rejected if + * the parameters are not compatible. + */ + fc = vfs_new_fs_context(s->s_type, s->s_root, flags, MS_RMT_MASK, + FS_CONTEXT_FOR_RECONFIGURE); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + ret = parse_monolithic_mount_data(fc, data, data_size); + if (ret < 0) + goto out; + + if (fc->ops->validate) { + ret = fc->ops->validate(fc); + if (ret < 0) + goto out; + } + + ret = security_fs_context_validate(fc); + if (ret) + goto out; + + ret = reconfigure_super(fc); +out: + put_fs_context(fc); + return ret; +} + static int compare_single(struct super_block *s, void *p) { return 1; } struct dentry *mount_single(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int flags, void *data, size_t data_size, + int (*fill_super)(struct super_block *, void *, size_t, int)) { struct super_block *s; int error; @@ -1227,79 +1461,22 @@ struct dentry *mount_single(struct file_system_type *fs_type, if (IS_ERR(s)) return ERR_CAST(s); if (!s->s_root) { - error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); - if (error) { - deactivate_locked_super(s); - return ERR_PTR(error); - } + error = fill_super(s, data, data_size, flags & SB_SILENT ? 1 : 0); + if (error) + goto error; s->s_flags |= SB_ACTIVE; } else { - do_remount_sb(s, flags, data, 0); - } - return dget(s->s_root); -} -EXPORT_SYMBOL(mount_single); - -struct dentry * -mount_fs(struct file_system_type *type, int flags, const char *name, void *data) -{ - struct dentry *root; - struct super_block *sb; - char *secdata = NULL; - int error = -ENOMEM; - - if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { - secdata = alloc_secdata(); - if (!secdata) - goto out; - - error = security_sb_copy_data(data, secdata); + error = reconfigure_single(s, flags, data, data_size); if (error) - goto out_free_secdata; - } - - root = type->mount(type, flags, name, data); - if (IS_ERR(root)) { - error = PTR_ERR(root); - goto out_free_secdata; + goto error; } - sb = root->d_sb; - BUG_ON(!sb); - WARN_ON(!sb->s_bdi); - - /* - * Write barrier is for super_cache_count(). We place it before setting - * SB_BORN as the data dependency between the two functions is the - * superblock structure contents that we just set up, not the SB_BORN - * flag. - */ - smp_wmb(); - sb->s_flags |= SB_BORN; - - error = security_sb_kern_mount(sb, flags, secdata); - if (error) - goto out_sb; - - /* - * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE - * but s_maxbytes was an unsigned long long for many releases. Throw - * this warning for a little while to try and catch filesystems that - * violate this rule. - */ - WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " - "negative value (%lld)\n", type->name, sb->s_maxbytes); + return dget(s->s_root); - up_write(&sb->s_umount); - free_secdata(secdata); - return root; -out_sb: - dput(root); - deactivate_locked_super(sb); -out_free_secdata: - free_secdata(secdata); -out: +error: + deactivate_locked_super(s); return ERR_PTR(error); } +EXPORT_SYMBOL(mount_single); /* * Setup private BDI for given superblock. It gets automatically cleaned up @@ -1580,3 +1757,92 @@ int thaw_super(struct super_block *sb) return thaw_super_locked(sb); } EXPORT_SYMBOL(thaw_super); + +/** + * vfs_get_tree - Get the mountable root + * @fc: The superblock configuration context. + * + * The filesystem is invoked to get or create a superblock which can then later + * be used for mounting. The filesystem places a pointer to the root to be + * used for mounting in @fc->root. + */ +int vfs_get_tree(struct fs_context *fc) +{ + struct super_block *sb; + int ret; + + if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) { + errorf(fc, "Filesystem requires source device"); + return -ENOENT; + } + + if (fc->root) + return -EBUSY; + + if (fc->ops->validate) { + ret = fc->ops->validate(fc); + if (ret < 0) + return ret; + } + + ret = security_fs_context_validate(fc); + if (ret < 0) + return ret; + + /* Get the mountable root in fc->root, with a ref on the root and a ref + * on the superblock. + */ + ret = fc->ops->get_tree(fc); + if (ret < 0) + return ret; + + if (!fc->root) { + pr_err("Filesystem %s get_tree() didn't set fc->root\n", + fc->fs_type->name); + /* We don't know what the locking state of the superblock is - + * if there is a superblock. + */ + BUG(); + } + + sb = fc->root->d_sb; + WARN_ON(!sb->s_bdi); + + ret = security_sb_get_tree(fc); + if (ret < 0) + goto err_sb; + + ret = -ENOMEM; + if (fc->subtype && !sb->s_subtype) { + sb->s_subtype = kstrdup(fc->subtype, GFP_KERNEL); + if (!sb->s_subtype) + goto err_sb; + } + + /* Write barrier is for super_cache_count(). We place it before setting + * SB_BORN as the data dependency between the two functions is the + * superblock structure contents that we just set up, not the SB_BORN + * flag. + */ + smp_wmb(); + sb->s_flags |= SB_BORN; + + /* Filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE + * but s_maxbytes was an unsigned long long for many releases. Throw + * this warning for a little while to try and catch filesystems that + * violate this rule. + */ + WARN(sb->s_maxbytes < 0, + "%s set sb->s_maxbytes to negative value (%lld)\n", + fc->fs_type->name, sb->s_maxbytes); + + up_write(&sb->s_umount); + return 0; + +err_sb: + dput(fc->root); + fc->root = NULL; + deactivate_locked_super(sb); + return ret; +} +EXPORT_SYMBOL(vfs_get_tree); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 92682fcc41f6..1e1c0ccc6a36 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -13,6 +13,7 @@ #include <linux/magic.h> #include <linux/mount.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/user_namespace.h> #include "sysfs.h" @@ -20,27 +21,55 @@ static struct kernfs_root *sysfs_root; struct kernfs_node *sysfs_root_kn; -static struct dentry *sysfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sysfs_get_tree(struct fs_context *fc) { - struct dentry *root; - void *ns; - bool new_sb = false; + struct kernfs_fs_context *kfc = fc->fs_private; + int ret; - if (!(flags & SB_KERNMOUNT)) { + ret = kernfs_get_tree(fc); + if (ret) + return ret; + + if (kfc->new_sb_created) + fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; + return 0; +} + +static void sysfs_fs_context_free(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc = fc->fs_private; + + if (kfc->ns_tag) + kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag); + kernfs_free_fs_context(fc); + kfree(kfc); +} + +static const struct fs_context_operations sysfs_fs_context_ops = { + .free = sysfs_fs_context_free, + .get_tree = sysfs_get_tree, +}; + +static int sysfs_init_fs_context(struct fs_context *fc, + struct dentry *reference) +{ + struct kernfs_fs_context *kfc; + + if (!(fc->sb_flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) - return ERR_PTR(-EPERM); + return -EPERM; } - ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); - root = kernfs_mount_ns(fs_type, flags, sysfs_root, - SYSFS_MAGIC, &new_sb, ns); - if (!new_sb) - kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); - else if (!IS_ERR(root)) - root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; + kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL); + if (!kfc) + return -ENOMEM; - return root; + kfc->ns_tag = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); + kfc->root = sysfs_root; + kfc->magic = SYSFS_MAGIC; + fc->fs_private = kfc; + fc->ops = &sysfs_fs_context_ops; + return 0; } static void sysfs_kill_sb(struct super_block *sb) @@ -52,10 +81,10 @@ static void sysfs_kill_sb(struct super_block *sb) } static struct file_system_type sysfs_fs_type = { - .name = "sysfs", - .mount = sysfs_mount, - .kill_sb = sysfs_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "sysfs", + .init_fs_context = sysfs_init_fs_context, + .kill_sb = sysfs_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; int __init sysfs_init(void) diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 273736f41be3..8a0e5e36e5e7 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -57,7 +57,8 @@ static int sysv_sync_fs(struct super_block *sb, int wait) return 0; } -static int sysv_remount(struct super_block *sb, int *flags, char *data) +static int sysv_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct sysv_sb_info *sbi = SYSV_SB(sb); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 89765ddfb738..275c7038eecd 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -349,7 +349,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size) return 1; } -static int sysv_fill_super(struct super_block *sb, void *data, int silent) +static int sysv_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct buffer_head *bh1, *bh = NULL; struct sysv_sb_info *sbi; @@ -470,7 +471,8 @@ static int v7_sanity_check(struct super_block *sb, struct buffer_head *bh) return 1; } -static int v7_fill_super(struct super_block *sb, void *data, int silent) +static int v7_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct sysv_sb_info *sbi; struct buffer_head *bh; @@ -528,15 +530,17 @@ failed: /* Every kernel module contains stuff like this. */ static struct dentry *sysv_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + sysv_fill_super); } static struct dentry *v7_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + v7_fill_super); } static struct file_system_type sysv_fs_type = { diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 7098c49f3693..aa423e446765 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -225,7 +225,8 @@ static int tracefs_apply_options(struct super_block *sb) return 0; } -static int tracefs_remount(struct super_block *sb, int *flags, char *data) +static int tracefs_remount(struct super_block *sb, int *flags, + char *data, size_t data_size) { int err; struct tracefs_fs_info *fsi = sb->s_fs_info; @@ -264,7 +265,8 @@ static const struct super_operations tracefs_super_operations = { .show_options = tracefs_show_options, }; -static int trace_fill_super(struct super_block *sb, void *data, int silent) +static int trace_fill_super(struct super_block *sb, + void *data, size_t data_size, int silent) { static const struct tree_descr trace_files[] = {{""}}; struct tracefs_fs_info *fsi; @@ -299,9 +301,9 @@ fail: static struct dentry *trace_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_single(fs_type, flags, data, trace_fill_super); + return mount_single(fs_type, flags, data, data_size, trace_fill_super); } static struct file_system_type trace_fs_type = { diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1fac1133dadd..403351619076 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1927,7 +1927,8 @@ static void ubifs_put_super(struct super_block *sb) mutex_unlock(&c->umount_mutex); } -static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) +static int ubifs_remount_fs(struct super_block *sb, int *flags, + char *data, size_t data_size) { int err; struct ubifs_info *c = sb->s_fs_info; @@ -2198,7 +2199,7 @@ static int sb_set(struct super_block *sb, void *data) } static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, - const char *name, void *data) + const char *name, void *data, size_t data_size) { struct ubi_volume_desc *ubi; struct ubifs_info *c; diff --git a/fs/udf/super.c b/fs/udf/super.c index e3d684ea3203..d51a876ac384 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -87,10 +87,10 @@ enum { enum { UDF_MAX_LINKS = 0xffff }; /* These are the "meat" - everything else is stuffing */ -static int udf_fill_super(struct super_block *, void *, int); +static int udf_fill_super(struct super_block *, void *, size_t, int); static void udf_put_super(struct super_block *); static int udf_sync_fs(struct super_block *, int); -static int udf_remount_fs(struct super_block *, int *, char *); +static int udf_remount_fs(struct super_block *, int *, char *, size_t); static void udf_load_logicalvolint(struct super_block *, struct kernel_extent_ad); static int udf_find_fileset(struct super_block *, struct kernel_lb_addr *, struct kernel_lb_addr *); @@ -126,9 +126,11 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb) /* UDF filesystem type */ static struct dentry *udf_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + udf_fill_super); } static struct file_system_type udf_fstype = { @@ -604,7 +606,8 @@ static int udf_parse_options(char *options, struct udf_options *uopt, return 1; } -static int udf_remount_fs(struct super_block *sb, int *flags, char *options) +static int udf_remount_fs(struct super_block *sb, int *flags, + char *options, size_t data_size) { struct udf_options uopt; struct udf_sb_info *sbi = UDF_SB(sb); @@ -2054,7 +2057,8 @@ u64 lvid_get_unique_id(struct super_block *sb) return ret; } -static int udf_fill_super(struct super_block *sb, void *options, int silent) +static int udf_fill_super(struct super_block *sb, + void *options, size_t data_size, int silent) { int ret = -EINVAL; struct inode *inode = NULL; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index a4e07e910f1b..f48a5b802221 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -774,7 +774,8 @@ static u64 ufs_max_bytes(struct super_block *sb) return res << uspi->s_bshift; } -static int ufs_fill_super(struct super_block *sb, void *data, int silent) +static int ufs_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct ufs_sb_info * sbi; struct ufs_sb_private_info * uspi; @@ -1297,7 +1298,8 @@ failed_nomem: return -ENOMEM; } -static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) +static int ufs_remount (struct super_block *sb, int *mount_flags, + char *data, size_t data_size) { struct ufs_sb_private_info * uspi; struct ufs_super_block_first * usb1; @@ -1505,9 +1507,10 @@ static const struct super_operations ufs_super_ops = { }; static struct dentry *ufs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + ufs_fill_super); } static struct file_system_type ufs_fs_type = { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d3e6cd063688..696b4657d17e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1270,7 +1270,8 @@ STATIC int xfs_fs_remount( struct super_block *sb, int *flags, - char *options) + char *options, + size_t data_size) { struct xfs_mount *mp = XFS_M(sb); xfs_sb_t *sbp = &mp->m_sb; @@ -1601,6 +1602,7 @@ STATIC int xfs_fs_fill_super( struct super_block *sb, void *data, + size_t data_size, int silent) { struct inode *root; @@ -1814,9 +1816,11 @@ xfs_fs_mount( struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, + size_t data_size) { - return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, data_size, + xfs_fs_fill_super); } static long diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9d12757a65b0..bb0c7da50ed2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -865,10 +865,11 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, #endif /* !CONFIG_CGROUPS */ -static inline void get_cgroup_ns(struct cgroup_namespace *ns) +static inline struct cgroup_namespace *get_cgroup_ns(struct cgroup_namespace *ns) { if (ns) refcount_inc(&ns->count); + return ns; } static inline void put_cgroup_ns(struct cgroup_namespace *ns) diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 3b0ba54cc4d5..a02de1b397ca 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -75,11 +75,11 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest); -typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); +typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *, size_t); struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, debugfs_automount_t f, - void *data); + void *data, size_t data_size); void debugfs_remove(struct dentry *dentry); void debugfs_remove_recursive(struct dentry *dentry); @@ -204,8 +204,8 @@ static inline struct dentry *debugfs_create_symlink(const char *name, static inline struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), - void *data) + struct vfsmount *(*f)(void *, size_t), + void *data, size_t data_size) { return ERR_PTR(-ENODEV); } diff --git a/include/linux/errno.h b/include/linux/errno.h index 3cba627577d6..d73f597a2484 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -18,6 +18,7 @@ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ #define EPROBE_DEFER 517 /* Driver requests probe retry */ #define EOPENSTALE 518 /* open found a stale dentry */ +#define ENOPARAM 519 /* Parameter not supported */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ diff --git a/include/linux/fs.h b/include/linux/fs.h index fd41ec161e6e..62451269d951 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -62,6 +62,10 @@ struct workqueue_struct; struct iov_iter; struct fscrypt_info; struct fscrypt_operations; +struct fs_context; +struct fs_parameter_description; +struct fsinfo_kparams; +enum fsinfo_attribute; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -158,10 +162,13 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) /* File is capable of returning -EAGAIN if I/O will block */ -#define FMODE_NOWAIT ((__force fmode_t)0x8000000) +#define FMODE_NOWAIT ((__force fmode_t)0x8000000) + +/* File represents mount that needs unmounting */ +#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) /* File does not contribute to nr_files count */ -#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) +#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector @@ -1896,7 +1903,8 @@ struct super_operations { int (*thaw_super) (struct super_block *); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); - int (*remount_fs) (struct super_block *, int *, char *); + int (*fsinfo) (struct path *, struct fsinfo_kparams *); + int (*remount_fs) (struct super_block *, int *, char *, size_t); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); @@ -2157,8 +2165,10 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ + int (*init_fs_context)(struct fs_context *, struct dentry *); + const struct fs_parameter_description *parameters; struct dentry *(*mount) (struct file_system_type *, int, - const char *, void *); + const char *, void *, size_t); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; @@ -2177,26 +2187,27 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) extern struct dentry *mount_ns(struct file_system_type *fs_type, - int flags, void *data, void *ns, struct user_namespace *user_ns, - int (*fill_super)(struct super_block *, void *, int)); + int flags, void *data, size_t data_size, + void *ns, struct user_namespace *user_ns, + int (*fill_super)(struct super_block *, void *, size_t, int)); #ifdef CONFIG_BLOCK extern struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)); + int flags, const char *dev_name, void *data, size_t data_size, + int (*fill_super)(struct super_block *, void *, size_t, int)); #else static inline struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int flags, const char *dev_name, void *data, size_t data_size, + int (*fill_super)(struct super_block *, void *, size_t, int)) { return ERR_PTR(-ENODEV); } #endif extern struct dentry *mount_single(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)); + int flags, void *data, size_t data_size, + int (*fill_super)(struct super_block *, void *, size_t, int)); extern struct dentry *mount_nodev(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)); + int flags, void *data, size_t data_size, + int (*fill_super)(struct super_block *, void *, size_t, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void generic_shutdown_super(struct super_block *sb); #ifdef CONFIG_BLOCK @@ -2212,8 +2223,12 @@ void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); +int set_anon_super_fc(struct super_block *s, struct fs_context *fc); int get_anon_bdev(dev_t *); void free_anon_bdev(dev_t); +struct super_block *sget_fc(struct fs_context *fc, + int (*test)(struct super_block *, struct fs_context *), + int (*set)(struct super_block *, struct fs_context *)); struct super_block *sget_userns(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), @@ -2256,8 +2271,7 @@ mount_pseudo(struct file_system_type *fs_type, char *name, extern int register_filesystem(struct file_system_type *); extern int unregister_filesystem(struct file_system_type *); -extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); -#define kern_mount(type) kern_mount_data(type, NULL) +extern struct vfsmount *kern_mount(struct file_system_type *); extern void kern_unmount(struct vfsmount *mnt); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); @@ -2270,6 +2284,7 @@ extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); +extern int vfs_fsinfo(struct path *, struct fsinfo_kparams *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h new file mode 100644 index 000000000000..bb584db982ff --- /dev/null +++ b/include/linux/fs_context.h @@ -0,0 +1,215 @@ +/* Filesystem superblock creation and reconfiguration context. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_FS_CONTEXT_H +#define _LINUX_FS_CONTEXT_H + +#include <linux/kernel.h> +#include <linux/refcount.h> +#include <linux/errno.h> +#include <linux/mutex.h> + +struct cred; +struct dentry; +struct file_operations; +struct file_system_type; +struct mnt_namespace; +struct net; +struct pid_namespace; +struct super_block; +struct user_namespace; +struct vfsmount; +struct path; + +enum fs_context_purpose { + FS_CONTEXT_FOR_USER_MOUNT, /* New superblock for user-specified mount */ + FS_CONTEXT_FOR_KERNEL_MOUNT, /* New superblock for kernel-internal mount */ + FS_CONTEXT_FOR_SUBMOUNT, /* New superblock for automatic submount */ + FS_CONTEXT_FOR_ROOT_MOUNT, /* New superblock for internal root mount */ + FS_CONTEXT_FOR_RECONFIGURE, /* Superblock reconfiguration (remount) */ + FS_CONTEXT_FOR_UMOUNT, /* Reconfiguration to R/O for unmount */ + FS_CONTEXT_FOR_EMERGENCY_RO, /* Emergency reconfiguration to R/O */ +}; + +/* + * Userspace usage phase for fsopen/fspick. + */ +enum fs_context_phase { + FS_CONTEXT_CREATE_PARAMS, /* Loading params for sb creation */ + FS_CONTEXT_CREATING, /* A superblock is being created */ + FS_CONTEXT_AWAITING_MOUNT, /* Superblock created, awaiting fsmount() */ + FS_CONTEXT_AWAITING_RECONF, /* Awaiting initialisation for reconfiguration */ + FS_CONTEXT_RECONF_PARAMS, /* Loading params for reconfiguration */ + FS_CONTEXT_RECONFIGURING, /* Reconfiguring the superblock */ + FS_CONTEXT_FAILED, /* Failed to correctly transition a context */ +}; + +/* + * Type of parameter value. + */ +enum fs_value_type { + fs_value_is_undefined, + fs_value_is_flag, /* Value not given a value */ + fs_value_is_string, /* Value is a string */ + fs_value_is_blob, /* Value is a binary blob */ + fs_value_is_filename, /* Value is a filename* + dirfd */ + fs_value_is_filename_empty, /* Value is a filename* + dirfd + AT_EMPTY_PATH */ + fs_value_is_file, /* Value is a file* */ +}; + +/* + * Configuration parameter. + */ +struct fs_parameter { + const char *key; /* Parameter name */ + enum fs_value_type type:8; /* The type of value here */ + union { + char *string; + void *blob; + struct filename *name; + struct file *file; + }; + size_t size; + int dirfd; +}; + +/* + * Filesystem context for holding the parameters used in the creation or + * reconfiguration of a superblock. + * + * Superblock creation fills in ->root whereas reconfiguration begins with this + * already set. + * + * See Documentation/filesystems/mounting.txt + */ +struct fs_context { + const struct fs_context_operations *ops; + struct mutex uapi_mutex; /* Userspace access mutex */ + struct file_system_type *fs_type; + void *fs_private; /* The filesystem's context */ + struct dentry *root; /* The root and superblock */ + struct user_namespace *user_ns; /* The user namespace for this mount */ + struct net *net_ns; /* The network namespace for this mount */ + const struct cred *cred; /* The mounter's credentials */ + struct fc_log *log; /* Logging buffer */ + char *source; /* The source name (eg. dev path) */ + char *subtype; /* The subtype to set on the superblock */ + void *security; /* The LSM context */ + void *s_fs_info; /* Proposed s_fs_info */ + unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ + unsigned int sb_flags_mask; /* Superblock flags that were changed */ + unsigned int lsm_flags; /* Information flags from the fs to the LSM */ + enum fs_context_purpose purpose:8; + enum fs_context_phase phase:8; /* The phase the context is in */ + bool sloppy:1; /* T if unrecognised options are okay */ + bool silent:1; /* T if "o silent" specified */ + bool need_free:1; /* Need to call ops->free() */ +}; + +struct fs_context_operations { + void (*free)(struct fs_context *fc); + int (*dup)(struct fs_context *fc, struct fs_context *src_fc); + int (*parse_param)(struct fs_context *fc, struct fs_parameter *param); + int (*parse_monolithic)(struct fs_context *fc, void *data, size_t data_size); + int (*validate)(struct fs_context *fc); + int (*get_tree)(struct fs_context *fc); + int (*reconfigure)(struct fs_context *fc); +}; + +/* + * fs_context manipulation functions. + */ +extern struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type, + struct dentry *reference, + unsigned int sb_flags, + unsigned int sb_flags_mask, + enum fs_context_purpose purpose); +extern struct fs_context *vfs_dup_fs_context(struct fs_context *src, + enum fs_context_purpose purpose); +extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); +extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, + const char *value, size_t v_size); +extern int generic_parse_monolithic(struct fs_context *fc, void *data, size_t data_size); +extern int vfs_get_tree(struct fs_context *fc); +extern void put_fs_context(struct fs_context *fc); + +/* + * sget() wrapper to be called from the ->get_tree() op. + */ +enum vfs_get_super_keying { + vfs_get_single_super, /* Only one such superblock may exist */ + vfs_get_keyed_super, /* Superblocks with different s_fs_info keys may exist */ + vfs_get_independent_super, /* Multiple independent superblocks may exist */ +}; +extern int vfs_get_super(struct fs_context *fc, + enum vfs_get_super_keying keying, + int (*fill_super)(struct super_block *sb, + struct fs_context *fc)); + +extern const struct file_operations fscontext_fops; + +/* + * Mount error, warning and informational message logging. This structure is + * shareable between a mount and a subordinate mount. + */ +struct fc_log { + refcount_t usage; + u8 head; /* Insertion index in buffer[] */ + u8 tail; /* Removal index in buffer[] */ + u8 need_free; /* Mask of kfree'able items in buffer[] */ + struct module *owner; /* Owner module for strings that don't then need freeing */ + char *buffer[8]; +}; + +extern __attribute__((format(printf, 2, 3))) +void logfc(struct fs_context *fc, const char *fmt, ...); + +/** + * infof - Store supplementary informational message + * @fc: The context in which to log the informational message + * @fmt: The format string + * + * Store the supplementary informational message for the process if the process + * has enabled the facility. + */ +#define infof(fc, fmt, ...) ({ logfc(fc, "i "fmt, ## __VA_ARGS__); }) + +/** + * warnf - Store supplementary warning message + * @fc: The context in which to log the error message + * @fmt: The format string + * + * Store the supplementary warning message for the process if the process has + * enabled the facility. + */ +#define warnf(fc, fmt, ...) ({ logfc(fc, "w "fmt, ## __VA_ARGS__); }) + +/** + * errorf - Store supplementary error message + * @fc: The context in which to log the error message + * @fmt: The format string + * + * Store the supplementary error message for the process if the process has + * enabled the facility. + */ +#define errorf(fc, fmt, ...) ({ logfc(fc, "e "fmt, ## __VA_ARGS__); }) + +/** + * invalf - Store supplementary invalid argument error message + * @fc: The context in which to log the error message + * @fmt: The format string + * + * Store the supplementary error message for the process if the process has + * enabled the facility and return -EINVAL. + */ +#define invalf(fc, fmt, ...) ({ errorf(fc, fmt, ## __VA_ARGS__); -EINVAL; }) + +#endif /* _LINUX_FS_CONTEXT_H */ diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h new file mode 100644 index 000000000000..e21792a6fc33 --- /dev/null +++ b/include/linux/fs_parser.h @@ -0,0 +1,119 @@ +/* Filesystem parameter description and parser + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_FS_PARSER_H +#define _LINUX_FS_PARSER_H + +#include <linux/fs_context.h> + +struct path; + +struct constant_table { + const char *name; + int value; +}; + +#define fsconfig_key_removed 0xff /* Parameter name is no longer valid */ + +/* + * The type of parameter expected. + */ +enum fs_parameter_type { + __fs_param_wasnt_defined, + fs_param_is_flag, + fs_param_is_bool, + fs_param_is_u32, + fs_param_is_u32_octal, + fs_param_is_u32_hex, + fs_param_is_s32, + fs_param_is_u64, + fs_param_is_enum, + fs_param_is_string, + fs_param_is_blob, + fs_param_is_blockdev, + fs_param_is_path, + fs_param_is_fd, + nr__fs_parameter_type, +}; + +/* + * Specification of the type of value a parameter wants. + */ +struct fs_parameter_spec { + enum fs_parameter_type type:8; /* The desired parameter type */ + u8 flags; +#define fs_param_v_optional 0x01 /* The value is optional */ +#define fs_param_neg_with_no 0x02 /* "noxxx" is negative param */ +#define fs_param_neg_with_empty 0x04 /* "xxx=" is negative param */ +#define fs_param_deprecated 0x08 /* The param is deprecated */ +}; + +struct fs_parameter_enum { + u8 param_id; + char name[14]; + u8 value; +}; + +struct fs_parameter_description { + const char name[16]; /* Name for logging purposes */ + u8 nr_params; /* Number of parameter IDs */ + u8 nr_alt_keys; /* Number of alt_keys[] */ + u8 nr_enums; /* Number of enum value names */ + u8 source_param; /* Index of source parameter */ + bool no_source; /* Set if no source is expected */ + const char *const *keys; /* Sorted list of key names, one per nr_params */ + const struct constant_table *alt_keys; /* Sorted list of alternate key names */ + const struct fs_parameter_spec *specs; /* List of param specifications */ + const struct fs_parameter_enum *enums; /* Enum values */ +}; + +/* + * Result of parse. + */ +struct fs_parse_result { + struct fs_parameter_spec t; + u8 key; /* Looked up key ID */ + bool negated; /* T if param was "noxxx" */ + bool has_value; /* T if value supplied to param */ + union { + bool boolean; /* For spec_bool */ + int int_32; /* For spec_s32/spec_enum */ + unsigned int uint_32; /* For spec_u32{,_octal,_hex}/spec_enum */ + u64 uint_64; /* For spec_u64 */ + }; +}; + +extern int fs_parse(struct fs_context *fc, + const struct fs_parameter_description *desc, + struct fs_parameter *value, + struct fs_parse_result *result); +extern int fs_lookup_param(struct fs_context *fc, + struct fs_parameter *param, + bool want_bdev, + struct path *_path); + +extern int __lookup_constant(const struct constant_table tbl[], size_t tbl_size, + const char *name, int not_found); +#define lookup_constant(t, n, nf) __lookup_constant(t, ARRAY_SIZE(t), (n), (nf)) + +#ifdef CONFIG_VALIDATE_FS_PARSER +extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, + int low, int high, int special); +extern bool fs_validate_description(const struct fs_parameter_description *desc); +#else +static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, + int low, int high, int special) +{ return true; } +static inline bool fs_validate_description(const struct fs_parameter_description *desc) +{ return true; } +#endif + +#endif /* _LINUX_FS_PARSER_H */ diff --git a/include/linux/fsinfo.h b/include/linux/fsinfo.h new file mode 100644 index 000000000000..e488701c5c04 --- /dev/null +++ b/include/linux/fsinfo.h @@ -0,0 +1,41 @@ +/* Filesystem information query + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_FSINFO_H +#define _LINUX_FSINFO_H + +#include <uapi/linux/fsinfo.h> + +struct fsinfo_kparams { + __u32 at_flags; /* AT_SYMLINK_NOFOLLOW and similar */ + enum fsinfo_attribute request; /* What is being asking for */ + __u32 Nth; /* Instance of it (some may have multiple) */ + __u32 Mth; /* Subinstance */ + bool string_val; /* T if variable-length string value */ + void *buffer; /* Where to place the reply */ + size_t buf_size; /* Size of the buffer */ +}; + +extern int generic_fsinfo(struct path *, struct fsinfo_kparams *); + +static inline void fsinfo_set_cap(struct fsinfo_capabilities *c, + enum fsinfo_capability cap) +{ + c->capabilities[cap / 8] |= 1 << (cap % 8); +} + +static inline void fsinfo_clear_cap(struct fsinfo_capabilities *c, + enum fsinfo_capability cap) +{ + c->capabilities[cap / 8] &= ~(1 << (cap % 8)); +} + +#endif /* _LINUX_FSINFO_H */ diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5b36b1287a5a..625c19ce86bb 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -17,6 +17,7 @@ #include <linux/atomic.h> #include <linux/uidgid.h> #include <linux/wait.h> +#include <linux/fs_context.h> struct file; struct dentry; @@ -25,7 +26,10 @@ struct seq_file; struct vm_area_struct; struct super_block; struct file_system_type; +struct fs_context; +struct fsinfo_kparams; +struct kernfs_fs_context; struct kernfs_open_node; struct kernfs_iattrs; @@ -167,8 +171,9 @@ struct kernfs_node { * kernfs_node parameter. */ struct kernfs_syscall_ops { - int (*remount_fs)(struct kernfs_root *root, int *flags, char *data); + int (*reconfigure)(struct kernfs_root *root, struct fs_context *fc); int (*show_options)(struct seq_file *sf, struct kernfs_root *root); + int (*fsinfo)(struct kernfs_root *root, struct fsinfo_kparams *params); int (*mkdir)(struct kernfs_node *parent, const char *name, umode_t mode); @@ -268,6 +273,18 @@ struct kernfs_ops { #endif }; +/* + * The kernfs superblock creation/mount parameter context. + */ +struct kernfs_fs_context { + struct kernfs_root *root; /* Root of the hierarchy being mounted */ + void *ns_tag; /* Namespace tag of the mount (or NULL) */ + unsigned long magic; /* File system specific magic number */ + + /* The following are set/used by kernfs_mount() */ + bool new_sb_created; /* Set to T if we allocated a new sb */ +}; + #ifdef CONFIG_KERNFS static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) @@ -353,11 +370,11 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); void kernfs_notify(struct kernfs_node *kn); const void *kernfs_super_ns(struct super_block *sb); -struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, unsigned long magic, - bool *new_sb_created, const void *ns); +int kernfs_get_tree(struct fs_context *fc); +void kernfs_free_fs_context(struct fs_context *fc); void kernfs_kill_sb(struct super_block *sb); struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns); +int kernfs_reconfigure(struct fs_context *fc); void kernfs_init(void); @@ -459,11 +476,10 @@ static inline void kernfs_notify(struct kernfs_node *kn) { } static inline const void *kernfs_super_ns(struct super_block *sb) { return NULL; } -static inline struct dentry * -kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, unsigned long magic, - bool *new_sb_created, const void *ns) -{ return ERR_PTR(-ENOSYS); } +static inline int kernfs_get_tree(struct fs_context *fc) +{ return -ENOSYS; } + +static inline void kernfs_free_fs_context(struct fs_context *fc) { } static inline void kernfs_kill_sb(struct super_block *sb) { } @@ -546,13 +562,4 @@ static inline int kernfs_rename(struct kernfs_node *kn, return kernfs_rename_ns(kn, new_parent, new_name, NULL); } -static inline struct dentry * -kernfs_mount(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, unsigned long magic, - bool *new_sb_created) -{ - return kernfs_mount_ns(fs_type, flags, root, - magic, new_sb_created, NULL); -} - #endif /* __LINUX_KERNFS_H */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index aaeb7fa24dc4..e99495acb037 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -76,6 +76,49 @@ * changes on the process such as clearing out non-inheritable signal * state. This is called immediately after commit_creds(). * + * Security hooks for mount using fs_context. + * [See also Documentation/filesystems/mounting.txt] + * + * @fs_context_alloc: + * Allocate and attach a security structure to sc->security. This pointer + * is initialised to NULL by the caller. + * @fc indicates the new filesystem context. + * @reference indicates the source dentry of a submount or start of reconfig. + * @fs_context_dup: + * Allocate and attach a security structure to sc->security. This pointer + * is initialised to NULL by the caller. + * @fc indicates the new filesystem context. + * @src_fc indicates the original filesystem context. + * @fs_context_free: + * Clean up a filesystem context. + * @fc indicates the filesystem context. + * @fs_context_parse_param: + * Userspace provided a parameter to configure a superblock. The LSM may + * reject it with an error and may use it for itself, in which case it + * should return 0; otherwise it should return -ENOPARAM to pass it on to + * the filesystem. + * @fc indicates the filesystem context. + * @param The parameter + * @fs_context_validate: + * Validate the filesystem context preparatory to applying it. This is + * done after all the options have been parsed. + * @fc indicates the filesystem context. + * @sb_get_tree: + * Assign the security to a newly created superblock. + * @fc indicates the filesystem context. + * @fc->root indicates the root that will be mounted. + * @fc->root->d_sb points to the superblock. + * @sb_reconfigure: + * Apply reconfiguration to the security on a superblock. + * @fc indicates the filesystem context. + * @fc->root indicates a dentry in the mount. + * @fc->root->d_sb points to the superblock. + * @sb_mountpoint: + * Equivalent of sb_mount, but with an fs_context. + * @fc indicates the filesystem context. + * @mountpoint indicates the path on which the mount will take place. + * @mnt_flags indicates the MNT_* flags specified. + * * Security hooks for filesystem operations. * * @sb_alloc_security: @@ -104,6 +147,7 @@ * @type contains the filesystem type. * @flags contains the mount flags. * @data contains the filesystem-specific data. + * @data_size contains the size of the data. * Return 0 if permission is granted. * @sb_copy_data: * Allow mount option data to be copied prior to parsing by the filesystem, @@ -113,14 +157,9 @@ * specific options to avoid having to make filesystems aware of them. * @type the type of filesystem being mounted. * @orig the original mount data copied from userspace. + * @orig_data is the size of the original data * @copy copied data which will be passed to the security module. * Returns 0 if the copy was successful. - * @sb_remount: - * Extracts security system specific mount options and verifies no changes - * are being made to those options. - * @sb superblock being remounted - * @data contains the filesystem-specific data. - * Return 0 if permission is granted. * @sb_umount: * Check permission before the @mnt file system is unmounted. * @mnt contains the mounted file system. @@ -144,6 +183,10 @@ * Parse a string of security data filling in the opts structure * @options string containing all mount options known by the LSM * @opts binary data structure usable by the LSM + * @move_mount: + * Check permission before a mount is moved. + * @from_path indicates the mount that is going to be moved. + * @to_path indicates the mountpoint that will be mounted upon. * @dentry_init_security: * Compute a context for a dentry as the inode is not yet available * since NFSv4 has no label backed by an EA anyway. @@ -1459,15 +1502,24 @@ union security_list_options { void (*bprm_committing_creds)(struct linux_binprm *bprm); void (*bprm_committed_creds)(struct linux_binprm *bprm); + int (*fs_context_alloc)(struct fs_context *fc, struct dentry *reference); + int (*fs_context_dup)(struct fs_context *fc, struct fs_context *src_sc); + void (*fs_context_free)(struct fs_context *fc); + int (*fs_context_parse_param)(struct fs_context *fc, struct fs_parameter *param); + int (*fs_context_validate)(struct fs_context *fc); + int (*sb_get_tree)(struct fs_context *fc); + void (*sb_reconfigure)(struct fs_context *fc); + int (*sb_mountpoint)(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags); + int (*sb_alloc_security)(struct super_block *sb); void (*sb_free_security)(struct super_block *sb); - int (*sb_copy_data)(char *orig, char *copy); - int (*sb_remount)(struct super_block *sb, void *data); - int (*sb_kern_mount)(struct super_block *sb, int flags, void *data); + int (*sb_copy_data)(char *orig, size_t orig_size, char *copy); int (*sb_show_options)(struct seq_file *m, struct super_block *sb); int (*sb_statfs)(struct dentry *dentry); int (*sb_mount)(const char *dev_name, const struct path *path, - const char *type, unsigned long flags, void *data); + const char *type, unsigned long flags, + void *data, size_t data_size); int (*sb_umount)(struct vfsmount *mnt, int flags); int (*sb_pivotroot)(const struct path *old_path, const struct path *new_path); int (*sb_set_mnt_opts)(struct super_block *sb, @@ -1479,6 +1531,7 @@ union security_list_options { unsigned long kern_flags, unsigned long *set_kern_flags); int (*sb_parse_opts_str)(char *options, struct security_mnt_opts *opts); + int (*move_mount)(const struct path *from_path, const struct path *to_path); int (*dentry_init_security)(struct dentry *dentry, int mode, const struct qstr *name, void **ctx, u32 *ctxlen); @@ -1798,11 +1851,17 @@ struct security_hook_heads { struct hlist_head bprm_check_security; struct hlist_head bprm_committing_creds; struct hlist_head bprm_committed_creds; + struct hlist_head fs_context_alloc; + struct hlist_head fs_context_dup; + struct hlist_head fs_context_free; + struct hlist_head fs_context_parse_param; + struct hlist_head fs_context_validate; + struct hlist_head sb_get_tree; + struct hlist_head sb_reconfigure; + struct hlist_head sb_mountpoint; struct hlist_head sb_alloc_security; struct hlist_head sb_free_security; struct hlist_head sb_copy_data; - struct hlist_head sb_remount; - struct hlist_head sb_kern_mount; struct hlist_head sb_show_options; struct hlist_head sb_statfs; struct hlist_head sb_mount; @@ -1811,6 +1870,7 @@ struct security_hook_heads { struct hlist_head sb_set_mnt_opts; struct hlist_head sb_clone_mnt_opts; struct hlist_head sb_parse_opts_str; + struct hlist_head move_mount; struct hlist_head dentry_init_security; struct hlist_head dentry_create_files_as; #ifdef CONFIG_SECURITY_PATH diff --git a/include/linux/module.h b/include/linux/module.h index fce6b4335e36..b9b4394ee0c7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -682,6 +682,12 @@ static inline bool is_module_text_address(unsigned long addr) return false; } +static inline bool within_module_core(unsigned long addr, + const struct module *mod) +{ + return false; +} + /* Get/put a kernel symbol (calls should be symmetric) */ #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) #define symbol_put(x) do { } while (0) diff --git a/include/linux/mount.h b/include/linux/mount.h index 45b1f56c6c2f..41b6b080ffd0 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -21,6 +21,7 @@ struct super_block; struct vfsmount; struct dentry; struct mnt_namespace; +struct fs_context; #define MNT_NOSUID 0x01 #define MNT_NODEV 0x02 @@ -81,19 +82,22 @@ extern void mnt_drop_write_file(struct file *file); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); extern struct vfsmount *mnt_clone_internal(const struct path *path); -extern int __mnt_is_readonly(struct vfsmount *mnt); +extern bool __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); struct path; extern struct vfsmount *clone_private_mount(const struct path *path); struct file_system_type; +extern struct vfsmount *vfs_create_mount(struct fs_context *fc, + unsigned int mnt_flags); extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, - void *data); + void *data, size_t data_size); extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, - const char *name, void *data); + const char *name, + void *data, size_t data_size); extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); diff --git a/include/linux/mtd/super.h b/include/linux/mtd/super.h index f456230f9330..3f37c7cd711c 100644 --- a/include/linux/mtd/super.h +++ b/include/linux/mtd/super.h @@ -19,8 +19,8 @@ #include <linux/mount.h> extern struct dentry *mount_mtd(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)); + const char *dev_name, void *data, size_t data_size, + int (*fill_super)(struct super_block *, void *, size_t, int)); extern void kill_mtd_super(struct super_block *sb); diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 5ef7d54caac2..6d64e6be9928 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -5,7 +5,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir, umode_t mode, dev_t dev); extern struct dentry *ramfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data); + int flags, const char *dev_name, void *data, size_t data_size); #ifdef CONFIG_MMU static inline int @@ -21,6 +21,6 @@ extern const struct file_operations ramfs_file_operations; extern const struct vm_operations_struct generic_file_vm_ops; extern int __init init_ramfs_fs(void); -int ramfs_fill_super(struct super_block *sb, void *data, int silent); +int ramfs_fill_super(struct super_block *sb, void *data, size_t data_size, int silent); #endif diff --git a/include/linux/security.h b/include/linux/security.h index d170a5b031f3..ac0ef467d25e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,12 +53,15 @@ struct msg_msg; struct xattr; struct xfrm_sec_ctx; struct mm_struct; +struct fs_context; +struct fs_parameter; +enum fs_value_type; /* If capable should audit the security request */ #define SECURITY_CAP_NOAUDIT 0 #define SECURITY_CAP_AUDIT 1 -/* LSM Agnostic defines for sb_set_mnt_opts */ +/* LSM Agnostic defines for fs_context::lsm_flags */ #define SECURITY_LSM_NATIVE_LABELS 1 struct ctl_table; @@ -246,15 +249,22 @@ int security_bprm_set_creds(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); void security_bprm_committing_creds(struct linux_binprm *bprm); void security_bprm_committed_creds(struct linux_binprm *bprm); +int security_fs_context_alloc(struct fs_context *fc, struct dentry *reference); +int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); +void security_fs_context_free(struct fs_context *fc); +int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); +int security_fs_context_validate(struct fs_context *fc); +int security_sb_get_tree(struct fs_context *fc); +void security_sb_reconfigure(struct fs_context *fc); +int security_sb_mountpoint(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags); int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); -int security_sb_copy_data(char *orig, char *copy); -int security_sb_remount(struct super_block *sb, void *data); -int security_sb_kern_mount(struct super_block *sb, int flags, void *data); +int security_sb_copy_data(char *orig, size_t orig_size, char *copy); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(const char *dev_name, const struct path *path, - const char *type, unsigned long flags, void *data); + const char *type, unsigned long flags, void *data, size_t data_size); int security_sb_umount(struct vfsmount *mnt, int flags); int security_sb_pivotroot(const struct path *old_path, const struct path *new_path); int security_sb_set_mnt_opts(struct super_block *sb, @@ -266,6 +276,7 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, unsigned long kern_flags, unsigned long *set_kern_flags); int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts); +int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, void **ctx, u32 *ctxlen); @@ -547,25 +558,50 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm) { } -static inline int security_sb_alloc(struct super_block *sb) +static inline int security_fs_context_alloc(struct fs_context *fc, + struct dentry *reference) { return 0; } - -static inline void security_sb_free(struct super_block *sb) -{ } - -static inline int security_sb_copy_data(char *orig, char *copy) +static inline int security_fs_context_dup(struct fs_context *fc, + struct fs_context *src_fc) +{ + return 0; +} +static inline void security_fs_context_free(struct fs_context *fc) +{ +} +static inline int security_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + return -ENOPARAM; +} +static inline int security_fs_context_validate(struct fs_context *fc) +{ + return 0; +} +static inline int security_sb_get_tree(struct fs_context *fc) +{ + return 0; +} +static inline void security_sb_reconfigure(struct fs_context *fc) +{ +} +static inline int security_sb_mountpoint(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags) { return 0; } -static inline int security_sb_remount(struct super_block *sb, void *data) +static inline int security_sb_alloc(struct super_block *sb) { return 0; } -static inline int security_sb_kern_mount(struct super_block *sb, int flags, void *data) +static inline void security_sb_free(struct super_block *sb) +{ } + +static inline int security_sb_copy_data(char *orig, size_t orig_size, char *copy) { return 0; } @@ -583,7 +619,7 @@ static inline int security_sb_statfs(struct dentry *dentry) static inline int security_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, - void *data) + void *data, size_t data_size) { return 0; } @@ -620,6 +656,12 @@ static inline int security_sb_parse_opts_str(char *options, struct security_mnt_ return 0; } +static inline int security_move_mount(const struct path *from_path, + const struct path *to_path) +{ + return 0; +} + static inline int security_inode_alloc(struct inode *inode) { return 0; diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index f155dc607112..66772728cb74 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -49,7 +49,8 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) * Functions in mm/shmem.c called directly from elsewhere: */ extern int shmem_init(void); -extern int shmem_fill_super(struct super_block *sb, void *data, int silent); +extern int shmem_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent); extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2ac3d13a915b..82682b69435e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -50,6 +50,7 @@ struct stat64; struct statfs; struct statfs64; struct statx; +struct fsinfo_params; struct __sysctl_args; struct sysinfo; struct timespec; @@ -907,6 +908,18 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, int flags, uint32_t sig); +asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); +asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, + int to_dfd, const char __user *to_path, + unsigned int ms_flags); +asmlinkage long sys_fsopen(const char __user *fs_name, unsigned int flags); +asmlinkage long sys_fsconfig(int fs_fd, unsigned int cmd, const char __user *key, + const void __user *value, int aux); +asmlinkage long sys_fsmount(int fs_fd, unsigned int flags, unsigned int ms_flags); +asmlinkage long sys_fspick(int dfd, const char __user *path, unsigned int flags); +asmlinkage long sys_fsinfo(int dfd, const char __user *path, + struct fsinfo_params __user *params, + void __user *buffer, size_t buf_size); /* * Architecture-specific system calls diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6448cdd9a350..594b85f7cb86 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -90,5 +90,7 @@ #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ #define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a441ea1bfe6d..53a22e8e0408 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -14,6 +14,11 @@ #include <linux/ioctl.h> #include <linux/types.h> +/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */ +#if !defined(__KERNEL__) +#include <linux/mount.h> +#endif + /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change * the file limit at runtime and only root can increase the per-process @@ -101,57 +106,6 @@ struct inodes_stat_t { #define NR_FILE 8192 /* this can well be larger on a larger system */ - -/* - * These are the fs-independent mount-flags: up to 32 flags are supported - */ -#define MS_RDONLY 1 /* Mount read-only */ -#define MS_NOSUID 2 /* Ignore suid and sgid bits */ -#define MS_NODEV 4 /* Disallow access to device special files */ -#define MS_NOEXEC 8 /* Disallow program execution */ -#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ -#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ -#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ -#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ -#define MS_NOATIME 1024 /* Do not update access times. */ -#define MS_NODIRATIME 2048 /* Do not update directory access times */ -#define MS_BIND 4096 -#define MS_MOVE 8192 -#define MS_REC 16384 -#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. - MS_VERBOSE is deprecated. */ -#define MS_SILENT 32768 -#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ -#define MS_UNBINDABLE (1<<17) /* change to unbindable */ -#define MS_PRIVATE (1<<18) /* change to private */ -#define MS_SLAVE (1<<19) /* change to slave */ -#define MS_SHARED (1<<20) /* change to shared */ -#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ -#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ -#define MS_I_VERSION (1<<23) /* Update inode I_version field */ -#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ -#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ - -/* These sb flags are internal to the kernel */ -#define MS_SUBMOUNT (1<<26) -#define MS_NOREMOTELOCK (1<<27) -#define MS_NOSEC (1<<28) -#define MS_BORN (1<<29) -#define MS_ACTIVE (1<<30) -#define MS_NOUSER (1<<31) - -/* - * Superblock flags that can be altered by MS_REMOUNT - */ -#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ - MS_LAZYTIME) - -/* - * Old magic mount flag and mask - */ -#define MS_MGC_VAL 0xC0ED0000 -#define MS_MGC_MSK 0xffff0000 - /* * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR. */ diff --git a/include/uapi/linux/fsinfo.h b/include/uapi/linux/fsinfo.h new file mode 100644 index 000000000000..50e6cd50fe63 --- /dev/null +++ b/include/uapi/linux/fsinfo.h @@ -0,0 +1,303 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* fsinfo() definitions. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ +#ifndef _UAPI_LINUX_FSINFO_H +#define _UAPI_LINUX_FSINFO_H + +#include <linux/types.h> +#include <linux/socket.h> + +/* + * The filesystem attributes that can be requested. Note that some attributes + * may have multiple instances which can be switched in the parameter block. + */ +enum fsinfo_attribute { + FSINFO_ATTR_STATFS = 0, /* statfs()-style state */ + FSINFO_ATTR_FSINFO = 1, /* Information about fsinfo() */ + FSINFO_ATTR_IDS = 2, /* Filesystem IDs */ + FSINFO_ATTR_LIMITS = 3, /* Filesystem limits */ + FSINFO_ATTR_SUPPORTS = 4, /* What's supported in statx, iocflags, ... */ + FSINFO_ATTR_CAPABILITIES = 5, /* Filesystem capabilities (bits) */ + FSINFO_ATTR_TIMESTAMP_INFO = 6, /* Inode timestamp info */ + FSINFO_ATTR_VOLUME_ID = 7, /* Volume ID (string) */ + FSINFO_ATTR_VOLUME_UUID = 8, /* Volume UUID (LE uuid) */ + FSINFO_ATTR_VOLUME_NAME = 9, /* Volume name (string) */ + FSINFO_ATTR_CELL_NAME = 10, /* Cell name (string) */ + FSINFO_ATTR_DOMAIN_NAME = 11, /* Domain name (string) */ + FSINFO_ATTR_SERVER_NAME = 12, /* Name of the Nth server */ + FSINFO_ATTR_SERVER_ADDRESS = 13, /* Mth address of the Nth server */ + FSINFO_ATTR_PARAMETER = 14, /* Nth mount parameter (string) */ + FSINFO_ATTR_SOURCE = 15, /* Nth mount source name (string) */ + FSINFO_ATTR_NAME_ENCODING = 16, /* Filename encoding (string) */ + FSINFO_ATTR_NAME_CODEPAGE = 17, /* Filename codepage (string) */ + FSINFO_ATTR_IO_SIZE = 18, /* Optimal I/O sizes */ + FSINFO_ATTR_PARAM_DESCRIPTION = 19, /* General fs parameter description */ + FSINFO_ATTR_PARAM_SPECIFICATION = 20, /* Nth parameter specification */ + FSINFO_ATTR_PARAM_NAME = 21, /* Nth name to param index */ + FSINFO_ATTR_PARAM_ENUM = 22, /* Nth enum-to-val */ + FSINFO_ATTR__NR +}; + +/* + * Optional fsinfo() parameter structure. + * + * If this is not given, it is assumed that fsinfo_attr_statfs instance 0,0 is + * desired. + */ +struct fsinfo_params { + __u32 at_flags; /* AT_SYMLINK_NOFOLLOW and similar flags */ + __u32 request; /* What is being asking for (enum fsinfo_attribute) */ + __u32 Nth; /* Instance of it (some may have multiple) */ + __u32 Mth; /* Subinstance of Nth instance */ + __u32 __reserved[6]; /* Reserved params; all must be 0 */ +}; + +/* + * Information struct for fsinfo(fsinfo_attr_statfs). + * - This gives extended filesystem information. + */ +struct fsinfo_statfs { + __u64 f_blocks; /* Total number of blocks in fs */ + __u64 f_bfree; /* Total number of free blocks */ + __u64 f_bavail; /* Number of free blocks available to ordinary user */ + __u64 f_files; /* Total number of file nodes in fs */ + __u64 f_ffree; /* Number of free file nodes */ + __u64 f_favail; /* Number of free file nodes available to ordinary user */ + __u32 f_bsize; /* Optimal block size */ + __u32 f_frsize; /* Fragment size */ +}; + +/* + * Information struct for fsinfo(fsinfo_attr_ids). + * + * List of basic identifiers as is normally found in statfs(). + */ +struct fsinfo_ids { + char f_fs_name[15 + 1]; + __u64 f_flags; /* Filesystem mount flags (MS_*) */ + __u64 f_fsid; /* Short 64-bit Filesystem ID (as statfs) */ + __u64 f_sb_id; /* Internal superblock ID for sbnotify()/mntnotify() */ + __u32 f_fstype; /* Filesystem type from linux/magic.h [uncond] */ + __u32 f_dev_major; /* As st_dev_* from struct statx [uncond] */ + __u32 f_dev_minor; + __u32 __reserved[1]; +}; + +/* + * Information struct for fsinfo(fsinfo_attr_limits). + * + * List of supported filesystem limits. + */ +struct fsinfo_limits { + __u64 max_file_size; /* Maximum file size */ + __u64 max_uid; /* Maximum UID supported */ + __u64 max_gid; /* Maximum GID supported */ + __u64 max_projid; /* Maximum project ID supported */ + __u32 max_dev_major; /* Maximum device major representable */ + __u32 max_dev_minor; /* Maximum device minor representable */ + __u32 max_hard_links; /* Maximum number of hard links on a file */ + __u32 max_xattr_body_len; /* Maximum xattr content length */ + __u32 max_xattr_name_len; /* Maximum xattr name length */ + __u32 max_filename_len; /* Maximum filename length */ + __u32 max_symlink_len; /* Maximum symlink content length */ + __u32 __reserved[1]; +}; + +/* + * Information struct for fsinfo(fsinfo_attr_supports). + * + * What's supported in various masks, such as statx() attribute and mask bits + * and IOC flags. + */ +struct fsinfo_supports { + __u64 stx_attributes; /* What statx::stx_attributes are supported */ + __u32 stx_mask; /* What statx::stx_mask bits are supported */ + __u32 ioc_flags; /* What FS_IOC_* flags are supported */ + __u32 win_file_attrs; /* What DOS/Windows FILE_* attributes are supported */ + __u32 __reserved[1]; +}; + +/* + * Information struct for fsinfo(fsinfo_attr_capabilities). + * + * Bitmask indicating filesystem capabilities where renderable as single bits. + */ +enum fsinfo_capability { + FSINFO_CAP_IS_KERNEL_FS = 0, /* fs is kernel-special filesystem */ + FSINFO_CAP_IS_BLOCK_FS = 1, /* fs is block-based filesystem */ + FSINFO_CAP_IS_FLASH_FS = 2, /* fs is flash filesystem */ + FSINFO_CAP_IS_NETWORK_FS = 3, /* fs is network filesystem */ + FSINFO_CAP_IS_AUTOMOUNTER_FS = 4, /* fs is automounter special filesystem */ + FSINFO_CAP_AUTOMOUNTS = 5, /* fs supports automounts */ + FSINFO_CAP_ADV_LOCKS = 6, /* fs supports advisory file locking */ + FSINFO_CAP_MAND_LOCKS = 7, /* fs supports mandatory file locking */ + FSINFO_CAP_LEASES = 8, /* fs supports file leases */ + FSINFO_CAP_UIDS = 9, /* fs supports numeric uids */ + FSINFO_CAP_GIDS = 10, /* fs supports numeric gids */ + FSINFO_CAP_PROJIDS = 11, /* fs supports numeric project ids */ + FSINFO_CAP_ID_NAMES = 12, /* fs supports user names */ + FSINFO_CAP_ID_GUIDS = 13, /* fs supports user guids */ + FSINFO_CAP_WINDOWS_ATTRS = 14, /* fs has windows attributes */ + FSINFO_CAP_USER_QUOTAS = 15, /* fs has per-user quotas */ + FSINFO_CAP_GROUP_QUOTAS = 16, /* fs has per-group quotas */ + FSINFO_CAP_PROJECT_QUOTAS = 17, /* fs has per-project quotas */ + FSINFO_CAP_XATTRS = 18, /* fs has xattrs */ + FSINFO_CAP_JOURNAL = 19, /* fs has a journal */ + FSINFO_CAP_DATA_IS_JOURNALLED = 20, /* fs is using data journalling */ + FSINFO_CAP_O_SYNC = 21, /* fs supports O_SYNC */ + FSINFO_CAP_O_DIRECT = 22, /* fs supports O_DIRECT */ + FSINFO_CAP_VOLUME_ID = 23, /* fs has a volume ID */ + FSINFO_CAP_VOLUME_UUID = 24, /* fs has a volume UUID */ + FSINFO_CAP_VOLUME_NAME = 25, /* fs has a volume name */ + FSINFO_CAP_VOLUME_FSID = 26, /* fs has a volume FSID */ + FSINFO_CAP_CELL_NAME = 27, /* fs has a cell name */ + FSINFO_CAP_DOMAIN_NAME = 28, /* fs has a domain name */ + FSINFO_CAP_REALM_NAME = 29, /* fs has a realm name */ + FSINFO_CAP_IVER_ALL_CHANGE = 30, /* i_version represents data + meta changes */ + FSINFO_CAP_IVER_DATA_CHANGE = 31, /* i_version represents data changes only */ + FSINFO_CAP_IVER_MONO_INCR = 32, /* i_version incremented monotonically */ + FSINFO_CAP_SYMLINKS = 33, /* fs supports symlinks */ + FSINFO_CAP_HARD_LINKS = 34, /* fs supports hard links */ + FSINFO_CAP_HARD_LINKS_1DIR = 35, /* fs supports hard links in same dir only */ + FSINFO_CAP_DEVICE_FILES = 36, /* fs supports bdev, cdev */ + FSINFO_CAP_UNIX_SPECIALS = 37, /* fs supports pipe, fifo, socket */ + FSINFO_CAP_RESOURCE_FORKS = 38, /* fs supports resource forks/streams */ + FSINFO_CAP_NAME_CASE_INDEP = 39, /* Filename case independence is mandatory */ + FSINFO_CAP_NAME_NON_UTF8 = 40, /* fs has non-utf8 names */ + FSINFO_CAP_NAME_HAS_CODEPAGE = 41, /* fs has a filename codepage */ + FSINFO_CAP_SPARSE = 42, /* fs supports sparse files */ + FSINFO_CAP_NOT_PERSISTENT = 43, /* fs is not persistent */ + FSINFO_CAP_NO_UNIX_MODE = 44, /* fs does not support unix mode bits */ + FSINFO_CAP_HAS_ATIME = 45, /* fs supports access time */ + FSINFO_CAP_HAS_BTIME = 46, /* fs supports birth/creation time */ + FSINFO_CAP_HAS_CTIME = 47, /* fs supports change time */ + FSINFO_CAP_HAS_MTIME = 48, /* fs supports modification time */ + FSINFO_CAP__NR +}; + +struct fsinfo_capabilities { + __u8 capabilities[(FSINFO_CAP__NR + 7) / 8]; +}; + +/* + * Information struct for fsinfo(fsinfo_attr_timestamp_info). + */ +struct fsinfo_timestamp_info { + __s64 minimum_timestamp; /* Minimum timestamp value in seconds */ + __s64 maximum_timestamp; /* Maximum timestamp value in seconds */ + __u16 atime_gran_mantissa; /* Granularity(secs) = mant * 10^exp */ + __u16 btime_gran_mantissa; + __u16 ctime_gran_mantissa; + __u16 mtime_gran_mantissa; + __s8 atime_gran_exponent; + __s8 btime_gran_exponent; + __s8 ctime_gran_exponent; + __s8 mtime_gran_exponent; + __u32 __reserved[1]; +}; + +/* + * Information struct for fsinfo(fsinfo_attr_volume_uuid). + */ +struct fsinfo_volume_uuid { + __u8 uuid[16]; +}; + +/* + * Information struct for fsinfo(fsinfo_attr_server_addresses). + * + * Find the Mth address of the Nth server for a network mount. + */ +struct fsinfo_server_address { + struct __kernel_sockaddr_storage address; +}; + +/* + * Information struct for fsinfo(fsinfo_attr_io_size). + * + * Retrieve I/O size hints for a filesystem. + */ +struct fsinfo_io_size { + __u32 dio_size_gran; /* Size granularity for O_DIRECT */ + __u32 dio_mem_align; /* Memory alignment for O_DIRECT */ +}; + +/* + * Information struct for fsinfo(fsinfo_attr_fsinfo). + * + * This gives information about fsinfo() itself. + */ +struct fsinfo_fsinfo { + __u32 max_attr; /* Number of supported attributes (fsinfo_attr__nr) */ + __u32 max_cap; /* Number of supported capabilities (fsinfo_cap__nr) */ +}; + +/* + * Information struct for fsinfo(fsinfo_attr_param_description). + * + * Query the parameter set for a filesystem. + */ +struct fsinfo_param_description { + __u32 nr_params; /* Number of individual parameters */ + __u32 nr_names; /* Number of parameter names */ + __u32 nr_enum_names; /* Number of enum names */ + __u32 source_param; /* Source parameter index (or UINT_MAX) */ +}; + +/* + * Information struct for fsinfo(fsinfo_attr_param_specification). + * + * Query the specification of the Nth filesystem parameter. + */ +struct fsinfo_param_specification { + __u32 type; /* enum fsinfo_param_specification_type */ + __u32 flags; /* Qualifiers */ +}; + +enum fsinfo_param_specification_type { + FSINFO_PARAM_SPEC_NOT_DEFINED, + FSINFO_PARAM_SPEC_TAKES_NO_VALUE, + FSINFO_PARAM_SPEC_IS_BOOL, + FSINFO_PARAM_SPEC_IS_U32, + FSINFO_PARAM_SPEC_IS_U32_OCTAL, + FSINFO_PARAM_SPEC_IS_U32_HEX, + FSINFO_PARAM_SPEC_IS_S32, + FSINFO_PARAM_SPEC_IS_U64, + FSINFO_PARAM_SPEC_IS_ENUM, + FSINFO_PARAM_SPEC_IS_STRING, + FSINFO_PARAM_SPEC_IS_BLOB, + FSINFO_PARAM_SPEC_IS_BLOCKDEV, + FSINFO_PARAM_SPEC_IS_PATH, + FSINFO_PARAM_SPEC_IS_FD, + NR__FSINFO_PARAM_SPEC +}; + +#define FSINFO_PARAM_SPEC_VALUE_IS_OPTIONAL 0X00000001 +#define FSINFO_PARAM_SPEC_PREFIX_NO_IS_NEG 0X00000002 +#define FSINFO_PARAM_SPEC_EMPTY_STRING_IS_NEG 0X00000004 +#define FSINFO_PARAM_SPEC_DEPRECATED 0X00000008 + +/* + * Information struct for fsinfo(fsinfo_attr_param_name). + * + * Query the Nth filesystem parameter name + */ +struct fsinfo_param_name { + __u32 param_index; /* Index of the parameter specification */ + char name[252]; /* Name of the parameter */ +}; + +/* + * Information struct for fsinfo(fsinfo_attr_param_enum). + * + * Query the Nth filesystem enum parameter value name. + */ +struct fsinfo_param_enum { + __u32 param_index; /* Index of the relevant parameter specification */ + char name[252]; /* Name of the enum value */ +}; + +#endif /* _UAPI_LINUX_FSINFO_H */ diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h new file mode 100644 index 000000000000..96a0240f23fe --- /dev/null +++ b/include/uapi/linux/mount.h @@ -0,0 +1,120 @@ +#ifndef _UAPI_LINUX_MOUNT_H +#define _UAPI_LINUX_MOUNT_H + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + * + * Usage of these is restricted within the kernel to core mount(2) code and + * callers of sys_mount() only. Filesystems should be using the SB_* + * equivalent instead. + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT__MASK 0x00000077 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ + +#endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/init/Kconfig b/init/Kconfig index d5fb0ee1400a..52ce82c4c8c1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1159,9 +1159,6 @@ config LD_DEAD_CODE_DATA_ELIMINATION config SYSCTL bool -config ANON_INODES - bool - config HAVE_UID16 bool @@ -1366,14 +1363,12 @@ config HAVE_FUTEX_CMPXCHG config EPOLL bool "Enable eventpoll support" if EXPERT default y - select ANON_INODES help Disabling this option will cause the kernel to be built without support for epoll family of system calls. config SIGNALFD bool "Enable signalfd() system call" if EXPERT - select ANON_INODES default y help Enable the signalfd() system call that allows to receive signals @@ -1383,7 +1378,6 @@ config SIGNALFD config TIMERFD bool "Enable timerfd() system call" if EXPERT - select ANON_INODES default y help Enable the timerfd() system call that allows to receive timer @@ -1393,7 +1387,6 @@ config TIMERFD config EVENTFD bool "Enable eventfd() system call" if EXPERT - select ANON_INODES default y help Enable the eventfd() system call that allows to receive both @@ -1495,7 +1488,6 @@ config KALLSYMS_BASE_RELATIVE # syscall, maps, verifier config BPF_SYSCALL bool "Enable bpf() system call" - select ANON_INODES select BPF select IRQ_WORK default n @@ -1512,7 +1504,6 @@ config BPF_JIT_ALWAYS_ON config USERFAULTFD bool "Enable userfaultfd() system call" - select ANON_INODES depends on MMU help Enable the userfaultfd() system call that allows to intercept and @@ -1579,7 +1570,6 @@ config PERF_EVENTS bool "Kernel performance events and counters" default y if PROFILING depends on HAVE_PERF_EVENTS - select ANON_INODES select IRQ_WORK select SRCU help diff --git a/init/do_mounts.c b/init/do_mounts.c index a754e3ba9831..ea9f18047087 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -22,6 +22,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> #include <linux/nfs_mount.h> +#include <uapi/linux/mount.h> #include "do_mounts.h" @@ -626,7 +627,7 @@ out: static bool is_tmpfs; static struct dentry *rootfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { static unsigned long once; void *fill = ramfs_fill_super; @@ -637,7 +638,7 @@ static struct dentry *rootfs_mount(struct file_system_type *fs_type, if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs) fill = shmem_fill_super; - return mount_nodev(fs_type, flags, data, fill); + return mount_nodev(fs_type, flags, data, data_size, fill); } static struct file_system_type rootfs_fs_type = { diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index d1a5d885ce13..56a557403d39 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/freezer.h> #include <linux/kmod.h> +#include <uapi/linux/mount.h> #include "do_mounts.h" diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c595bed7bfcb..5799c871acb7 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -18,6 +18,7 @@ #include <linux/pagemap.h> #include <linux/file.h> #include <linux/mount.h> +#include <linux/fs_context.h> #include <linux/namei.h> #include <linux/sysctl.h> #include <linux/poll.h> @@ -42,6 +43,10 @@ #include <net/sock.h> #include "util.h" +struct mqueue_fs_context { + struct ipc_namespace *ipc_ns; +}; + #define MQUEUE_MAGIC 0x19800202 #define DIRENT_SIZE 20 #define FILENT_SIZE 80 @@ -87,9 +92,11 @@ struct mqueue_inode_info { unsigned long qsize; /* size of queue in memory (sum of all msgs) */ }; +static struct file_system_type mqueue_fs_type; static const struct inode_operations mqueue_dir_inode_operations; static const struct file_operations mqueue_file_operations; static const struct super_operations mqueue_super_ops; +static const struct fs_context_operations mqueue_fs_context_ops; static void remove_notification(struct mqueue_inode_info *info); static struct kmem_cache *mqueue_inode_cachep; @@ -322,7 +329,7 @@ err: return ERR_PTR(ret); } -static int mqueue_fill_super(struct super_block *sb, void *data, int silent) +static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct ipc_namespace *ns = sb->s_fs_info; @@ -343,18 +350,70 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct dentry *mqueue_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int mqueue_get_tree(struct fs_context *fc) { - struct ipc_namespace *ns; - if (flags & SB_KERNMOUNT) { - ns = data; - data = NULL; - } else { - ns = current->nsproxy->ipc_ns; + struct mqueue_fs_context *ctx = fc->fs_private; + + fc->s_fs_info = ctx->ipc_ns; + return vfs_get_super(fc, vfs_get_keyed_super, mqueue_fill_super); +} + +static void mqueue_fs_context_free(struct fs_context *fc) +{ + struct mqueue_fs_context *ctx = fc->fs_private; + + if (ctx->ipc_ns) + put_ipc_ns(ctx->ipc_ns); + kfree(ctx); +} + +static int mqueue_init_fs_context(struct fs_context *fc, + struct dentry *reference) +{ + struct mqueue_fs_context *ctx; + + ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); + fc->fs_private = ctx; + fc->ops = &mqueue_fs_context_ops; + return 0; +} + +static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) +{ + struct mqueue_fs_context *ctx; + struct fs_context *fc; + struct vfsmount *mnt; + int ret; + + fc = vfs_new_fs_context(&mqueue_fs_type, NULL, 0, 0, + FS_CONTEXT_FOR_KERNEL_MOUNT); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + ctx = fc->fs_private; + put_ipc_ns(ctx->ipc_ns); + ctx->ipc_ns = get_ipc_ns(ns); + + ret = vfs_get_tree(fc); + if (ret < 0) + goto err_fc; + + mnt = vfs_create_mount(fc, 0); + if (IS_ERR(mnt)) { + ret = PTR_ERR(mnt); + goto err_fc; } - return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); + + put_fs_context(fc); + return mnt; + +err_fc: + put_fs_context(fc); + return ERR_PTR(ret); } static void init_once(void *foo) @@ -1522,15 +1581,22 @@ static const struct super_operations mqueue_super_ops = { .statfs = simple_statfs, }; +static const struct fs_context_operations mqueue_fs_context_ops = { + .free = mqueue_fs_context_free, + .get_tree = mqueue_get_tree, +}; + static struct file_system_type mqueue_fs_type = { - .name = "mqueue", - .mount = mqueue_mount, - .kill_sb = kill_litter_super, - .fs_flags = FS_USERNS_MOUNT, + .name = "mqueue", + .init_fs_context = mqueue_init_fs_context, + .kill_sb = kill_litter_super, + .fs_flags = FS_USERNS_MOUNT, }; int mq_init_ns(struct ipc_namespace *ns) { + struct vfsmount *m; + ns->mq_queues_count = 0; ns->mq_queues_max = DFLT_QUEUESMAX; ns->mq_msg_max = DFLT_MSGMAX; @@ -1538,12 +1604,10 @@ int mq_init_ns(struct ipc_namespace *ns) ns->mq_msg_default = DFLT_MSG; ns->mq_msgsize_default = DFLT_MSGSIZE; - ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); - if (IS_ERR(ns->mq_mnt)) { - int err = PTR_ERR(ns->mq_mnt); - ns->mq_mnt = NULL; - return err; - } + m = mq_create_mount(ns); + if (IS_ERR(m)) + return PTR_ERR(m); + ns->mq_mnt = m; return 0; } diff --git a/ipc/namespace.c b/ipc/namespace.c index 21607791d62c..b3ca1476ca51 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -42,7 +42,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, goto fail; err = -ENOMEM; - ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); + ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL); if (ns == NULL) goto fail_dec; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 2ada5e21dfa6..f4aad0b704de 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -629,7 +629,7 @@ static int bpf_parse_options(char *data, struct bpf_mount_opts *opts) return 0; } -static int bpf_fill_super(struct super_block *sb, void *data, int silent) +static int bpf_fill_super(struct super_block *sb, void *data, size_t data_size, int silent) { static const struct tree_descr bpf_rfiles[] = { { "" } }; struct bpf_mount_opts opts; @@ -655,9 +655,10 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent) } static struct dentry *bpf_mount(struct file_system_type *type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, + size_t data_size) { - return mount_nodev(type, flags, data, bpf_fill_super); + return mount_nodev(type, flags, data, data_size, bpf_fill_super); } static struct file_system_type bpf_fs_type = { diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 75568fcf2180..35012d2aca97 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -35,6 +35,33 @@ extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; } while (0) /* + * The cgroup filesystem superblock creation/mount context. + */ +struct cgroup_fs_context { + struct kernfs_fs_context kfc; + struct cgroup_root *root; + struct cgroup_namespace *ns; + u8 version; /* cgroups version */ + unsigned int flags; /* CGRP_ROOT_* flags */ + + /* cgroup1 bits */ + bool cpuset_clone_children; + bool none; /* User explicitly requested empty subsystem */ + bool all_ss; /* Seen 'all' option */ + bool one_ss; /* Seen 'none' option */ + u16 subsys_mask; /* Selected subsystems */ + char *name; /* Hierarchy name */ + char *release_agent; /* Path for release notifications */ +}; + +static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc = fc->fs_private; + + return container_of(kfc, struct cgroup_fs_context, kfc); +} + +/* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other * direction, a css_set is naturally associated with multiple cgroups. @@ -115,16 +142,6 @@ struct cgroup_mgctx { #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) -struct cgroup_sb_opts { - u16 subsys_mask; - unsigned int flags; - char *release_agent; - bool cpuset_clone_children; - char *name; - /* User explicitly requested empty subsystem */ - bool none; -}; - extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; @@ -195,12 +212,10 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); void cgroup_free_root(struct cgroup_root *root); -void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); +void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); -struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, - struct cgroup_root *root, unsigned long magic, - struct cgroup_namespace *ns); +int cgroup_do_get_tree(struct fs_context *fc); int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); @@ -244,14 +259,15 @@ extern const struct proc_ns_operations cgroupns_operations; */ extern struct cftype cgroup1_base_files[]; extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; +extern const struct fs_parameter_description cgroup1_fs_parameters; int proc_cgroupstats_show(struct seq_file *m, void *v); bool cgroup1_ssid_disabled(int ssid); void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); -struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, - void *data, unsigned long magic, - struct cgroup_namespace *ns); +int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param); +int cgroup1_validate(struct fs_context *fc); +int cgroup1_get_tree(struct fs_context *fc); #endif /* __CGROUP_INTERNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 51063e7a93c2..d20128d00fbe 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -13,9 +13,13 @@ #include <linux/delayacct.h> #include <linux/pid_namespace.h> #include <linux/cgroupstats.h> +#include <linux/fs_parser.h> +#include <linux/fsinfo.h> #include <trace/events/cgroup.h> +#define cg_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) + /* * pidlists linger the following amount before being destroyed. The goal * is avoiding frequent destruction in the middle of consecutive read calls @@ -903,92 +907,131 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo return 0; } -static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) -{ - char *token, *o = data; - bool all_ss = false, one_ss = false; - u16 mask = U16_MAX; - struct cgroup_subsys *ss; - int nr_opts = 0; - int i; +enum cgroup1_param { + Opt_all, + Opt_clone_children, + Opt_cpuset_v2_mode, + Opt_name, + Opt_none, + Opt_noprefix, + Opt_release_agent, + Opt_xattr, + nr__cgroup1_params +}; -#ifdef CONFIG_CPUSETS - mask = ~((u16)1 << cpuset_cgrp_id); -#endif +static const struct fs_parameter_spec cgroup1_param_specs[nr__cgroup1_params] = { + [Opt_all] = { fs_param_is_flag }, + [Opt_clone_children] = { fs_param_is_flag }, + [Opt_cpuset_v2_mode] = { fs_param_is_flag }, + [Opt_name] = { fs_param_is_string }, + [Opt_none] = { fs_param_is_flag }, + [Opt_noprefix] = { fs_param_is_flag }, + [Opt_release_agent] = { fs_param_is_string }, + [Opt_xattr] = { fs_param_is_flag }, +}; - memset(opts, 0, sizeof(*opts)); +static const char *const cgroup1_param_keys[nr__cgroup1_params] = { + [Opt_all] = "all", + [Opt_clone_children] = "clone_children", + [Opt_cpuset_v2_mode] = "cpuset_v2_mode", + [Opt_name] = "name", + [Opt_none] = "none", + [Opt_noprefix] = "noprefix", + [Opt_release_agent] = "release_agent", + [Opt_xattr] = "xattr", +}; - while ((token = strsep(&o, ",")) != NULL) { - nr_opts++; +const struct fs_parameter_description cgroup1_fs_parameters = { + .name = "cgroup1", + .nr_params = nr__cgroup1_params, + .keys = cgroup1_param_keys, + .specs = cgroup1_param_specs, + .no_source = true, +}; - if (!*token) - return -EINVAL; - if (!strcmp(token, "none")) { - /* Explicitly have no subsystems */ - opts->none = true; - continue; - } - if (!strcmp(token, "all")) { - /* Mutually exclusive option 'all' + subsystem name */ - if (one_ss) - return -EINVAL; - all_ss = true; - continue; - } - if (!strcmp(token, "noprefix")) { - opts->flags |= CGRP_ROOT_NOPREFIX; - continue; - } - if (!strcmp(token, "clone_children")) { - opts->cpuset_clone_children = true; - continue; - } - if (!strcmp(token, "cpuset_v2_mode")) { - opts->flags |= CGRP_ROOT_CPUSET_V2_MODE; - continue; - } - if (!strcmp(token, "xattr")) { - opts->flags |= CGRP_ROOT_XATTR; - continue; - } - if (!strncmp(token, "release_agent=", 14)) { - /* Specifying two release agents is forbidden */ - if (opts->release_agent) - return -EINVAL; - opts->release_agent = - kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); - if (!opts->release_agent) - return -ENOMEM; - continue; - } - if (!strncmp(token, "name=", 5)) { - const char *name = token + 5; - /* Can't specify an empty name */ - if (!strlen(name)) - return -EINVAL; - /* Must match [\w.-]+ */ - for (i = 0; i < strlen(name); i++) { - char c = name[i]; - if (isalnum(c)) - continue; - if ((c == '.') || (c == '-') || (c == '_')) - continue; - return -EINVAL; +static int cgroup1_fsinfo(struct kernfs_root *kf_root, struct fsinfo_kparams *params) +{ + struct cgroup_root *root = cgroup_root_from_kf(kf_root); + struct cgroup_subsys *ss; + const char *str = NULL; + unsigned int Mth; + int ret = 0, ssid; + + switch (params->request) { + case FSINFO_ATTR_PARAMETER: + if (params->Mth && params->Nth != nr__cgroup1_params) + return -ENODATA; + switch (params->Nth) { + case Opt_all: + return 0; + case Opt_clone_children: + if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags)) + str = "clone_children"; + goto string; + case Opt_cpuset_v2_mode: + if (root->flags & CGRP_ROOT_CPUSET_V2_MODE) + str = "noprefix"; + goto string; + case Opt_name: + if (strlen(root->name)) + return sprintf(params->buffer, "name=%s", root->name); + return 0; + case Opt_none: + return 0; + case Opt_noprefix: + if (root->flags & CGRP_ROOT_NOPREFIX) + str = "noprefix"; + goto string; + case Opt_release_agent: + spin_lock(&release_agent_path_lock); + if (strlen(root->release_agent_path)) + ret = sprintf(params->buffer, "release_agent=%s", + root->release_agent_path); + spin_unlock(&release_agent_path_lock); + return ret; + case Opt_xattr: + if (root->flags & CGRP_ROOT_XATTR) + str = "noprefix"; + goto string; + case nr__cgroup1_params: + Mth = params->Mth; + for_each_subsys(ss, ssid) { + if (Mth == 0) { + if (root->subsys_mask & (1 << ssid)) + str = ss->legacy_name; + goto string; + } + Mth--; } - /* Specifying two names is forbidden */ - if (opts->name) - return -EINVAL; - opts->name = kstrndup(name, - MAX_CGROUP_ROOT_NAMELEN - 1, - GFP_KERNEL); - if (!opts->name) - return -ENOMEM; - - continue; + return -ENODATA; + default: + return -ENODATA; } + default: + return -EAGAIN; /* Tell kernfs to call generic_fsinfo() */ + } + +string: + if (!str) + return 0; + strcpy(params->buffer, str); + return strlen(params->buffer); +} + +int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct cgroup_subsys *ss; + struct fs_parse_result result; + int opt, i; + + opt = fs_parse(fc, &cgroup1_fs_parameters, param, &result); + if (opt == -ENOPARAM) { + if (strcmp(param->key, "source") == 0) + return 0; for_each_subsys(ss, i) { - if (strcmp(token, ss->legacy_name)) + if (strcmp(param->key, ss->legacy_name) != 0) continue; if (!cgroup_ssid_enabled(i)) continue; @@ -996,75 +1039,144 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) continue; /* Mutually exclusive option 'all' + subsystem name */ - if (all_ss) - return -EINVAL; - opts->subsys_mask |= (1 << i); - one_ss = true; + if (ctx->all_ss) + return cg_invalf(fc, "cgroup1: subsys name conflicts with all"); + ctx->subsys_mask |= (1 << i); + ctx->one_ss = true; + return 0; + } - break; + return cg_invalf(fc, "cgroup1: Unknown subsys name '%s'", param->key); + } + if (opt < 0) + return opt; + + switch (opt) { + case Opt_none: + /* Explicitly have no subsystems */ + ctx->none = true; + return 0; + case Opt_all: + /* Mutually exclusive option 'all' + subsystem name */ + if (ctx->one_ss) + return cg_invalf(fc, "cgroup1: all conflicts with subsys name"); + ctx->all_ss = true; + return 0; + case Opt_noprefix: + ctx->flags |= CGRP_ROOT_NOPREFIX; + return 0; + case Opt_clone_children: + ctx->cpuset_clone_children = true; + return 0; + case Opt_cpuset_v2_mode: + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; + return 0; + case Opt_xattr: + ctx->flags |= CGRP_ROOT_XATTR; + return 0; + case Opt_release_agent: + /* Specifying two release agents is forbidden */ + if (ctx->release_agent) + return cg_invalf(fc, "cgroup1: release_agent respecified"); + ctx->release_agent = param->string; + param->string = NULL; + if (!ctx->release_agent) + return -ENOMEM; + return 0; + + case Opt_name: + /* Can't specify an empty name */ + if (!param->size) + return cg_invalf(fc, "cgroup1: Empty name"); + if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1) + return cg_invalf(fc, "cgroup1: Name too long"); + /* Must match [\w.-]+ */ + for (i = 0; i < param->size; i++) { + char c = param->string[i]; + if (isalnum(c)) + continue; + if ((c == '.') || (c == '-') || (c == '_')) + continue; + return cg_invalf(fc, "cgroup1: Invalid name"); } - if (i == CGROUP_SUBSYS_COUNT) - return -ENOENT; + /* Specifying two names is forbidden */ + if (ctx->name) + return cg_invalf(fc, "cgroup1: name respecified"); + ctx->name = param->string; + param->string = NULL; + return 0; } + return 0; +} + +/* + * Validate the options that have been parsed. + */ +int cgroup1_validate(struct fs_context *fc) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct cgroup_subsys *ss; + u16 mask = U16_MAX; + int i; + +#ifdef CONFIG_CPUSETS + mask = ~((u16)1 << cpuset_cgrp_id); +#endif + /* * If the 'all' option was specified select all the subsystems, * otherwise if 'none', 'name=' and a subsystem name options were * not specified, let's default to 'all' */ - if (all_ss || (!one_ss && !opts->none && !opts->name)) + if (ctx->all_ss || (!ctx->one_ss && !ctx->none && !ctx->name)) for_each_subsys(ss, i) if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) - opts->subsys_mask |= (1 << i); + ctx->subsys_mask |= (1 << i); /* * We either have to specify by name or by subsystems. (So all * empty hierarchies must have a name). */ - if (!opts->subsys_mask && !opts->name) - return -EINVAL; + if (!ctx->subsys_mask && !ctx->name) + return cg_invalf(fc, "cgroup1: Need name or subsystem set"); /* * Option noprefix was introduced just for backward compatibility * with the old cpuset, so we allow noprefix only if mounting just * the cpuset subsystem. */ - if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) - return -EINVAL; + if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) + return cg_invalf(fc, "cgroup1: noprefix used incorrectly"); /* Can't specify "none" and some subsystems */ - if (opts->subsys_mask && opts->none) - return -EINVAL; + if (ctx->subsys_mask && ctx->none) + return cg_invalf(fc, "cgroup1: none used incorrectly"); return 0; } -static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) +static int cgroup1_reconfigure(struct kernfs_root *kf_root, struct fs_context *fc) { - int ret = 0; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct cgroup_root *root = cgroup_root_from_kf(kf_root); - struct cgroup_sb_opts opts; u16 added_mask, removed_mask; + int ret = 0; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); - /* See what subsystems are wanted */ - ret = parse_cgroupfs_options(data, &opts); - if (ret) - goto out_unlock; - - if (opts.subsys_mask != root->subsys_mask || opts.release_agent) + if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent) pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); - added_mask = opts.subsys_mask & ~root->subsys_mask; - removed_mask = root->subsys_mask & ~opts.subsys_mask; + added_mask = ctx->subsys_mask & ~root->subsys_mask; + removed_mask = root->subsys_mask & ~ctx->subsys_mask; /* Don't allow flags or name to change at remount */ - if ((opts.flags ^ root->flags) || - (opts.name && strcmp(opts.name, root->name))) { - pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", - opts.flags, opts.name ?: "", root->flags, root->name); + if ((ctx->flags ^ root->flags) || + (ctx->name && strcmp(ctx->name, root->name))) { + cg_invalf(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"", + ctx->flags, ctx->name ?: "", root->flags, root->name); ret = -EINVAL; goto out_unlock; } @@ -1081,17 +1193,15 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask)); - if (opts.release_agent) { + if (ctx->release_agent) { spin_lock(&release_agent_path_lock); - strcpy(root->release_agent_path, opts.release_agent); + strcpy(root->release_agent_path, ctx->release_agent); spin_unlock(&release_agent_path_lock); } trace_cgroup_remount(root); out_unlock: - kfree(opts.release_agent); - kfree(opts.name); mutex_unlock(&cgroup_mutex); return ret; } @@ -1099,31 +1209,27 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { .rename = cgroup1_rename, .show_options = cgroup1_show_options, - .remount_fs = cgroup1_remount, + .fsinfo = cgroup1_fsinfo, + .reconfigure = cgroup1_reconfigure, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .show_path = cgroup_show_path, }; -struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, - void *data, unsigned long magic, - struct cgroup_namespace *ns) +/* + * Find or create a v1 cgroups superblock. + */ +int cgroup1_get_tree(struct fs_context *fc) { + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct super_block *pinned_sb = NULL; - struct cgroup_sb_opts opts; struct cgroup_root *root; struct cgroup_subsys *ss; - struct dentry *dentry; int i, ret; bool new_root = false; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); - /* First find the desired set of subsystems */ - ret = parse_cgroupfs_options(data, &opts); - if (ret) - goto out_unlock; - /* * Destruction of cgroup root is asynchronous, so subsystems may * still be dying after the previous unmount. Let's drain the @@ -1132,15 +1238,13 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, * starting. Testing ref liveliness is good enough. */ for_each_subsys(ss, i) { - if (!(opts.subsys_mask & (1 << i)) || + if (!(ctx->subsys_mask & (1 << i)) || ss->root == &cgrp_dfl_root) continue; if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) { mutex_unlock(&cgroup_mutex); - msleep(10); - ret = restart_syscall(); - goto out_free; + goto err_restart; } cgroup_put(&ss->root->cgrp); } @@ -1156,8 +1260,8 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, * name matches but sybsys_mask doesn't, we should fail. * Remember whether name matched. */ - if (opts.name) { - if (strcmp(opts.name, root->name)) + if (ctx->name) { + if (strcmp(ctx->name, root->name)) continue; name_match = true; } @@ -1166,15 +1270,15 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, * If we asked for subsystems (or explicitly for no * subsystems) then they must match. */ - if ((opts.subsys_mask || opts.none) && - (opts.subsys_mask != root->subsys_mask)) { + if ((ctx->subsys_mask || ctx->none) && + (ctx->subsys_mask != root->subsys_mask)) { if (!name_match) continue; ret = -EBUSY; - goto out_unlock; + goto err_unlock; } - if (root->flags ^ opts.flags) + if (root->flags ^ ctx->flags) pr_warn("new mount options do not match the existing superblock, will be ignored\n"); /* @@ -1195,11 +1299,10 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, mutex_unlock(&cgroup_mutex); if (!IS_ERR_OR_NULL(pinned_sb)) deactivate_super(pinned_sb); - msleep(10); - ret = restart_syscall(); - goto out_free; + goto err_restart; } + ctx->root = root; ret = 0; goto out_unlock; } @@ -1209,41 +1312,35 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, * specification is allowed for already existing hierarchies but we * can't create new one without subsys specification. */ - if (!opts.subsys_mask && !opts.none) { - ret = -EINVAL; - goto out_unlock; + if (!ctx->subsys_mask && !ctx->none) { + ret = cg_invalf(fc, "cgroup1: No subsys list or none specified"); + goto err_unlock; } /* Hierarchies may only be created in the initial cgroup namespace. */ - if (ns != &init_cgroup_ns) { + if (ctx->ns != &init_cgroup_ns) { ret = -EPERM; - goto out_unlock; + goto err_unlock; } root = kzalloc(sizeof(*root), GFP_KERNEL); if (!root) { ret = -ENOMEM; - goto out_unlock; + goto err_unlock; } new_root = true; + ctx->root = root; - init_cgroup_root(root, &opts); + init_cgroup_root(ctx); - ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD); + ret = cgroup_setup_root(root, ctx->subsys_mask, PERCPU_REF_INIT_DEAD); if (ret) - cgroup_free_root(root); + goto err_unlock; out_unlock: mutex_unlock(&cgroup_mutex); -out_free: - kfree(opts.release_agent); - kfree(opts.name); - - if (ret) - return ERR_PTR(ret); - dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, - CGROUP_SUPER_MAGIC, ns); + ret = cgroup_do_get_tree(fc); /* * There's a race window after we release cgroup_mutex and before @@ -1256,6 +1353,7 @@ out_free: percpu_ref_reinit(&root->cgrp.self.refcnt); mutex_unlock(&cgroup_mutex); } + cgroup_get(&root->cgrp); /* * If @pinned_sb, we're reusing an existing root and holding an @@ -1264,7 +1362,14 @@ out_free: if (pinned_sb) deactivate_super(pinned_sb); - return dentry; + return ret; + +err_restart: + msleep(10); + return restart_syscall(); +err_unlock: + mutex_unlock(&cgroup_mutex); + return ret; } static int __init cgroup1_wq_init(void) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6aaf5dd5383b..1a9fa3de5916 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -54,6 +54,8 @@ #include <linux/proc_ns.h> #include <linux/nsproxy.h> #include <linux/file.h> +#include <linux/fs_parser.h> +#include <linux/fsinfo.h> #include <linux/sched/cputime.h> #include <linux/psi.h> #include <net/sock.h> @@ -1738,26 +1740,81 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, return len; } -static int parse_cgroup_root_flags(char *data, unsigned int *root_flags) +enum cgroup2_param { + Opt_nsdelegate, + nr__cgroup2_params +}; + +static const struct fs_parameter_spec cgroup2_param_specs[nr__cgroup2_params] = { + [Opt_nsdelegate] = { fs_param_is_flag }, +}; + +static const char *const cgroup2_param_keys[nr__cgroup2_params] = { + [Opt_nsdelegate] = "nsdelegate", +}; + +static const struct fs_parameter_description cgroup2_fs_parameters = { + .name = "cgroup2", + .nr_params = nr__cgroup2_params, + .keys = cgroup2_param_keys, + .specs = cgroup2_param_specs, + .no_source = true, +}; + +static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param) { - char *token; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct fs_parse_result result; + int opt; - *root_flags = 0; + opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result); + if (opt < 0) + return opt; - if (!data) + switch (opt) { + case Opt_nsdelegate: + ctx->flags |= CGRP_ROOT_NS_DELEGATE; return 0; + } - while ((token = strsep(&data, ",")) != NULL) { - if (!strcmp(token, "nsdelegate")) { - *root_flags |= CGRP_ROOT_NS_DELEGATE; - continue; + return -EINVAL; +} + +static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root) +{ + if (current->nsproxy->cgroup_ns == &init_cgroup_ns && + cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) + seq_puts(seq, ",nsdelegate"); + return 0; +} + +static int cgroup_fsinfo(struct kernfs_root *kf_root, struct fsinfo_kparams *params) +{ + const char *str = NULL; + + switch (params->request) { + case FSINFO_ATTR_PARAMETER: + if (params->Mth) + return -ENODATA; + switch (params->Nth) { + case Opt_nsdelegate: + if (current->nsproxy->cgroup_ns == &init_cgroup_ns && + cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) + str = "nsdelegate"; + goto string; + default: + return -ENODATA; } - pr_err("cgroup2: unknown option \"%s\"\n", token); - return -EINVAL; + default: + return -EAGAIN; /* Tell kernfs to call generic_fsinfo() */ } - return 0; +string: + if (!str) + return 0; + strcpy(params->buffer, str); + return strlen(params->buffer); } static void apply_cgroup_root_flags(unsigned int root_flags) @@ -1770,23 +1827,11 @@ static void apply_cgroup_root_flags(unsigned int root_flags) } } -static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root) +static int cgroup_reconfigure(struct kernfs_root *kf_root, struct fs_context *fc) { - if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) - seq_puts(seq, ",nsdelegate"); - return 0; -} + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); -static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) -{ - unsigned int root_flags; - int ret; - - ret = parse_cgroup_root_flags(data, &root_flags); - if (ret) - return ret; - - apply_cgroup_root_flags(root_flags); + apply_cgroup_root_flags(ctx->flags); return 0; } @@ -1874,8 +1919,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } -void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) +void init_cgroup_root(struct cgroup_fs_context *ctx) { + struct cgroup_root *root = ctx->root; struct cgroup *cgrp = &root->cgrp; INIT_LIST_HEAD(&root->root_list); @@ -1884,12 +1930,12 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) init_cgroup_housekeeping(cgrp); idr_init(&root->cgroup_idr); - root->flags = opts->flags; - if (opts->release_agent) - strscpy(root->release_agent_path, opts->release_agent, PATH_MAX); - if (opts->name) - strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); - if (opts->cpuset_clone_children) + root->flags = ctx->flags; + if (ctx->release_agent) + strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX); + if (ctx->name) + strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN); + if (ctx->cpuset_clone_children) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } @@ -1994,57 +2040,53 @@ out: return ret; } -struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, - struct cgroup_root *root, unsigned long magic, - struct cgroup_namespace *ns) +int cgroup_do_get_tree(struct fs_context *fc) { - struct dentry *dentry; - bool new_sb; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + int ret; - dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); + ctx->kfc.root = ctx->root->kf_root; + + ret = kernfs_get_tree(fc); + if (ret < 0) + goto out_cgrp; /* * In non-init cgroup namespace, instead of root cgroup's dentry, * we return the dentry corresponding to the cgroupns->root_cgrp. */ - if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { + if (ctx->ns != &init_cgroup_ns) { struct dentry *nsdentry; struct cgroup *cgrp; mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock); - cgrp = cset_cgroup_from_root(ns->root_cset, root); + cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root); spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); - nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); - dput(dentry); - dentry = nsdentry; + nsdentry = kernfs_node_dentry(cgrp->kn, fc->root->d_sb); + if (IS_ERR(nsdentry)) + return PTR_ERR(nsdentry); + dput(fc->root); + fc->root = nsdentry; } - if (IS_ERR(dentry) || !new_sb) - cgroup_put(&root->cgrp); + ret = 0; + if (ctx->kfc.new_sb_created) + goto out_cgrp; + apply_cgroup_root_flags(ctx->flags); + return 0; - return dentry; +out_cgrp: + return ret; } -static struct dentry *cgroup_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +static int cgroup_get_tree(struct fs_context *fc) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; - struct dentry *dentry; - int ret; - - get_cgroup_ns(ns); - - /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) { - put_cgroup_ns(ns); - return ERR_PTR(-EPERM); - } + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); /* * The first time anyone tries to mount a cgroup, enable the list @@ -2053,29 +2095,96 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, if (!use_task_css_set_links) cgroup_enable_task_cg_lists(); - if (fs_type == &cgroup2_fs_type) { - unsigned int root_flags; - - ret = parse_cgroup_root_flags(data, &root_flags); - if (ret) { - put_cgroup_ns(ns); - return ERR_PTR(ret); - } + switch (ctx->version) { + case 1: + return cgroup1_get_tree(fc); + case 2: cgrp_dfl_visible = true; cgroup_get_live(&cgrp_dfl_root.cgrp); - dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, - CGROUP2_SUPER_MAGIC, ns); - if (!IS_ERR(dentry)) - apply_cgroup_root_flags(root_flags); - } else { - dentry = cgroup1_mount(&cgroup_fs_type, flags, data, - CGROUP_SUPER_MAGIC, ns); + ctx->root = &cgrp_dfl_root; + return cgroup_do_get_tree(fc); + + default: + BUG(); + } +} + +static int cgroup_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + if (ctx->version == 1) + return cgroup1_parse_param(fc, param); + + return cgroup2_parse_param(fc, param); +} + +static int cgroup_validate(struct fs_context *fc) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + if (ctx->version == 1) + return cgroup1_validate(fc); + return 0; +} + +/* + * Destroy a cgroup filesystem context. + */ +static void cgroup_fs_context_free(struct fs_context *fc) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + kfree(ctx->name); + kfree(ctx->release_agent); + if (ctx->root) + cgroup_put(&ctx->root->cgrp); + put_cgroup_ns(ctx->ns); + kernfs_free_fs_context(fc); + kfree(ctx); +} + +static const struct fs_context_operations cgroup_fs_context_ops = { + .free = cgroup_fs_context_free, + .parse_param = cgroup_parse_param, + .validate = cgroup_validate, + .get_tree = cgroup_get_tree, + .reconfigure = kernfs_reconfigure, +}; + +/* + * Initialise the cgroup filesystem creation/reconfiguration context. Notably, + * we select the namespace we're going to use. + */ +static int cgroup_init_fs_context(struct fs_context *fc, struct dentry *reference) +{ + struct cgroup_fs_context *ctx; + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + + switch (fc->purpose) { + case FS_CONTEXT_FOR_UMOUNT: + case FS_CONTEXT_FOR_EMERGENCY_RO: + return -EOPNOTSUPP; + default: + break; } - put_cgroup_ns(ns); - return dentry; + /* Check if the caller has permission to mount. */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ns = get_cgroup_ns(ns); + ctx->version = (fc->fs_type == &cgroup2_fs_type) ? 2 : 1; + ctx->kfc.magic = (ctx->version == 2) ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC; + fc->fs_private = &ctx->kfc; + fc->ops = &cgroup_fs_context_ops; + return 0; } static void cgroup_kill_sb(struct super_block *sb) @@ -2100,17 +2209,19 @@ static void cgroup_kill_sb(struct super_block *sb) } struct file_system_type cgroup_fs_type = { - .name = "cgroup", - .mount = cgroup_mount, - .kill_sb = cgroup_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "cgroup", + .init_fs_context = cgroup_init_fs_context, + .parameters = &cgroup1_fs_parameters, + .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; static struct file_system_type cgroup2_fs_type = { - .name = "cgroup2", - .mount = cgroup_mount, - .kill_sb = cgroup_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "cgroup2", + .init_fs_context = cgroup_init_fs_context, + .parameters = &cgroup2_fs_parameters, + .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, @@ -5227,7 +5338,8 @@ int cgroup_rmdir(struct kernfs_node *kn) static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { .show_options = cgroup_show_options, - .remount_fs = cgroup_remount, + .fsinfo = cgroup_fsinfo, + .reconfigure = cgroup_reconfigure, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .show_path = cgroup_show_path, @@ -5294,11 +5406,12 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) */ int __init cgroup_init_early(void) { - static struct cgroup_sb_opts __initdata opts; + static struct cgroup_fs_context __initdata ctx; struct cgroup_subsys *ss; int i; - init_cgroup_root(&cgrp_dfl_root, &opts); + ctx.root = &cgrp_dfl_root; + init_cgroup_root(&ctx); cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; RCU_INIT_POINTER(init_task.cgroups, &init_css_set); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 266f10cb7222..5583b6d69748 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -38,7 +38,7 @@ #include <linux/mm.h> #include <linux/memory.h> #include <linux/export.h> -#include <linux/mount.h> +#include <linux/fs_context.h> #include <linux/namei.h> #include <linux/pagemap.h> #include <linux/proc_fs.h> @@ -315,25 +315,80 @@ static inline bool is_in_v2_mode(void) * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead */ -static struct dentry *cpuset_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, void *data) -{ - struct file_system_type *cgroup_fs = get_fs_type("cgroup"); - struct dentry *ret = ERR_PTR(-ENODEV); - if (cgroup_fs) { - char mountopts[] = - "cpuset,noprefix," - "release_agent=/sbin/cpuset_release_agent"; - ret = cgroup_fs->mount(cgroup_fs, flags, - unused_dev_name, mountopts); - put_filesystem(cgroup_fs); +static int cpuset_get_tree(struct fs_context *fc) +{ + static const char opts[] = "cpuset,noprefix,release_agent=/sbin/cpuset_release_agent"; + struct file_system_type *cgroup_fs; + struct fs_context *cg_fc; + char *p; + int ret = -ENODEV; + + cgroup_fs = get_fs_type("cgroup"); + if (!cgroup_fs) + goto out; + + cg_fc = vfs_new_fs_context(cgroup_fs, NULL, fc->sb_flags, fc->sb_flags, + fc->purpose); + put_filesystem(cgroup_fs); + if (IS_ERR(cg_fc)) { + ret = PTR_ERR(cg_fc); + goto out; + } + + ret = -ENOMEM; + p = kstrdup(opts, GFP_KERNEL); + if (!p) + goto out_fc; + + ret = generic_parse_monolithic(cg_fc, p, sizeof(opts) - 1); + kfree(p); + if (ret < 0) + goto out_fc; + + /* We can't call vfs_get_tree() as that will do various post-get-tree + * things that we want our caller to do. + */ + if (cg_fc->ops->validate) { + ret = cg_fc->ops->validate(cg_fc); + if (ret < 0) + goto out_fc; } + + ret = security_fs_context_validate(cg_fc); + if (ret < 0) + goto out_fc; + + /* Get the mountable root in fc->root, with a ref on the root and a ref + * on the superblock. + */ + ret = cg_fc->ops->get_tree(cg_fc); + if (ret < 0) + return ret; + + fc->root = cg_fc->root; + cg_fc->root = NULL; + ret = 0; + +out_fc: + put_fs_context(cg_fc); +out: return ret; } +static const struct fs_context_operations cpuset_fs_context_ops = { + .get_tree = cpuset_get_tree, +}; + +static int cpuset_init_fs_context(struct fs_context *fc, + struct dentry *reference) +{ + fc->ops = &cpuset_fs_context_ops; + return 0; +} + static struct file_system_type cpuset_fs_type = { - .name = "cpuset", - .mount = cpuset_mount, + .name = "cpuset", + .init_fs_context = cpuset_init_fs_context, }; /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ff1c4b20cd0a..0d89a15e0944 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7991,7 +7991,8 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) ftrace_init_tracefs(tr, d_tracer); } -static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) +static struct vfsmount *trace_automount(struct dentry *mntpt, + void *data, size_t data_size) { struct vfsmount *mnt; struct file_system_type *type; @@ -8004,7 +8005,7 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) type = get_fs_type("tracefs"); if (!type) return NULL; - mnt = vfs_submount(mntpt, type, "tracefs", NULL); + mnt = vfs_submount(mntpt, type, "tracefs", NULL, 0); put_filesystem(type); if (IS_ERR(mnt)) return NULL; @@ -8040,7 +8041,7 @@ struct dentry *tracing_init_dentry(void) * work with the newer kerenl. */ tr->dir = debugfs_create_automount("tracing", NULL, - trace_automount, NULL); + trace_automount, NULL, 0); if (!tr->dir) { pr_warn_once("Could not create debugfs directory 'tracing'\n"); return ERR_PTR(-ENOMEM); diff --git a/mm/shmem.c b/mm/shmem.c index d44991ea5ed4..7a73c7b4f571 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3347,7 +3347,8 @@ error: } -static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) +static int shmem_remount_fs(struct super_block *sb, int *flags, + char *data, size_t data_size) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); struct shmem_sb_info config = *sbinfo; @@ -3430,7 +3431,8 @@ static void shmem_put_super(struct super_block *sb) sb->s_fs_info = NULL; } -int shmem_fill_super(struct super_block *sb, void *data, int silent) +int shmem_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct inode *inode; struct shmem_sb_info *sbinfo; @@ -3644,9 +3646,9 @@ static const struct vm_operations_struct shmem_vm_ops = { }; static struct dentry *shmem_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { - return mount_nodev(fs_type, flags, data, shmem_fill_super); + return mount_nodev(fs_type, flags, data, data_size, shmem_fill_super); } static struct file_system_type shmem_fs_type = { diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 0787d33b80d8..fd4b3a96c004 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1815,7 +1815,8 @@ static void lock_zspage(struct zspage *zspage) } static struct dentry *zs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { static const struct dentry_operations ops = { .d_dname = simple_dname, diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1506e1632394..10dfd20c748e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -27,6 +27,7 @@ #include <linux/export.h> #include <linux/utsname.h> #include <linux/sched.h> +#include <linux/compat.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -919,7 +920,7 @@ static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg) /* Ioctls that require bound socket */ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, - unsigned long arg) + void __user *argp) { struct hci_dev *hdev = hci_pi(sk)->hdev; @@ -942,20 +943,20 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, return -EOPNOTSUPP; case HCIGETCONNINFO: - return hci_get_conn_info(hdev, (void __user *)arg); + return hci_get_conn_info(hdev, argp); case HCIGETAUTHINFO: - return hci_get_auth_info(hdev, (void __user *)arg); + return hci_get_auth_info(hdev, argp); case HCIBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; - return hci_sock_blacklist_add(hdev, (void __user *)arg); + return hci_sock_blacklist_add(hdev, argp); case HCIUNBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; - return hci_sock_blacklist_del(hdev, (void __user *)arg); + return hci_sock_blacklist_del(hdev, argp); } return -ENOIOCTLCMD; @@ -1048,13 +1049,29 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, lock_sock(sk); - err = hci_sock_bound_ioctl(sk, cmd, arg); + err = hci_sock_bound_ioctl(sk, cmd, argp); done: release_sock(sk); return err; } +#ifdef CONFIG_COMPAT +static int hci_sock_compat_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case HCIDEVUP: + case HCIDEVDOWN: + case HCIDEVRESET: + case HCIDEVRESTAT: + return hci_sock_ioctl(sock, cmd, arg); + default: + return hci_sock_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); + } +} +#endif + static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -1975,6 +1992,9 @@ static const struct proto_ops hci_sock_ops = { .sendmsg = hci_sock_sendmsg, .recvmsg = hci_sock_recvmsg, .ioctl = hci_sock_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = hci_sock_compat_ioctl, +#endif .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/socket.c b/net/socket.c index 334fcc617ef2..f1ede2a64985 100644 --- a/net/socket.c +++ b/net/socket.c @@ -352,7 +352,8 @@ static const struct xattr_handler *sockfs_xattr_handlers[] = { }; static struct dentry *sockfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops, sockfs_xattr_handlers, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 4fda18d47e2c..023c2a6389e7 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1367,7 +1367,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry) } static int -rpc_fill_super(struct super_block *sb, void *data, int silent) +rpc_fill_super(struct super_block *sb, void *data, size_t data_size, int silent) { struct inode *inode; struct dentry *root, *gssd_dentry; @@ -1430,10 +1430,11 @@ EXPORT_SYMBOL_GPL(gssd_running); static struct dentry * rpc_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, size_t data_size) { struct net *net = current->nsproxy->net_ns; - return mount_ns(fs_type, flags, data, net, net->user_ns, rpc_fill_super); + return mount_ns(fs_type, flags, data, data_size, + net, net->user_ns, rpc_fill_super); } static void rpc_kill_sb(struct super_block *sb) diff --git a/samples/Kconfig b/samples/Kconfig index ad1ec7016d4c..dc4eb5355fad 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -147,10 +147,11 @@ config SAMPLE_VFIO_MDEV_MBOCHS Specifically it does *not* include any legacy vga stuff. Device looks a lot like "qemu -device secondary-vga". -config SAMPLE_STATX - bool "Build example extended-stat using code" - depends on BROKEN +config SAMPLE_VFS + bool "Build example programs that use new VFS system calls" help - Build example userspace program to use the new extended-stat syscall. + Build example userspace programs that use new VFS system calls such + as mount API and statx(). Note that this is restricted to the x86 + arch whilst it accesses system calls that aren't yet in all arches. endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index bd601c038b86..c5a6175c2d3f 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -3,4 +3,4 @@ obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ configfs/ connector/ v4l/ trace_printk/ \ - vfio-mdev/ statx/ qmi/ + vfio-mdev/ vfs/ qmi/ diff --git a/samples/statx/Makefile b/samples/statx/Makefile deleted file mode 100644 index 59df7c25a9d1..000000000000 --- a/samples/statx/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# List of programs to build -hostprogs-$(CONFIG_SAMPLE_STATX) := test-statx - -# Tell kbuild to always build the programs -always := $(hostprogs-y) - -HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile new file mode 100644 index 000000000000..8552a347ccc2 --- /dev/null +++ b/samples/vfs/Makefile @@ -0,0 +1,16 @@ +# List of programs to build +hostprogs-$(CONFIG_SAMPLE_VFS) := \ + test-fsinfo \ + test-fs-query \ + test-fsmount \ + test-statx + +# Tell kbuild to always build the programs +always := $(hostprogs-y) + +HOSTCFLAGS_test-fsinfo.o += -I$(objtree)/usr/include +HOSTLDLIBS_test-fsinfo += -lm + +HOSTCFLAGS_test-fs-query.o += -I$(objtree)/usr/include +HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include +HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include diff --git a/samples/vfs/test-fs-query.c b/samples/vfs/test-fs-query.c new file mode 100644 index 000000000000..4635bf1eb3d4 --- /dev/null +++ b/samples/vfs/test-fs-query.c @@ -0,0 +1,145 @@ +/* Test using the fsinfo() system call to query mount parameters. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define _GNU_SOURCE +#define _ATFILE_SOURCE +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <time.h> +#include <math.h> +#include <fcntl.h> +#include <sys/syscall.h> +#include <linux/fsinfo.h> +#include <linux/socket.h> +#include <sys/stat.h> + +#ifndef __NR_fsopen +#define __NR_fsopen -1 +#endif +#ifndef __NR_fsinfo +#define __NR_fsinfo -1 +#endif + +static int fsopen(const char *fs_name, unsigned int flags) +{ + return syscall(__NR_fsopen, fs_name, flags); +} + +static ssize_t fsinfo(int dfd, const char *filename, struct fsinfo_params *params, + void *buffer, size_t buf_size) +{ + return syscall(__NR_fsinfo, dfd, filename, params, buffer, buf_size); +} + +static const char *param_types[NR__FSINFO_PARAM_SPEC] = { + [FSINFO_PARAM_SPEC_NOT_DEFINED] = "?undef", + [FSINFO_PARAM_SPEC_TAKES_NO_VALUE] = "no-val", + [FSINFO_PARAM_SPEC_IS_BOOL] = "bool", + [FSINFO_PARAM_SPEC_IS_U32] = "u32", + [FSINFO_PARAM_SPEC_IS_U32_OCTAL] = "octal", + [FSINFO_PARAM_SPEC_IS_U32_HEX] = "hex", + [FSINFO_PARAM_SPEC_IS_S32] = "s32", + [FSINFO_PARAM_SPEC_IS_U64] = "u64", + [FSINFO_PARAM_SPEC_IS_ENUM] = "enum", + [FSINFO_PARAM_SPEC_IS_STRING] = "string", + [FSINFO_PARAM_SPEC_IS_BLOB] = "binary", + [FSINFO_PARAM_SPEC_IS_BLOCKDEV] = "blockdev", + [FSINFO_PARAM_SPEC_IS_PATH] = "path", + [FSINFO_PARAM_SPEC_IS_FD] = "fd", +}; + +/* + * + */ +int main(int argc, char **argv) +{ + struct fsinfo_param_description desc; + struct fsinfo_param_specification spec; + struct fsinfo_param_name name; + struct fsinfo_param_enum enum_name; + + struct fsinfo_params params = { + .at_flags = AT_SYMLINK_NOFOLLOW, + }; + int fd; + + if (argc != 2) { + printf("Format: test-fs-query <fs_name>\n"); + exit(2); + } + + fd = fsopen(argv[1], 0); + if (fd == -1) { + perror(argv[1]); + exit(1); + } + + params.request = FSINFO_ATTR_PARAM_DESCRIPTION; + if (fsinfo(fd, NULL, ¶ms, &desc, sizeof(desc)) == -1) { + perror("fsinfo/desc"); + exit(1); + } + + printf("Filesystem %s has %u parameters\n", argv[1], desc.nr_params); + + params.request = FSINFO_ATTR_PARAM_SPECIFICATION; + for (params.Nth = 0; params.Nth < desc.nr_params; params.Nth++) { + if (fsinfo(fd, NULL, ¶ms, &spec, sizeof(spec)) == -1) { + if (errno == ENODATA) + break; + perror("fsinfo/spec"); + exit(1); + } + printf("- PARAM[%3u] type=%u(%s)%s%s%s%s\n", + params.Nth, + spec.type, + spec.type < NR__FSINFO_PARAM_SPEC ? param_types[spec.type] : "?type", + spec.flags & FSINFO_PARAM_SPEC_VALUE_IS_OPTIONAL ? " -opt" : "", + spec.flags & FSINFO_PARAM_SPEC_PREFIX_NO_IS_NEG ? " -neg-no" : "", + spec.flags & FSINFO_PARAM_SPEC_EMPTY_STRING_IS_NEG ? " -neg-empty" : "", + spec.flags & FSINFO_PARAM_SPEC_DEPRECATED ? " -dep" : ""); + } + + printf("Filesystem has %u parameter names\n", desc.nr_names); + + params.request = FSINFO_ATTR_PARAM_NAME; + for (params.Nth = 0; params.Nth < desc.nr_names; params.Nth++) { + if (fsinfo(fd, NULL, ¶ms, &name, sizeof(name)) == -1) { + if (errno == ENODATA) + break; + perror("fsinfo/name"); + exit(1); + } + printf("- NAME[%3u] %s -> %u\n", + params.Nth, name.name, name.param_index); + } + + printf("Filesystem has %u enumeration values\n", desc.nr_enum_names); + + params.request = FSINFO_ATTR_PARAM_ENUM; + for (params.Nth = 0; params.Nth < desc.nr_enum_names; params.Nth++) { + if (fsinfo(fd, NULL, ¶ms, &enum_name, sizeof(enum_name)) == -1) { + if (errno == ENODATA) + break; + perror("fsinfo/enum"); + exit(1); + } + printf("- ENUM[%3u] %3u.%s\n", + params.Nth, enum_name.param_index, enum_name.name); + } + return 0; +} diff --git a/samples/vfs/test-fsinfo.c b/samples/vfs/test-fsinfo.c new file mode 100644 index 000000000000..125010212eee --- /dev/null +++ b/samples/vfs/test-fsinfo.c @@ -0,0 +1,593 @@ +/* Test the fsinfo() system call + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define _GNU_SOURCE +#define _ATFILE_SOURCE +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <time.h> +#include <math.h> +#include <fcntl.h> +#include <sys/syscall.h> +#include <linux/fsinfo.h> +#include <linux/socket.h> +#include <sys/stat.h> +#include <arpa/inet.h> + +#ifndef __NR_fsinfo +#define __NR_fsinfo -1 +#endif + +static bool debug = 0; + +static __attribute__((unused)) +ssize_t fsinfo(int dfd, const char *filename, struct fsinfo_params *params, + void *buffer, size_t buf_size) +{ + return syscall(__NR_fsinfo, dfd, filename, params, buffer, buf_size); +} + +#define FSINFO_STRING(X,Y) [FSINFO_ATTR_##X] = 0x0000 +#define FSINFO_STRUCT(X,Y) [FSINFO_ATTR_##X] = sizeof(struct fsinfo_##Y) +#define FSINFO_STRING_N(X,Y) [FSINFO_ATTR_##X] = 0x4000 +#define FSINFO_STRUCT_N(X,Y) [FSINFO_ATTR_##X] = 0x4000 | sizeof(struct fsinfo_##Y) +#define FSINFO_STRUCT_NM(X,Y) [FSINFO_ATTR_##X] = 0x8000 | sizeof(struct fsinfo_##Y) +#define FSINFO_STRING_NM(X,Y) [FSINFO_ATTR_##X] = 0x8000 +static const __u16 fsinfo_buffer_sizes[FSINFO_ATTR__NR] = { + FSINFO_STRUCT (STATFS, statfs), + FSINFO_STRUCT (FSINFO, fsinfo), + FSINFO_STRUCT (IDS, ids), + FSINFO_STRUCT (LIMITS, limits), + FSINFO_STRUCT (CAPABILITIES, capabilities), + FSINFO_STRUCT (SUPPORTS, supports), + FSINFO_STRUCT (TIMESTAMP_INFO, timestamp_info), + FSINFO_STRING (VOLUME_ID, volume_id), + FSINFO_STRUCT (VOLUME_UUID, volume_uuid), + FSINFO_STRING (VOLUME_NAME, volume_name), + FSINFO_STRING (CELL_NAME, cell_name), + FSINFO_STRING (DOMAIN_NAME, domain_name), + FSINFO_STRING_N (SERVER_NAME, server_name), + FSINFO_STRUCT_NM (SERVER_ADDRESS, server_address), + FSINFO_STRING_NM (PARAMETER, parameter), + FSINFO_STRING_N (SOURCE, source), + FSINFO_STRING (NAME_ENCODING, name_encoding), + FSINFO_STRING (NAME_CODEPAGE, name_codepage), + FSINFO_STRUCT (IO_SIZE, io_size), + FSINFO_STRUCT (PARAM_DESCRIPTION, param_description), + FSINFO_STRUCT_N (PARAM_SPECIFICATION, param_specification), + FSINFO_STRUCT_N (PARAM_NAME, param_name), + FSINFO_STRUCT_N (PARAM_ENUM, param_enum), +}; + +#define FSINFO_NAME(X,Y) [FSINFO_ATTR_##X] = #Y +static const char *fsinfo_attr_names[FSINFO_ATTR__NR] = { + FSINFO_NAME (STATFS, statfs), + FSINFO_NAME (FSINFO, fsinfo), + FSINFO_NAME (IDS, ids), + FSINFO_NAME (LIMITS, limits), + FSINFO_NAME (CAPABILITIES, capabilities), + FSINFO_NAME (SUPPORTS, supports), + FSINFO_NAME (TIMESTAMP_INFO, timestamp_info), + FSINFO_NAME (VOLUME_ID, volume_id), + FSINFO_NAME (VOLUME_UUID, volume_uuid), + FSINFO_NAME (VOLUME_NAME, volume_name), + FSINFO_NAME (CELL_NAME, cell_name), + FSINFO_NAME (DOMAIN_NAME, domain_name), + FSINFO_NAME (SERVER_NAME, server_name), + FSINFO_NAME (SERVER_ADDRESS, server_address), + FSINFO_NAME (PARAMETER, parameter), + FSINFO_NAME (SOURCE, source), + FSINFO_NAME (NAME_ENCODING, name_encoding), + FSINFO_NAME (NAME_CODEPAGE, name_codepage), + FSINFO_NAME (IO_SIZE, io_size), + FSINFO_NAME (PARAM_DESCRIPTION, param_description), + FSINFO_NAME (PARAM_SPECIFICATION, param_specification), + FSINFO_NAME (PARAM_NAME, param_name), + FSINFO_NAME (PARAM_ENUM, param_enum), +}; + +union reply { + char buffer[4096]; + struct fsinfo_statfs statfs; + struct fsinfo_fsinfo fsinfo; + struct fsinfo_ids ids; + struct fsinfo_limits limits; + struct fsinfo_supports supports; + struct fsinfo_capabilities caps; + struct fsinfo_timestamp_info timestamps; + struct fsinfo_volume_uuid uuid; + struct fsinfo_server_address srv_addr; + struct fsinfo_io_size io_size; +}; + +static void dump_hex(unsigned int *data, int from, int to) +{ + unsigned offset, print_offset = 1, col = 0; + + from /= 4; + to = (to + 3) / 4; + + for (offset = from; offset < to; offset++) { + if (print_offset) { + printf("%04x: ", offset * 8); + print_offset = 0; + } + printf("%08x", data[offset]); + col++; + if ((col & 3) == 0) { + printf("\n"); + print_offset = 1; + } else { + printf(" "); + } + } + + if (!print_offset) + printf("\n"); +} + +static void dump_attr_STATFS(union reply *r, int size) +{ + struct fsinfo_statfs *f = &r->statfs; + + printf("\n"); + printf("\tblocks: n=%llu fr=%llu av=%llu\n", + (unsigned long long)f->f_blocks, + (unsigned long long)f->f_bfree, + (unsigned long long)f->f_bavail); + + printf("\tfiles : n=%llu fr=%llu av=%llu\n", + (unsigned long long)f->f_files, + (unsigned long long)f->f_ffree, + (unsigned long long)f->f_favail); + printf("\tbsize : %u\n", f->f_bsize); + printf("\tfrsize: %u\n", f->f_frsize); +} + +static void dump_attr_FSINFO(union reply *r, int size) +{ + struct fsinfo_fsinfo *f = &r->fsinfo; + + printf("max_attr=%u max_cap=%u\n", f->max_attr, f->max_cap); +} + +static void dump_attr_IDS(union reply *r, int size) +{ + struct fsinfo_ids *f = &r->ids; + + printf("\n"); + printf("\tdev : %02x:%02x\n", f->f_dev_major, f->f_dev_minor); + printf("\tfs : type=%x name=%s\n", f->f_fstype, f->f_fs_name); + printf("\tflags : %llx\n", (unsigned long long)f->f_flags); + printf("\tfsid : %llx\n", (unsigned long long)f->f_fsid); +} + +static void dump_attr_LIMITS(union reply *r, int size) +{ + struct fsinfo_limits *f = &r->limits; + + printf("\n"); + printf("\tmax file size: %llx\n", + (unsigned long long)f->max_file_size); + printf("\tmax ids : u=%llx g=%llx p=%llx\n", + (unsigned long long)f->max_uid, + (unsigned long long)f->max_gid, + (unsigned long long)f->max_projid); + printf("\tmax dev : maj=%x min=%x\n", + f->max_dev_major, f->max_dev_minor); + printf("\tmax links : %x\n", f->max_hard_links); + printf("\tmax xattr : n=%x b=%x\n", + f->max_xattr_name_len, f->max_xattr_body_len); + printf("\tmax len : file=%x sym=%x\n", + f->max_filename_len, f->max_symlink_len); +} + +static void dump_attr_SUPPORTS(union reply *r, int size) +{ + struct fsinfo_supports *f = &r->supports; + + printf("\n"); + printf("\tstx_attr=%llx\n", (unsigned long long)f->stx_attributes); + printf("\tstx_mask=%x\n", f->stx_mask); + printf("\tioc_flags=%x\n", f->ioc_flags); + printf("\twin_fattrs=%x\n", f->win_file_attrs); +} + +#define FSINFO_CAP_NAME(C) [FSINFO_CAP_##C] = #C +static const char *fsinfo_cap_names[FSINFO_CAP__NR] = { + FSINFO_CAP_NAME(IS_KERNEL_FS), + FSINFO_CAP_NAME(IS_BLOCK_FS), + FSINFO_CAP_NAME(IS_FLASH_FS), + FSINFO_CAP_NAME(IS_NETWORK_FS), + FSINFO_CAP_NAME(IS_AUTOMOUNTER_FS), + FSINFO_CAP_NAME(AUTOMOUNTS), + FSINFO_CAP_NAME(ADV_LOCKS), + FSINFO_CAP_NAME(MAND_LOCKS), + FSINFO_CAP_NAME(LEASES), + FSINFO_CAP_NAME(UIDS), + FSINFO_CAP_NAME(GIDS), + FSINFO_CAP_NAME(PROJIDS), + FSINFO_CAP_NAME(ID_NAMES), + FSINFO_CAP_NAME(ID_GUIDS), + FSINFO_CAP_NAME(WINDOWS_ATTRS), + FSINFO_CAP_NAME(USER_QUOTAS), + FSINFO_CAP_NAME(GROUP_QUOTAS), + FSINFO_CAP_NAME(PROJECT_QUOTAS), + FSINFO_CAP_NAME(XATTRS), + FSINFO_CAP_NAME(JOURNAL), + FSINFO_CAP_NAME(DATA_IS_JOURNALLED), + FSINFO_CAP_NAME(O_SYNC), + FSINFO_CAP_NAME(O_DIRECT), + FSINFO_CAP_NAME(VOLUME_ID), + FSINFO_CAP_NAME(VOLUME_UUID), + FSINFO_CAP_NAME(VOLUME_NAME), + FSINFO_CAP_NAME(VOLUME_FSID), + FSINFO_CAP_NAME(CELL_NAME), + FSINFO_CAP_NAME(DOMAIN_NAME), + FSINFO_CAP_NAME(REALM_NAME), + FSINFO_CAP_NAME(IVER_ALL_CHANGE), + FSINFO_CAP_NAME(IVER_DATA_CHANGE), + FSINFO_CAP_NAME(IVER_MONO_INCR), + FSINFO_CAP_NAME(SYMLINKS), + FSINFO_CAP_NAME(HARD_LINKS), + FSINFO_CAP_NAME(HARD_LINKS_1DIR), + FSINFO_CAP_NAME(DEVICE_FILES), + FSINFO_CAP_NAME(UNIX_SPECIALS), + FSINFO_CAP_NAME(RESOURCE_FORKS), + FSINFO_CAP_NAME(NAME_CASE_INDEP), + FSINFO_CAP_NAME(NAME_NON_UTF8), + FSINFO_CAP_NAME(NAME_HAS_CODEPAGE), + FSINFO_CAP_NAME(SPARSE), + FSINFO_CAP_NAME(NOT_PERSISTENT), + FSINFO_CAP_NAME(NO_UNIX_MODE), + FSINFO_CAP_NAME(HAS_ATIME), + FSINFO_CAP_NAME(HAS_BTIME), + FSINFO_CAP_NAME(HAS_CTIME), + FSINFO_CAP_NAME(HAS_MTIME), +}; + +static void dump_attr_CAPABILITIES(union reply *r, int size) +{ + struct fsinfo_capabilities *f = &r->caps; + int i; + + for (i = 0; i < sizeof(f->capabilities); i++) + printf("%02x", f->capabilities[i]); + printf("\n"); + for (i = 0; i < FSINFO_CAP__NR; i++) + if (f->capabilities[i / 8] & (1 << (i % 8))) + printf("\t- %s\n", fsinfo_cap_names[i]); +} + +static void dump_attr_TIMESTAMP_INFO(union reply *r, int size) +{ + struct fsinfo_timestamp_info *f = &r->timestamps; + + printf("range=%llx-%llx\n", + (unsigned long long)f->minimum_timestamp, + (unsigned long long)f->maximum_timestamp); + +#define print_time(G) \ + printf("\t"#G"time : gran=%gs\n", \ + (f->G##time_gran_mantissa * \ + pow(10., f->G##time_gran_exponent))) + print_time(a); + print_time(b); + print_time(c); + print_time(m); +} + +static void dump_attr_VOLUME_UUID(union reply *r, int size) +{ + struct fsinfo_volume_uuid *f = &r->uuid; + + printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x" + "-%02x%02x%02x%02x%02x%02x\n", + f->uuid[ 0], f->uuid[ 1], + f->uuid[ 2], f->uuid[ 3], + f->uuid[ 4], f->uuid[ 5], + f->uuid[ 6], f->uuid[ 7], + f->uuid[ 8], f->uuid[ 9], + f->uuid[10], f->uuid[11], + f->uuid[12], f->uuid[13], + f->uuid[14], f->uuid[15]); +} + +static void dump_attr_SERVER_ADDRESS(union reply *r, int size) +{ + struct fsinfo_server_address *f = &r->srv_addr; + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + char buf[1024]; + + switch (f->address.ss_family) { + case AF_INET: + sin = (struct sockaddr_in *)&f->address; + if (!inet_ntop(AF_INET, &sin->sin_addr, buf, sizeof(buf))) + break; + printf("IPv4: %s\n", buf); + return; + case AF_INET6: + sin6 = (struct sockaddr_in6 *)&f->address; + if (!inet_ntop(AF_INET6, &sin6->sin6_addr, buf, sizeof(buf))) + break; + printf("IPv6: %s\n", buf); + return; + } + + printf("family=%u\n", f->address.ss_family); +} + +static void dump_attr_IO_SIZE(union reply *r, int size) +{ + struct fsinfo_io_size *f = &r->io_size; + + printf("dio_size=%u\n", f->dio_size_gran); +} + +/* + * + */ +typedef void (*dumper_t)(union reply *r, int size); + +#define FSINFO_DUMPER(N) [FSINFO_ATTR_##N] = dump_attr_##N +static const dumper_t fsinfo_attr_dumper[FSINFO_ATTR__NR] = { + FSINFO_DUMPER(STATFS), + FSINFO_DUMPER(FSINFO), + FSINFO_DUMPER(IDS), + FSINFO_DUMPER(LIMITS), + FSINFO_DUMPER(SUPPORTS), + FSINFO_DUMPER(CAPABILITIES), + FSINFO_DUMPER(TIMESTAMP_INFO), + FSINFO_DUMPER(VOLUME_UUID), + FSINFO_DUMPER(SERVER_ADDRESS), + FSINFO_DUMPER(IO_SIZE), +}; + +static void dump_fsinfo(enum fsinfo_attribute attr, __u8 about, + union reply *r, int size) +{ + dumper_t dumper = fsinfo_attr_dumper[attr]; + unsigned int len; + + if (!dumper) { + printf("<no dumper>\n"); + return; + } + + len = about & 0x3fff; + if (size < len) { + printf("<short data %u/%u>\n", size, len); + return; + } + + dumper(r, size); +} + +/* + * Try one subinstance of an attribute. + */ +static int try_one(const char *file, struct fsinfo_params *params, bool raw) +{ + union reply r; + char *p; + int ret; + __u16 about; + + memset(&r.buffer, 0xbd, sizeof(r.buffer)); + + errno = 0; + ret = fsinfo(AT_FDCWD, file, params, r.buffer, sizeof(r.buffer)); + if (params->request >= FSINFO_ATTR__NR) { + if (ret == -1 && errno == EOPNOTSUPP) + exit(0); + fprintf(stderr, "Unexpected error for too-large command %u: %m\n", + params->request); + exit(1); + } + + if (debug) + printf("fsinfo(%s,%s,%u,%u) = %d: %m\n", + file, fsinfo_attr_names[params->request], + params->Nth, params->Mth, ret); + + about = fsinfo_buffer_sizes[params->request]; + if (ret == -1) { + if (errno == ENODATA) { + switch (about & 0xc000) { + case 0x0000: + if (params->Nth == 0 && params->Mth == 0) { + fprintf(stderr, + "Unexpected ENODATA1 (%u[%u][%u])\n", + params->request, params->Nth, params->Mth); + exit(1); + } + break; + case 0x4000: + if (params->Nth == 0 && params->Mth == 0) { + fprintf(stderr, + "Unexpected ENODATA2 (%u[%u][%u])\n", + params->request, params->Nth, params->Mth); + exit(1); + } + break; + } + return (params->Mth == 0) ? 2 : 1; + } + if (errno == EOPNOTSUPP) { + if (params->Nth > 0 || params->Mth > 0) { + fprintf(stderr, + "Should return -ENODATA (%u[%u][%u])\n", + params->request, params->Nth, params->Mth); + exit(1); + } + //printf("\e[33m%s\e[m: <not supported>\n", + // fsinfo_attr_names[attr]); + return 2; + } + perror(file); + exit(1); + } + + if (raw) { + if (ret > 4096) + ret = 4096; + dump_hex((unsigned int *)&r.buffer, 0, ret); + return 0; + } + + switch (params->request) { + case FSINFO_ATTR_PARAMETER: + if (ret == 0) + return 0; + } + + switch (about & 0xc000) { + case 0x0000: + printf("\e[33m%s\e[m: ", + fsinfo_attr_names[params->request]); + break; + case 0x4000: + printf("\e[33m%s[%u]\e[m: ", + fsinfo_attr_names[params->request], + params->Nth); + break; + case 0x8000: + printf("\e[33m%s[%u][%u]\e[m: ", + fsinfo_attr_names[params->request], + params->Nth, params->Mth); + break; + } + + switch (about) { + /* Struct */ + case 0x0001 ... 0x3fff: + case 0x4001 ... 0x7fff: + case 0x8001 ... 0xbfff: + dump_fsinfo(params->request, about, &r, ret); + return 0; + + /* String */ + case 0x0000: + case 0x4000: + case 0x8000: + if (ret >= 4096) { + ret = 4096; + r.buffer[4092] = '.'; + r.buffer[4093] = '.'; + r.buffer[4094] = '.'; + r.buffer[4095] = 0; + } else { + r.buffer[ret] = 0; + } + for (p = r.buffer; *p; p++) { + if (!isprint(*p)) { + printf("<non-printable>\n"); + continue; + } + } + printf("%s\n", r.buffer); + return 0; + + default: + fprintf(stderr, "Fishy about %u %02x\n", params->request, about); + exit(1); + } +} + +/* + * + */ +int main(int argc, char **argv) +{ + struct fsinfo_params params = { + .at_flags = AT_SYMLINK_NOFOLLOW, + }; + unsigned int attr; + int raw = 0, opt, Nth, Mth; + + while ((opt = getopt(argc, argv, "adlr"))) { + switch (opt) { + case 'a': + params.at_flags |= AT_NO_AUTOMOUNT; + continue; + case 'd': + debug = true; + continue; + case 'l': + params.at_flags &= ~AT_SYMLINK_NOFOLLOW; + continue; + case 'r': + raw = 1; + continue; + } + break; + } + + argc -= optind; + argv += optind; + + if (argc != 1) { + printf("Format: test-fsinfo [-alr] <file>\n"); + exit(2); + } + + for (attr = 0; attr <= FSINFO_ATTR__NR; attr++) { + switch (attr) { + case FSINFO_ATTR_PARAM_DESCRIPTION: + case FSINFO_ATTR_PARAM_SPECIFICATION: + case FSINFO_ATTR_PARAM_NAME: + case FSINFO_ATTR_PARAM_ENUM: + /* See test-fs-query.c instead */ + continue; + } + + Nth = 0; + do { + Mth = 0; + do { + params.request = attr; + params.Nth = Nth; + params.Mth = Mth; + + switch (try_one(argv[0], ¶ms, raw)) { + case 0: + continue; + case 1: + goto done_M; + case 2: + goto done_N; + } + } while (++Mth < 100); + + done_M: + if (Mth >= 100) { + fprintf(stderr, "Fishy: Mth == %u\n", Mth); + break; + } + + } while (++Nth < 100); + + done_N: + if (Nth >= 100) { + fprintf(stderr, "Fishy: Nth == %u\n", Nth); + break; + } + } + + return 0; +} diff --git a/samples/vfs/test-fsmount.c b/samples/vfs/test-fsmount.c new file mode 100644 index 000000000000..266d72b3dce4 --- /dev/null +++ b/samples/vfs/test-fsmount.c @@ -0,0 +1,133 @@ +/* fd-based mount test. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <linux/mount.h> +#include <linux/unistd.h> + +#define E(x) do { if ((x) == -1) { perror(#x); exit(1); } } while(0) + +static void check_messages(int fd) +{ + char buf[4096]; + int err, n; + + err = errno; + + for (;;) { + n = read(fd, buf, sizeof(buf)); + if (n < 0) + break; + n -= 2; + + switch (buf[0]) { + case 'e': + fprintf(stderr, "Error: %*.*s\n", n, n, buf + 2); + break; + case 'w': + fprintf(stderr, "Warning: %*.*s\n", n, n, buf + 2); + break; + case 'i': + fprintf(stderr, "Info: %*.*s\n", n, n, buf + 2); + break; + } + } + + errno = err; +} + +static __attribute__((noreturn)) +void mount_error(int fd, const char *s) +{ + check_messages(fd); + fprintf(stderr, "%s: %m\n", s); + exit(1); +} + +/* Hope -1 isn't a syscall */ +#ifndef __NR_fsopen +#define __NR_fsopen -1 +#endif +#ifndef __NR_fsmount +#define __NR_fsmount -1 +#endif +#ifndef __NR_fsconfig +#define __NR_fsconfig -1 +#endif +#ifndef __NR_move_mount +#define __NR_move_mount -1 +#endif + + +static inline int fsopen(const char *fs_name, unsigned int flags) +{ + return syscall(__NR_fsopen, fs_name, flags); +} + +static inline int fsmount(int fsfd, unsigned int flags, unsigned int ms_flags) +{ + return syscall(__NR_fsmount, fsfd, flags, ms_flags); +} + +static inline int fsconfig(int fsfd, unsigned int cmd, + const char *key, const void *val, int aux) +{ + return syscall(__NR_fsconfig, fsfd, cmd, key, val, aux); +} + +static inline int move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, + from_dfd, from_pathname, + to_dfd, to_pathname, flags); +} + +#define E_fsconfig(fd, cmd, key, val, aux) \ + do { \ + if (fsconfig(fd, cmd, key, val, aux) == -1) \ + mount_error(fd, key ?: "create"); \ + } while (0) + +int main(int argc, char *argv[]) +{ + int fsfd, mfd; + + /* Mount a publically available AFS filesystem */ + fsfd = fsopen("afs", 0); + if (fsfd == -1) { + perror("fsopen"); + exit(1); + } + + E_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "#grand.central.org:root.cell.", 0); + E_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + + mfd = fsmount(fsfd, 0, MOUNT_ATTR_RDONLY); + if (mfd < 0) + mount_error(fsfd, "fsmount"); + E(close(fsfd)); + + if (move_mount(mfd, "", AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH) < 0) { + perror("move_mount"); + exit(1); + } + + E(close(mfd)); + exit(0); +} diff --git a/samples/statx/test-statx.c b/samples/vfs/test-statx.c index d4d77b09412c..4ef0c914a62a 100644 --- a/samples/statx/test-statx.c +++ b/samples/vfs/test-statx.c @@ -32,6 +32,10 @@ #define AT_STATX_FORCE_SYNC 0x2000 #define AT_STATX_DONT_SYNC 0x4000 +#ifndef __NR_statx +#define __NR_statx -1 +#endif + static __attribute__((unused)) ssize_t statx(int dfd, const char *filename, unsigned flags, unsigned int mask, struct statx *buffer) @@ -157,7 +161,8 @@ static void dump_statx(struct statx *stx) "?dai?c??" /* 7- 0 0x00000000-000000ff */ ; - printf("Attributes: %016llx (", stx->stx_attributes); + printf("Attributes: %016llx (", + (unsigned long long)stx->stx_attributes); for (byte = 64 - 8; byte >= 0; byte -= 8) { bits = stx->stx_attributes >> byte; mbits = stx->stx_attributes_mask >> byte; diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 8963203319ea..33f89b3f28a4 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -137,7 +137,8 @@ static const struct super_operations aafs_super_ops = { .show_path = aafs_show_path, }; -static int fill_super(struct super_block *sb, void *data, int silent) +static int fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { static struct tree_descr files[] = { {""} }; int error; @@ -151,9 +152,10 @@ static int fill_super(struct super_block *sb, void *data, int silent) } static struct dentry *aafs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, + size_t data_size) { - return mount_single(fs_type, flags, data, fill_super); + return mount_single(fs_type, flags, data, data_size, fill_super); } static struct file_system_type aafs_ops = { diff --git a/security/apparmor/include/mount.h b/security/apparmor/include/mount.h index 25d6067fa6ef..0441bfae30fa 100644 --- a/security/apparmor/include/mount.h +++ b/security/apparmor/include/mount.h @@ -16,6 +16,7 @@ #include <linux/fs.h> #include <linux/path.h> +#include <linux/fs_context.h> #include "domain.h" #include "policy.h" @@ -27,7 +28,13 @@ #define AA_AUDIT_DATA 0x40 #define AA_MNT_CONT_MATCH 0x40 -#define AA_MS_IGNORE_MASK (MS_KERNMOUNT | MS_NOSEC | MS_ACTIVE | MS_BORN) +#define AA_SB_IGNORE_MASK (SB_KERNMOUNT | SB_NOSEC | SB_ACTIVE | SB_BORN) + +struct apparmor_fs_context { + struct fs_context fc; + char *saved_options; + size_t saved_size; +}; int aa_remount(struct aa_label *label, const struct path *path, unsigned long flags, void *data); @@ -45,6 +52,8 @@ int aa_move_mount(struct aa_label *label, const struct path *path, int aa_new_mount(struct aa_label *label, const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); +int aa_new_mount_fc(struct aa_label *label, struct fs_context *fc, + const struct path *mountpoint); int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 42446a216f3b..11a64ab17c76 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -26,6 +26,7 @@ #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <net/sock.h> +#include <uapi/linux/mount.h> #include "include/apparmor.h" #include "include/apparmorfs.h" @@ -521,8 +522,108 @@ static int apparmor_file_mprotect(struct vm_area_struct *vma, !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0); } +static int apparmor_fs_context_alloc(struct fs_context *fc, struct dentry *reference) +{ + struct apparmor_fs_context *afc; + + afc = kzalloc(sizeof(*afc), GFP_KERNEL); + if (!afc) + return -ENOMEM; + + fc->security = afc; + return 0; +} + +static int apparmor_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) +{ + fc->security = NULL; + return 0; +} + +static void apparmor_fs_context_free(struct fs_context *fc) +{ + struct apparmor_fs_context *afc = fc->security; + + if (afc) { + kfree(afc->saved_options); + kfree(afc); + } +} + +/* + * As a temporary hack, we buffer all the options. The problem is that we need + * to pass them to the DFA evaluator *after* mount point parameters, which + * means deferring the entire check to the sb_mountpoint hook. + */ +static int apparmor_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + struct apparmor_fs_context *afc = fc->security; + const char *value; + size_t space = 0, k_len = strlen(param->key), len = k_len, v_len; + char *p, *q; + + if (afc->saved_size > 0) + space = 1; + + switch (param->type) { + case fs_value_is_string: + value = param->string; + v_len = param->size; + len += 1 + v_len; + break; + case fs_value_is_filename: + case fs_value_is_filename_empty: { + value = param->name->name; + v_len = param->size; + len += 1 + v_len; + break; + } + default: + value = NULL; + v_len = 0; + break; + } + + p = krealloc(afc->saved_options, afc->saved_size + space + len + 1, + GFP_KERNEL); + if (!p) + return -ENOMEM; + + q = p + afc->saved_size; + if (q != p) + *q++ = ' '; + memcpy(q, param->key, k_len); + q += k_len; + if (value) { + *q++ = '='; + memcpy(q, value, v_len); + q += v_len; + } + *q = 0; + + afc->saved_options = p; + afc->saved_size += 1 + len; + return -ENOPARAM; +} + +static int apparmor_sb_mountpoint(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags) +{ + struct aa_label *label; + int error = 0; + + label = __begin_current_label_crit_section(); + if (!unconfined(label)) + error = aa_new_mount_fc(label, fc, mountpoint); + __end_current_label_crit_section(label); + + return error; +} + static int apparmor_sb_mount(const char *dev_name, const struct path *path, - const char *type, unsigned long flags, void *data) + const char *type, unsigned long flags, + void *data, size_t data_size) { struct aa_label *label; int error = 0; @@ -531,7 +632,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; - flags &= ~AA_MS_IGNORE_MASK; + flags &= ~AA_SB_IGNORE_MASK; label = __begin_current_label_crit_section(); if (!unconfined(label)) { @@ -1156,6 +1257,12 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(capget, apparmor_capget), LSM_HOOK_INIT(capable, apparmor_capable), + LSM_HOOK_INIT(fs_context_alloc, apparmor_fs_context_alloc), + LSM_HOOK_INIT(fs_context_dup, apparmor_fs_context_dup), + LSM_HOOK_INIT(fs_context_free, apparmor_fs_context_free), + LSM_HOOK_INIT(fs_context_parse_param, apparmor_fs_context_parse_param), + LSM_HOOK_INIT(sb_mountpoint, apparmor_sb_mountpoint), + LSM_HOOK_INIT(sb_mount, apparmor_sb_mount), LSM_HOOK_INIT(sb_umount, apparmor_sb_umount), LSM_HOOK_INIT(sb_pivotroot, apparmor_sb_pivotroot), diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index c1da22482bfb..3c95fffb76ac 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -15,6 +15,7 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/namei.h> +#include <uapi/linux/mount.h> #include "include/apparmor.h" #include "include/audit.h" @@ -553,6 +554,52 @@ int aa_new_mount(struct aa_label *label, const char *dev_name, return error; } +int aa_new_mount_fc(struct aa_label *label, struct fs_context *fc, + const struct path *mountpoint) +{ + struct apparmor_fs_context *afc = fc->security; + struct aa_profile *profile; + char *buffer = NULL, *dev_buffer = NULL; + bool binary; + int error; + struct path tmp_path, *dev_path = NULL; + + AA_BUG(!label); + AA_BUG(!mountpoint); + + binary = fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA; + + if (fc->fs_type->fs_flags & FS_REQUIRES_DEV) { + if (!fc->source) + return -ENOENT; + + error = kern_path(fc->source, LOOKUP_FOLLOW, &tmp_path); + if (error) + return error; + dev_path = &tmp_path; + } + + get_buffers(buffer, dev_buffer); + if (dev_path) { + error = fn_for_each_confined(label, profile, + match_mnt(profile, mountpoint, buffer, dev_path, dev_buffer, + fc->fs_type->name, + fc->sb_flags & ~AA_SB_IGNORE_MASK, + afc->saved_options, binary)); + } else { + error = fn_for_each_confined(label, profile, + match_mnt_path_str(profile, mountpoint, buffer, + fc->source, fc->fs_type->name, + fc->sb_flags & ~AA_SB_IGNORE_MASK, + afc->saved_options, binary, NULL)); + } + put_buffers(buffer, dev_buffer); + if (dev_path) + path_put(dev_path); + + return error; +} + static int profile_umount(struct aa_profile *profile, struct path *path, char *buffer) { diff --git a/security/inode.c b/security/inode.c index 8dd9ca8848e4..a89a00714f33 100644 --- a/security/inode.c +++ b/security/inode.c @@ -39,7 +39,8 @@ static const struct super_operations securityfs_super_operations = { .evict_inode = securityfs_evict_inode, }; -static int fill_super(struct super_block *sb, void *data, int silent) +static int fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { static const struct tree_descr files[] = {{""}}; int error; @@ -55,9 +56,9 @@ static int fill_super(struct super_block *sb, void *data, int silent) static struct dentry *get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, size_t data_size) { - return mount_single(fs_type, flags, data, fill_super); + return mount_single(fs_type, flags, data, data_size, fill_super); } static struct file_system_type fs_type = { diff --git a/security/security.c b/security/security.c index 04d173eb93f6..c66625345cc4 100644 --- a/security/security.c +++ b/security/security.c @@ -374,32 +374,63 @@ void security_bprm_committed_creds(struct linux_binprm *bprm) call_void_hook(bprm_committed_creds, bprm); } -int security_sb_alloc(struct super_block *sb) +int security_fs_context_alloc(struct fs_context *fc, struct dentry *reference) { - return call_int_hook(sb_alloc_security, 0, sb); + return call_int_hook(fs_context_alloc, 0, fc, reference); } -void security_sb_free(struct super_block *sb) +int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) { - call_void_hook(sb_free_security, sb); + return call_int_hook(fs_context_dup, 0, fc, src_fc); } -int security_sb_copy_data(char *orig, char *copy) +void security_fs_context_free(struct fs_context *fc) { - return call_int_hook(sb_copy_data, 0, orig, copy); + call_void_hook(fs_context_free, fc); +} + +int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param); } -EXPORT_SYMBOL(security_sb_copy_data); -int security_sb_remount(struct super_block *sb, void *data) +int security_fs_context_validate(struct fs_context *fc) { - return call_int_hook(sb_remount, 0, sb, data); + return call_int_hook(fs_context_validate, 0, fc); } -int security_sb_kern_mount(struct super_block *sb, int flags, void *data) +int security_sb_get_tree(struct fs_context *fc) { - return call_int_hook(sb_kern_mount, 0, sb, flags, data); + return call_int_hook(sb_get_tree, 0, fc); } +void security_sb_reconfigure(struct fs_context *fc) +{ + call_void_hook(sb_reconfigure, fc); +} + +int security_sb_mountpoint(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags) +{ + return call_int_hook(sb_mountpoint, 0, fc, mountpoint, mnt_flags); +} + +int security_sb_alloc(struct super_block *sb) +{ + return call_int_hook(sb_alloc_security, 0, sb); +} + +void security_sb_free(struct super_block *sb) +{ + call_void_hook(sb_free_security, sb); +} + +int security_sb_copy_data(char *orig, size_t data_size, char *copy) +{ + return call_int_hook(sb_copy_data, 0, orig, data_size, copy); +} +EXPORT_SYMBOL(security_sb_copy_data); + int security_sb_show_options(struct seq_file *m, struct super_block *sb) { return call_int_hook(sb_show_options, 0, m, sb); @@ -411,9 +442,11 @@ int security_sb_statfs(struct dentry *dentry) } int security_sb_mount(const char *dev_name, const struct path *path, - const char *type, unsigned long flags, void *data) + const char *type, unsigned long flags, + void *data, size_t data_size) { - return call_int_hook(sb_mount, 0, dev_name, path, type, flags, data); + return call_int_hook(sb_mount, 0, dev_name, path, type, flags, + data, data_size); } int security_sb_umount(struct vfsmount *mnt, int flags) @@ -453,6 +486,11 @@ int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) } EXPORT_SYMBOL(security_sb_parse_opts_str); +int security_move_mount(const struct path *from_path, const struct path *to_path) +{ + return call_int_hook(move_mount, 0, from_path, to_path); +} + int security_inode_alloc(struct inode *inode) { inode->i_security = NULL; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a67459eb62d5..bfedda63fb42 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -48,6 +48,8 @@ #include <linux/fdtable.h> #include <linux/namei.h> #include <linux/mount.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/tty.h> @@ -88,6 +90,7 @@ #include <linux/msg.h> #include <linux/shm.h> #include <linux/bpf.h> +#include <uapi/linux/mount.h> #include "avc.h" #include "objsec.h" @@ -438,24 +441,23 @@ static inline int inode_doinit(struct inode *inode) } enum { - Opt_error = -1, - Opt_context = 1, + Opt_context = 0, + Opt_defcontext = 1, Opt_fscontext = 2, - Opt_defcontext = 3, - Opt_rootcontext = 4, - Opt_labelsupport = 5, - Opt_nextmntopt = 6, + Opt_rootcontext = 3, + Opt_seclabel = 4, + nr__selinux_params }; -#define NUM_SEL_MNT_OPTS (Opt_nextmntopt - 1) +#define NUM_SEL_MNT_OPTS (nr__selinux_params - 1) static const match_table_t tokens = { - {Opt_context, CONTEXT_STR "%s"}, - {Opt_fscontext, FSCONTEXT_STR "%s"}, - {Opt_defcontext, DEFCONTEXT_STR "%s"}, - {Opt_rootcontext, ROOTCONTEXT_STR "%s"}, - {Opt_labelsupport, LABELSUPP_STR}, - {Opt_error, NULL}, + {Opt_context, CONTEXT_STR "=%s"}, + {Opt_fscontext, FSCONTEXT_STR "=%s"}, + {Opt_defcontext, DEFCONTEXT_STR "=%s"}, + {Opt_rootcontext, ROOTCONTEXT_STR "=%s"}, + {Opt_seclabel, SECLABEL_STR}, + {-1, NULL}, }; #define SEL_MOUNT_FAIL_MSG "SELinux: duplicate or incompatible mount options\n" @@ -614,15 +616,11 @@ static int selinux_get_mnt_opts(const struct super_block *sb, if (!selinux_state.initialized) return -EINVAL; - /* make sure we always check enough bits to cover the mask */ - BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS)); - tmp = sbsec->flags & SE_MNTMASK; /* count the number of mount options for this sb */ for (i = 0; i < NUM_SEL_MNT_OPTS; i++) { - if (tmp & 0x01) + if (tmp & (1 << i)) opts->num_mnt_opts++; - tmp >>= 1; } /* Check if the Label support flag is set */ if (sbsec->flags & SBLABEL_MNT) @@ -1153,7 +1151,7 @@ static int selinux_parse_opts_str(char *options, goto out_err; } break; - case Opt_labelsupport: + case Opt_seclabel: break; default: rc = -EINVAL; @@ -1258,7 +1256,7 @@ static void selinux_write_opts(struct seq_file *m, break; case SBLABEL_MNT: seq_putc(m, ','); - seq_puts(m, LABELSUPP_STR); + seq_puts(m, SECLABEL_STR); continue; default: BUG(); @@ -1267,6 +1265,7 @@ static void selinux_write_opts(struct seq_file *m, /* we need a comma before each option */ seq_putc(m, ','); seq_puts(m, prefix); + seq_putc(m, '='); if (has_comma) seq_putc(m, '\"'); seq_escape(m, opts->mnt_opts[i], "\"\n\\"); @@ -2757,11 +2756,11 @@ static inline int match_prefix(char *prefix, int plen, char *option, int olen) static inline int selinux_option(char *option, int len) { - return (match_prefix(CONTEXT_STR, sizeof(CONTEXT_STR)-1, option, len) || - match_prefix(FSCONTEXT_STR, sizeof(FSCONTEXT_STR)-1, option, len) || - match_prefix(DEFCONTEXT_STR, sizeof(DEFCONTEXT_STR)-1, option, len) || - match_prefix(ROOTCONTEXT_STR, sizeof(ROOTCONTEXT_STR)-1, option, len) || - match_prefix(LABELSUPP_STR, sizeof(LABELSUPP_STR)-1, option, len)); + return (match_prefix(CONTEXT_STR"=", sizeof(CONTEXT_STR)-1, option, len) || + match_prefix(FSCONTEXT_STR"=", sizeof(FSCONTEXT_STR)-1, option, len) || + match_prefix(DEFCONTEXT_STR"=", sizeof(DEFCONTEXT_STR)-1, option, len) || + match_prefix(ROOTCONTEXT_STR"=", sizeof(ROOTCONTEXT_STR)-1, option, len) || + match_prefix(SECLABEL_STR"=", sizeof(SECLABEL_STR)-1, option, len)); } static inline void take_option(char **to, char *from, int *first, int len) @@ -2796,7 +2795,7 @@ static inline void take_selinux_option(char **to, char *from, int *first, } } -static int selinux_sb_copy_data(char *orig, char *copy) +static int selinux_sb_copy_data(char *orig, size_t data_size, char *copy) { int fnosec, fsec, rc = 0; char *in_save, *in_curr, *in_end; @@ -2838,141 +2837,316 @@ out: return rc; } -static int selinux_sb_remount(struct super_block *sb, void *data) +static int selinux_sb_statfs(struct dentry *dentry) { - int rc, i, *flags; - struct security_mnt_opts opts; - char *secdata, **mount_options; - struct superblock_security_struct *sbsec = sb->s_security; + const struct cred *cred = current_cred(); + struct common_audit_data ad; - if (!(sbsec->flags & SE_SBINITIALIZED)) - return 0; + ad.type = LSM_AUDIT_DATA_DENTRY; + ad.u.dentry = dentry->d_sb->s_root; + return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad); +} - if (!data) - return 0; +static int selinux_mount(const char *dev_name, + const struct path *path, + const char *type, + unsigned long flags, + void *data, + size_t data_size) +{ + const struct cred *cred = current_cred(); + + if (flags & MS_REMOUNT) + return superblock_has_perm(cred, path->dentry->d_sb, + FILESYSTEM__REMOUNT, NULL); + else + return path_has_perm(cred, path, FILE__MOUNTON); +} + +static int selinux_umount(struct vfsmount *mnt, int flags) +{ + const struct cred *cred = current_cred(); + + return superblock_has_perm(cred, mnt->mnt_sb, + FILESYSTEM__UNMOUNT, NULL); +} + +/* fsopen mount context operations */ + +static int selinux_fs_context_alloc(struct fs_context *fc, + struct dentry *reference) +{ + struct security_mnt_opts *opts; - if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) + opts = kzalloc(sizeof(*opts), GFP_KERNEL); + if (!opts) + return -ENOMEM; + + fc->security = opts; + return 0; +} + +static int selinux_fs_context_dup(struct fs_context *fc, + struct fs_context *src_fc) +{ + const struct security_mnt_opts *src = src_fc->security; + struct security_mnt_opts *opts; + int i, n; + + opts = kzalloc(sizeof(*opts), GFP_KERNEL); + if (!opts) + return -ENOMEM; + fc->security = opts; + + if (!src || !src->num_mnt_opts) return 0; + n = opts->num_mnt_opts = src->num_mnt_opts; - security_init_mnt_opts(&opts); - secdata = alloc_secdata(); - if (!secdata) + if (src->mnt_opts) { + opts->mnt_opts = kcalloc(n, sizeof(char *), GFP_KERNEL); + if (!opts->mnt_opts) + return -ENOMEM; + + for (i = 0; i < n; i++) { + if (src->mnt_opts[i]) { + opts->mnt_opts[i] = kstrdup(src->mnt_opts[i], + GFP_KERNEL); + if (!opts->mnt_opts[i]) + return -ENOMEM; + } + } + } + + if (src->mnt_opts_flags) { + opts->mnt_opts_flags = kmemdup(src->mnt_opts_flags, + n * sizeof(int), GFP_KERNEL); + if (!opts->mnt_opts_flags) + return -ENOMEM; + } + + return 0; +} + +static void selinux_fs_context_free(struct fs_context *fc) +{ + struct security_mnt_opts *opts = fc->security; + + if (opts) { + security_free_mnt_opts(opts); + fc->security = NULL; + } +} + +static const struct fs_parameter_spec selinux_param_specs[nr__selinux_params] = { + [Opt_context] = { fs_param_is_string }, + [Opt_defcontext] = { fs_param_is_string }, + [Opt_fscontext] = { fs_param_is_string }, + [Opt_rootcontext] = { fs_param_is_string }, + [Opt_seclabel] = { fs_param_is_flag }, +}; + +static const char *const selinux_param_keys[nr__selinux_params] = { + [Opt_context] = CONTEXT_STR, + [Opt_defcontext] = DEFCONTEXT_STR, + [Opt_fscontext] = FSCONTEXT_STR, + [Opt_rootcontext] = ROOTCONTEXT_STR, + [Opt_seclabel] = SECLABEL_STR, +}; + +static const struct fs_parameter_description selinux_fs_parameters = { + .name = "SELinux", + .nr_params = nr__selinux_params, + .keys = selinux_param_keys, + .specs = selinux_param_specs, + .no_source = true, +}; + +static int selinux_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + struct security_mnt_opts *opts = fc->security; + struct fs_parse_result result; + unsigned int have; + char **oo; + int opt, ctx, i, *of; + + opt = fs_parse(fc, &selinux_fs_parameters, param, &result); + if (opt < 0) + return opt; + + have = 0; + for (i = 0; i < opts->num_mnt_opts; i++) + have |= 1 << opts->mnt_opts_flags[i]; + if (have & (1 << opt)) + return -EINVAL; + + switch (opt) { + case Opt_context: + if (have & (1 << Opt_defcontext)) + goto incompatible; + ctx = CONTEXT_MNT; + goto copy_context_string; + + case Opt_fscontext: + ctx = FSCONTEXT_MNT; + goto copy_context_string; + + case Opt_rootcontext: + ctx = ROOTCONTEXT_MNT; + goto copy_context_string; + + case Opt_defcontext: + if (have & (1 << Opt_context)) + goto incompatible; + ctx = DEFCONTEXT_MNT; + goto copy_context_string; + + case Opt_seclabel: + return 1; + + default: + return -EINVAL; + } + +copy_context_string: + if (opts->num_mnt_opts > 3) + return -EINVAL; + + of = krealloc(opts->mnt_opts_flags, + (opts->num_mnt_opts + 1) * sizeof(int), GFP_KERNEL); + if (!of) return -ENOMEM; - rc = selinux_sb_copy_data(data, secdata); - if (rc) - goto out_free_secdata; + of[opts->num_mnt_opts] = 0; + opts->mnt_opts_flags = of; - rc = selinux_parse_opts_str(secdata, &opts); - if (rc) - goto out_free_secdata; + oo = krealloc(opts->mnt_opts, + (opts->num_mnt_opts + 1) * sizeof(char *), GFP_KERNEL); + if (!oo) + return -ENOMEM; + oo[opts->num_mnt_opts] = NULL; + opts->mnt_opts = oo; + + opts->mnt_opts[opts->num_mnt_opts] = param->string; + opts->mnt_opts_flags[opts->num_mnt_opts] = ctx; + opts->num_mnt_opts++; + param->string = NULL; + return 1; - mount_options = opts.mnt_opts; - flags = opts.mnt_opts_flags; +incompatible: + return -EINVAL; +} - for (i = 0; i < opts.num_mnt_opts; i++) { +/* + * Validate the security parameters supplied for a reconfiguration/remount + * event. + */ +static int selinux_validate_for_sb_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + struct superblock_security_struct *sbsec = sb->s_security; + struct security_mnt_opts *opts = fc->security; + int rc, i, *flags; + char **mount_options; + + if (!(sbsec->flags & SE_SBINITIALIZED)) + return 0; + + mount_options = opts->mnt_opts; + flags = opts->mnt_opts_flags; + + for (i = 0; i < opts->num_mnt_opts; i++) { u32 sid; if (flags[i] == SBLABEL_MNT) continue; - rc = security_context_str_to_sid(&selinux_state, - mount_options[i], &sid, - GFP_KERNEL); + + rc = security_context_str_to_sid(&selinux_state, mount_options[i], + &sid, GFP_KERNEL); if (rc) { pr_warn("SELinux: security_context_str_to_sid" - "(%s) failed for (dev %s, type %s) errno=%d\n", - mount_options[i], sb->s_id, sb->s_type->name, rc); - goto out_free_opts; + "(%s) failed for (dev %s, type %s) errno=%d\n", + mount_options[i], sb->s_id, sb->s_type->name, rc); + goto inval; } - rc = -EINVAL; + switch (flags[i]) { case FSCONTEXT_MNT: if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) - goto out_bad_option; + goto bad_option; break; case CONTEXT_MNT: if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) - goto out_bad_option; + goto bad_option; break; case ROOTCONTEXT_MNT: { struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) - goto out_bad_option; + goto bad_option; break; } case DEFCONTEXT_MNT: if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) - goto out_bad_option; + goto bad_option; break; default: - goto out_free_opts; + goto inval; } } rc = 0; -out_free_opts: - security_free_mnt_opts(&opts); -out_free_secdata: - free_secdata(secdata); +out: return rc; -out_bad_option: + +bad_option: pr_warn("SELinux: unable to change security options " - "during remount (dev %s, type=%s)\n", sb->s_id, - sb->s_type->name); - goto out_free_opts; + "during remount (dev %s, type=%s)\n", + sb->s_id, sb->s_type->name); +inval: + rc = -EINVAL; + goto out; +} + +/* + * Validate the security context assembled from the option data supplied to + * mount. + */ +static int selinux_fs_context_validate(struct fs_context *fc) +{ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) + return selinux_validate_for_sb_reconfigure(fc); + return 0; } -static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) +/* + * Set the security context on a superblock. + */ +static int selinux_sb_get_tree(struct fs_context *fc) { const struct cred *cred = current_cred(); struct common_audit_data ad; int rc; - rc = superblock_doinit(sb, data); + rc = selinux_set_mnt_opts(fc->root->d_sb, fc->security, 0, NULL); if (rc) return rc; /* Allow all mounts performed by the kernel */ - if (flags & MS_KERNMOUNT) + if (fc->purpose == FS_CONTEXT_FOR_KERNEL_MOUNT) return 0; ad.type = LSM_AUDIT_DATA_DENTRY; - ad.u.dentry = sb->s_root; - return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad); -} - -static int selinux_sb_statfs(struct dentry *dentry) -{ - const struct cred *cred = current_cred(); - struct common_audit_data ad; - - ad.type = LSM_AUDIT_DATA_DENTRY; - ad.u.dentry = dentry->d_sb->s_root; - return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad); + ad.u.dentry = fc->root; + return superblock_has_perm(cred, fc->root->d_sb, FILESYSTEM__MOUNT, &ad); } -static int selinux_mount(const char *dev_name, - const struct path *path, - const char *type, - unsigned long flags, - void *data) +static int selinux_sb_mountpoint(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags) { const struct cred *cred = current_cred(); - if (flags & MS_REMOUNT) - return superblock_has_perm(cred, path->dentry->d_sb, - FILESYSTEM__REMOUNT, NULL); - else - return path_has_perm(cred, path, FILE__MOUNTON); -} - -static int selinux_umount(struct vfsmount *mnt, int flags) -{ - const struct cred *cred = current_cred(); - - return superblock_has_perm(cred, mnt->mnt_sb, - FILESYSTEM__UNMOUNT, NULL); + return path_has_perm(cred, mountpoint, FILE__MOUNTON); } /* inode security operations */ @@ -6924,11 +7098,17 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bprm_committing_creds, selinux_bprm_committing_creds), LSM_HOOK_INIT(bprm_committed_creds, selinux_bprm_committed_creds), + LSM_HOOK_INIT(fs_context_alloc, selinux_fs_context_alloc), + LSM_HOOK_INIT(fs_context_dup, selinux_fs_context_dup), + LSM_HOOK_INIT(fs_context_free, selinux_fs_context_free), + LSM_HOOK_INIT(fs_context_parse_param, selinux_fs_context_parse_param), + LSM_HOOK_INIT(fs_context_validate, selinux_fs_context_validate), + LSM_HOOK_INIT(sb_get_tree, selinux_sb_get_tree), + LSM_HOOK_INIT(sb_mountpoint, selinux_sb_mountpoint), + LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security), LSM_HOOK_INIT(sb_free_security, selinux_sb_free_security), LSM_HOOK_INIT(sb_copy_data, selinux_sb_copy_data), - LSM_HOOK_INIT(sb_remount, selinux_sb_remount), - LSM_HOOK_INIT(sb_kern_mount, selinux_sb_kern_mount), LSM_HOOK_INIT(sb_show_options, selinux_sb_show_options), LSM_HOOK_INIT(sb_statfs, selinux_sb_statfs), LSM_HOOK_INIT(sb_mount, selinux_mount), @@ -7191,6 +7371,8 @@ static __init int selinux_init(void) else pr_debug("SELinux: Starting in permissive mode\n"); + fs_validate_description(&selinux_fs_parameters); + return 0; } diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 23e762d529fa..7c100283b66f 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -50,20 +50,20 @@ /* Super block security struct flags for mount options */ /* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */ #define CONTEXT_MNT 0x01 -#define FSCONTEXT_MNT 0x02 -#define ROOTCONTEXT_MNT 0x04 -#define DEFCONTEXT_MNT 0x08 +#define DEFCONTEXT_MNT 0x02 +#define FSCONTEXT_MNT 0x04 +#define ROOTCONTEXT_MNT 0x08 #define SBLABEL_MNT 0x10 /* Non-mount related flags */ #define SE_SBINITIALIZED 0x0100 #define SE_SBPROC 0x0200 #define SE_SBGENFS 0x0400 -#define CONTEXT_STR "context=" -#define FSCONTEXT_STR "fscontext=" -#define ROOTCONTEXT_STR "rootcontext=" -#define DEFCONTEXT_STR "defcontext=" -#define LABELSUPP_STR "seclabel" +#define CONTEXT_STR "context" +#define FSCONTEXT_STR "fscontext" +#define ROOTCONTEXT_STR "rootcontext" +#define DEFCONTEXT_STR "defcontext" +#define SECLABEL_STR "seclabel" struct netlbl_lsm_secattr; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index f3a5a138a096..c3914eeb695c 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1893,7 +1893,8 @@ static struct dentry *sel_make_dir(struct dentry *dir, const char *name, #define NULL_FILE_NAME "null" -static int sel_fill_super(struct super_block *sb, void *data, int silent) +static int sel_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { struct selinux_fs_info *fsi; int ret; @@ -2010,9 +2011,10 @@ err: } static struct dentry *sel_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { - return mount_single(fs_type, flags, data, sel_fill_super); + return mount_single(fs_type, flags, data, data_size, sel_fill_super); } static void sel_kill_sb(struct super_block *sb) diff --git a/security/smack/smack.h b/security/smack/smack.h index f7db791fb566..891a307a2029 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -195,21 +195,22 @@ struct smack_known_list_elem { enum { Opt_error = -1, - Opt_fsdefault = 1, - Opt_fsfloor = 2, - Opt_fshat = 3, - Opt_fsroot = 4, - Opt_fstransmute = 5, + Opt_fsdefault = 0, + Opt_fsfloor = 1, + Opt_fshat = 2, + Opt_fsroot = 3, + Opt_fstransmute = 4, + nr__smack_params }; /* * Mount options */ -#define SMK_FSDEFAULT "smackfsdef=" -#define SMK_FSFLOOR "smackfsfloor=" -#define SMK_FSHAT "smackfshat=" -#define SMK_FSROOT "smackfsroot=" -#define SMK_FSTRANS "smackfstransmute=" +#define SMK_FSDEFAULT "smackfsdef" +#define SMK_FSFLOOR "smackfsfloor" +#define SMK_FSHAT "smackfshat" +#define SMK_FSROOT "smackfsroot" +#define SMK_FSTRANS "smackfstransmute" #define SMACK_DELETE_OPTION "-DELETE" #define SMACK_CIPSO_OPTION "-CIPSO" diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 81fb4c1631e9..8a1f5e7f60f3 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -43,6 +43,8 @@ #include <linux/shm.h> #include <linux/binfmts.h> #include <linux/parser.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include "smack.h" #define TRANS_TRUE "TRUE" @@ -60,11 +62,11 @@ static struct kmem_cache *smack_inode_cache; int smack_enabled; static const match_table_t smk_mount_tokens = { - {Opt_fsdefault, SMK_FSDEFAULT "%s"}, - {Opt_fsfloor, SMK_FSFLOOR "%s"}, - {Opt_fshat, SMK_FSHAT "%s"}, - {Opt_fsroot, SMK_FSROOT "%s"}, - {Opt_fstransmute, SMK_FSTRANS "%s"}, + {Opt_fsdefault, SMK_FSDEFAULT "=%s"}, + {Opt_fsfloor, SMK_FSFLOOR "=%s"}, + {Opt_fshat, SMK_FSHAT "=%s"}, + {Opt_fsroot, SMK_FSROOT "=%s"}, + {Opt_fstransmute, SMK_FSTRANS "=%s"}, {Opt_error, NULL}, }; @@ -524,6 +526,319 @@ static int smack_syslog(int typefrom_file) return rc; } +/* + * Mount context operations + */ + +struct smack_fs_context { + union { + struct { + char *fsdefault; + char *fsfloor; + char *fshat; + char *fsroot; + char *fstransmute; + }; + char *ptrs[5]; + + }; + struct superblock_smack *sbsp; + struct inode_smack *isp; + bool transmute; +}; + +/** + * smack_fs_context_free - Free the security data from a filesystem context + * @fc: The filesystem context to be cleaned up. + */ +static void smack_fs_context_free(struct fs_context *fc) +{ + struct smack_fs_context *ctx = fc->security; + int i; + + if (ctx) { + for (i = 0; i < ARRAY_SIZE(ctx->ptrs); i++) + kfree(ctx->ptrs[i]); + kfree(ctx->isp); + kfree(ctx->sbsp); + kfree(ctx); + fc->security = NULL; + } +} + +/** + * smack_fs_context_alloc - Allocate security data for a filesystem context + * @fc: The filesystem context. + * @reference: Reference dentry (automount/reconfigure) or NULL + * + * Returns 0 on success or -ENOMEM on error. + */ +static int smack_fs_context_alloc(struct fs_context *fc, + struct dentry *reference) +{ + struct smack_fs_context *ctx; + struct superblock_smack *sbsp; + struct inode_smack *isp; + struct smack_known *skp; + + ctx = kzalloc(sizeof(struct smack_fs_context), GFP_KERNEL); + if (!ctx) + goto nomem; + fc->security = ctx; + + sbsp = kzalloc(sizeof(struct superblock_smack), GFP_KERNEL); + if (!sbsp) + goto nomem_free; + ctx->sbsp = sbsp; + + isp = new_inode_smack(NULL); + if (!isp) + goto nomem_free; + ctx->isp = isp; + + if (reference) { + if (reference->d_sb->s_security) + memcpy(sbsp, reference->d_sb->s_security, sizeof(*sbsp)); + } else if (!smack_privileged(CAP_MAC_ADMIN)) { + /* Unprivileged mounts get root and default from the caller. */ + skp = smk_of_current(); + sbsp->smk_root = skp; + sbsp->smk_default = skp; + } else { + sbsp->smk_root = &smack_known_floor; + sbsp->smk_default = &smack_known_floor; + sbsp->smk_floor = &smack_known_floor; + sbsp->smk_hat = &smack_known_hat; + /* SMK_SB_INITIALIZED will be zero from kzalloc. */ + } + + return 0; + +nomem_free: + smack_fs_context_free(fc); +nomem: + return -ENOMEM; +} + +/** + * smack_fs_context_dup - Duplicate the security data on fs_context duplication + * @fc: The new filesystem context. + * @src_fc: The source filesystem context being duplicated. + * + * Returns 0 on success or -ENOMEM on error. + */ +static int smack_fs_context_dup(struct fs_context *fc, + struct fs_context *src_fc) +{ + struct smack_fs_context *dst, *src = src_fc->security; + int i; + + dst = kzalloc(sizeof(struct smack_fs_context), GFP_KERNEL); + if (!dst) + goto nomem; + fc->security = dst; + + dst->sbsp = kmemdup(src->sbsp, sizeof(struct superblock_smack), + GFP_KERNEL); + if (!dst->sbsp) + goto nomem_free; + + for (i = 0; i < ARRAY_SIZE(dst->ptrs); i++) { + if (src->ptrs[i]) { + dst->ptrs[i] = kstrdup(src->ptrs[i], GFP_KERNEL); + if (!dst->ptrs[i]) + goto nomem_free; + } + } + + return 0; + +nomem_free: + smack_fs_context_free(fc); +nomem: + return -ENOMEM; +} + +static const struct fs_parameter_spec smack_param_specs[nr__smack_params] = { + [Opt_fsdefault] = { fs_param_is_string }, + [Opt_fsfloor] = { fs_param_is_string }, + [Opt_fshat] = { fs_param_is_string }, + [Opt_fsroot] = { fs_param_is_string }, + [Opt_fstransmute] = { fs_param_is_string }, +}; + +static const char *const smack_param_keys[nr__smack_params] = { + [Opt_fsdefault] = SMK_FSDEFAULT, + [Opt_fsfloor] = SMK_FSFLOOR, + [Opt_fshat] = SMK_FSHAT, + [Opt_fsroot] = SMK_FSROOT, + [Opt_fstransmute] = SMK_FSTRANS, +}; + +static const struct fs_parameter_description smack_fs_parameters = { + .name = "smack", + .nr_params = nr__smack_params, + .keys = smack_param_keys, + .specs = smack_param_specs, + .no_source = true, +}; + +/** + * smack_fs_context_parse_param - Parse a single mount parameter + * @fc: The new filesystem context being constructed. + * @param: The parameter. + * + * Returns 0 on success or -ENOMEM on error. + */ +static int smack_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + struct smack_fs_context *ctx = fc->security; + struct fs_parse_result result; + int opt; + + /* Unprivileged mounts don't get to specify Smack values. */ + if (!smack_privileged(CAP_MAC_ADMIN)) + return -EPERM; + + opt = fs_parse(fc, &smack_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_fsdefault: + if (ctx->fsdefault) + goto error_dup; + ctx->fsdefault = param->string; + break; + case Opt_fsfloor: + if (ctx->fsfloor) + goto error_dup; + ctx->fsfloor = param->string; + break; + case Opt_fshat: + if (ctx->fshat) + goto error_dup; + ctx->fshat = param->string; + break; + case Opt_fsroot: + if (ctx->fsroot) + goto error_dup; + ctx->fsroot = param->string; + break; + case Opt_fstransmute: + if (ctx->fstransmute) + goto error_dup; + ctx->fstransmute = param->string; + break; + default: + return invalf(fc, "Smack: unknown mount option\n"); + } + + param->string = NULL; + return 0; + +error_dup: + return invalf(fc, "Smack: duplicate mount option\n"); +} + +/** + * smack_fs_context_validate - Validate the filesystem context security data + * @fc: The filesystem context. + * + * Returns 0 on success or -ENOMEM on error. + */ +static int smack_fs_context_validate(struct fs_context *fc) +{ + struct smack_fs_context *ctx = fc->security; + struct superblock_smack *sbsp = ctx->sbsp; + struct inode_smack *isp = ctx->isp; + struct smack_known *skp; + + if (ctx->fsdefault) { + skp = smk_import_entry(ctx->fsdefault, 0); + if (IS_ERR(skp)) + return PTR_ERR(skp); + sbsp->smk_default = skp; + } + + if (ctx->fsfloor) { + skp = smk_import_entry(ctx->fsfloor, 0); + if (IS_ERR(skp)) + return PTR_ERR(skp); + sbsp->smk_floor = skp; + } + + if (ctx->fshat) { + skp = smk_import_entry(ctx->fshat, 0); + if (IS_ERR(skp)) + return PTR_ERR(skp); + sbsp->smk_hat = skp; + } + + if (ctx->fsroot || ctx->fstransmute) { + skp = smk_import_entry(ctx->fstransmute ?: ctx->fsroot, 0); + if (IS_ERR(skp)) + return PTR_ERR(skp); + sbsp->smk_root = skp; + ctx->transmute = !!ctx->fstransmute; + } + + isp->smk_inode = sbsp->smk_root; + return 0; +} + +/** + * smack_sb_get_tree - Assign the context to a newly created superblock + * @fc: The new filesystem context. + * + * Returns 0 on success or -ENOMEM on error. + */ +static int smack_sb_get_tree(struct fs_context *fc) +{ + struct smack_fs_context *ctx = fc->security; + struct superblock_smack *sbsp = ctx->sbsp; + struct dentry *root = fc->root; + struct inode *inode = d_backing_inode(root); + struct super_block *sb = root->d_sb; + struct inode_smack *isp; + bool transmute = ctx->transmute; + + if (sb->s_security) + return 0; + + if (!smack_privileged(CAP_MAC_ADMIN)) { + /* + * For a handful of fs types with no user-controlled + * backing store it's okay to trust security labels + * in the filesystem. The rest are untrusted. + */ + if (fc->user_ns != &init_user_ns && + sb->s_magic != SYSFS_MAGIC && sb->s_magic != TMPFS_MAGIC && + sb->s_magic != RAMFS_MAGIC) { + transmute = true; + sbsp->smk_flags |= SMK_SB_UNTRUSTED; + } + } + + sbsp->smk_flags |= SMK_SB_INITIALIZED; + sb->s_security = sbsp; + ctx->sbsp = NULL; + + /* Initialize the root inode. */ + isp = inode->i_security; + if (isp == NULL) { + isp = ctx->isp; + ctx->isp = NULL; + inode->i_security = isp; + } else + isp->smk_inode = sbsp->smk_root; + + if (transmute) + isp->smk_flags |= SMK_INODE_TRANSMUTE; + + return 0; +} /* * Superblock Hooks. @@ -570,6 +885,7 @@ static void smack_sb_free_security(struct super_block *sb) /** * smack_sb_copy_data - copy mount options data for processing * @orig: where to start + * @orig_size: Size of orig buffer * @smackopts: mount options string * * Returns 0 on success or -ENOMEM on error. @@ -577,7 +893,7 @@ static void smack_sb_free_security(struct super_block *sb) * Copy the Smack specific mount options out of the mount * options list. */ -static int smack_sb_copy_data(char *orig, char *smackopts) +static int smack_sb_copy_data(char *orig, size_t orig_size, char *smackopts) { char *cp, *commap, *otheropts, *dp; @@ -851,37 +1167,6 @@ static int smack_set_mnt_opts(struct super_block *sb, } /** - * smack_sb_kern_mount - Smack specific mount processing - * @sb: the file system superblock - * @flags: the mount flags - * @data: the smack mount options - * - * Returns 0 on success, an error code on failure - */ -static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data) -{ - int rc = 0; - char *options = data; - struct security_mnt_opts opts; - - security_init_mnt_opts(&opts); - - if (!options) - goto out; - - rc = smack_parse_opts_str(options, &opts); - if (rc) - goto out_err; - -out: - rc = smack_set_mnt_opts(sb, &opts, 0, NULL); - -out_err: - security_free_mnt_opts(&opts); - return rc; -} - -/** * smack_sb_statfs - Smack check on statfs * @dentry: identifies the file system in question * @@ -4665,10 +4950,16 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme), LSM_HOOK_INIT(syslog, smack_syslog), + LSM_HOOK_INIT(fs_context_alloc, smack_fs_context_alloc), + LSM_HOOK_INIT(fs_context_dup, smack_fs_context_dup), + LSM_HOOK_INIT(fs_context_free, smack_fs_context_free), + LSM_HOOK_INIT(fs_context_parse_param, smack_fs_context_parse_param), + LSM_HOOK_INIT(fs_context_validate, smack_fs_context_validate), + LSM_HOOK_INIT(sb_get_tree, smack_sb_get_tree), + LSM_HOOK_INIT(sb_alloc_security, smack_sb_alloc_security), LSM_HOOK_INIT(sb_free_security, smack_sb_free_security), LSM_HOOK_INIT(sb_copy_data, smack_sb_copy_data), - LSM_HOOK_INIT(sb_kern_mount, smack_sb_kern_mount), LSM_HOOK_INIT(sb_statfs, smack_sb_statfs), LSM_HOOK_INIT(sb_set_mnt_opts, smack_set_mnt_opts), LSM_HOOK_INIT(sb_parse_opts_str, smack_parse_opts_str), diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 06b517075ec0..8972da495dfe 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2844,13 +2844,15 @@ static const struct file_operations smk_ptrace_ops = { * smk_fill_super - fill the smackfs superblock * @sb: the empty superblock * @data: unused + * @data_size: size of data buffer * @silent: unused * * Fill in the well known entries for the smack filesystem * * Returns 0 on success, an error code on failure */ -static int smk_fill_super(struct super_block *sb, void *data, int silent) +static int smk_fill_super(struct super_block *sb, void *data, size_t data_size, + int silent) { int rc; @@ -2931,9 +2933,10 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent) * Returns what the lower level code does. */ static struct dentry *smk_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, + void *data, size_t data_size) { - return mount_single(fs_type, flags, data, smk_fill_super); + return mount_single(fs_type, flags, data, data_size, smk_fill_super); } static struct file_system_type smk_fs_type = { diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 539bcdd30bb8..e637ce73f7f9 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -971,6 +971,9 @@ int tomoyo_init_request_info(struct tomoyo_request_info *r, const u8 index); int tomoyo_mkdev_perm(const u8 operation, const struct path *path, const unsigned int mode, unsigned int dev); +int tomoyo_mount_permission_fc(struct fs_context *fc, + const struct path *mountpoint, + unsigned int mnt_flags); int tomoyo_mount_permission(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data_page); diff --git a/security/tomoyo/mount.c b/security/tomoyo/mount.c index 807fd91dbb54..9ec84ab6f5e1 100644 --- a/security/tomoyo/mount.c +++ b/security/tomoyo/mount.c @@ -6,6 +6,8 @@ */ #include <linux/slab.h> +#include <linux/fs_context.h> +#include <uapi/linux/mount.h> #include "common.h" /* String table for special mount operations. */ @@ -235,3 +237,47 @@ int tomoyo_mount_permission(const char *dev_name, const struct path *path, tomoyo_read_unlock(idx); return error; } + +/** + * tomoyo_mount_permission_fc - Check permission to create a new mount. + * @fc: Context describing the object to be mounted. + * @mountpoint: The target object to mount on. + * @mnt: The MNT_* flags to be set on the mountpoint. + * + * Check the permission to create a mount of the object described in @fc. Note + * that the source object may be a newly created superblock or may be an + * existing one picked from the filesystem (bind mount). + * + * Returns 0 on success, negative value otherwise. + */ +int tomoyo_mount_permission_fc(struct fs_context *fc, + const struct path *mountpoint, + unsigned int mnt_flags) +{ + struct tomoyo_request_info r; + unsigned int ms_flags = 0; + int error; + int idx; + + if (tomoyo_init_request_info(&r, NULL, TOMOYO_MAC_FILE_MOUNT) == + TOMOYO_CONFIG_DISABLED) + return 0; + + /* Convert MNT_* flags to MS_* equivalents. */ + if (mnt_flags & MNT_NOSUID) ms_flags |= MS_NOSUID; + if (mnt_flags & MNT_NODEV) ms_flags |= MS_NODEV; + if (mnt_flags & MNT_NOEXEC) ms_flags |= MS_NOEXEC; + if (mnt_flags & MNT_NOATIME) ms_flags |= MS_NOATIME; + if (mnt_flags & MNT_NODIRATIME) ms_flags |= MS_NODIRATIME; + if (mnt_flags & MNT_RELATIME) ms_flags |= MS_RELATIME; + if (mnt_flags & MNT_READONLY) ms_flags |= MS_RDONLY; + + idx = tomoyo_read_lock(); + /* TODO: There may be multiple sources; for the moment, just pick the + * first if there is one. + */ + error = tomoyo_mount_acl(&r, fc->source, mountpoint, fc->fs_type->name, + ms_flags); + tomoyo_read_unlock(idx); + return error; +} diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 1b5b5097efd7..dcb7980ef255 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -392,6 +392,20 @@ static int tomoyo_path_chroot(const struct path *path) } /** + * tomoyo_sb_mount - Target for security_sb_mountpoint(). + * @fc: Context describing the object to be mounted. + * @mountpoint: The target object to mount on. + * @mnt_flags: Mountpoint specific options (as MNT_* flags). + * + * Returns 0 on success, negative value otherwise. + */ +static int tomoyo_sb_mountpoint(struct fs_context *fc, struct path *mountpoint, + unsigned int mnt_flags) +{ + return tomoyo_mount_permission_fc(fc, mountpoint, mnt_flags); +} + +/** * tomoyo_sb_mount - Target for security_sb_mount(). * * @dev_name: Name of device file. Maybe NULL. @@ -399,11 +413,13 @@ static int tomoyo_path_chroot(const struct path *path) * @type: Name of filesystem type. Maybe NULL. * @flags: Mount options. * @data: Optional data. Maybe NULL. + * @data_size: Size of data. * * Returns 0 on success, negative value otherwise. */ static int tomoyo_sb_mount(const char *dev_name, const struct path *path, - const char *type, unsigned long flags, void *data) + const char *type, unsigned long flags, + void *data, size_t data_size) { return tomoyo_mount_permission(dev_name, path, type, flags, data); } @@ -519,6 +535,7 @@ static struct security_hook_list tomoyo_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod), LSM_HOOK_INIT(path_chown, tomoyo_path_chown), LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot), + LSM_HOOK_INIT(sb_mountpoint, tomoyo_sb_mountpoint), LSM_HOOK_INIT(sb_mount, tomoyo_sb_mount), LSM_HOOK_INIT(sb_umount, tomoyo_sb_umount), LSM_HOOK_INIT(sb_pivotroot, tomoyo_sb_pivotroot), |