diff options
author | Miklos Szeredi <miklos@szeredi.hu> | 2005-06-02 13:03:09 +0000 |
---|---|---|
committer | Miklos Szeredi <miklos@szeredi.hu> | 2005-06-02 13:03:09 +0000 |
commit | 106aa7664c13aa3c76de4b57a67e3bd603055910 (patch) | |
tree | 6476a2413b6fb75201a276d23248650aef160f0e | |
parent | 33d1cfc63c8fd0bbfd85a7b90e8b1fc7b557bc9f (diff) | |
download | fuse-106aa7664c13aa3c76de4b57a67e3bd603055910.tar.gz |
revert non-bugfix things
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | doc/kernel.txt | 130 | ||||
-rw-r--r-- | kernel/dev.c | 160 | ||||
-rw-r--r-- | kernel/dir.c | 2 | ||||
-rw-r--r-- | kernel/file.c | 6 | ||||
-rw-r--r-- | kernel/fuse_i.h | 3 | ||||
-rw-r--r-- | kernel/fuse_kernel.h | 18 | ||||
-rw-r--r-- | lib/fuse.c | 1 |
9 files changed, 39 insertions, 302 deletions
@@ -1,21 +1,3 @@ -2005-03-31 Miklos Szeredi <miklos@szeredi.hu> - - * kernel API: add padding to structures, so 64bit and 32bit - compiler will return the same size - - * kernel API: add offset field to fuse_dirent. This will allow - more sophisticated readdir interface for userspace - - * kernel API: change major number to 6 - - * kernel: fix warnings on 64bit archs - - * kernel: in case of API version mismatch, return ECONNREFUSED - -2005-03-24 Miklos Szeredi <miklos@szeredi.hu> - - * kernel: trivial cleanups - 2005-03-19 Miklos Szeredi <miklos@szeredi.hu> * kernel: add locking to background list (fixes previous fix) diff --git a/Makefile.am b/Makefile.am index caeaed7..28f5b32 100644 --- a/Makefile.am +++ b/Makefile.am @@ -7,8 +7,7 @@ EXTRA_DIST = \ README* \ Filesystems \ FAQ \ - doc/how-fuse-works \ - doc/kernel.txt + doc/how-fuse-works pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = fuse.pc diff --git a/doc/kernel.txt b/doc/kernel.txt deleted file mode 100644 index 6fcfada..0000000 --- a/doc/kernel.txt +++ /dev/null @@ -1,130 +0,0 @@ -The following diagram shows how a filesystem operation (in this -example unlink) is performed in FUSE. - -NOTE: everything in this description is greatly simplified - - | "rm /mnt/fuse/file" | FUSE filesystem daemon - | | - | | >sys_read() - | | >fuse_dev_read() - | | >request_wait() - | | [sleep on fc->waitq] - | | - | >sys_unlink() | - | >fuse_unlink() | - | [get request from | - | fc->unused_list] | - | >request_send() | - | [queue req on fc->pending] | - | [wake up fc->waitq] | [woken up] - | >request_wait_answer() | - | [sleep on req->waitq] | - | | <request_wait() - | | [remove req from fc->pending] - | | [copy req to read buffer] - | | [add req to fc->processing] - | | <fuse_dev_read() - | | <sys_read() - | | - | | [perform unlink] - | | - | | >sys_write() - | | >fuse_dev_write() - | | [look up req in fc->processing] - | | [remove from fc->processing] - | | [copy write buffer to req] - | [woken up] | [wake up req->waitq] - | | <fuse_dev_write() - | | <sys_write() - | <request_wait_answer() | - | <request_send() | - | [add request to | - | fc->unused_list] | - | <fuse_unlink() | - | <sys_unlink() | - -There are a couple of ways in which to deadlock a FUSE filesystem. -Since we are talking about unprivileged userspace programs, -something must be done about these. - -Scenario 1 - Simple deadlock ------------------------------ - - | "rm /mnt/fuse/file" | FUSE filesystem daemon - | | - | >sys_unlink("/mnt/fuse/file") | - | [acquire inode semaphore | - | for "file"] | - | >fuse_unlink() | - | [sleep on req->waitq] | - | | <sys_read() - | | >sys_unlink("/mnt/fuse/file") - | | [acquire inode semaphore - | | for "file"] - | | *DEADLOCK* - -The solution for this is to allow requests to be interrupted while -they are in userspace: - - | [interrupted by signal] | - | <fuse_unlink() | - | [release semaphore] | [semaphore acquired] - | <sys_unlink() | - | | >fuse_unlink() - | | [queue req on fc->pending] - | | [wake up fc->waitq] - | | [sleep on req->waitq] - -If the filesystem daemon was single threaded, this will stop here, -since there's no other thread to dequeue and execute the request. -In this case the solution is to kill the FUSE daemon as well. If -there are multiple serving threads, you just have to kill them as -long as any remain. - -Moral: a filesystem which deadlocks, can soon find itself dead. - -Scenario 2 - Tricky deadlock ----------------------------- - -This one needs a carefully crafted filesystem. It's a variation on -the above, only the call back to the filesystem is not explicit, -but is caused by a pagefault. - - | Kamikaze filesystem thread 1 | Kamikaze filesystem thread 2 - | | - | [fd = open("/mnt/fuse/file")] | [request served normally] - | [mmap fd to 'addr'] | - | [close fd] | [FLUSH triggers 'magic' flag] - | [read a byte from addr] | - | >do_page_fault() | - | [find or create page] | - | [lock page] | - | >fuse_readpage() | - | [queue READ request] | - | [sleep on req->waitq] | - | | [read request to buffer] - | | [create reply header before addr] - | | >sys_write(addr - headerlength) - | | >fuse_dev_write() - | | [look up req in fc->processing] - | | [remove from fc->processing] - | | [copy write buffer to req] - | | >do_page_fault() - | | [find or create page] - | | [lock page] - | | * DEADLOCK * - -Solution is again to let the the request be interrupted (not -elaborated further). - -An additional problem is that while the write buffer is being -copied to the request, the request must not be interrupted. This -is because the destination address of the copy may not be valid -after the request is interrupted. - -This is solved with doing the copy atomically, and allowing -interruption while the page(s) belonging to the write buffer are -faulted with get_user_pages(). The 'req->locked' flag indicates -when the copy is taking place, and interruption is delayed until -this flag is unset. - diff --git a/kernel/dev.c b/kernel/dev.c index 29a9b63..56a51cd 100644 --- a/kernel/dev.c +++ b/kernel/dev.c @@ -146,11 +146,6 @@ struct fuse_req *fuse_get_request(struct fuse_conn *fc) return do_get_request(fc); } -/* - * Non-interruptible version of the above function is for operations - * which can't legally return -ERESTART{SYS,NOINTR}. This can still - * return NULL, but only in case the signal is SIGKILL. - */ struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc) { int intr; @@ -170,7 +165,6 @@ static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) else fuse_request_free(req); - /* If we are in debt decrease that first */ if (fc->outstanding_debt) fc->outstanding_debt--; else @@ -195,17 +189,7 @@ void fuse_release_background(struct fuse_req *req) spin_unlock(&fuse_lock); } -/* - * This function is called when a request is finished. Either a reply - * has arrived or it was interrupted (and not yet sent) or some error - * occured during communication with userspace, or the device file was - * closed. It decreases the referece count for the request. In case - * of a background request the referece to the stored objects are - * released. The requester thread is woken up (if still waiting), and - * finally the request is either freed or put on the unused_list - * - * Called with fuse_lock, unlocks it - */ +/* Called with fuse_lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) { int putback; @@ -221,10 +205,6 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); if (req->in.h.opcode == FUSE_INIT) { int i; - - if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION) - fc->conn_error = 1; - /* After INIT reply is received other requests can go out. So do (FUSE_MAX_OUTSTANDING - 1) number of up()s on outstanding_sem. The last up() is done in @@ -236,37 +216,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) fuse_putback_request(fc, req); } -/* - * Unfortunately request interruption not just solves the deadlock - * problem, it causes problems too. These stem from the fact, that an - * interrupted request is continued to be processed in userspace, - * while all the locks and object references (inode and file) held - * during the operation are released. - * - * To release the locks is exactly why there's a need to interrupt the - * request, so there's not a lot that can be done about this, except - * introduce additional locking in userspace. - * - * More important is to keep inode and file references until userspace - * has replied, otherwise FORGET and RELEASE could be sent while the - * inode/file is still used by the filesystem. - * - * For this reason the concept of "background" request is introduced. - * An interrupted request is backgrounded if it has been already sent - * to userspace. Backgrounding involves getting an extra reference to - * inode(s) or file used in the request, and adding the request to - * fc->background list. When a reply is received for a background - * request, the object references are released, and the request is - * removed from the list. If the filesystem is unmounted while there - * are still background requests, the list is walked and references - * are released as if a reply was received. - * - * There's one more use for a background request. The RELEASE message is - * always sent as background, since it doesn't return an error or - * data. - */ static void background_request(struct fuse_conn *fc, struct fuse_req *req) { + /* Need to get hold of the inode(s) and/or file used in the + request, so FORGET and RELEASE are not sent too early */ req->background = 1; list_add(&req->bg_entry, &fc->background); if (req->inode) @@ -318,7 +271,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req, if (req->locked) { /* This is uninterruptible sleep, because data is being copied to/from the buffers of req. During - locked state, there mustn't be any filesystem + locked state, there musn't be any filesystem operation (e.g. page fault), since that could lead to deadlock */ spin_unlock(&fuse_lock); @@ -353,12 +306,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) req->in.h.len = sizeof(struct fuse_in_header) + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); if (!req->preallocated) { - /* If request is not preallocated (either FORGET or - RELEASE), then still decrease outstanding_sem, so - user can't open infinite number of files while not - processing the RELEASE requests. However for - efficiency do it without blocking, so if down() - would block, just increase the debt instead */ + /* decrease outstanding_sem, but without blocking... */ if (down_trylock(&fc->outstanding_sem)) fc->outstanding_debt++; } @@ -371,11 +319,8 @@ static void request_send_wait(struct fuse_conn *fc, struct fuse_req *req, { req->isreply = 1; spin_lock(&fuse_lock); - if (!fc->file) - req->out.h.error = -ENOTCONN; - else if (fc->conn_error) - req->out.h.error = -ECONNREFUSED; - else { + req->out.h.error = -ENOTCONN; + if (fc->file) { queue_request(fc, req); /* acquire extra reference, since request is still needed after request_end() */ @@ -391,11 +336,6 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req) request_send_wait(fc, req, 1); } -/* - * Non-interruptible version of the above function is for operations - * which can't legally return -ERESTART{SYS,NOINTR}. This can still - * be interrupted but only with SIGKILL. - */ void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req) { request_send_wait(fc, req, 0); @@ -446,11 +386,6 @@ void fuse_send_init(struct fuse_conn *fc) request_send_background(fc, req); } -/* - * Lock the request. Up to the next unlock_request() there mustn't be - * anything that could cause a page-fault. If the request was already - * interrupted bail out. - */ static inline int lock_request(struct fuse_req *req) { int err = 0; @@ -465,11 +400,6 @@ static inline int lock_request(struct fuse_req *req) return err; } -/* - * Unlock request. If it was interrupted during being locked, the - * requester thread is currently waiting for it to be unlocked, so - * wake it up. - */ static inline void unlock_request(struct fuse_req *req) { if (req) { @@ -481,6 +411,15 @@ static inline void unlock_request(struct fuse_req *req) } } +/* Why all this complex one-page-at-a-time copying needed instead of + just copy_to/from_user()? The reason is that blocking on a page + fault must be avoided while the request is locked. This is because + if servicing that pagefault happens to be done by this filesystem, + an unbreakable deadlock can occur. So the code is careful to allow + request interruption during get_user_pages(), and only lock the + request while doing kmapped copying, which cannot block. + */ + struct fuse_copy_state { int write; struct fuse_req *req; @@ -494,18 +433,26 @@ struct fuse_copy_state { unsigned len; }; -static void fuse_copy_init(struct fuse_copy_state *cs, int write, - struct fuse_req *req, const struct iovec *iov, - unsigned long nr_segs) +static unsigned fuse_copy_init(struct fuse_copy_state *cs, int write, + struct fuse_req *req, const struct iovec *iov, + unsigned long nr_segs) { + unsigned i; + unsigned nbytes; + memset(cs, 0, sizeof(*cs)); cs->write = write; cs->req = req; cs->iov = iov; cs->nr_segs = nr_segs; + + nbytes = 0; + for (i = 0; i < nr_segs; i++) + nbytes += iov[i].iov_len; + + return nbytes; } -/* Unmap and put previous page of userspace buffer */ static inline void fuse_copy_finish(struct fuse_copy_state *cs) { if (cs->mapaddr) { @@ -519,10 +466,6 @@ static inline void fuse_copy_finish(struct fuse_copy_state *cs) } } -/* - * Get another pagefull of userspace buffer, and map it to kernel - * address space, and lock request - */ static int fuse_copy_fill(struct fuse_copy_state *cs) { unsigned long offset; @@ -554,7 +497,6 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) return lock_request(cs->req); } -/* Do as much copy to/from userspace buffer as we can */ static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) { @@ -572,10 +514,6 @@ static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val, return ncpy; } -/* - * Copy a page in the request to/from the userspace buffer. Must be - * done atomically - */ static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, unsigned offset, unsigned count, int zeroing) { @@ -601,7 +539,6 @@ static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, return 0; } -/* Copy pages in the request to/from userspace buffer */ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, int zeroing) { @@ -623,7 +560,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, return 0; } -/* Copy a single argument in the request to/from userspace buffer */ static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) { while (size) { @@ -635,7 +571,6 @@ static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) return 0; } -/* Copy request arguments to/from userspace buffer */ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, unsigned argpages, struct fuse_arg *args, int zeroing) @@ -653,7 +588,6 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, return err; } -/* Wait until a request is available on the pending list */ static void request_wait(struct fuse_conn *fc) { DECLARE_WAITQUEUE(wait, current); @@ -672,26 +606,6 @@ static void request_wait(struct fuse_conn *fc) remove_wait_queue(&fc->waitq, &wait); } -#ifndef KERNEL_2_6 -static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) -{ - unsigned long seg; - size_t ret = 0; - - for (seg = 0; seg < nr_segs; seg++) - ret += iov[seg].iov_len; - return ret; -} -#endif -/* - * Read a single request into the userspace filesystem's buffer. This - * function waits until a request is available, then removes it from - * the pending list and copies request data to userspace buffer. If - * no reply is needed (FORGET) or request has been interrupted or - * there was an error during the copying then it's finished by calling - * request_end(). Otherwise add it to the processing list, and set - * the 'sent' flag. - */ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *off) { @@ -700,6 +614,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, struct fuse_req *req; struct fuse_in *in; struct fuse_copy_state cs; + unsigned nbytes; unsigned reqsize; spin_lock(&fuse_lock); @@ -721,9 +636,9 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, in = &req->in; reqsize = req->in.h.len; - fuse_copy_init(&cs, 1, req, iov, nr_segs); + nbytes = fuse_copy_init(&cs, 1, req, iov, nr_segs); err = -EINVAL; - if (iov_length(iov, nr_segs) >= reqsize) { + if (nbytes >= reqsize) { err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); if (!err) err = fuse_copy_args(&cs, in->numargs, in->argpages, @@ -764,7 +679,6 @@ static ssize_t fuse_dev_read(struct file *file, char __user *buf, return fuse_dev_readv(file, &iov, 1, off); } -/* Look up request on processing list by unique ID */ static struct fuse_req *request_find(struct fuse_conn *fc, unsigned unique) { struct list_head *entry; @@ -801,18 +715,11 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, out->page_zeroing); } -/* - * Write a single reply to a request. First the header is copied from - * the write buffer. The request is then searched on the processing - * list by the unique ID found in the header. If found, then remove - * it from the list and copy the rest of the buffer to the request. - * The request is finished by calling request_end() - */ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *off) { int err; - unsigned nbytes = iov_length(iov, nr_segs); + unsigned nbytes; struct fuse_req *req; struct fuse_out_header oh; struct fuse_copy_state cs; @@ -820,7 +727,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, if (!fc) return -ENODEV; - fuse_copy_init(&cs, 0, NULL, iov, nr_segs); + nbytes = fuse_copy_init(&cs, 0, NULL, iov, nr_segs); if (nbytes < sizeof(struct fuse_out_header)) return -EINVAL; @@ -897,7 +804,6 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) return mask; } -/* Abort all requests on the given list (pending or processing) */ static void end_requests(struct fuse_conn *fc, struct list_head *head) { while (!list_empty(head)) { diff --git a/kernel/dir.c b/kernel/dir.c index 2e02023..be51309 100644 --- a/kernel/dir.c +++ b/kernel/dir.c @@ -512,7 +512,7 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, break; over = filldir(dstbuf, dirent->name, dirent->namelen, - dirent->off, dirent->ino, dirent->type); + file->f_pos, dirent->ino, dirent->type); if (over) break; diff --git a/kernel/file.c b/kernel/file.c index 88bf5f4..74921d6 100644 --- a/kernel/file.c +++ b/kernel/file.c @@ -519,7 +519,7 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, { struct inode *inode = file->f_dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - size_t nmax = write ? fc->max_write : fc->max_read; + unsigned nmax = write ? fc->max_write : fc->max_read; loff_t pos = *ppos; ssize_t res = 0; struct fuse_req *req = fuse_get_request(fc); @@ -527,8 +527,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, return -ERESTARTSYS; while (count) { - size_t tmp; - size_t nres; + unsigned tmp; + unsigned nres; size_t nbytes = min(count, nmax); int err = fuse_get_user_pages(req, buf, nbytes, !write); if (err) { diff --git a/kernel/fuse_i.h b/kernel/fuse_i.h index 759278d..d877071 100644 --- a/kernel/fuse_i.h +++ b/kernel/fuse_i.h @@ -325,9 +325,6 @@ struct fuse_conn { /** Is removexattr not implemented by fs? */ unsigned no_removexattr : 1; - /** Connection failed (version mismatch) */ - unsigned conn_error : 1; - #ifdef KERNEL_2_6 /** Backing dev info */ struct backing_dev_info bdi; diff --git a/kernel/fuse_kernel.h b/kernel/fuse_kernel.h index 6dcf9c6..870ce59 100644 --- a/kernel/fuse_kernel.h +++ b/kernel/fuse_kernel.h @@ -11,7 +11,7 @@ #include <asm/types.h> /** Version number of this interface */ -#define FUSE_KERNEL_VERSION 6 +#define FUSE_KERNEL_VERSION 5 /** Minor version number of this interface */ #define FUSE_KERNEL_MINOR_VERSION 1 @@ -25,9 +25,6 @@ /** The minor number of the fuse character device */ #define FUSE_MINOR 229 -/* Make sure all structures are padded to 64bit boundary, so 32bit - userspace works under 64bit kernels */ - struct fuse_attr { __u64 ino; __u64 size; @@ -129,7 +126,6 @@ struct fuse_mknod_in { struct fuse_mkdir_in { __u32 mode; - __u32 padding; }; struct fuse_rename_in { @@ -142,38 +138,32 @@ struct fuse_link_in { struct fuse_setattr_in { __u32 valid; - __u32 padding; struct fuse_attr attr; }; struct fuse_open_in { __u32 flags; - __u32 padding; }; struct fuse_open_out { __u64 fh; __u32 open_flags; - __u32 padding; }; struct fuse_release_in { __u64 fh; __u32 flags; - __u32 padding; }; struct fuse_flush_in { __u64 fh; __u32 flush_flags; - __u32 padding; }; struct fuse_read_in { __u64 fh; __u64 offset; __u32 size; - __u32 padding; }; struct fuse_write_in { @@ -185,7 +175,6 @@ struct fuse_write_in { struct fuse_write_out { __u32 size; - __u32 padding; }; struct fuse_statfs_out { @@ -195,7 +184,6 @@ struct fuse_statfs_out { struct fuse_fsync_in { __u64 fh; __u32 fsync_flags; - __u32 padding; }; struct fuse_setxattr_in { @@ -205,12 +193,10 @@ struct fuse_setxattr_in { struct fuse_getxattr_in { __u32 size; - __u32 padding; }; struct fuse_getxattr_out { __u32 size; - __u32 padding; }; struct fuse_init_in_out { @@ -226,7 +212,6 @@ struct fuse_in_header { __u32 uid; __u32 gid; __u32 pid; - __u32 padding; }; struct fuse_out_header { @@ -237,7 +222,6 @@ struct fuse_out_header { struct fuse_dirent { __u64 ino; - __u64 off; __u32 namelen; __u32 type; char name[0]; @@ -464,7 +464,6 @@ static int fill_dir(struct fuse_dirhandle *dh, const char *name, int type, dirent->namelen = namelen; strncpy(dirent->name, name, namelen); dirent->type = type; - dirent->off = ftell(dh->fp); reclen = FUSE_DIRENT_SIZE(dirent); res = fwrite(dirent, reclen, 1, dh->fp); free(dirent); |