2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
22 static kmem_cache_t *fuse_req_cachep;
24 static struct fuse_conn *fuse_get_conn(struct file *file)
27 * Lockless access is OK, because file->private data is set
28 * once during mount and is valid until the file is released.
30 return file->private_data;
33 static void fuse_request_init(struct fuse_req *req)
35 memset(req, 0, sizeof(*req));
36 INIT_LIST_HEAD(&req->list);
37 init_waitqueue_head(&req->waitq);
38 atomic_set(&req->count, 1);
41 struct fuse_req *fuse_request_alloc(void)
43 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
45 fuse_request_init(req);
49 void fuse_request_free(struct fuse_req *req)
51 kmem_cache_free(fuse_req_cachep, req);
54 static void block_sigs(sigset_t *oldset)
58 siginitsetinv(&mask, sigmask(SIGKILL));
59 sigprocmask(SIG_BLOCK, &mask, oldset);
62 static void restore_sigs(sigset_t *oldset)
64 sigprocmask(SIG_SETMASK, oldset, NULL);
68 * Reset request, so that it can be reused
70 * The caller must be _very_ careful to make sure, that it is holding
71 * the only reference to req
73 void fuse_reset_request(struct fuse_req *req)
75 BUG_ON(atomic_read(&req->count) != 1);
76 fuse_request_init(req);
79 static void __fuse_get_request(struct fuse_req *req)
81 atomic_inc(&req->count);
84 /* Must be called with > 1 refcount */
85 static void __fuse_put_request(struct fuse_req *req)
87 BUG_ON(atomic_read(&req->count) < 2);
88 atomic_dec(&req->count);
91 struct fuse_req *fuse_get_req(struct fuse_conn *fc)
98 err = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
99 restore_sigs(&oldset);
101 return ERR_PTR(-EINTR);
103 req = fuse_request_alloc();
105 return ERR_PTR(-ENOMEM);
107 atomic_inc(&fc->num_waiting);
108 fuse_request_init(req);
109 req->in.h.uid = current->fsuid;
110 req->in.h.gid = current->fsgid;
111 req->in.h.pid = current->pid;
115 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
117 if (atomic_dec_and_test(&req->count)) {
118 atomic_dec(&fc->num_waiting);
119 fuse_request_free(req);
123 void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req)
125 list_del_init(&req->bg_entry);
126 if (fc->num_background == FUSE_MAX_BACKGROUND) {
128 wake_up_all(&fc->blocked_waitq);
130 fc->num_background--;
134 * This function is called when a request is finished. Either a reply
135 * has arrived or it was interrupted (and not yet sent) or some error
136 * occurred during communication with userspace, or the device file
137 * was closed. In case of a background request the reference to the
138 * stored objects are released. The requester thread is woken up (if
139 * still waiting), the 'end' callback is called if given, else the
140 * reference to the request is released
142 * Releasing extra reference for foreground requests must be done
143 * within the same locked region as setting state to finished. This
144 * is because fuse_reset_request() may be called after request is
145 * finished and it must be the sole possessor. If request is
146 * interrupted and put in the background, it will return with an error
147 * and hence never be reset and reused.
149 * Called with fc->lock, unlocks it
151 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
153 list_del(&req->list);
154 req->state = FUSE_REQ_FINISHED;
155 if (!req->background) {
156 spin_unlock(&fc->lock);
157 wake_up(&req->waitq);
158 fuse_put_request(fc, req);
160 struct inode *inode = req->inode;
161 struct inode *inode2 = req->inode2;
162 struct file *file = req->file;
163 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
168 if (!list_empty(&req->bg_entry))
169 fuse_remove_background(fc, req);
170 spin_unlock(&fc->lock);
175 fuse_put_request(fc, req);
185 * Unfortunately request interruption not just solves the deadlock
186 * problem, it causes problems too. These stem from the fact, that an
187 * interrupted request is continued to be processed in userspace,
188 * while all the locks and object references (inode and file) held
189 * during the operation are released.
191 * To release the locks is exactly why there's a need to interrupt the
192 * request, so there's not a lot that can be done about this, except
193 * introduce additional locking in userspace.
195 * More important is to keep inode and file references until userspace
196 * has replied, otherwise FORGET and RELEASE could be sent while the
197 * inode/file is still used by the filesystem.
199 * For this reason the concept of "background" request is introduced.
200 * An interrupted request is backgrounded if it has been already sent
201 * to userspace. Backgrounding involves getting an extra reference to
202 * inode(s) or file used in the request, and adding the request to
203 * fc->background list. When a reply is received for a background
204 * request, the object references are released, and the request is
205 * removed from the list. If the filesystem is unmounted while there
206 * are still background requests, the list is walked and references
207 * are released as if a reply was received.
209 * There's one more use for a background request. The RELEASE message is
210 * always sent as background, since it doesn't return an error or
213 static void background_request(struct fuse_conn *fc, struct fuse_req *req)
216 list_add(&req->bg_entry, &fc->background);
217 fc->num_background++;
218 if (fc->num_background == FUSE_MAX_BACKGROUND)
221 req->inode = igrab(req->inode);
223 req->inode2 = igrab(req->inode2);
228 /* Called with fc->lock held. Releases, and then reacquires it. */
229 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
233 spin_unlock(&fc->lock);
235 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
236 restore_sigs(&oldset);
237 spin_lock(&fc->lock);
238 if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
241 if (!req->interrupted) {
242 req->out.h.error = -EINTR;
243 req->interrupted = 1;
246 /* This is uninterruptible sleep, because data is
247 being copied to/from the buffers of req. During
248 locked state, there mustn't be any filesystem
249 operation (e.g. page fault), since that could lead
251 spin_unlock(&fc->lock);
252 wait_event(req->waitq, !req->locked);
253 spin_lock(&fc->lock);
255 if (req->state == FUSE_REQ_PENDING) {
256 list_del(&req->list);
257 __fuse_put_request(req);
258 } else if (req->state == FUSE_REQ_SENT)
259 background_request(fc, req);
262 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
267 for (i = 0; i < numargs; i++)
268 nbytes += args[i].size;
273 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
276 /* zero is special */
279 req->in.h.unique = fc->reqctr;
280 req->in.h.len = sizeof(struct fuse_in_header) +
281 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
282 list_add_tail(&req->list, &fc->pending);
283 req->state = FUSE_REQ_PENDING;
285 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
289 * This can only be interrupted by a SIGKILL
291 void request_send(struct fuse_conn *fc, struct fuse_req *req)
294 spin_lock(&fc->lock);
296 req->out.h.error = -ENOTCONN;
297 else if (fc->conn_error)
298 req->out.h.error = -ECONNREFUSED;
300 queue_request(fc, req);
301 /* acquire extra reference, since request is still needed
302 after request_end() */
303 __fuse_get_request(req);
305 request_wait_answer(fc, req);
307 spin_unlock(&fc->lock);
310 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
312 spin_lock(&fc->lock);
313 background_request(fc, req);
315 queue_request(fc, req);
316 spin_unlock(&fc->lock);
318 req->out.h.error = -ENOTCONN;
319 request_end(fc, req);
323 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
326 request_send_nowait(fc, req);
329 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
332 request_send_nowait(fc, req);
336 * Lock the request. Up to the next unlock_request() there mustn't be
337 * anything that could cause a page-fault. If the request was already
338 * interrupted bail out.
340 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
344 spin_lock(&fc->lock);
345 if (req->interrupted)
349 spin_unlock(&fc->lock);
355 * Unlock request. If it was interrupted during being locked, the
356 * requester thread is currently waiting for it to be unlocked, so
359 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
362 spin_lock(&fc->lock);
364 if (req->interrupted)
365 wake_up(&req->waitq);
366 spin_unlock(&fc->lock);
370 struct fuse_copy_state {
371 struct fuse_conn *fc;
373 struct fuse_req *req;
374 const struct iovec *iov;
375 unsigned long nr_segs;
376 unsigned long seglen;
384 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
385 int write, struct fuse_req *req,
386 const struct iovec *iov, unsigned long nr_segs)
388 memset(cs, 0, sizeof(*cs));
393 cs->nr_segs = nr_segs;
396 /* Unmap and put previous page of userspace buffer */
397 static void fuse_copy_finish(struct fuse_copy_state *cs)
400 kunmap_atomic(cs->mapaddr, KM_USER0);
402 flush_dcache_page(cs->pg);
403 set_page_dirty_lock(cs->pg);
411 * Get another pagefull of userspace buffer, and map it to kernel
412 * address space, and lock request
414 static int fuse_copy_fill(struct fuse_copy_state *cs)
416 unsigned long offset;
419 unlock_request(cs->fc, cs->req);
420 fuse_copy_finish(cs);
422 BUG_ON(!cs->nr_segs);
423 cs->seglen = cs->iov[0].iov_len;
424 cs->addr = (unsigned long) cs->iov[0].iov_base;
428 down_read(¤t->mm->mmap_sem);
429 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
431 up_read(¤t->mm->mmap_sem);
435 offset = cs->addr % PAGE_SIZE;
436 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
437 cs->buf = cs->mapaddr + offset;
438 cs->len = min(PAGE_SIZE - offset, cs->seglen);
439 cs->seglen -= cs->len;
442 return lock_request(cs->fc, cs->req);
445 /* Do as much copy to/from userspace buffer as we can */
446 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
448 unsigned ncpy = min(*size, cs->len);
451 memcpy(cs->buf, *val, ncpy);
453 memcpy(*val, cs->buf, ncpy);
463 * Copy a page in the request to/from the userspace buffer. Must be
466 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
467 unsigned offset, unsigned count, int zeroing)
469 if (page && zeroing && count < PAGE_SIZE) {
470 void *mapaddr = kmap_atomic(page, KM_USER1);
471 memset(mapaddr, 0, PAGE_SIZE);
472 kunmap_atomic(mapaddr, KM_USER1);
476 if (!cs->len && (err = fuse_copy_fill(cs)))
479 void *mapaddr = kmap_atomic(page, KM_USER1);
480 void *buf = mapaddr + offset;
481 offset += fuse_copy_do(cs, &buf, &count);
482 kunmap_atomic(mapaddr, KM_USER1);
484 offset += fuse_copy_do(cs, NULL, &count);
486 if (page && !cs->write)
487 flush_dcache_page(page);
491 /* Copy pages in the request to/from userspace buffer */
492 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
496 struct fuse_req *req = cs->req;
497 unsigned offset = req->page_offset;
498 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
500 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
501 struct page *page = req->pages[i];
502 int err = fuse_copy_page(cs, page, offset, count, zeroing);
507 count = min(nbytes, (unsigned) PAGE_SIZE);
513 /* Copy a single argument in the request to/from userspace buffer */
514 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
518 if (!cs->len && (err = fuse_copy_fill(cs)))
520 fuse_copy_do(cs, &val, &size);
525 /* Copy request arguments to/from userspace buffer */
526 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
527 unsigned argpages, struct fuse_arg *args,
533 for (i = 0; !err && i < numargs; i++) {
534 struct fuse_arg *arg = &args[i];
535 if (i == numargs - 1 && argpages)
536 err = fuse_copy_pages(cs, arg->size, zeroing);
538 err = fuse_copy_one(cs, arg->value, arg->size);
543 /* Wait until a request is available on the pending list */
544 static void request_wait(struct fuse_conn *fc)
546 DECLARE_WAITQUEUE(wait, current);
548 add_wait_queue_exclusive(&fc->waitq, &wait);
549 while (fc->connected && list_empty(&fc->pending)) {
550 set_current_state(TASK_INTERRUPTIBLE);
551 if (signal_pending(current))
554 spin_unlock(&fc->lock);
556 spin_lock(&fc->lock);
558 set_current_state(TASK_RUNNING);
559 remove_wait_queue(&fc->waitq, &wait);
563 * Read a single request into the userspace filesystem's buffer. This
564 * function waits until a request is available, then removes it from
565 * the pending list and copies request data to userspace buffer. If
566 * no reply is needed (FORGET) or request has been interrupted or
567 * there was an error during the copying then it's finished by calling
568 * request_end(). Otherwise add it to the processing list, and set
571 static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
572 unsigned long nr_segs, loff_t *off)
575 struct fuse_req *req;
577 struct fuse_copy_state cs;
579 struct fuse_conn *fc = fuse_get_conn(file);
584 spin_lock(&fc->lock);
586 if ((file->f_flags & O_NONBLOCK) && fc->connected &&
587 list_empty(&fc->pending))
595 if (list_empty(&fc->pending))
598 req = list_entry(fc->pending.next, struct fuse_req, list);
599 req->state = FUSE_REQ_READING;
600 list_move(&req->list, &fc->io);
604 /* If request is too large, reply with an error and restart the read */
605 if (iov_length(iov, nr_segs) < reqsize) {
606 req->out.h.error = -EIO;
607 /* SETXATTR is special, since it may contain too large data */
608 if (in->h.opcode == FUSE_SETXATTR)
609 req->out.h.error = -E2BIG;
610 request_end(fc, req);
613 spin_unlock(&fc->lock);
614 fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
615 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
617 err = fuse_copy_args(&cs, in->numargs, in->argpages,
618 (struct fuse_arg *) in->args, 0);
619 fuse_copy_finish(&cs);
620 spin_lock(&fc->lock);
622 if (!err && req->interrupted)
625 if (!req->interrupted)
626 req->out.h.error = -EIO;
627 request_end(fc, req);
631 request_end(fc, req);
633 req->state = FUSE_REQ_SENT;
634 list_move_tail(&req->list, &fc->processing);
635 spin_unlock(&fc->lock);
640 spin_unlock(&fc->lock);
644 static ssize_t fuse_dev_read(struct file *file, char __user *buf,
645 size_t nbytes, loff_t *off)
648 iov.iov_len = nbytes;
650 return fuse_dev_readv(file, &iov, 1, off);
653 /* Look up request on processing list by unique ID */
654 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
656 struct list_head *entry;
658 list_for_each(entry, &fc->processing) {
659 struct fuse_req *req;
660 req = list_entry(entry, struct fuse_req, list);
661 if (req->in.h.unique == unique)
667 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
670 unsigned reqsize = sizeof(struct fuse_out_header);
673 return nbytes != reqsize ? -EINVAL : 0;
675 reqsize += len_args(out->numargs, out->args);
677 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
679 else if (reqsize > nbytes) {
680 struct fuse_arg *lastarg = &out->args[out->numargs-1];
681 unsigned diffsize = reqsize - nbytes;
682 if (diffsize > lastarg->size)
684 lastarg->size -= diffsize;
686 return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
691 * Write a single reply to a request. First the header is copied from
692 * the write buffer. The request is then searched on the processing
693 * list by the unique ID found in the header. If found, then remove
694 * it from the list and copy the rest of the buffer to the request.
695 * The request is finished by calling request_end()
697 static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
698 unsigned long nr_segs, loff_t *off)
701 unsigned nbytes = iov_length(iov, nr_segs);
702 struct fuse_req *req;
703 struct fuse_out_header oh;
704 struct fuse_copy_state cs;
705 struct fuse_conn *fc = fuse_get_conn(file);
709 fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
710 if (nbytes < sizeof(struct fuse_out_header))
713 err = fuse_copy_one(&cs, &oh, sizeof(oh));
717 if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
721 spin_lock(&fc->lock);
726 req = request_find(fc, oh.unique);
731 if (req->interrupted) {
732 spin_unlock(&fc->lock);
733 fuse_copy_finish(&cs);
734 spin_lock(&fc->lock);
735 request_end(fc, req);
738 list_move(&req->list, &fc->io);
742 spin_unlock(&fc->lock);
744 err = copy_out_args(&cs, &req->out, nbytes);
745 fuse_copy_finish(&cs);
747 spin_lock(&fc->lock);
750 if (req->interrupted)
752 } else if (!req->interrupted)
753 req->out.h.error = -EIO;
754 request_end(fc, req);
756 return err ? err : nbytes;
759 spin_unlock(&fc->lock);
761 fuse_copy_finish(&cs);
765 static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
766 size_t nbytes, loff_t *off)
769 iov.iov_len = nbytes;
770 iov.iov_base = (char __user *) buf;
771 return fuse_dev_writev(file, &iov, 1, off);
774 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
776 unsigned mask = POLLOUT | POLLWRNORM;
777 struct fuse_conn *fc = fuse_get_conn(file);
781 poll_wait(file, &fc->waitq, wait);
783 spin_lock(&fc->lock);
786 else if (!list_empty(&fc->pending))
787 mask |= POLLIN | POLLRDNORM;
788 spin_unlock(&fc->lock);
794 * Abort all requests on the given list (pending or processing)
796 * This function releases and reacquires fc->lock
798 static void end_requests(struct fuse_conn *fc, struct list_head *head)
800 while (!list_empty(head)) {
801 struct fuse_req *req;
802 req = list_entry(head->next, struct fuse_req, list);
803 req->out.h.error = -ECONNABORTED;
804 request_end(fc, req);
805 spin_lock(&fc->lock);
810 * Abort requests under I/O
812 * The requests are set to interrupted and finished, and the request
813 * waiter is woken up. This will make request_wait_answer() wait
814 * until the request is unlocked and then return.
816 * If the request is asynchronous, then the end function needs to be
817 * called after waiting for the request to be unlocked (if it was
820 static void end_io_requests(struct fuse_conn *fc)
822 while (!list_empty(&fc->io)) {
823 struct fuse_req *req =
824 list_entry(fc->io.next, struct fuse_req, list);
825 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
827 req->interrupted = 1;
828 req->out.h.error = -ECONNABORTED;
829 req->state = FUSE_REQ_FINISHED;
830 list_del_init(&req->list);
831 wake_up(&req->waitq);
834 /* The end function will consume this reference */
835 __fuse_get_request(req);
836 spin_unlock(&fc->lock);
837 wait_event(req->waitq, !req->locked);
839 spin_lock(&fc->lock);
845 * Abort all requests.
847 * Emergency exit in case of a malicious or accidental deadlock, or
848 * just a hung filesystem.
850 * The same effect is usually achievable through killing the
851 * filesystem daemon and all users of the filesystem. The exception
852 * is the combination of an asynchronous request and the tricky
853 * deadlock (see Documentation/filesystems/fuse.txt).
855 * During the aborting, progression of requests from the pending and
856 * processing lists onto the io list, and progression of new requests
857 * onto the pending list is prevented by req->connected being false.
859 * Progression of requests under I/O to the processing list is
860 * prevented by the req->interrupted flag being true for these
861 * requests. For this reason requests on the io list must be aborted
864 void fuse_abort_conn(struct fuse_conn *fc)
866 spin_lock(&fc->lock);
870 end_requests(fc, &fc->pending);
871 end_requests(fc, &fc->processing);
872 wake_up_all(&fc->waitq);
873 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
875 spin_unlock(&fc->lock);
878 static int fuse_dev_release(struct inode *inode, struct file *file)
880 struct fuse_conn *fc = fuse_get_conn(file);
882 spin_lock(&fc->lock);
884 end_requests(fc, &fc->pending);
885 end_requests(fc, &fc->processing);
886 spin_unlock(&fc->lock);
887 fasync_helper(-1, file, 0, &fc->fasync);
888 kobject_put(&fc->kobj);
894 static int fuse_dev_fasync(int fd, struct file *file, int on)
896 struct fuse_conn *fc = fuse_get_conn(file);
900 /* No locking - fasync_helper does its own locking */
901 return fasync_helper(fd, file, on, &fc->fasync);
904 const struct file_operations fuse_dev_operations = {
905 .owner = THIS_MODULE,
907 .read = fuse_dev_read,
908 .readv = fuse_dev_readv,
909 .write = fuse_dev_write,
910 .writev = fuse_dev_writev,
911 .poll = fuse_dev_poll,
912 .release = fuse_dev_release,
913 .fasync = fuse_dev_fasync,
916 static struct miscdevice fuse_miscdevice = {
919 .fops = &fuse_dev_operations,
922 int __init fuse_dev_init(void)
925 fuse_req_cachep = kmem_cache_create("fuse_request",
926 sizeof(struct fuse_req),
928 if (!fuse_req_cachep)
931 err = misc_register(&fuse_miscdevice);
933 goto out_cache_clean;
938 kmem_cache_destroy(fuse_req_cachep);
943 void fuse_dev_cleanup(void)
945 misc_deregister(&fuse_miscdevice);
946 kmem_cache_destroy(fuse_req_cachep);