~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/fuse/dev.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2   FUSE: Filesystem in Userspace
  3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
  4 
  5   This program can be distributed under the terms of the GNU GPL.
  6   See the file COPYING.
  7 */
  8 
  9 #include "fuse_i.h"
 10 
 11 #include <linux/init.h>
 12 #include <linux/module.h>
 13 #include <linux/poll.h>
 14 #include <linux/sched/signal.h>
 15 #include <linux/uio.h>
 16 #include <linux/miscdevice.h>
 17 #include <linux/pagemap.h>
 18 #include <linux/file.h>
 19 #include <linux/slab.h>
 20 #include <linux/pipe_fs_i.h>
 21 #include <linux/swap.h>
 22 #include <linux/splice.h>
 23 #include <linux/sched.h>
 24 
 25 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 26 MODULE_ALIAS("devname:fuse");
 27 
 28 /* Ordinary requests have even IDs, while interrupts IDs are odd */
 29 #define FUSE_INT_REQ_BIT (1ULL << 0)
 30 #define FUSE_REQ_ID_STEP (1ULL << 1)
 31 
 32 static struct kmem_cache *fuse_req_cachep;
 33 
 34 static void end_requests(struct list_head *head);
 35 
 36 static struct fuse_dev *fuse_get_dev(struct file *file)
 37 {
 38         /*
 39          * Lockless access is OK, because file->private data is set
 40          * once during mount and is valid until the file is released.
 41          */
 42         return READ_ONCE(file->private_data);
 43 }
 44 
 45 static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
 46 {
 47         INIT_LIST_HEAD(&req->list);
 48         INIT_LIST_HEAD(&req->intr_entry);
 49         init_waitqueue_head(&req->waitq);
 50         refcount_set(&req->count, 1);
 51         __set_bit(FR_PENDING, &req->flags);
 52         req->fm = fm;
 53 }
 54 
 55 static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
 56 {
 57         struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
 58         if (req)
 59                 fuse_request_init(fm, req);
 60 
 61         return req;
 62 }
 63 
 64 static void fuse_request_free(struct fuse_req *req)
 65 {
 66         kmem_cache_free(fuse_req_cachep, req);
 67 }
 68 
 69 static void __fuse_get_request(struct fuse_req *req)
 70 {
 71         refcount_inc(&req->count);
 72 }
 73 
 74 /* Must be called with > 1 refcount */
 75 static void __fuse_put_request(struct fuse_req *req)
 76 {
 77         refcount_dec(&req->count);
 78 }
 79 
 80 void fuse_set_initialized(struct fuse_conn *fc)
 81 {
 82         /* Make sure stores before this are seen on another CPU */
 83         smp_wmb();
 84         fc->initialized = 1;
 85 }
 86 
 87 static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
 88 {
 89         return !fc->initialized || (for_background && fc->blocked);
 90 }
 91 
 92 static void fuse_drop_waiting(struct fuse_conn *fc)
 93 {
 94         /*
 95          * lockess check of fc->connected is okay, because atomic_dec_and_test()
 96          * provides a memory barrier matched with the one in fuse_wait_aborted()
 97          * to ensure no wake-up is missed.
 98          */
 99         if (atomic_dec_and_test(&fc->num_waiting) &&
100             !READ_ONCE(fc->connected)) {
101                 /* wake up aborters */
102                 wake_up_all(&fc->blocked_waitq);
103         }
104 }
105 
106 static void fuse_put_request(struct fuse_req *req);
107 
108 static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
109 {
110         struct fuse_conn *fc = fm->fc;
111         struct fuse_req *req;
112         int err;
113         atomic_inc(&fc->num_waiting);
114 
115         if (fuse_block_alloc(fc, for_background)) {
116                 err = -EINTR;
117                 if (wait_event_killable_exclusive(fc->blocked_waitq,
118                                 !fuse_block_alloc(fc, for_background)))
119                         goto out;
120         }
121         /* Matches smp_wmb() in fuse_set_initialized() */
122         smp_rmb();
123 
124         err = -ENOTCONN;
125         if (!fc->connected)
126                 goto out;
127 
128         err = -ECONNREFUSED;
129         if (fc->conn_error)
130                 goto out;
131 
132         req = fuse_request_alloc(fm, GFP_KERNEL);
133         err = -ENOMEM;
134         if (!req) {
135                 if (for_background)
136                         wake_up(&fc->blocked_waitq);
137                 goto out;
138         }
139 
140         req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
141         req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
142         req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
143 
144         __set_bit(FR_WAITING, &req->flags);
145         if (for_background)
146                 __set_bit(FR_BACKGROUND, &req->flags);
147 
148         if (unlikely(req->in.h.uid == ((uid_t)-1) ||
149                      req->in.h.gid == ((gid_t)-1))) {
150                 fuse_put_request(req);
151                 return ERR_PTR(-EOVERFLOW);
152         }
153         return req;
154 
155  out:
156         fuse_drop_waiting(fc);
157         return ERR_PTR(err);
158 }
159 
160 static void fuse_put_request(struct fuse_req *req)
161 {
162         struct fuse_conn *fc = req->fm->fc;
163 
164         if (refcount_dec_and_test(&req->count)) {
165                 if (test_bit(FR_BACKGROUND, &req->flags)) {
166                         /*
167                          * We get here in the unlikely case that a background
168                          * request was allocated but not sent
169                          */
170                         spin_lock(&fc->bg_lock);
171                         if (!fc->blocked)
172                                 wake_up(&fc->blocked_waitq);
173                         spin_unlock(&fc->bg_lock);
174                 }
175 
176                 if (test_bit(FR_WAITING, &req->flags)) {
177                         __clear_bit(FR_WAITING, &req->flags);
178                         fuse_drop_waiting(fc);
179                 }
180 
181                 fuse_request_free(req);
182         }
183 }
184 
185 unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
186 {
187         unsigned nbytes = 0;
188         unsigned i;
189 
190         for (i = 0; i < numargs; i++)
191                 nbytes += args[i].size;
192 
193         return nbytes;
194 }
195 EXPORT_SYMBOL_GPL(fuse_len_args);
196 
197 u64 fuse_get_unique(struct fuse_iqueue *fiq)
198 {
199         fiq->reqctr += FUSE_REQ_ID_STEP;
200         return fiq->reqctr;
201 }
202 EXPORT_SYMBOL_GPL(fuse_get_unique);
203 
204 static unsigned int fuse_req_hash(u64 unique)
205 {
206         return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
207 }
208 
209 /*
210  * A new request is available, wake fiq->waitq
211  */
212 static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
213 __releases(fiq->lock)
214 {
215         wake_up(&fiq->waitq);
216         kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
217         spin_unlock(&fiq->lock);
218 }
219 
220 const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
221         .wake_forget_and_unlock         = fuse_dev_wake_and_unlock,
222         .wake_interrupt_and_unlock      = fuse_dev_wake_and_unlock,
223         .wake_pending_and_unlock        = fuse_dev_wake_and_unlock,
224 };
225 EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
226 
227 static void queue_request_and_unlock(struct fuse_iqueue *fiq,
228                                      struct fuse_req *req)
229 __releases(fiq->lock)
230 {
231         req->in.h.len = sizeof(struct fuse_in_header) +
232                 fuse_len_args(req->args->in_numargs,
233                               (struct fuse_arg *) req->args->in_args);
234         list_add_tail(&req->list, &fiq->pending);
235         fiq->ops->wake_pending_and_unlock(fiq);
236 }
237 
238 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
239                        u64 nodeid, u64 nlookup)
240 {
241         struct fuse_iqueue *fiq = &fc->iq;
242 
243         forget->forget_one.nodeid = nodeid;
244         forget->forget_one.nlookup = nlookup;
245 
246         spin_lock(&fiq->lock);
247         if (fiq->connected) {
248                 fiq->forget_list_tail->next = forget;
249                 fiq->forget_list_tail = forget;
250                 fiq->ops->wake_forget_and_unlock(fiq);
251         } else {
252                 kfree(forget);
253                 spin_unlock(&fiq->lock);
254         }
255 }
256 
257 static void flush_bg_queue(struct fuse_conn *fc)
258 {
259         struct fuse_iqueue *fiq = &fc->iq;
260 
261         while (fc->active_background < fc->max_background &&
262                !list_empty(&fc->bg_queue)) {
263                 struct fuse_req *req;
264 
265                 req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
266                 list_del(&req->list);
267                 fc->active_background++;
268                 spin_lock(&fiq->lock);
269                 req->in.h.unique = fuse_get_unique(fiq);
270                 queue_request_and_unlock(fiq, req);
271         }
272 }
273 
274 /*
275  * This function is called when a request is finished.  Either a reply
276  * has arrived or it was aborted (and not yet sent) or some error
277  * occurred during communication with userspace, or the device file
278  * was closed.  The requester thread is woken up (if still waiting),
279  * the 'end' callback is called if given, else the reference to the
280  * request is released
281  */
282 void fuse_request_end(struct fuse_req *req)
283 {
284         struct fuse_mount *fm = req->fm;
285         struct fuse_conn *fc = fm->fc;
286         struct fuse_iqueue *fiq = &fc->iq;
287 
288         if (test_and_set_bit(FR_FINISHED, &req->flags))
289                 goto put_request;
290 
291         /*
292          * test_and_set_bit() implies smp_mb() between bit
293          * changing and below FR_INTERRUPTED check. Pairs with
294          * smp_mb() from queue_interrupt().
295          */
296         if (test_bit(FR_INTERRUPTED, &req->flags)) {
297                 spin_lock(&fiq->lock);
298                 list_del_init(&req->intr_entry);
299                 spin_unlock(&fiq->lock);
300         }
301         WARN_ON(test_bit(FR_PENDING, &req->flags));
302         WARN_ON(test_bit(FR_SENT, &req->flags));
303         if (test_bit(FR_BACKGROUND, &req->flags)) {
304                 spin_lock(&fc->bg_lock);
305                 clear_bit(FR_BACKGROUND, &req->flags);
306                 if (fc->num_background == fc->max_background) {
307                         fc->blocked = 0;
308                         wake_up(&fc->blocked_waitq);
309                 } else if (!fc->blocked) {
310                         /*
311                          * Wake up next waiter, if any.  It's okay to use
312                          * waitqueue_active(), as we've already synced up
313                          * fc->blocked with waiters with the wake_up() call
314                          * above.
315                          */
316                         if (waitqueue_active(&fc->blocked_waitq))
317                                 wake_up(&fc->blocked_waitq);
318                 }
319 
320                 fc->num_background--;
321                 fc->active_background--;
322                 flush_bg_queue(fc);
323                 spin_unlock(&fc->bg_lock);
324         } else {
325                 /* Wake up waiter sleeping in request_wait_answer() */
326                 wake_up(&req->waitq);
327         }
328 
329         if (test_bit(FR_ASYNC, &req->flags))
330                 req->args->end(fm, req->args, req->out.h.error);
331 put_request:
332         fuse_put_request(req);
333 }
334 EXPORT_SYMBOL_GPL(fuse_request_end);
335 
336 static int queue_interrupt(struct fuse_req *req)
337 {
338         struct fuse_iqueue *fiq = &req->fm->fc->iq;
339 
340         spin_lock(&fiq->lock);
341         /* Check for we've sent request to interrupt this req */
342         if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
343                 spin_unlock(&fiq->lock);
344                 return -EINVAL;
345         }
346 
347         if (list_empty(&req->intr_entry)) {
348                 list_add_tail(&req->intr_entry, &fiq->interrupts);
349                 /*
350                  * Pairs with smp_mb() implied by test_and_set_bit()
351                  * from fuse_request_end().
352                  */
353                 smp_mb();
354                 if (test_bit(FR_FINISHED, &req->flags)) {
355                         list_del_init(&req->intr_entry);
356                         spin_unlock(&fiq->lock);
357                         return 0;
358                 }
359                 fiq->ops->wake_interrupt_and_unlock(fiq);
360         } else {
361                 spin_unlock(&fiq->lock);
362         }
363         return 0;
364 }
365 
366 static void request_wait_answer(struct fuse_req *req)
367 {
368         struct fuse_conn *fc = req->fm->fc;
369         struct fuse_iqueue *fiq = &fc->iq;
370         int err;
371 
372         if (!fc->no_interrupt) {
373                 /* Any signal may interrupt this */
374                 err = wait_event_interruptible(req->waitq,
375                                         test_bit(FR_FINISHED, &req->flags));
376                 if (!err)
377                         return;
378 
379                 set_bit(FR_INTERRUPTED, &req->flags);
380                 /* matches barrier in fuse_dev_do_read() */
381                 smp_mb__after_atomic();
382                 if (test_bit(FR_SENT, &req->flags))
383                         queue_interrupt(req);
384         }
385 
386         if (!test_bit(FR_FORCE, &req->flags)) {
387                 /* Only fatal signals may interrupt this */
388                 err = wait_event_killable(req->waitq,
389                                         test_bit(FR_FINISHED, &req->flags));
390                 if (!err)
391                         return;
392 
393                 spin_lock(&fiq->lock);
394                 /* Request is not yet in userspace, bail out */
395                 if (test_bit(FR_PENDING, &req->flags)) {
396                         list_del(&req->list);
397                         spin_unlock(&fiq->lock);
398                         __fuse_put_request(req);
399                         req->out.h.error = -EINTR;
400                         return;
401                 }
402                 spin_unlock(&fiq->lock);
403         }
404 
405         /*
406          * Either request is already in userspace, or it was forced.
407          * Wait it out.
408          */
409         wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
410 }
411 
412 static void __fuse_request_send(struct fuse_req *req)
413 {
414         struct fuse_iqueue *fiq = &req->fm->fc->iq;
415 
416         BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
417         spin_lock(&fiq->lock);
418         if (!fiq->connected) {
419                 spin_unlock(&fiq->lock);
420                 req->out.h.error = -ENOTCONN;
421         } else {
422                 req->in.h.unique = fuse_get_unique(fiq);
423                 /* acquire extra reference, since request is still needed
424                    after fuse_request_end() */
425                 __fuse_get_request(req);
426                 queue_request_and_unlock(fiq, req);
427 
428                 request_wait_answer(req);
429                 /* Pairs with smp_wmb() in fuse_request_end() */
430                 smp_rmb();
431         }
432 }
433 
434 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
435 {
436         if (fc->minor < 4 && args->opcode == FUSE_STATFS)
437                 args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
438 
439         if (fc->minor < 9) {
440                 switch (args->opcode) {
441                 case FUSE_LOOKUP:
442                 case FUSE_CREATE:
443                 case FUSE_MKNOD:
444                 case FUSE_MKDIR:
445                 case FUSE_SYMLINK:
446                 case FUSE_LINK:
447                         args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
448                         break;
449                 case FUSE_GETATTR:
450                 case FUSE_SETATTR:
451                         args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
452                         break;
453                 }
454         }
455         if (fc->minor < 12) {
456                 switch (args->opcode) {
457                 case FUSE_CREATE:
458                         args->in_args[0].size = sizeof(struct fuse_open_in);
459                         break;
460                 case FUSE_MKNOD:
461                         args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
462                         break;
463                 }
464         }
465 }
466 
467 static void fuse_force_creds(struct fuse_req *req)
468 {
469         struct fuse_conn *fc = req->fm->fc;
470 
471         req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
472         req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
473         req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
474 }
475 
476 static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
477 {
478         req->in.h.opcode = args->opcode;
479         req->in.h.nodeid = args->nodeid;
480         req->args = args;
481         if (args->is_ext)
482                 req->in.h.total_extlen = args->in_args[args->ext_idx].size / 8;
483         if (args->end)
484                 __set_bit(FR_ASYNC, &req->flags);
485 }
486 
487 ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
488 {
489         struct fuse_conn *fc = fm->fc;
490         struct fuse_req *req;
491         ssize_t ret;
492 
493         if (args->force) {
494                 atomic_inc(&fc->num_waiting);
495                 req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL);
496 
497                 if (!args->nocreds)
498                         fuse_force_creds(req);
499 
500                 __set_bit(FR_WAITING, &req->flags);
501                 __set_bit(FR_FORCE, &req->flags);
502         } else {
503                 WARN_ON(args->nocreds);
504                 req = fuse_get_req(fm, false);
505                 if (IS_ERR(req))
506                         return PTR_ERR(req);
507         }
508 
509         /* Needs to be done after fuse_get_req() so that fc->minor is valid */
510         fuse_adjust_compat(fc, args);
511         fuse_args_to_req(req, args);
512 
513         if (!args->noreply)
514                 __set_bit(FR_ISREPLY, &req->flags);
515         __fuse_request_send(req);
516         ret = req->out.h.error;
517         if (!ret && args->out_argvar) {
518                 BUG_ON(args->out_numargs == 0);
519                 ret = args->out_args[args->out_numargs - 1].size;
520         }
521         fuse_put_request(req);
522 
523         return ret;
524 }
525 
526 static bool fuse_request_queue_background(struct fuse_req *req)
527 {
528         struct fuse_mount *fm = req->fm;
529         struct fuse_conn *fc = fm->fc;
530         bool queued = false;
531 
532         WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
533         if (!test_bit(FR_WAITING, &req->flags)) {
534                 __set_bit(FR_WAITING, &req->flags);
535                 atomic_inc(&fc->num_waiting);
536         }
537         __set_bit(FR_ISREPLY, &req->flags);
538         spin_lock(&fc->bg_lock);
539         if (likely(fc->connected)) {
540                 fc->num_background++;
541                 if (fc->num_background == fc->max_background)
542                         fc->blocked = 1;
543                 list_add_tail(&req->list, &fc->bg_queue);
544                 flush_bg_queue(fc);
545                 queued = true;
546         }
547         spin_unlock(&fc->bg_lock);
548 
549         return queued;
550 }
551 
552 int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
553                             gfp_t gfp_flags)
554 {
555         struct fuse_req *req;
556 
557         if (args->force) {
558                 WARN_ON(!args->nocreds);
559                 req = fuse_request_alloc(fm, gfp_flags);
560                 if (!req)
561                         return -ENOMEM;
562                 __set_bit(FR_BACKGROUND, &req->flags);
563         } else {
564                 WARN_ON(args->nocreds);
565                 req = fuse_get_req(fm, true);
566                 if (IS_ERR(req))
567                         return PTR_ERR(req);
568         }
569 
570         fuse_args_to_req(req, args);
571 
572         if (!fuse_request_queue_background(req)) {
573                 fuse_put_request(req);
574                 return -ENOTCONN;
575         }
576 
577         return 0;
578 }
579 EXPORT_SYMBOL_GPL(fuse_simple_background);
580 
581 static int fuse_simple_notify_reply(struct fuse_mount *fm,
582                                     struct fuse_args *args, u64 unique)
583 {
584         struct fuse_req *req;
585         struct fuse_iqueue *fiq = &fm->fc->iq;
586         int err = 0;
587 
588         req = fuse_get_req(fm, false);
589         if (IS_ERR(req))
590                 return PTR_ERR(req);
591 
592         __clear_bit(FR_ISREPLY, &req->flags);
593         req->in.h.unique = unique;
594 
595         fuse_args_to_req(req, args);
596 
597         spin_lock(&fiq->lock);
598         if (fiq->connected) {
599                 queue_request_and_unlock(fiq, req);
600         } else {
601                 err = -ENODEV;
602                 spin_unlock(&fiq->lock);
603                 fuse_put_request(req);
604         }
605 
606         return err;
607 }
608 
609 /*
610  * Lock the request.  Up to the next unlock_request() there mustn't be
611  * anything that could cause a page-fault.  If the request was already
612  * aborted bail out.
613  */
614 static int lock_request(struct fuse_req *req)
615 {
616         int err = 0;
617         if (req) {
618                 spin_lock(&req->waitq.lock);
619                 if (test_bit(FR_ABORTED, &req->flags))
620                         err = -ENOENT;
621                 else
622                         set_bit(FR_LOCKED, &req->flags);
623                 spin_unlock(&req->waitq.lock);
624         }
625         return err;
626 }
627 
628 /*
629  * Unlock request.  If it was aborted while locked, caller is responsible
630  * for unlocking and ending the request.
631  */
632 static int unlock_request(struct fuse_req *req)
633 {
634         int err = 0;
635         if (req) {
636                 spin_lock(&req->waitq.lock);
637                 if (test_bit(FR_ABORTED, &req->flags))
638                         err = -ENOENT;
639                 else
640                         clear_bit(FR_LOCKED, &req->flags);
641                 spin_unlock(&req->waitq.lock);
642         }
643         return err;
644 }
645 
646 struct fuse_copy_state {
647         int write;
648         struct fuse_req *req;
649         struct iov_iter *iter;
650         struct pipe_buffer *pipebufs;
651         struct pipe_buffer *currbuf;
652         struct pipe_inode_info *pipe;
653         unsigned long nr_segs;
654         struct page *pg;
655         unsigned len;
656         unsigned offset;
657         unsigned move_pages:1;
658 };
659 
660 static void fuse_copy_init(struct fuse_copy_state *cs, int write,
661                            struct iov_iter *iter)
662 {
663         memset(cs, 0, sizeof(*cs));
664         cs->write = write;
665         cs->iter = iter;
666 }
667 
668 /* Unmap and put previous page of userspace buffer */
669 static void fuse_copy_finish(struct fuse_copy_state *cs)
670 {
671         if (cs->currbuf) {
672                 struct pipe_buffer *buf = cs->currbuf;
673 
674                 if (cs->write)
675                         buf->len = PAGE_SIZE - cs->len;
676                 cs->currbuf = NULL;
677         } else if (cs->pg) {
678                 if (cs->write) {
679                         flush_dcache_page(cs->pg);
680                         set_page_dirty_lock(cs->pg);
681                 }
682                 put_page(cs->pg);
683         }
684         cs->pg = NULL;
685 }
686 
687 /*
688  * Get another pagefull of userspace buffer, and map it to kernel
689  * address space, and lock request
690  */
691 static int fuse_copy_fill(struct fuse_copy_state *cs)
692 {
693         struct page *page;
694         int err;
695 
696         err = unlock_request(cs->req);
697         if (err)
698                 return err;
699 
700         fuse_copy_finish(cs);
701         if (cs->pipebufs) {
702                 struct pipe_buffer *buf = cs->pipebufs;
703 
704                 if (!cs->write) {
705                         err = pipe_buf_confirm(cs->pipe, buf);
706                         if (err)
707                                 return err;
708 
709                         BUG_ON(!cs->nr_segs);
710                         cs->currbuf = buf;
711                         cs->pg = buf->page;
712                         cs->offset = buf->offset;
713                         cs->len = buf->len;
714                         cs->pipebufs++;
715                         cs->nr_segs--;
716                 } else {
717                         if (cs->nr_segs >= cs->pipe->max_usage)
718                                 return -EIO;
719 
720                         page = alloc_page(GFP_HIGHUSER);
721                         if (!page)
722                                 return -ENOMEM;
723 
724                         buf->page = page;
725                         buf->offset = 0;
726                         buf->len = 0;
727 
728                         cs->currbuf = buf;
729                         cs->pg = page;
730                         cs->offset = 0;
731                         cs->len = PAGE_SIZE;
732                         cs->pipebufs++;
733                         cs->nr_segs++;
734                 }
735         } else {
736                 size_t off;
737                 err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off);
738                 if (err < 0)
739                         return err;
740                 BUG_ON(!err);
741                 cs->len = err;
742                 cs->offset = off;
743                 cs->pg = page;
744         }
745 
746         return lock_request(cs->req);
747 }
748 
749 /* Do as much copy to/from userspace buffer as we can */
750 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
751 {
752         unsigned ncpy = min(*size, cs->len);
753         if (val) {
754                 void *pgaddr = kmap_local_page(cs->pg);
755                 void *buf = pgaddr + cs->offset;
756 
757                 if (cs->write)
758                         memcpy(buf, *val, ncpy);
759                 else
760                         memcpy(*val, buf, ncpy);
761 
762                 kunmap_local(pgaddr);
763                 *val += ncpy;
764         }
765         *size -= ncpy;
766         cs->len -= ncpy;
767         cs->offset += ncpy;
768         return ncpy;
769 }
770 
771 static int fuse_check_folio(struct folio *folio)
772 {
773         if (folio_mapped(folio) ||
774             folio->mapping != NULL ||
775             (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
776              ~(1 << PG_locked |
777                1 << PG_referenced |
778                1 << PG_lru |
779                1 << PG_active |
780                1 << PG_workingset |
781                1 << PG_reclaim |
782                1 << PG_waiters |
783                LRU_GEN_MASK | LRU_REFS_MASK))) {
784                 dump_page(&folio->page, "fuse: trying to steal weird page");
785                 return 1;
786         }
787         return 0;
788 }
789 
790 static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
791 {
792         int err;
793         struct folio *oldfolio = page_folio(*pagep);
794         struct folio *newfolio;
795         struct pipe_buffer *buf = cs->pipebufs;
796 
797         folio_get(oldfolio);
798         err = unlock_request(cs->req);
799         if (err)
800                 goto out_put_old;
801 
802         fuse_copy_finish(cs);
803 
804         err = pipe_buf_confirm(cs->pipe, buf);
805         if (err)
806                 goto out_put_old;
807 
808         BUG_ON(!cs->nr_segs);
809         cs->currbuf = buf;
810         cs->len = buf->len;
811         cs->pipebufs++;
812         cs->nr_segs--;
813 
814         if (cs->len != PAGE_SIZE)
815                 goto out_fallback;
816 
817         if (!pipe_buf_try_steal(cs->pipe, buf))
818                 goto out_fallback;
819 
820         newfolio = page_folio(buf->page);
821 
822         folio_clear_uptodate(newfolio);
823         folio_clear_mappedtodisk(newfolio);
824 
825         if (fuse_check_folio(newfolio) != 0)
826                 goto out_fallback_unlock;
827 
828         /*
829          * This is a new and locked page, it shouldn't be mapped or
830          * have any special flags on it
831          */
832         if (WARN_ON(folio_mapped(oldfolio)))
833                 goto out_fallback_unlock;
834         if (WARN_ON(folio_has_private(oldfolio)))
835                 goto out_fallback_unlock;
836         if (WARN_ON(folio_test_dirty(oldfolio) ||
837                                 folio_test_writeback(oldfolio)))
838                 goto out_fallback_unlock;
839         if (WARN_ON(folio_test_mlocked(oldfolio)))
840                 goto out_fallback_unlock;
841 
842         replace_page_cache_folio(oldfolio, newfolio);
843 
844         folio_get(newfolio);
845 
846         if (!(buf->flags & PIPE_BUF_FLAG_LRU))
847                 folio_add_lru(newfolio);
848 
849         /*
850          * Release while we have extra ref on stolen page.  Otherwise
851          * anon_pipe_buf_release() might think the page can be reused.
852          */
853         pipe_buf_release(cs->pipe, buf);
854 
855         err = 0;
856         spin_lock(&cs->req->waitq.lock);
857         if (test_bit(FR_ABORTED, &cs->req->flags))
858                 err = -ENOENT;
859         else
860                 *pagep = &newfolio->page;
861         spin_unlock(&cs->req->waitq.lock);
862 
863         if (err) {
864                 folio_unlock(newfolio);
865                 folio_put(newfolio);
866                 goto out_put_old;
867         }
868 
869         folio_unlock(oldfolio);
870         /* Drop ref for ap->pages[] array */
871         folio_put(oldfolio);
872         cs->len = 0;
873 
874         err = 0;
875 out_put_old:
876         /* Drop ref obtained in this function */
877         folio_put(oldfolio);
878         return err;
879 
880 out_fallback_unlock:
881         folio_unlock(newfolio);
882 out_fallback:
883         cs->pg = buf->page;
884         cs->offset = buf->offset;
885 
886         err = lock_request(cs->req);
887         if (!err)
888                 err = 1;
889 
890         goto out_put_old;
891 }
892 
893 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
894                          unsigned offset, unsigned count)
895 {
896         struct pipe_buffer *buf;
897         int err;
898 
899         if (cs->nr_segs >= cs->pipe->max_usage)
900                 return -EIO;
901 
902         get_page(page);
903         err = unlock_request(cs->req);
904         if (err) {
905                 put_page(page);
906                 return err;
907         }
908 
909         fuse_copy_finish(cs);
910 
911         buf = cs->pipebufs;
912         buf->page = page;
913         buf->offset = offset;
914         buf->len = count;
915 
916         cs->pipebufs++;
917         cs->nr_segs++;
918         cs->len = 0;
919 
920         return 0;
921 }
922 
923 /*
924  * Copy a page in the request to/from the userspace buffer.  Must be
925  * done atomically
926  */
927 static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
928                           unsigned offset, unsigned count, int zeroing)
929 {
930         int err;
931         struct page *page = *pagep;
932 
933         if (page && zeroing && count < PAGE_SIZE)
934                 clear_highpage(page);
935 
936         while (count) {
937                 if (cs->write && cs->pipebufs && page) {
938                         /*
939                          * Can't control lifetime of pipe buffers, so always
940                          * copy user pages.
941                          */
942                         if (cs->req->args->user_pages) {
943                                 err = fuse_copy_fill(cs);
944                                 if (err)
945                                         return err;
946                         } else {
947                                 return fuse_ref_page(cs, page, offset, count);
948                         }
949                 } else if (!cs->len) {
950                         if (cs->move_pages && page &&
951                             offset == 0 && count == PAGE_SIZE) {
952                                 err = fuse_try_move_page(cs, pagep);
953                                 if (err <= 0)
954                                         return err;
955                         } else {
956                                 err = fuse_copy_fill(cs);
957                                 if (err)
958                                         return err;
959                         }
960                 }
961                 if (page) {
962                         void *mapaddr = kmap_local_page(page);
963                         void *buf = mapaddr + offset;
964                         offset += fuse_copy_do(cs, &buf, &count);
965                         kunmap_local(mapaddr);
966                 } else
967                         offset += fuse_copy_do(cs, NULL, &count);
968         }
969         if (page && !cs->write)
970                 flush_dcache_page(page);
971         return 0;
972 }
973 
974 /* Copy pages in the request to/from userspace buffer */
975 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
976                            int zeroing)
977 {
978         unsigned i;
979         struct fuse_req *req = cs->req;
980         struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
981 
982 
983         for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
984                 int err;
985                 unsigned int offset = ap->descs[i].offset;
986                 unsigned int count = min(nbytes, ap->descs[i].length);
987 
988                 err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
989                 if (err)
990                         return err;
991 
992                 nbytes -= count;
993         }
994         return 0;
995 }
996 
997 /* Copy a single argument in the request to/from userspace buffer */
998 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
999 {
1000         while (size) {
1001                 if (!cs->len) {
1002                         int err = fuse_copy_fill(cs);
1003                         if (err)
1004                                 return err;
1005                 }
1006                 fuse_copy_do(cs, &val, &size);
1007         }
1008         return 0;
1009 }
1010 
1011 /* Copy request arguments to/from userspace buffer */
1012 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1013                           unsigned argpages, struct fuse_arg *args,
1014                           int zeroing)
1015 {
1016         int err = 0;
1017         unsigned i;
1018 
1019         for (i = 0; !err && i < numargs; i++)  {
1020                 struct fuse_arg *arg = &args[i];
1021                 if (i == numargs - 1 && argpages)
1022                         err = fuse_copy_pages(cs, arg->size, zeroing);
1023                 else
1024                         err = fuse_copy_one(cs, arg->value, arg->size);
1025         }
1026         return err;
1027 }
1028 
1029 static int forget_pending(struct fuse_iqueue *fiq)
1030 {
1031         return fiq->forget_list_head.next != NULL;
1032 }
1033 
1034 static int request_pending(struct fuse_iqueue *fiq)
1035 {
1036         return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1037                 forget_pending(fiq);
1038 }
1039 
1040 /*
1041  * Transfer an interrupt request to userspace
1042  *
1043  * Unlike other requests this is assembled on demand, without a need
1044  * to allocate a separate fuse_req structure.
1045  *
1046  * Called with fiq->lock held, releases it
1047  */
1048 static int fuse_read_interrupt(struct fuse_iqueue *fiq,
1049                                struct fuse_copy_state *cs,
1050                                size_t nbytes, struct fuse_req *req)
1051 __releases(fiq->lock)
1052 {
1053         struct fuse_in_header ih;
1054         struct fuse_interrupt_in arg;
1055         unsigned reqsize = sizeof(ih) + sizeof(arg);
1056         int err;
1057 
1058         list_del_init(&req->intr_entry);
1059         memset(&ih, 0, sizeof(ih));
1060         memset(&arg, 0, sizeof(arg));
1061         ih.len = reqsize;
1062         ih.opcode = FUSE_INTERRUPT;
1063         ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
1064         arg.unique = req->in.h.unique;
1065 
1066         spin_unlock(&fiq->lock);
1067         if (nbytes < reqsize)
1068                 return -EINVAL;
1069 
1070         err = fuse_copy_one(cs, &ih, sizeof(ih));
1071         if (!err)
1072                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1073         fuse_copy_finish(cs);
1074 
1075         return err ? err : reqsize;
1076 }
1077 
1078 struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
1079                                              unsigned int max,
1080                                              unsigned int *countp)
1081 {
1082         struct fuse_forget_link *head = fiq->forget_list_head.next;
1083         struct fuse_forget_link **newhead = &head;
1084         unsigned count;
1085 
1086         for (count = 0; *newhead != NULL && count < max; count++)
1087                 newhead = &(*newhead)->next;
1088 
1089         fiq->forget_list_head.next = *newhead;
1090         *newhead = NULL;
1091         if (fiq->forget_list_head.next == NULL)
1092                 fiq->forget_list_tail = &fiq->forget_list_head;
1093 
1094         if (countp != NULL)
1095                 *countp = count;
1096 
1097         return head;
1098 }
1099 EXPORT_SYMBOL(fuse_dequeue_forget);
1100 
1101 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1102                                    struct fuse_copy_state *cs,
1103                                    size_t nbytes)
1104 __releases(fiq->lock)
1105 {
1106         int err;
1107         struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
1108         struct fuse_forget_in arg = {
1109                 .nlookup = forget->forget_one.nlookup,
1110         };
1111         struct fuse_in_header ih = {
1112                 .opcode = FUSE_FORGET,
1113                 .nodeid = forget->forget_one.nodeid,
1114                 .unique = fuse_get_unique(fiq),
1115                 .len = sizeof(ih) + sizeof(arg),
1116         };
1117 
1118         spin_unlock(&fiq->lock);
1119         kfree(forget);
1120         if (nbytes < ih.len)
1121                 return -EINVAL;
1122 
1123         err = fuse_copy_one(cs, &ih, sizeof(ih));
1124         if (!err)
1125                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1126         fuse_copy_finish(cs);
1127 
1128         if (err)
1129                 return err;
1130 
1131         return ih.len;
1132 }
1133 
1134 static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1135                                    struct fuse_copy_state *cs, size_t nbytes)
1136 __releases(fiq->lock)
1137 {
1138         int err;
1139         unsigned max_forgets;
1140         unsigned count;
1141         struct fuse_forget_link *head;
1142         struct fuse_batch_forget_in arg = { .count = 0 };
1143         struct fuse_in_header ih = {
1144                 .opcode = FUSE_BATCH_FORGET,
1145                 .unique = fuse_get_unique(fiq),
1146                 .len = sizeof(ih) + sizeof(arg),
1147         };
1148 
1149         if (nbytes < ih.len) {
1150                 spin_unlock(&fiq->lock);
1151                 return -EINVAL;
1152         }
1153 
1154         max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1155         head = fuse_dequeue_forget(fiq, max_forgets, &count);
1156         spin_unlock(&fiq->lock);
1157 
1158         arg.count = count;
1159         ih.len += count * sizeof(struct fuse_forget_one);
1160         err = fuse_copy_one(cs, &ih, sizeof(ih));
1161         if (!err)
1162                 err = fuse_copy_one(cs, &arg, sizeof(arg));
1163 
1164         while (head) {
1165                 struct fuse_forget_link *forget = head;
1166 
1167                 if (!err) {
1168                         err = fuse_copy_one(cs, &forget->forget_one,
1169                                             sizeof(forget->forget_one));
1170                 }
1171                 head = forget->next;
1172                 kfree(forget);
1173         }
1174 
1175         fuse_copy_finish(cs);
1176 
1177         if (err)
1178                 return err;
1179 
1180         return ih.len;
1181 }
1182 
1183 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
1184                             struct fuse_copy_state *cs,
1185                             size_t nbytes)
1186 __releases(fiq->lock)
1187 {
1188         if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
1189                 return fuse_read_single_forget(fiq, cs, nbytes);
1190         else
1191                 return fuse_read_batch_forget(fiq, cs, nbytes);
1192 }
1193 
1194 /*
1195  * Read a single request into the userspace filesystem's buffer.  This
1196  * function waits until a request is available, then removes it from
1197  * the pending list and copies request data to userspace buffer.  If
1198  * no reply is needed (FORGET) or request has been aborted or there
1199  * was an error during the copying then it's finished by calling
1200  * fuse_request_end().  Otherwise add it to the processing list, and set
1201  * the 'sent' flag.
1202  */
1203 static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1204                                 struct fuse_copy_state *cs, size_t nbytes)
1205 {
1206         ssize_t err;
1207         struct fuse_conn *fc = fud->fc;
1208         struct fuse_iqueue *fiq = &fc->iq;
1209         struct fuse_pqueue *fpq = &fud->pq;
1210         struct fuse_req *req;
1211         struct fuse_args *args;
1212         unsigned reqsize;
1213         unsigned int hash;
1214 
1215         /*
1216          * Require sane minimum read buffer - that has capacity for fixed part
1217          * of any request header + negotiated max_write room for data.
1218          *
1219          * Historically libfuse reserves 4K for fixed header room, but e.g.
1220          * GlusterFS reserves only 80 bytes
1221          *
1222          *      = `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
1223          *
1224          * which is the absolute minimum any sane filesystem should be using
1225          * for header room.
1226          */
1227         if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
1228                            sizeof(struct fuse_in_header) +
1229                            sizeof(struct fuse_write_in) +
1230                            fc->max_write))
1231                 return -EINVAL;
1232 
1233  restart:
1234         for (;;) {
1235                 spin_lock(&fiq->lock);
1236                 if (!fiq->connected || request_pending(fiq))
1237                         break;
1238                 spin_unlock(&fiq->lock);
1239 
1240                 if (file->f_flags & O_NONBLOCK)
1241                         return -EAGAIN;
1242                 err = wait_event_interruptible_exclusive(fiq->waitq,
1243                                 !fiq->connected || request_pending(fiq));
1244                 if (err)
1245                         return err;
1246         }
1247 
1248         if (!fiq->connected) {
1249                 err = fc->aborted ? -ECONNABORTED : -ENODEV;
1250                 goto err_unlock;
1251         }
1252 
1253         if (!list_empty(&fiq->interrupts)) {
1254                 req = list_entry(fiq->interrupts.next, struct fuse_req,
1255                                  intr_entry);
1256                 return fuse_read_interrupt(fiq, cs, nbytes, req);
1257         }
1258 
1259         if (forget_pending(fiq)) {
1260                 if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1261                         return fuse_read_forget(fc, fiq, cs, nbytes);
1262 
1263                 if (fiq->forget_batch <= -8)
1264                         fiq->forget_batch = 16;
1265         }
1266 
1267         req = list_entry(fiq->pending.next, struct fuse_req, list);
1268         clear_bit(FR_PENDING, &req->flags);
1269         list_del_init(&req->list);
1270         spin_unlock(&fiq->lock);
1271 
1272         args = req->args;
1273         reqsize = req->in.h.len;
1274 
1275         /* If request is too large, reply with an error and restart the read */
1276         if (nbytes < reqsize) {
1277                 req->out.h.error = -EIO;
1278                 /* SETXATTR is special, since it may contain too large data */
1279                 if (args->opcode == FUSE_SETXATTR)
1280                         req->out.h.error = -E2BIG;
1281                 fuse_request_end(req);
1282                 goto restart;
1283         }
1284         spin_lock(&fpq->lock);
1285         /*
1286          *  Must not put request on fpq->io queue after having been shut down by
1287          *  fuse_abort_conn()
1288          */
1289         if (!fpq->connected) {
1290                 req->out.h.error = err = -ECONNABORTED;
1291                 goto out_end;
1292 
1293         }
1294         list_add(&req->list, &fpq->io);
1295         spin_unlock(&fpq->lock);
1296         cs->req = req;
1297         err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
1298         if (!err)
1299                 err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
1300                                      (struct fuse_arg *) args->in_args, 0);
1301         fuse_copy_finish(cs);
1302         spin_lock(&fpq->lock);
1303         clear_bit(FR_LOCKED, &req->flags);
1304         if (!fpq->connected) {
1305                 err = fc->aborted ? -ECONNABORTED : -ENODEV;
1306                 goto out_end;
1307         }
1308         if (err) {
1309                 req->out.h.error = -EIO;
1310                 goto out_end;
1311         }
1312         if (!test_bit(FR_ISREPLY, &req->flags)) {
1313                 err = reqsize;
1314                 goto out_end;
1315         }
1316         hash = fuse_req_hash(req->in.h.unique);
1317         list_move_tail(&req->list, &fpq->processing[hash]);
1318         __fuse_get_request(req);
1319         set_bit(FR_SENT, &req->flags);
1320         spin_unlock(&fpq->lock);
1321         /* matches barrier in request_wait_answer() */
1322         smp_mb__after_atomic();
1323         if (test_bit(FR_INTERRUPTED, &req->flags))
1324                 queue_interrupt(req);
1325         fuse_put_request(req);
1326 
1327         return reqsize;
1328 
1329 out_end:
1330         if (!test_bit(FR_PRIVATE, &req->flags))
1331                 list_del_init(&req->list);
1332         spin_unlock(&fpq->lock);
1333         fuse_request_end(req);
1334         return err;
1335 
1336  err_unlock:
1337         spin_unlock(&fiq->lock);
1338         return err;
1339 }
1340 
1341 static int fuse_dev_open(struct inode *inode, struct file *file)
1342 {
1343         /*
1344          * The fuse device's file's private_data is used to hold
1345          * the fuse_conn(ection) when it is mounted, and is used to
1346          * keep track of whether the file has been mounted already.
1347          */
1348         file->private_data = NULL;
1349         return 0;
1350 }
1351 
1352 static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1353 {
1354         struct fuse_copy_state cs;
1355         struct file *file = iocb->ki_filp;
1356         struct fuse_dev *fud = fuse_get_dev(file);
1357 
1358         if (!fud)
1359                 return -EPERM;
1360 
1361         if (!user_backed_iter(to))
1362                 return -EINVAL;
1363 
1364         fuse_copy_init(&cs, 1, to);
1365 
1366         return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1367 }
1368 
1369 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1370                                     struct pipe_inode_info *pipe,
1371                                     size_t len, unsigned int flags)
1372 {
1373         int total, ret;
1374         int page_nr = 0;
1375         struct pipe_buffer *bufs;
1376         struct fuse_copy_state cs;
1377         struct fuse_dev *fud = fuse_get_dev(in);
1378 
1379         if (!fud)
1380                 return -EPERM;
1381 
1382         bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
1383                               GFP_KERNEL);
1384         if (!bufs)
1385                 return -ENOMEM;
1386 
1387         fuse_copy_init(&cs, 1, NULL);
1388         cs.pipebufs = bufs;
1389         cs.pipe = pipe;
1390         ret = fuse_dev_do_read(fud, in, &cs, len);
1391         if (ret < 0)
1392                 goto out;
1393 
1394         if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
1395                 ret = -EIO;
1396                 goto out;
1397         }
1398 
1399         for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1400                 /*
1401                  * Need to be careful about this.  Having buf->ops in module
1402                  * code can Oops if the buffer persists after module unload.
1403                  */
1404                 bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1405                 bufs[page_nr].flags = 0;
1406                 ret = add_to_pipe(pipe, &bufs[page_nr++]);
1407                 if (unlikely(ret < 0))
1408                         break;
1409         }
1410         if (total)
1411                 ret = total;
1412 out:
1413         for (; page_nr < cs.nr_segs; page_nr++)
1414                 put_page(bufs[page_nr].page);
1415 
1416         kvfree(bufs);
1417         return ret;
1418 }
1419 
1420 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1421                             struct fuse_copy_state *cs)
1422 {
1423         struct fuse_notify_poll_wakeup_out outarg;
1424         int err = -EINVAL;
1425 
1426         if (size != sizeof(outarg))
1427                 goto err;
1428 
1429         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1430         if (err)
1431                 goto err;
1432 
1433         fuse_copy_finish(cs);
1434         return fuse_notify_poll_wakeup(fc, &outarg);
1435 
1436 err:
1437         fuse_copy_finish(cs);
1438         return err;
1439 }
1440 
1441 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1442                                    struct fuse_copy_state *cs)
1443 {
1444         struct fuse_notify_inval_inode_out outarg;
1445         int err = -EINVAL;
1446 
1447         if (size != sizeof(outarg))
1448                 goto err;
1449 
1450         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1451         if (err)
1452                 goto err;
1453         fuse_copy_finish(cs);
1454 
1455         down_read(&fc->killsb);
1456         err = fuse_reverse_inval_inode(fc, outarg.ino,
1457                                        outarg.off, outarg.len);
1458         up_read(&fc->killsb);
1459         return err;
1460 
1461 err:
1462         fuse_copy_finish(cs);
1463         return err;
1464 }
1465 
1466 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1467                                    struct fuse_copy_state *cs)
1468 {
1469         struct fuse_notify_inval_entry_out outarg;
1470         int err = -ENOMEM;
1471         char *buf;
1472         struct qstr name;
1473 
1474         buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1475         if (!buf)
1476                 goto err;
1477 
1478         err = -EINVAL;
1479         if (size < sizeof(outarg))
1480                 goto err;
1481 
1482         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1483         if (err)
1484                 goto err;
1485 
1486         err = -ENAMETOOLONG;
1487         if (outarg.namelen > FUSE_NAME_MAX)
1488                 goto err;
1489 
1490         err = -EINVAL;
1491         if (size != sizeof(outarg) + outarg.namelen + 1)
1492                 goto err;
1493 
1494         name.name = buf;
1495         name.len = outarg.namelen;
1496         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1497         if (err)
1498                 goto err;
1499         fuse_copy_finish(cs);
1500         buf[outarg.namelen] = 0;
1501 
1502         down_read(&fc->killsb);
1503         err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags);
1504         up_read(&fc->killsb);
1505         kfree(buf);
1506         return err;
1507 
1508 err:
1509         kfree(buf);
1510         fuse_copy_finish(cs);
1511         return err;
1512 }
1513 
1514 static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1515                               struct fuse_copy_state *cs)
1516 {
1517         struct fuse_notify_delete_out outarg;
1518         int err = -ENOMEM;
1519         char *buf;
1520         struct qstr name;
1521 
1522         buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1523         if (!buf)
1524                 goto err;
1525 
1526         err = -EINVAL;
1527         if (size < sizeof(outarg))
1528                 goto err;
1529 
1530         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1531         if (err)
1532                 goto err;
1533 
1534         err = -ENAMETOOLONG;
1535         if (outarg.namelen > FUSE_NAME_MAX)
1536                 goto err;
1537 
1538         err = -EINVAL;
1539         if (size != sizeof(outarg) + outarg.namelen + 1)
1540                 goto err;
1541 
1542         name.name = buf;
1543         name.len = outarg.namelen;
1544         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1545         if (err)
1546                 goto err;
1547         fuse_copy_finish(cs);
1548         buf[outarg.namelen] = 0;
1549 
1550         down_read(&fc->killsb);
1551         err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0);
1552         up_read(&fc->killsb);
1553         kfree(buf);
1554         return err;
1555 
1556 err:
1557         kfree(buf);
1558         fuse_copy_finish(cs);
1559         return err;
1560 }
1561 
1562 static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1563                              struct fuse_copy_state *cs)
1564 {
1565         struct fuse_notify_store_out outarg;
1566         struct inode *inode;
1567         struct address_space *mapping;
1568         u64 nodeid;
1569         int err;
1570         pgoff_t index;
1571         unsigned int offset;
1572         unsigned int num;
1573         loff_t file_size;
1574         loff_t end;
1575 
1576         err = -EINVAL;
1577         if (size < sizeof(outarg))
1578                 goto out_finish;
1579 
1580         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1581         if (err)
1582                 goto out_finish;
1583 
1584         err = -EINVAL;
1585         if (size - sizeof(outarg) != outarg.size)
1586                 goto out_finish;
1587 
1588         nodeid = outarg.nodeid;
1589 
1590         down_read(&fc->killsb);
1591 
1592         err = -ENOENT;
1593         inode = fuse_ilookup(fc, nodeid,  NULL);
1594         if (!inode)
1595                 goto out_up_killsb;
1596 
1597         mapping = inode->i_mapping;
1598         index = outarg.offset >> PAGE_SHIFT;
1599         offset = outarg.offset & ~PAGE_MASK;
1600         file_size = i_size_read(inode);
1601         end = outarg.offset + outarg.size;
1602         if (end > file_size) {
1603                 file_size = end;
1604                 fuse_write_update_attr(inode, file_size, outarg.size);
1605         }
1606 
1607         num = outarg.size;
1608         while (num) {
1609                 struct page *page;
1610                 unsigned int this_num;
1611 
1612                 err = -ENOMEM;
1613                 page = find_or_create_page(mapping, index,
1614                                            mapping_gfp_mask(mapping));
1615                 if (!page)
1616                         goto out_iput;
1617 
1618                 this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1619                 err = fuse_copy_page(cs, &page, offset, this_num, 0);
1620                 if (!PageUptodate(page) && !err && offset == 0 &&
1621                     (this_num == PAGE_SIZE || file_size == end)) {
1622                         zero_user_segment(page, this_num, PAGE_SIZE);
1623                         SetPageUptodate(page);
1624                 }
1625                 unlock_page(page);
1626                 put_page(page);
1627 
1628                 if (err)
1629                         goto out_iput;
1630 
1631                 num -= this_num;
1632                 offset = 0;
1633                 index++;
1634         }
1635 
1636         err = 0;
1637 
1638 out_iput:
1639         iput(inode);
1640 out_up_killsb:
1641         up_read(&fc->killsb);
1642 out_finish:
1643         fuse_copy_finish(cs);
1644         return err;
1645 }
1646 
1647 struct fuse_retrieve_args {
1648         struct fuse_args_pages ap;
1649         struct fuse_notify_retrieve_in inarg;
1650 };
1651 
1652 static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
1653                               int error)
1654 {
1655         struct fuse_retrieve_args *ra =
1656                 container_of(args, typeof(*ra), ap.args);
1657 
1658         release_pages(ra->ap.pages, ra->ap.num_pages);
1659         kfree(ra);
1660 }
1661 
1662 static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
1663                          struct fuse_notify_retrieve_out *outarg)
1664 {
1665         int err;
1666         struct address_space *mapping = inode->i_mapping;
1667         pgoff_t index;
1668         loff_t file_size;
1669         unsigned int num;
1670         unsigned int offset;
1671         size_t total_len = 0;
1672         unsigned int num_pages;
1673         struct fuse_conn *fc = fm->fc;
1674         struct fuse_retrieve_args *ra;
1675         size_t args_size = sizeof(*ra);
1676         struct fuse_args_pages *ap;
1677         struct fuse_args *args;
1678 
1679         offset = outarg->offset & ~PAGE_MASK;
1680         file_size = i_size_read(inode);
1681 
1682         num = min(outarg->size, fc->max_write);
1683         if (outarg->offset > file_size)
1684                 num = 0;
1685         else if (outarg->offset + num > file_size)
1686                 num = file_size - outarg->offset;
1687 
1688         num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1689         num_pages = min(num_pages, fc->max_pages);
1690 
1691         args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
1692 
1693         ra = kzalloc(args_size, GFP_KERNEL);
1694         if (!ra)
1695                 return -ENOMEM;
1696 
1697         ap = &ra->ap;
1698         ap->pages = (void *) (ra + 1);
1699         ap->descs = (void *) (ap->pages + num_pages);
1700 
1701         args = &ap->args;
1702         args->nodeid = outarg->nodeid;
1703         args->opcode = FUSE_NOTIFY_REPLY;
1704         args->in_numargs = 2;
1705         args->in_pages = true;
1706         args->end = fuse_retrieve_end;
1707 
1708         index = outarg->offset >> PAGE_SHIFT;
1709 
1710         while (num && ap->num_pages < num_pages) {
1711                 struct page *page;
1712                 unsigned int this_num;
1713 
1714                 page = find_get_page(mapping, index);
1715                 if (!page)
1716                         break;
1717 
1718                 this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1719                 ap->pages[ap->num_pages] = page;
1720                 ap->descs[ap->num_pages].offset = offset;
1721                 ap->descs[ap->num_pages].length = this_num;
1722                 ap->num_pages++;
1723 
1724                 offset = 0;
1725                 num -= this_num;
1726                 total_len += this_num;
1727                 index++;
1728         }
1729         ra->inarg.offset = outarg->offset;
1730         ra->inarg.size = total_len;
1731         args->in_args[0].size = sizeof(ra->inarg);
1732         args->in_args[0].value = &ra->inarg;
1733         args->in_args[1].size = total_len;
1734 
1735         err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
1736         if (err)
1737                 fuse_retrieve_end(fm, args, err);
1738 
1739         return err;
1740 }
1741 
1742 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1743                                 struct fuse_copy_state *cs)
1744 {
1745         struct fuse_notify_retrieve_out outarg;
1746         struct fuse_mount *fm;
1747         struct inode *inode;
1748         u64 nodeid;
1749         int err;
1750 
1751         err = -EINVAL;
1752         if (size != sizeof(outarg))
1753                 goto copy_finish;
1754 
1755         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1756         if (err)
1757                 goto copy_finish;
1758 
1759         fuse_copy_finish(cs);
1760 
1761         down_read(&fc->killsb);
1762         err = -ENOENT;
1763         nodeid = outarg.nodeid;
1764 
1765         inode = fuse_ilookup(fc, nodeid, &fm);
1766         if (inode) {
1767                 err = fuse_retrieve(fm, inode, &outarg);
1768                 iput(inode);
1769         }
1770         up_read(&fc->killsb);
1771 
1772         return err;
1773 
1774 copy_finish:
1775         fuse_copy_finish(cs);
1776         return err;
1777 }
1778 
1779 /*
1780  * Resending all processing queue requests.
1781  *
1782  * During a FUSE daemon panics and failover, it is possible for some inflight
1783  * requests to be lost and never returned. As a result, applications awaiting
1784  * replies would become stuck forever. To address this, we can use notification
1785  * to trigger resending of these pending requests to the FUSE daemon, ensuring
1786  * they are properly processed again.
1787  *
1788  * Please note that this strategy is applicable only to idempotent requests or
1789  * if the FUSE daemon takes careful measures to avoid processing duplicated
1790  * non-idempotent requests.
1791  */
1792 static void fuse_resend(struct fuse_conn *fc)
1793 {
1794         struct fuse_dev *fud;
1795         struct fuse_req *req, *next;
1796         struct fuse_iqueue *fiq = &fc->iq;
1797         LIST_HEAD(to_queue);
1798         unsigned int i;
1799 
1800         spin_lock(&fc->lock);
1801         if (!fc->connected) {
1802                 spin_unlock(&fc->lock);
1803                 return;
1804         }
1805 
1806         list_for_each_entry(fud, &fc->devices, entry) {
1807                 struct fuse_pqueue *fpq = &fud->pq;
1808 
1809                 spin_lock(&fpq->lock);
1810                 for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
1811                         list_splice_tail_init(&fpq->processing[i], &to_queue);
1812                 spin_unlock(&fpq->lock);
1813         }
1814         spin_unlock(&fc->lock);
1815 
1816         list_for_each_entry_safe(req, next, &to_queue, list) {
1817                 set_bit(FR_PENDING, &req->flags);
1818                 clear_bit(FR_SENT, &req->flags);
1819                 /* mark the request as resend request */
1820                 req->in.h.unique |= FUSE_UNIQUE_RESEND;
1821         }
1822 
1823         spin_lock(&fiq->lock);
1824         if (!fiq->connected) {
1825                 spin_unlock(&fiq->lock);
1826                 list_for_each_entry(req, &to_queue, list)
1827                         clear_bit(FR_PENDING, &req->flags);
1828                 end_requests(&to_queue);
1829                 return;
1830         }
1831         /* iq and pq requests are both oldest to newest */
1832         list_splice(&to_queue, &fiq->pending);
1833         fiq->ops->wake_pending_and_unlock(fiq);
1834 }
1835 
1836 static int fuse_notify_resend(struct fuse_conn *fc)
1837 {
1838         fuse_resend(fc);
1839         return 0;
1840 }
1841 
1842 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1843                        unsigned int size, struct fuse_copy_state *cs)
1844 {
1845         /* Don't try to move pages (yet) */
1846         cs->move_pages = 0;
1847 
1848         switch (code) {
1849         case FUSE_NOTIFY_POLL:
1850                 return fuse_notify_poll(fc, size, cs);
1851 
1852         case FUSE_NOTIFY_INVAL_INODE:
1853                 return fuse_notify_inval_inode(fc, size, cs);
1854 
1855         case FUSE_NOTIFY_INVAL_ENTRY:
1856                 return fuse_notify_inval_entry(fc, size, cs);
1857 
1858         case FUSE_NOTIFY_STORE:
1859                 return fuse_notify_store(fc, size, cs);
1860 
1861         case FUSE_NOTIFY_RETRIEVE:
1862                 return fuse_notify_retrieve(fc, size, cs);
1863 
1864         case FUSE_NOTIFY_DELETE:
1865                 return fuse_notify_delete(fc, size, cs);
1866 
1867         case FUSE_NOTIFY_RESEND:
1868                 return fuse_notify_resend(fc);
1869 
1870         default:
1871                 fuse_copy_finish(cs);
1872                 return -EINVAL;
1873         }
1874 }
1875 
1876 /* Look up request on processing list by unique ID */
1877 static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
1878 {
1879         unsigned int hash = fuse_req_hash(unique);
1880         struct fuse_req *req;
1881 
1882         list_for_each_entry(req, &fpq->processing[hash], list) {
1883                 if (req->in.h.unique == unique)
1884                         return req;
1885         }
1886         return NULL;
1887 }
1888 
1889 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
1890                          unsigned nbytes)
1891 {
1892         unsigned reqsize = sizeof(struct fuse_out_header);
1893 
1894         reqsize += fuse_len_args(args->out_numargs, args->out_args);
1895 
1896         if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
1897                 return -EINVAL;
1898         else if (reqsize > nbytes) {
1899                 struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
1900                 unsigned diffsize = reqsize - nbytes;
1901 
1902                 if (diffsize > lastarg->size)
1903                         return -EINVAL;
1904                 lastarg->size -= diffsize;
1905         }
1906         return fuse_copy_args(cs, args->out_numargs, args->out_pages,
1907                               args->out_args, args->page_zeroing);
1908 }
1909 
1910 /*
1911  * Write a single reply to a request.  First the header is copied from
1912  * the write buffer.  The request is then searched on the processing
1913  * list by the unique ID found in the header.  If found, then remove
1914  * it from the list and copy the rest of the buffer to the request.
1915  * The request is finished by calling fuse_request_end().
1916  */
1917 static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
1918                                  struct fuse_copy_state *cs, size_t nbytes)
1919 {
1920         int err;
1921         struct fuse_conn *fc = fud->fc;
1922         struct fuse_pqueue *fpq = &fud->pq;
1923         struct fuse_req *req;
1924         struct fuse_out_header oh;
1925 
1926         err = -EINVAL;
1927         if (nbytes < sizeof(struct fuse_out_header))
1928                 goto out;
1929 
1930         err = fuse_copy_one(cs, &oh, sizeof(oh));
1931         if (err)
1932                 goto copy_finish;
1933 
1934         err = -EINVAL;
1935         if (oh.len != nbytes)
1936                 goto copy_finish;
1937 
1938         /*
1939          * Zero oh.unique indicates unsolicited notification message
1940          * and error contains notification code.
1941          */
1942         if (!oh.unique) {
1943                 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1944                 goto out;
1945         }
1946 
1947         err = -EINVAL;
1948         if (oh.error <= -512 || oh.error > 0)
1949                 goto copy_finish;
1950 
1951         spin_lock(&fpq->lock);
1952         req = NULL;
1953         if (fpq->connected)
1954                 req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
1955 
1956         err = -ENOENT;
1957         if (!req) {
1958                 spin_unlock(&fpq->lock);
1959                 goto copy_finish;
1960         }
1961 
1962         /* Is it an interrupt reply ID? */
1963         if (oh.unique & FUSE_INT_REQ_BIT) {
1964                 __fuse_get_request(req);
1965                 spin_unlock(&fpq->lock);
1966 
1967                 err = 0;
1968                 if (nbytes != sizeof(struct fuse_out_header))
1969                         err = -EINVAL;
1970                 else if (oh.error == -ENOSYS)
1971                         fc->no_interrupt = 1;
1972                 else if (oh.error == -EAGAIN)
1973                         err = queue_interrupt(req);
1974 
1975                 fuse_put_request(req);
1976 
1977                 goto copy_finish;
1978         }
1979 
1980         clear_bit(FR_SENT, &req->flags);
1981         list_move(&req->list, &fpq->io);
1982         req->out.h = oh;
1983         set_bit(FR_LOCKED, &req->flags);
1984         spin_unlock(&fpq->lock);
1985         cs->req = req;
1986         if (!req->args->page_replace)
1987                 cs->move_pages = 0;
1988 
1989         if (oh.error)
1990                 err = nbytes != sizeof(oh) ? -EINVAL : 0;
1991         else
1992                 err = copy_out_args(cs, req->args, nbytes);
1993         fuse_copy_finish(cs);
1994 
1995         spin_lock(&fpq->lock);
1996         clear_bit(FR_LOCKED, &req->flags);
1997         if (!fpq->connected)
1998                 err = -ENOENT;
1999         else if (err)
2000                 req->out.h.error = -EIO;
2001         if (!test_bit(FR_PRIVATE, &req->flags))
2002                 list_del_init(&req->list);
2003         spin_unlock(&fpq->lock);
2004 
2005         fuse_request_end(req);
2006 out:
2007         return err ? err : nbytes;
2008 
2009 copy_finish:
2010         fuse_copy_finish(cs);
2011         goto out;
2012 }
2013 
2014 static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
2015 {
2016         struct fuse_copy_state cs;
2017         struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
2018 
2019         if (!fud)
2020                 return -EPERM;
2021 
2022         if (!user_backed_iter(from))
2023                 return -EINVAL;
2024 
2025         fuse_copy_init(&cs, 0, from);
2026 
2027         return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
2028 }
2029 
2030 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
2031                                      struct file *out, loff_t *ppos,
2032                                      size_t len, unsigned int flags)
2033 {
2034         unsigned int head, tail, mask, count;
2035         unsigned nbuf;
2036         unsigned idx;
2037         struct pipe_buffer *bufs;
2038         struct fuse_copy_state cs;
2039         struct fuse_dev *fud;
2040         size_t rem;
2041         ssize_t ret;
2042 
2043         fud = fuse_get_dev(out);
2044         if (!fud)
2045                 return -EPERM;
2046 
2047         pipe_lock(pipe);
2048 
2049         head = pipe->head;
2050         tail = pipe->tail;
2051         mask = pipe->ring_size - 1;
2052         count = head - tail;
2053 
2054         bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
2055         if (!bufs) {
2056                 pipe_unlock(pipe);
2057                 return -ENOMEM;
2058         }
2059 
2060         nbuf = 0;
2061         rem = 0;
2062         for (idx = tail; idx != head && rem < len; idx++)
2063                 rem += pipe->bufs[idx & mask].len;
2064 
2065         ret = -EINVAL;
2066         if (rem < len)
2067                 goto out_free;
2068 
2069         rem = len;
2070         while (rem) {
2071                 struct pipe_buffer *ibuf;
2072                 struct pipe_buffer *obuf;
2073 
2074                 if (WARN_ON(nbuf >= count || tail == head))
2075                         goto out_free;
2076 
2077                 ibuf = &pipe->bufs[tail & mask];
2078                 obuf = &bufs[nbuf];
2079 
2080                 if (rem >= ibuf->len) {
2081                         *obuf = *ibuf;
2082                         ibuf->ops = NULL;
2083                         tail++;
2084                         pipe->tail = tail;
2085                 } else {
2086                         if (!pipe_buf_get(pipe, ibuf))
2087                                 goto out_free;
2088 
2089                         *obuf = *ibuf;
2090                         obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2091                         obuf->len = rem;
2092                         ibuf->offset += obuf->len;
2093                         ibuf->len -= obuf->len;
2094                 }
2095                 nbuf++;
2096                 rem -= obuf->len;
2097         }
2098         pipe_unlock(pipe);
2099 
2100         fuse_copy_init(&cs, 0, NULL);
2101         cs.pipebufs = bufs;
2102         cs.nr_segs = nbuf;
2103         cs.pipe = pipe;
2104 
2105         if (flags & SPLICE_F_MOVE)
2106                 cs.move_pages = 1;
2107 
2108         ret = fuse_dev_do_write(fud, &cs, len);
2109 
2110         pipe_lock(pipe);
2111 out_free:
2112         for (idx = 0; idx < nbuf; idx++) {
2113                 struct pipe_buffer *buf = &bufs[idx];
2114 
2115                 if (buf->ops)
2116                         pipe_buf_release(pipe, buf);
2117         }
2118         pipe_unlock(pipe);
2119 
2120         kvfree(bufs);
2121         return ret;
2122 }
2123 
2124 static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
2125 {
2126         __poll_t mask = EPOLLOUT | EPOLLWRNORM;
2127         struct fuse_iqueue *fiq;
2128         struct fuse_dev *fud = fuse_get_dev(file);
2129 
2130         if (!fud)
2131                 return EPOLLERR;
2132 
2133         fiq = &fud->fc->iq;
2134         poll_wait(file, &fiq->waitq, wait);
2135 
2136         spin_lock(&fiq->lock);
2137         if (!fiq->connected)
2138                 mask = EPOLLERR;
2139         else if (request_pending(fiq))
2140                 mask |= EPOLLIN | EPOLLRDNORM;
2141         spin_unlock(&fiq->lock);
2142 
2143         return mask;
2144 }
2145 
2146 /* Abort all requests on the given list (pending or processing) */
2147 static void end_requests(struct list_head *head)
2148 {
2149         while (!list_empty(head)) {
2150                 struct fuse_req *req;
2151                 req = list_entry(head->next, struct fuse_req, list);
2152                 req->out.h.error = -ECONNABORTED;
2153                 clear_bit(FR_SENT, &req->flags);
2154                 list_del_init(&req->list);
2155                 fuse_request_end(req);
2156         }
2157 }
2158 
2159 static void end_polls(struct fuse_conn *fc)
2160 {
2161         struct rb_node *p;
2162 
2163         p = rb_first(&fc->polled_files);
2164 
2165         while (p) {
2166                 struct fuse_file *ff;
2167                 ff = rb_entry(p, struct fuse_file, polled_node);
2168                 wake_up_interruptible_all(&ff->poll_wait);
2169 
2170                 p = rb_next(p);
2171         }
2172 }
2173 
2174 /*
2175  * Abort all requests.
2176  *
2177  * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2178  * filesystem.
2179  *
2180  * The same effect is usually achievable through killing the filesystem daemon
2181  * and all users of the filesystem.  The exception is the combination of an
2182  * asynchronous request and the tricky deadlock (see
2183  * Documentation/filesystems/fuse.rst).
2184  *
2185  * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2186  * requests, they should be finished off immediately.  Locked requests will be
2187  * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2188  * requests.  It is possible that some request will finish before we can.  This
2189  * is OK, the request will in that case be removed from the list before we touch
2190  * it.
2191  */
2192 void fuse_abort_conn(struct fuse_conn *fc)
2193 {
2194         struct fuse_iqueue *fiq = &fc->iq;
2195 
2196         spin_lock(&fc->lock);
2197         if (fc->connected) {
2198                 struct fuse_dev *fud;
2199                 struct fuse_req *req, *next;
2200                 LIST_HEAD(to_end);
2201                 unsigned int i;
2202 
2203                 /* Background queuing checks fc->connected under bg_lock */
2204                 spin_lock(&fc->bg_lock);
2205                 fc->connected = 0;
2206                 spin_unlock(&fc->bg_lock);
2207 
2208                 fuse_set_initialized(fc);
2209                 list_for_each_entry(fud, &fc->devices, entry) {
2210                         struct fuse_pqueue *fpq = &fud->pq;
2211 
2212                         spin_lock(&fpq->lock);
2213                         fpq->connected = 0;
2214                         list_for_each_entry_safe(req, next, &fpq->io, list) {
2215                                 req->out.h.error = -ECONNABORTED;
2216                                 spin_lock(&req->waitq.lock);
2217                                 set_bit(FR_ABORTED, &req->flags);
2218                                 if (!test_bit(FR_LOCKED, &req->flags)) {
2219                                         set_bit(FR_PRIVATE, &req->flags);
2220                                         __fuse_get_request(req);
2221                                         list_move(&req->list, &to_end);
2222                                 }
2223                                 spin_unlock(&req->waitq.lock);
2224                         }
2225                         for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2226                                 list_splice_tail_init(&fpq->processing[i],
2227                                                       &to_end);
2228                         spin_unlock(&fpq->lock);
2229                 }
2230                 spin_lock(&fc->bg_lock);
2231                 fc->blocked = 0;
2232                 fc->max_background = UINT_MAX;
2233                 flush_bg_queue(fc);
2234                 spin_unlock(&fc->bg_lock);
2235 
2236                 spin_lock(&fiq->lock);
2237                 fiq->connected = 0;
2238                 list_for_each_entry(req, &fiq->pending, list)
2239                         clear_bit(FR_PENDING, &req->flags);
2240                 list_splice_tail_init(&fiq->pending, &to_end);
2241                 while (forget_pending(fiq))
2242                         kfree(fuse_dequeue_forget(fiq, 1, NULL));
2243                 wake_up_all(&fiq->waitq);
2244                 spin_unlock(&fiq->lock);
2245                 kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2246                 end_polls(fc);
2247                 wake_up_all(&fc->blocked_waitq);
2248                 spin_unlock(&fc->lock);
2249 
2250                 end_requests(&to_end);
2251         } else {
2252                 spin_unlock(&fc->lock);
2253         }
2254 }
2255 EXPORT_SYMBOL_GPL(fuse_abort_conn);
2256 
2257 void fuse_wait_aborted(struct fuse_conn *fc)
2258 {
2259         /* matches implicit memory barrier in fuse_drop_waiting() */
2260         smp_mb();
2261         wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
2262 }
2263 
2264 int fuse_dev_release(struct inode *inode, struct file *file)
2265 {
2266         struct fuse_dev *fud = fuse_get_dev(file);
2267 
2268         if (fud) {
2269                 struct fuse_conn *fc = fud->fc;
2270                 struct fuse_pqueue *fpq = &fud->pq;
2271                 LIST_HEAD(to_end);
2272                 unsigned int i;
2273 
2274                 spin_lock(&fpq->lock);
2275                 WARN_ON(!list_empty(&fpq->io));
2276                 for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2277                         list_splice_init(&fpq->processing[i], &to_end);
2278                 spin_unlock(&fpq->lock);
2279 
2280                 end_requests(&to_end);
2281 
2282                 /* Are we the last open device? */
2283                 if (atomic_dec_and_test(&fc->dev_count)) {
2284                         WARN_ON(fc->iq.fasync != NULL);
2285                         fuse_abort_conn(fc);
2286                 }
2287                 fuse_dev_free(fud);
2288         }
2289         return 0;
2290 }
2291 EXPORT_SYMBOL_GPL(fuse_dev_release);
2292 
2293 static int fuse_dev_fasync(int fd, struct file *file, int on)
2294 {
2295         struct fuse_dev *fud = fuse_get_dev(file);
2296 
2297         if (!fud)
2298                 return -EPERM;
2299 
2300         /* No locking - fasync_helper does its own locking */
2301         return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
2302 }
2303 
2304 static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
2305 {
2306         struct fuse_dev *fud;
2307 
2308         if (new->private_data)
2309                 return -EINVAL;
2310 
2311         fud = fuse_dev_alloc_install(fc);
2312         if (!fud)
2313                 return -ENOMEM;
2314 
2315         new->private_data = fud;
2316         atomic_inc(&fc->dev_count);
2317 
2318         return 0;
2319 }
2320 
2321 static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
2322 {
2323         int res;
2324         int oldfd;
2325         struct fuse_dev *fud = NULL;
2326         struct fd f;
2327 
2328         if (get_user(oldfd, argp))
2329                 return -EFAULT;
2330 
2331         f = fdget(oldfd);
2332         if (!f.file)
2333                 return -EINVAL;
2334 
2335         /*
2336          * Check against file->f_op because CUSE
2337          * uses the same ioctl handler.
2338          */
2339         if (f.file->f_op == file->f_op)
2340                 fud = fuse_get_dev(f.file);
2341 
2342         res = -EINVAL;
2343         if (fud) {
2344                 mutex_lock(&fuse_mutex);
2345                 res = fuse_device_clone(fud->fc, file);
2346                 mutex_unlock(&fuse_mutex);
2347         }
2348 
2349         fdput(f);
2350         return res;
2351 }
2352 
2353 static long fuse_dev_ioctl_backing_open(struct file *file,
2354                                         struct fuse_backing_map __user *argp)
2355 {
2356         struct fuse_dev *fud = fuse_get_dev(file);
2357         struct fuse_backing_map map;
2358 
2359         if (!fud)
2360                 return -EPERM;
2361 
2362         if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2363                 return -EOPNOTSUPP;
2364 
2365         if (copy_from_user(&map, argp, sizeof(map)))
2366                 return -EFAULT;
2367 
2368         return fuse_backing_open(fud->fc, &map);
2369 }
2370 
2371 static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
2372 {
2373         struct fuse_dev *fud = fuse_get_dev(file);
2374         int backing_id;
2375 
2376         if (!fud)
2377                 return -EPERM;
2378 
2379         if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2380                 return -EOPNOTSUPP;
2381 
2382         if (get_user(backing_id, argp))
2383                 return -EFAULT;
2384 
2385         return fuse_backing_close(fud->fc, backing_id);
2386 }
2387 
2388 static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2389                            unsigned long arg)
2390 {
2391         void __user *argp = (void __user *)arg;
2392 
2393         switch (cmd) {
2394         case FUSE_DEV_IOC_CLONE:
2395                 return fuse_dev_ioctl_clone(file, argp);
2396 
2397         case FUSE_DEV_IOC_BACKING_OPEN:
2398                 return fuse_dev_ioctl_backing_open(file, argp);
2399 
2400         case FUSE_DEV_IOC_BACKING_CLOSE:
2401                 return fuse_dev_ioctl_backing_close(file, argp);
2402 
2403         default:
2404                 return -ENOTTY;
2405         }
2406 }
2407 
2408 const struct file_operations fuse_dev_operations = {
2409         .owner          = THIS_MODULE,
2410         .open           = fuse_dev_open,
2411         .llseek         = no_llseek,
2412         .read_iter      = fuse_dev_read,
2413         .splice_read    = fuse_dev_splice_read,
2414         .write_iter     = fuse_dev_write,
2415         .splice_write   = fuse_dev_splice_write,
2416         .poll           = fuse_dev_poll,
2417         .release        = fuse_dev_release,
2418         .fasync         = fuse_dev_fasync,
2419         .unlocked_ioctl = fuse_dev_ioctl,
2420         .compat_ioctl   = compat_ptr_ioctl,
2421 };
2422 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2423 
2424 static struct miscdevice fuse_miscdevice = {
2425         .minor = FUSE_MINOR,
2426         .name  = "fuse",
2427         .fops = &fuse_dev_operations,
2428 };
2429 
2430 int __init fuse_dev_init(void)
2431 {
2432         int err = -ENOMEM;
2433         fuse_req_cachep = kmem_cache_create("fuse_request",
2434                                             sizeof(struct fuse_req),
2435                                             0, 0, NULL);
2436         if (!fuse_req_cachep)
2437                 goto out;
2438 
2439         err = misc_register(&fuse_miscdevice);
2440         if (err)
2441                 goto out_cache_clean;
2442 
2443         return 0;
2444 
2445  out_cache_clean:
2446         kmem_cache_destroy(fuse_req_cachep);
2447  out:
2448         return err;
2449 }
2450 
2451 void fuse_dev_cleanup(void)
2452 {
2453         misc_deregister(&fuse_miscdevice);
2454         kmem_cache_destroy(fuse_req_cachep);
2455 }
2456 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php