~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/fuse/virtio_fs.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * virtio-fs: Virtio Filesystem
  4  * Copyright (C) 2018 Red Hat, Inc.
  5  */
  6 
  7 #include <linux/fs.h>
  8 #include <linux/dax.h>
  9 #include <linux/pci.h>
 10 #include <linux/interrupt.h>
 11 #include <linux/group_cpus.h>
 12 #include <linux/pfn_t.h>
 13 #include <linux/memremap.h>
 14 #include <linux/module.h>
 15 #include <linux/virtio.h>
 16 #include <linux/virtio_fs.h>
 17 #include <linux/delay.h>
 18 #include <linux/fs_context.h>
 19 #include <linux/fs_parser.h>
 20 #include <linux/highmem.h>
 21 #include <linux/cleanup.h>
 22 #include <linux/uio.h>
 23 #include "fuse_i.h"
 24 
 25 /* Used to help calculate the FUSE connection's max_pages limit for a request's
 26  * size. Parts of the struct fuse_req are sliced into scattergather lists in
 27  * addition to the pages used, so this can help account for that overhead.
 28  */
 29 #define FUSE_HEADER_OVERHEAD    4
 30 
 31 /* List of virtio-fs device instances and a lock for the list. Also provides
 32  * mutual exclusion in device removal and mounting path
 33  */
 34 static DEFINE_MUTEX(virtio_fs_mutex);
 35 static LIST_HEAD(virtio_fs_instances);
 36 
 37 /* The /sys/fs/virtio_fs/ kset */
 38 static struct kset *virtio_fs_kset;
 39 
 40 enum {
 41         VQ_HIPRIO,
 42         VQ_REQUEST
 43 };
 44 
 45 #define VQ_NAME_LEN     24
 46 
 47 /* Per-virtqueue state */
 48 struct virtio_fs_vq {
 49         spinlock_t lock;
 50         struct virtqueue *vq;     /* protected by ->lock */
 51         struct work_struct done_work;
 52         struct list_head queued_reqs;
 53         struct list_head end_reqs;      /* End these requests */
 54         struct work_struct dispatch_work;
 55         struct fuse_dev *fud;
 56         bool connected;
 57         long in_flight;
 58         struct completion in_flight_zero; /* No inflight requests */
 59         char name[VQ_NAME_LEN];
 60 } ____cacheline_aligned_in_smp;
 61 
 62 /* A virtio-fs device instance */
 63 struct virtio_fs {
 64         struct kobject kobj;
 65         struct list_head list;    /* on virtio_fs_instances */
 66         char *tag;
 67         struct virtio_fs_vq *vqs;
 68         unsigned int nvqs;               /* number of virtqueues */
 69         unsigned int num_request_queues; /* number of request queues */
 70         struct dax_device *dax_dev;
 71 
 72         unsigned int *mq_map; /* index = cpu id, value = request vq id */
 73 
 74         /* DAX memory window where file contents are mapped */
 75         void *window_kaddr;
 76         phys_addr_t window_phys_addr;
 77         size_t window_len;
 78 };
 79 
 80 struct virtio_fs_forget_req {
 81         struct fuse_in_header ih;
 82         struct fuse_forget_in arg;
 83 };
 84 
 85 struct virtio_fs_forget {
 86         /* This request can be temporarily queued on virt queue */
 87         struct list_head list;
 88         struct virtio_fs_forget_req req;
 89 };
 90 
 91 struct virtio_fs_req_work {
 92         struct fuse_req *req;
 93         struct virtio_fs_vq *fsvq;
 94         struct work_struct done_work;
 95 };
 96 
 97 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
 98                                  struct fuse_req *req, bool in_flight);
 99 
100 static const struct constant_table dax_param_enums[] = {
101         {"always",      FUSE_DAX_ALWAYS },
102         {"never",       FUSE_DAX_NEVER },
103         {"inode",       FUSE_DAX_INODE_USER },
104         {}
105 };
106 
107 enum {
108         OPT_DAX,
109         OPT_DAX_ENUM,
110 };
111 
112 static const struct fs_parameter_spec virtio_fs_parameters[] = {
113         fsparam_flag("dax", OPT_DAX),
114         fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums),
115         {}
116 };
117 
118 static int virtio_fs_parse_param(struct fs_context *fsc,
119                                  struct fs_parameter *param)
120 {
121         struct fs_parse_result result;
122         struct fuse_fs_context *ctx = fsc->fs_private;
123         int opt;
124 
125         opt = fs_parse(fsc, virtio_fs_parameters, param, &result);
126         if (opt < 0)
127                 return opt;
128 
129         switch (opt) {
130         case OPT_DAX:
131                 ctx->dax_mode = FUSE_DAX_ALWAYS;
132                 break;
133         case OPT_DAX_ENUM:
134                 ctx->dax_mode = result.uint_32;
135                 break;
136         default:
137                 return -EINVAL;
138         }
139 
140         return 0;
141 }
142 
143 static void virtio_fs_free_fsc(struct fs_context *fsc)
144 {
145         struct fuse_fs_context *ctx = fsc->fs_private;
146 
147         kfree(ctx);
148 }
149 
150 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
151 {
152         struct virtio_fs *fs = vq->vdev->priv;
153 
154         return &fs->vqs[vq->index];
155 }
156 
157 /* Should be called with fsvq->lock held. */
158 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
159 {
160         fsvq->in_flight++;
161 }
162 
163 /* Should be called with fsvq->lock held. */
164 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
165 {
166         WARN_ON(fsvq->in_flight <= 0);
167         fsvq->in_flight--;
168         if (!fsvq->in_flight)
169                 complete(&fsvq->in_flight_zero);
170 }
171 
172 static ssize_t tag_show(struct kobject *kobj,
173                 struct kobj_attribute *attr, char *buf)
174 {
175         struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
176 
177         return sysfs_emit(buf, "%s\n", fs->tag);
178 }
179 
180 static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag);
181 
182 static struct attribute *virtio_fs_attrs[] = {
183         &virtio_fs_tag_attr.attr,
184         NULL
185 };
186 ATTRIBUTE_GROUPS(virtio_fs);
187 
188 static void virtio_fs_ktype_release(struct kobject *kobj)
189 {
190         struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj);
191 
192         kfree(vfs->mq_map);
193         kfree(vfs->vqs);
194         kfree(vfs);
195 }
196 
197 static const struct kobj_type virtio_fs_ktype = {
198         .release = virtio_fs_ktype_release,
199         .sysfs_ops = &kobj_sysfs_ops,
200         .default_groups = virtio_fs_groups,
201 };
202 
203 /* Make sure virtiofs_mutex is held */
204 static void virtio_fs_put(struct virtio_fs *fs)
205 {
206         kobject_put(&fs->kobj);
207 }
208 
209 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
210 {
211         struct virtio_fs *vfs = fiq->priv;
212 
213         mutex_lock(&virtio_fs_mutex);
214         virtio_fs_put(vfs);
215         mutex_unlock(&virtio_fs_mutex);
216 }
217 
218 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
219 {
220         WARN_ON(fsvq->in_flight < 0);
221 
222         /* Wait for in flight requests to finish.*/
223         spin_lock(&fsvq->lock);
224         if (fsvq->in_flight) {
225                 /* We are holding virtio_fs_mutex. There should not be any
226                  * waiters waiting for completion.
227                  */
228                 reinit_completion(&fsvq->in_flight_zero);
229                 spin_unlock(&fsvq->lock);
230                 wait_for_completion(&fsvq->in_flight_zero);
231         } else {
232                 spin_unlock(&fsvq->lock);
233         }
234 
235         flush_work(&fsvq->done_work);
236         flush_work(&fsvq->dispatch_work);
237 }
238 
239 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs)
240 {
241         struct virtio_fs_vq *fsvq;
242         int i;
243 
244         for (i = 0; i < fs->nvqs; i++) {
245                 fsvq = &fs->vqs[i];
246                 virtio_fs_drain_queue(fsvq);
247         }
248 }
249 
250 static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
251 {
252         /* Provides mutual exclusion between ->remove and ->kill_sb
253          * paths. We don't want both of these draining queue at the
254          * same time. Current completion logic reinits completion
255          * and that means there should not be any other thread
256          * doing reinit or waiting for completion already.
257          */
258         mutex_lock(&virtio_fs_mutex);
259         virtio_fs_drain_all_queues_locked(fs);
260         mutex_unlock(&virtio_fs_mutex);
261 }
262 
263 static void virtio_fs_start_all_queues(struct virtio_fs *fs)
264 {
265         struct virtio_fs_vq *fsvq;
266         int i;
267 
268         for (i = 0; i < fs->nvqs; i++) {
269                 fsvq = &fs->vqs[i];
270                 spin_lock(&fsvq->lock);
271                 fsvq->connected = true;
272                 spin_unlock(&fsvq->lock);
273         }
274 }
275 
276 /* Add a new instance to the list or return -EEXIST if tag name exists*/
277 static int virtio_fs_add_instance(struct virtio_device *vdev,
278                                   struct virtio_fs *fs)
279 {
280         struct virtio_fs *fs2;
281         int ret;
282 
283         mutex_lock(&virtio_fs_mutex);
284 
285         list_for_each_entry(fs2, &virtio_fs_instances, list) {
286                 if (strcmp(fs->tag, fs2->tag) == 0) {
287                         mutex_unlock(&virtio_fs_mutex);
288                         return -EEXIST;
289                 }
290         }
291 
292         /* Use the virtio_device's index as a unique identifier, there is no
293          * need to allocate our own identifiers because the virtio_fs instance
294          * is only visible to userspace as long as the underlying virtio_device
295          * exists.
296          */
297         fs->kobj.kset = virtio_fs_kset;
298         ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index);
299         if (ret < 0) {
300                 mutex_unlock(&virtio_fs_mutex);
301                 return ret;
302         }
303 
304         ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device");
305         if (ret < 0) {
306                 kobject_del(&fs->kobj);
307                 mutex_unlock(&virtio_fs_mutex);
308                 return ret;
309         }
310 
311         list_add_tail(&fs->list, &virtio_fs_instances);
312 
313         mutex_unlock(&virtio_fs_mutex);
314 
315         kobject_uevent(&fs->kobj, KOBJ_ADD);
316 
317         return 0;
318 }
319 
320 /* Return the virtio_fs with a given tag, or NULL */
321 static struct virtio_fs *virtio_fs_find_instance(const char *tag)
322 {
323         struct virtio_fs *fs;
324 
325         mutex_lock(&virtio_fs_mutex);
326 
327         list_for_each_entry(fs, &virtio_fs_instances, list) {
328                 if (strcmp(fs->tag, tag) == 0) {
329                         kobject_get(&fs->kobj);
330                         goto found;
331                 }
332         }
333 
334         fs = NULL; /* not found */
335 
336 found:
337         mutex_unlock(&virtio_fs_mutex);
338 
339         return fs;
340 }
341 
342 static void virtio_fs_free_devs(struct virtio_fs *fs)
343 {
344         unsigned int i;
345 
346         for (i = 0; i < fs->nvqs; i++) {
347                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
348 
349                 if (!fsvq->fud)
350                         continue;
351 
352                 fuse_dev_free(fsvq->fud);
353                 fsvq->fud = NULL;
354         }
355 }
356 
357 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
358 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
359 {
360         char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
361         char *end;
362         size_t len;
363 
364         virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
365                            &tag_buf, sizeof(tag_buf));
366         end = memchr(tag_buf, '\0', sizeof(tag_buf));
367         if (end == tag_buf)
368                 return -EINVAL; /* empty tag */
369         if (!end)
370                 end = &tag_buf[sizeof(tag_buf)];
371 
372         len = end - tag_buf;
373         fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
374         if (!fs->tag)
375                 return -ENOMEM;
376         memcpy(fs->tag, tag_buf, len);
377         fs->tag[len] = '\0';
378 
379         /* While the VIRTIO specification allows any character, newlines are
380          * awkward on mount(8) command-lines and cause problems in the sysfs
381          * "tag" attr and uevent TAG= properties. Forbid them.
382          */
383         if (strchr(fs->tag, '\n')) {
384                 dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n");
385                 return -EINVAL;
386         }
387 
388         return 0;
389 }
390 
391 /* Work function for hiprio completion */
392 static void virtio_fs_hiprio_done_work(struct work_struct *work)
393 {
394         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
395                                                  done_work);
396         struct virtqueue *vq = fsvq->vq;
397 
398         /* Free completed FUSE_FORGET requests */
399         spin_lock(&fsvq->lock);
400         do {
401                 unsigned int len;
402                 void *req;
403 
404                 virtqueue_disable_cb(vq);
405 
406                 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
407                         kfree(req);
408                         dec_in_flight_req(fsvq);
409                 }
410         } while (!virtqueue_enable_cb(vq));
411 
412         if (!list_empty(&fsvq->queued_reqs))
413                 schedule_work(&fsvq->dispatch_work);
414 
415         spin_unlock(&fsvq->lock);
416 }
417 
418 static void virtio_fs_request_dispatch_work(struct work_struct *work)
419 {
420         struct fuse_req *req;
421         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
422                                                  dispatch_work);
423         int ret;
424 
425         pr_debug("virtio-fs: worker %s called.\n", __func__);
426         while (1) {
427                 spin_lock(&fsvq->lock);
428                 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
429                                                list);
430                 if (!req) {
431                         spin_unlock(&fsvq->lock);
432                         break;
433                 }
434 
435                 list_del_init(&req->list);
436                 spin_unlock(&fsvq->lock);
437                 fuse_request_end(req);
438         }
439 
440         /* Dispatch pending requests */
441         while (1) {
442                 spin_lock(&fsvq->lock);
443                 req = list_first_entry_or_null(&fsvq->queued_reqs,
444                                                struct fuse_req, list);
445                 if (!req) {
446                         spin_unlock(&fsvq->lock);
447                         return;
448                 }
449                 list_del_init(&req->list);
450                 spin_unlock(&fsvq->lock);
451 
452                 ret = virtio_fs_enqueue_req(fsvq, req, true);
453                 if (ret < 0) {
454                         if (ret == -ENOSPC) {
455                                 spin_lock(&fsvq->lock);
456                                 list_add_tail(&req->list, &fsvq->queued_reqs);
457                                 spin_unlock(&fsvq->lock);
458                                 return;
459                         }
460                         req->out.h.error = ret;
461                         spin_lock(&fsvq->lock);
462                         dec_in_flight_req(fsvq);
463                         spin_unlock(&fsvq->lock);
464                         pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
465                                ret);
466                         fuse_request_end(req);
467                 }
468         }
469 }
470 
471 /*
472  * Returns 1 if queue is full and sender should wait a bit before sending
473  * next request, 0 otherwise.
474  */
475 static int send_forget_request(struct virtio_fs_vq *fsvq,
476                                struct virtio_fs_forget *forget,
477                                bool in_flight)
478 {
479         struct scatterlist sg;
480         struct virtqueue *vq;
481         int ret = 0;
482         bool notify;
483         struct virtio_fs_forget_req *req = &forget->req;
484 
485         spin_lock(&fsvq->lock);
486         if (!fsvq->connected) {
487                 if (in_flight)
488                         dec_in_flight_req(fsvq);
489                 kfree(forget);
490                 goto out;
491         }
492 
493         sg_init_one(&sg, req, sizeof(*req));
494         vq = fsvq->vq;
495         dev_dbg(&vq->vdev->dev, "%s\n", __func__);
496 
497         ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC);
498         if (ret < 0) {
499                 if (ret == -ENOSPC) {
500                         pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
501                                  ret);
502                         list_add_tail(&forget->list, &fsvq->queued_reqs);
503                         if (!in_flight)
504                                 inc_in_flight_req(fsvq);
505                         /* Queue is full */
506                         ret = 1;
507                 } else {
508                         pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
509                                  ret);
510                         kfree(forget);
511                         if (in_flight)
512                                 dec_in_flight_req(fsvq);
513                 }
514                 goto out;
515         }
516 
517         if (!in_flight)
518                 inc_in_flight_req(fsvq);
519         notify = virtqueue_kick_prepare(vq);
520         spin_unlock(&fsvq->lock);
521 
522         if (notify)
523                 virtqueue_notify(vq);
524         return ret;
525 out:
526         spin_unlock(&fsvq->lock);
527         return ret;
528 }
529 
530 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
531 {
532         struct virtio_fs_forget *forget;
533         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
534                                                  dispatch_work);
535         pr_debug("virtio-fs: worker %s called.\n", __func__);
536         while (1) {
537                 spin_lock(&fsvq->lock);
538                 forget = list_first_entry_or_null(&fsvq->queued_reqs,
539                                         struct virtio_fs_forget, list);
540                 if (!forget) {
541                         spin_unlock(&fsvq->lock);
542                         return;
543                 }
544 
545                 list_del(&forget->list);
546                 spin_unlock(&fsvq->lock);
547                 if (send_forget_request(fsvq, forget, true))
548                         return;
549         }
550 }
551 
552 /* Allocate and copy args into req->argbuf */
553 static int copy_args_to_argbuf(struct fuse_req *req)
554 {
555         struct fuse_args *args = req->args;
556         unsigned int offset = 0;
557         unsigned int num_in;
558         unsigned int num_out;
559         unsigned int len;
560         unsigned int i;
561 
562         num_in = args->in_numargs - args->in_pages;
563         num_out = args->out_numargs - args->out_pages;
564         len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
565               fuse_len_args(num_out, args->out_args);
566 
567         req->argbuf = kmalloc(len, GFP_ATOMIC);
568         if (!req->argbuf)
569                 return -ENOMEM;
570 
571         for (i = 0; i < num_in; i++) {
572                 memcpy(req->argbuf + offset,
573                        args->in_args[i].value,
574                        args->in_args[i].size);
575                 offset += args->in_args[i].size;
576         }
577 
578         return 0;
579 }
580 
581 /* Copy args out of and free req->argbuf */
582 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
583 {
584         unsigned int remaining;
585         unsigned int offset;
586         unsigned int num_in;
587         unsigned int num_out;
588         unsigned int i;
589 
590         remaining = req->out.h.len - sizeof(req->out.h);
591         num_in = args->in_numargs - args->in_pages;
592         num_out = args->out_numargs - args->out_pages;
593         offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
594 
595         for (i = 0; i < num_out; i++) {
596                 unsigned int argsize = args->out_args[i].size;
597 
598                 if (args->out_argvar &&
599                     i == args->out_numargs - 1 &&
600                     argsize > remaining) {
601                         argsize = remaining;
602                 }
603 
604                 memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
605                 offset += argsize;
606 
607                 if (i != args->out_numargs - 1)
608                         remaining -= argsize;
609         }
610 
611         /* Store the actual size of the variable-length arg */
612         if (args->out_argvar)
613                 args->out_args[args->out_numargs - 1].size = remaining;
614 
615         kfree(req->argbuf);
616         req->argbuf = NULL;
617 }
618 
619 /* Work function for request completion */
620 static void virtio_fs_request_complete(struct fuse_req *req,
621                                        struct virtio_fs_vq *fsvq)
622 {
623         struct fuse_pqueue *fpq = &fsvq->fud->pq;
624         struct fuse_args *args;
625         struct fuse_args_pages *ap;
626         unsigned int len, i, thislen;
627         struct page *page;
628 
629         /*
630          * TODO verify that server properly follows FUSE protocol
631          * (oh.uniq, oh.len)
632          */
633         args = req->args;
634         copy_args_from_argbuf(args, req);
635 
636         if (args->out_pages && args->page_zeroing) {
637                 len = args->out_args[args->out_numargs - 1].size;
638                 ap = container_of(args, typeof(*ap), args);
639                 for (i = 0; i < ap->num_pages; i++) {
640                         thislen = ap->descs[i].length;
641                         if (len < thislen) {
642                                 WARN_ON(ap->descs[i].offset);
643                                 page = ap->pages[i];
644                                 zero_user_segment(page, len, thislen);
645                                 len = 0;
646                         } else {
647                                 len -= thislen;
648                         }
649                 }
650         }
651 
652         spin_lock(&fpq->lock);
653         clear_bit(FR_SENT, &req->flags);
654         spin_unlock(&fpq->lock);
655 
656         fuse_request_end(req);
657         spin_lock(&fsvq->lock);
658         dec_in_flight_req(fsvq);
659         spin_unlock(&fsvq->lock);
660 }
661 
662 static void virtio_fs_complete_req_work(struct work_struct *work)
663 {
664         struct virtio_fs_req_work *w =
665                 container_of(work, typeof(*w), done_work);
666 
667         virtio_fs_request_complete(w->req, w->fsvq);
668         kfree(w);
669 }
670 
671 static void virtio_fs_requests_done_work(struct work_struct *work)
672 {
673         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
674                                                  done_work);
675         struct fuse_pqueue *fpq = &fsvq->fud->pq;
676         struct virtqueue *vq = fsvq->vq;
677         struct fuse_req *req;
678         struct fuse_req *next;
679         unsigned int len;
680         LIST_HEAD(reqs);
681 
682         /* Collect completed requests off the virtqueue */
683         spin_lock(&fsvq->lock);
684         do {
685                 virtqueue_disable_cb(vq);
686 
687                 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
688                         spin_lock(&fpq->lock);
689                         list_move_tail(&req->list, &reqs);
690                         spin_unlock(&fpq->lock);
691                 }
692         } while (!virtqueue_enable_cb(vq));
693         spin_unlock(&fsvq->lock);
694 
695         /* End requests */
696         list_for_each_entry_safe(req, next, &reqs, list) {
697                 list_del_init(&req->list);
698 
699                 /* blocking async request completes in a worker context */
700                 if (req->args->may_block) {
701                         struct virtio_fs_req_work *w;
702 
703                         w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
704                         INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
705                         w->fsvq = fsvq;
706                         w->req = req;
707                         schedule_work(&w->done_work);
708                 } else {
709                         virtio_fs_request_complete(req, fsvq);
710                 }
711         }
712 
713         /* Try to push previously queued requests, as the queue might no longer be full */
714         spin_lock(&fsvq->lock);
715         if (!list_empty(&fsvq->queued_reqs))
716                 schedule_work(&fsvq->dispatch_work);
717         spin_unlock(&fsvq->lock);
718 }
719 
720 static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs)
721 {
722         const struct cpumask *mask, *masks;
723         unsigned int q, cpu;
724 
725         /* First attempt to map using existing transport layer affinities
726          * e.g. PCIe MSI-X
727          */
728         if (!vdev->config->get_vq_affinity)
729                 goto fallback;
730 
731         for (q = 0; q < fs->num_request_queues; q++) {
732                 mask = vdev->config->get_vq_affinity(vdev, VQ_REQUEST + q);
733                 if (!mask)
734                         goto fallback;
735 
736                 for_each_cpu(cpu, mask)
737                         fs->mq_map[cpu] = q;
738         }
739 
740         return;
741 fallback:
742         /* Attempt to map evenly in groups over the CPUs */
743         masks = group_cpus_evenly(fs->num_request_queues);
744         /* If even this fails we default to all CPUs use queue zero */
745         if (!masks) {
746                 for_each_possible_cpu(cpu)
747                         fs->mq_map[cpu] = 0;
748                 return;
749         }
750 
751         for (q = 0; q < fs->num_request_queues; q++) {
752                 for_each_cpu(cpu, &masks[q])
753                         fs->mq_map[cpu] = q;
754         }
755         kfree(masks);
756 }
757 
758 /* Virtqueue interrupt handler */
759 static void virtio_fs_vq_done(struct virtqueue *vq)
760 {
761         struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
762 
763         dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
764 
765         schedule_work(&fsvq->done_work);
766 }
767 
768 static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name,
769                               int vq_type)
770 {
771         strscpy(fsvq->name, name, VQ_NAME_LEN);
772         spin_lock_init(&fsvq->lock);
773         INIT_LIST_HEAD(&fsvq->queued_reqs);
774         INIT_LIST_HEAD(&fsvq->end_reqs);
775         init_completion(&fsvq->in_flight_zero);
776 
777         if (vq_type == VQ_REQUEST) {
778                 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work);
779                 INIT_WORK(&fsvq->dispatch_work,
780                                 virtio_fs_request_dispatch_work);
781         } else {
782                 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work);
783                 INIT_WORK(&fsvq->dispatch_work,
784                                 virtio_fs_hiprio_dispatch_work);
785         }
786 }
787 
788 /* Initialize virtqueues */
789 static int virtio_fs_setup_vqs(struct virtio_device *vdev,
790                                struct virtio_fs *fs)
791 {
792         struct virtqueue_info *vqs_info;
793         struct virtqueue **vqs;
794         /* Specify pre_vectors to ensure that the queues before the
795          * request queues (e.g. hiprio) don't claim any of the CPUs in
796          * the multi-queue mapping and interrupt affinities
797          */
798         struct irq_affinity desc = { .pre_vectors = VQ_REQUEST };
799         unsigned int i;
800         int ret = 0;
801 
802         virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues,
803                         &fs->num_request_queues);
804         if (fs->num_request_queues == 0)
805                 return -EINVAL;
806 
807         /* Truncate nr of request queues to nr_cpu_id */
808         fs->num_request_queues = min_t(unsigned int, fs->num_request_queues,
809                                         nr_cpu_ids);
810         fs->nvqs = VQ_REQUEST + fs->num_request_queues;
811         fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
812         if (!fs->vqs)
813                 return -ENOMEM;
814 
815         vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
816         fs->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*fs->mq_map), GFP_KERNEL,
817                                         dev_to_node(&vdev->dev));
818         vqs_info = kcalloc(fs->nvqs, sizeof(*vqs_info), GFP_KERNEL);
819         if (!vqs || !vqs_info || !fs->mq_map) {
820                 ret = -ENOMEM;
821                 goto out;
822         }
823 
824         /* Initialize the hiprio/forget request virtqueue */
825         vqs_info[VQ_HIPRIO].callback = virtio_fs_vq_done;
826         virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
827         vqs_info[VQ_HIPRIO].name = fs->vqs[VQ_HIPRIO].name;
828 
829         /* Initialize the requests virtqueues */
830         for (i = VQ_REQUEST; i < fs->nvqs; i++) {
831                 char vq_name[VQ_NAME_LEN];
832 
833                 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST);
834                 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
835                 vqs_info[i].callback = virtio_fs_vq_done;
836                 vqs_info[i].name = fs->vqs[i].name;
837         }
838 
839         ret = virtio_find_vqs(vdev, fs->nvqs, vqs, vqs_info, &desc);
840         if (ret < 0)
841                 goto out;
842 
843         for (i = 0; i < fs->nvqs; i++)
844                 fs->vqs[i].vq = vqs[i];
845 
846         virtio_fs_start_all_queues(fs);
847 out:
848         kfree(vqs_info);
849         kfree(vqs);
850         if (ret) {
851                 kfree(fs->vqs);
852                 kfree(fs->mq_map);
853         }
854         return ret;
855 }
856 
857 /* Free virtqueues (device must already be reset) */
858 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev)
859 {
860         vdev->config->del_vqs(vdev);
861 }
862 
863 /* Map a window offset to a page frame number.  The window offset will have
864  * been produced by .iomap_begin(), which maps a file offset to a window
865  * offset.
866  */
867 static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
868                                     long nr_pages, enum dax_access_mode mode,
869                                     void **kaddr, pfn_t *pfn)
870 {
871         struct virtio_fs *fs = dax_get_private(dax_dev);
872         phys_addr_t offset = PFN_PHYS(pgoff);
873         size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff;
874 
875         if (kaddr)
876                 *kaddr = fs->window_kaddr + offset;
877         if (pfn)
878                 *pfn = phys_to_pfn_t(fs->window_phys_addr + offset,
879                                         PFN_DEV | PFN_MAP);
880         return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
881 }
882 
883 static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
884                                      pgoff_t pgoff, size_t nr_pages)
885 {
886         long rc;
887         void *kaddr;
888 
889         rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
890                                NULL);
891         if (rc < 0)
892                 return dax_mem2blk_err(rc);
893 
894         memset(kaddr, 0, nr_pages << PAGE_SHIFT);
895         dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
896         return 0;
897 }
898 
899 static const struct dax_operations virtio_fs_dax_ops = {
900         .direct_access = virtio_fs_direct_access,
901         .zero_page_range = virtio_fs_zero_page_range,
902 };
903 
904 static void virtio_fs_cleanup_dax(void *data)
905 {
906         struct dax_device *dax_dev = data;
907 
908         kill_dax(dax_dev);
909         put_dax(dax_dev);
910 }
911 
912 DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T))
913 
914 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
915 {
916         struct dax_device *dax_dev __free(cleanup_dax) = NULL;
917         struct virtio_shm_region cache_reg;
918         struct dev_pagemap *pgmap;
919         bool have_cache;
920 
921         if (!IS_ENABLED(CONFIG_FUSE_DAX))
922                 return 0;
923 
924         dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
925         if (IS_ERR(dax_dev)) {
926                 int rc = PTR_ERR(dax_dev);
927                 return rc == -EOPNOTSUPP ? 0 : rc;
928         }
929 
930         /* Get cache region */
931         have_cache = virtio_get_shm_region(vdev, &cache_reg,
932                                            (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
933         if (!have_cache) {
934                 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__);
935                 return 0;
936         }
937 
938         if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len,
939                                      dev_name(&vdev->dev))) {
940                 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n",
941                          cache_reg.addr, cache_reg.len);
942                 return -EBUSY;
943         }
944 
945         dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len,
946                    cache_reg.addr);
947 
948         pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL);
949         if (!pgmap)
950                 return -ENOMEM;
951 
952         pgmap->type = MEMORY_DEVICE_FS_DAX;
953 
954         /* Ideally we would directly use the PCI BAR resource but
955          * devm_memremap_pages() wants its own copy in pgmap.  So
956          * initialize a struct resource from scratch (only the start
957          * and end fields will be used).
958          */
959         pgmap->range = (struct range) {
960                 .start = (phys_addr_t) cache_reg.addr,
961                 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1,
962         };
963         pgmap->nr_range = 1;
964 
965         fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap);
966         if (IS_ERR(fs->window_kaddr))
967                 return PTR_ERR(fs->window_kaddr);
968 
969         fs->window_phys_addr = (phys_addr_t) cache_reg.addr;
970         fs->window_len = (phys_addr_t) cache_reg.len;
971 
972         dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
973                 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
974 
975         fs->dax_dev = no_free_ptr(dax_dev);
976         return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
977                                         fs->dax_dev);
978 }
979 
980 static int virtio_fs_probe(struct virtio_device *vdev)
981 {
982         struct virtio_fs *fs;
983         int ret;
984 
985         fs = kzalloc(sizeof(*fs), GFP_KERNEL);
986         if (!fs)
987                 return -ENOMEM;
988         kobject_init(&fs->kobj, &virtio_fs_ktype);
989         vdev->priv = fs;
990 
991         ret = virtio_fs_read_tag(vdev, fs);
992         if (ret < 0)
993                 goto out;
994 
995         ret = virtio_fs_setup_vqs(vdev, fs);
996         if (ret < 0)
997                 goto out;
998 
999         virtio_fs_map_queues(vdev, fs);
1000 
1001         ret = virtio_fs_setup_dax(vdev, fs);
1002         if (ret < 0)
1003                 goto out_vqs;
1004 
1005         /* Bring the device online in case the filesystem is mounted and
1006          * requests need to be sent before we return.
1007          */
1008         virtio_device_ready(vdev);
1009 
1010         ret = virtio_fs_add_instance(vdev, fs);
1011         if (ret < 0)
1012                 goto out_vqs;
1013 
1014         return 0;
1015 
1016 out_vqs:
1017         virtio_reset_device(vdev);
1018         virtio_fs_cleanup_vqs(vdev);
1019 
1020 out:
1021         vdev->priv = NULL;
1022         kobject_put(&fs->kobj);
1023         return ret;
1024 }
1025 
1026 static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
1027 {
1028         struct virtio_fs_vq *fsvq;
1029         int i;
1030 
1031         for (i = 0; i < fs->nvqs; i++) {
1032                 fsvq = &fs->vqs[i];
1033                 spin_lock(&fsvq->lock);
1034                 fsvq->connected = false;
1035                 spin_unlock(&fsvq->lock);
1036         }
1037 }
1038 
1039 static void virtio_fs_remove(struct virtio_device *vdev)
1040 {
1041         struct virtio_fs *fs = vdev->priv;
1042 
1043         mutex_lock(&virtio_fs_mutex);
1044         /* This device is going away. No one should get new reference */
1045         list_del_init(&fs->list);
1046         sysfs_remove_link(&fs->kobj, "device");
1047         kobject_del(&fs->kobj);
1048         virtio_fs_stop_all_queues(fs);
1049         virtio_fs_drain_all_queues_locked(fs);
1050         virtio_reset_device(vdev);
1051         virtio_fs_cleanup_vqs(vdev);
1052 
1053         vdev->priv = NULL;
1054         /* Put device reference on virtio_fs object */
1055         virtio_fs_put(fs);
1056         mutex_unlock(&virtio_fs_mutex);
1057 }
1058 
1059 #ifdef CONFIG_PM_SLEEP
1060 static int virtio_fs_freeze(struct virtio_device *vdev)
1061 {
1062         /* TODO need to save state here */
1063         pr_warn("virtio-fs: suspend/resume not yet supported\n");
1064         return -EOPNOTSUPP;
1065 }
1066 
1067 static int virtio_fs_restore(struct virtio_device *vdev)
1068 {
1069          /* TODO need to restore state here */
1070         return 0;
1071 }
1072 #endif /* CONFIG_PM_SLEEP */
1073 
1074 static const struct virtio_device_id id_table[] = {
1075         { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
1076         {},
1077 };
1078 
1079 static const unsigned int feature_table[] = {};
1080 
1081 static struct virtio_driver virtio_fs_driver = {
1082         .driver.name            = KBUILD_MODNAME,
1083         .id_table               = id_table,
1084         .feature_table          = feature_table,
1085         .feature_table_size     = ARRAY_SIZE(feature_table),
1086         .probe                  = virtio_fs_probe,
1087         .remove                 = virtio_fs_remove,
1088 #ifdef CONFIG_PM_SLEEP
1089         .freeze                 = virtio_fs_freeze,
1090         .restore                = virtio_fs_restore,
1091 #endif
1092 };
1093 
1094 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
1095 __releases(fiq->lock)
1096 {
1097         struct fuse_forget_link *link;
1098         struct virtio_fs_forget *forget;
1099         struct virtio_fs_forget_req *req;
1100         struct virtio_fs *fs;
1101         struct virtio_fs_vq *fsvq;
1102         u64 unique;
1103 
1104         link = fuse_dequeue_forget(fiq, 1, NULL);
1105         unique = fuse_get_unique(fiq);
1106 
1107         fs = fiq->priv;
1108         fsvq = &fs->vqs[VQ_HIPRIO];
1109         spin_unlock(&fiq->lock);
1110 
1111         /* Allocate a buffer for the request */
1112         forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
1113         req = &forget->req;
1114 
1115         req->ih = (struct fuse_in_header){
1116                 .opcode = FUSE_FORGET,
1117                 .nodeid = link->forget_one.nodeid,
1118                 .unique = unique,
1119                 .len = sizeof(*req),
1120         };
1121         req->arg = (struct fuse_forget_in){
1122                 .nlookup = link->forget_one.nlookup,
1123         };
1124 
1125         send_forget_request(fsvq, forget, false);
1126         kfree(link);
1127 }
1128 
1129 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
1130 __releases(fiq->lock)
1131 {
1132         /*
1133          * TODO interrupts.
1134          *
1135          * Normal fs operations on a local filesystems aren't interruptible.
1136          * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
1137          * with shared lock between host and guest.
1138          */
1139         spin_unlock(&fiq->lock);
1140 }
1141 
1142 /* Count number of scatter-gather elements required */
1143 static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs,
1144                                        unsigned int num_pages,
1145                                        unsigned int total_len)
1146 {
1147         unsigned int i;
1148         unsigned int this_len;
1149 
1150         for (i = 0; i < num_pages && total_len; i++) {
1151                 this_len =  min(page_descs[i].length, total_len);
1152                 total_len -= this_len;
1153         }
1154 
1155         return i;
1156 }
1157 
1158 /* Return the number of scatter-gather list elements required */
1159 static unsigned int sg_count_fuse_req(struct fuse_req *req)
1160 {
1161         struct fuse_args *args = req->args;
1162         struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
1163         unsigned int size, total_sgs = 1 /* fuse_in_header */;
1164 
1165         if (args->in_numargs - args->in_pages)
1166                 total_sgs += 1;
1167 
1168         if (args->in_pages) {
1169                 size = args->in_args[args->in_numargs - 1].size;
1170                 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
1171                                                  size);
1172         }
1173 
1174         if (!test_bit(FR_ISREPLY, &req->flags))
1175                 return total_sgs;
1176 
1177         total_sgs += 1 /* fuse_out_header */;
1178 
1179         if (args->out_numargs - args->out_pages)
1180                 total_sgs += 1;
1181 
1182         if (args->out_pages) {
1183                 size = args->out_args[args->out_numargs - 1].size;
1184                 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
1185                                                  size);
1186         }
1187 
1188         return total_sgs;
1189 }
1190 
1191 /* Add pages to scatter-gather list and return number of elements used */
1192 static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
1193                                        struct page **pages,
1194                                        struct fuse_page_desc *page_descs,
1195                                        unsigned int num_pages,
1196                                        unsigned int total_len)
1197 {
1198         unsigned int i;
1199         unsigned int this_len;
1200 
1201         for (i = 0; i < num_pages && total_len; i++) {
1202                 sg_init_table(&sg[i], 1);
1203                 this_len =  min(page_descs[i].length, total_len);
1204                 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
1205                 total_len -= this_len;
1206         }
1207 
1208         return i;
1209 }
1210 
1211 /* Add args to scatter-gather list and return number of elements used */
1212 static unsigned int sg_init_fuse_args(struct scatterlist *sg,
1213                                       struct fuse_req *req,
1214                                       struct fuse_arg *args,
1215                                       unsigned int numargs,
1216                                       bool argpages,
1217                                       void *argbuf,
1218                                       unsigned int *len_used)
1219 {
1220         struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
1221         unsigned int total_sgs = 0;
1222         unsigned int len;
1223 
1224         len = fuse_len_args(numargs - argpages, args);
1225         if (len)
1226                 sg_init_one(&sg[total_sgs++], argbuf, len);
1227 
1228         if (argpages)
1229                 total_sgs += sg_init_fuse_pages(&sg[total_sgs],
1230                                                 ap->pages, ap->descs,
1231                                                 ap->num_pages,
1232                                                 args[numargs - 1].size);
1233 
1234         if (len_used)
1235                 *len_used = len;
1236 
1237         return total_sgs;
1238 }
1239 
1240 /* Add a request to a virtqueue and kick the device */
1241 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
1242                                  struct fuse_req *req, bool in_flight)
1243 {
1244         /* requests need at least 4 elements */
1245         struct scatterlist *stack_sgs[6];
1246         struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
1247         struct scatterlist **sgs = stack_sgs;
1248         struct scatterlist *sg = stack_sg;
1249         struct virtqueue *vq;
1250         struct fuse_args *args = req->args;
1251         unsigned int argbuf_used = 0;
1252         unsigned int out_sgs = 0;
1253         unsigned int in_sgs = 0;
1254         unsigned int total_sgs;
1255         unsigned int i;
1256         int ret;
1257         bool notify;
1258         struct fuse_pqueue *fpq;
1259 
1260         /* Does the sglist fit on the stack? */
1261         total_sgs = sg_count_fuse_req(req);
1262         if (total_sgs > ARRAY_SIZE(stack_sgs)) {
1263                 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
1264                 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
1265                 if (!sgs || !sg) {
1266                         ret = -ENOMEM;
1267                         goto out;
1268                 }
1269         }
1270 
1271         /* Use a bounce buffer since stack args cannot be mapped */
1272         ret = copy_args_to_argbuf(req);
1273         if (ret < 0)
1274                 goto out;
1275 
1276         /* Request elements */
1277         sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
1278         out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
1279                                      (struct fuse_arg *)args->in_args,
1280                                      args->in_numargs, args->in_pages,
1281                                      req->argbuf, &argbuf_used);
1282 
1283         /* Reply elements */
1284         if (test_bit(FR_ISREPLY, &req->flags)) {
1285                 sg_init_one(&sg[out_sgs + in_sgs++],
1286                             &req->out.h, sizeof(req->out.h));
1287                 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
1288                                             args->out_args, args->out_numargs,
1289                                             args->out_pages,
1290                                             req->argbuf + argbuf_used, NULL);
1291         }
1292 
1293         WARN_ON(out_sgs + in_sgs != total_sgs);
1294 
1295         for (i = 0; i < total_sgs; i++)
1296                 sgs[i] = &sg[i];
1297 
1298         spin_lock(&fsvq->lock);
1299 
1300         if (!fsvq->connected) {
1301                 spin_unlock(&fsvq->lock);
1302                 ret = -ENOTCONN;
1303                 goto out;
1304         }
1305 
1306         vq = fsvq->vq;
1307         ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
1308         if (ret < 0) {
1309                 spin_unlock(&fsvq->lock);
1310                 goto out;
1311         }
1312 
1313         /* Request successfully sent. */
1314         fpq = &fsvq->fud->pq;
1315         spin_lock(&fpq->lock);
1316         list_add_tail(&req->list, fpq->processing);
1317         spin_unlock(&fpq->lock);
1318         set_bit(FR_SENT, &req->flags);
1319         /* matches barrier in request_wait_answer() */
1320         smp_mb__after_atomic();
1321 
1322         if (!in_flight)
1323                 inc_in_flight_req(fsvq);
1324         notify = virtqueue_kick_prepare(vq);
1325 
1326         spin_unlock(&fsvq->lock);
1327 
1328         if (notify)
1329                 virtqueue_notify(vq);
1330 
1331 out:
1332         if (ret < 0 && req->argbuf) {
1333                 kfree(req->argbuf);
1334                 req->argbuf = NULL;
1335         }
1336         if (sgs != stack_sgs) {
1337                 kfree(sgs);
1338                 kfree(sg);
1339         }
1340 
1341         return ret;
1342 }
1343 
1344 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
1345 __releases(fiq->lock)
1346 {
1347         unsigned int queue_id;
1348         struct virtio_fs *fs;
1349         struct fuse_req *req;
1350         struct virtio_fs_vq *fsvq;
1351         int ret;
1352 
1353         WARN_ON(list_empty(&fiq->pending));
1354         req = list_last_entry(&fiq->pending, struct fuse_req, list);
1355         clear_bit(FR_PENDING, &req->flags);
1356         list_del_init(&req->list);
1357         WARN_ON(!list_empty(&fiq->pending));
1358         spin_unlock(&fiq->lock);
1359 
1360         fs = fiq->priv;
1361         queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()];
1362 
1363         pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n",
1364                  __func__, req->in.h.opcode, req->in.h.unique,
1365                  req->in.h.nodeid, req->in.h.len,
1366                  fuse_len_args(req->args->out_numargs, req->args->out_args),
1367                  queue_id);
1368 
1369         fsvq = &fs->vqs[queue_id];
1370         ret = virtio_fs_enqueue_req(fsvq, req, false);
1371         if (ret < 0) {
1372                 if (ret == -ENOSPC) {
1373                         /*
1374                          * Virtqueue full. Retry submission from worker
1375                          * context as we might be holding fc->bg_lock.
1376                          */
1377                         spin_lock(&fsvq->lock);
1378                         list_add_tail(&req->list, &fsvq->queued_reqs);
1379                         inc_in_flight_req(fsvq);
1380                         spin_unlock(&fsvq->lock);
1381                         return;
1382                 }
1383                 req->out.h.error = ret;
1384                 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
1385 
1386                 /* Can't end request in submission context. Use a worker */
1387                 spin_lock(&fsvq->lock);
1388                 list_add_tail(&req->list, &fsvq->end_reqs);
1389                 schedule_work(&fsvq->dispatch_work);
1390                 spin_unlock(&fsvq->lock);
1391                 return;
1392         }
1393 }
1394 
1395 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
1396         .wake_forget_and_unlock         = virtio_fs_wake_forget_and_unlock,
1397         .wake_interrupt_and_unlock      = virtio_fs_wake_interrupt_and_unlock,
1398         .wake_pending_and_unlock        = virtio_fs_wake_pending_and_unlock,
1399         .release                        = virtio_fs_fiq_release,
1400 };
1401 
1402 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
1403 {
1404         ctx->rootmode = S_IFDIR;
1405         ctx->default_permissions = 1;
1406         ctx->allow_other = 1;
1407         ctx->max_read = UINT_MAX;
1408         ctx->blksize = 512;
1409         ctx->destroy = true;
1410         ctx->no_control = true;
1411         ctx->no_force_umount = true;
1412 }
1413 
1414 static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
1415 {
1416         struct fuse_mount *fm = get_fuse_mount_super(sb);
1417         struct fuse_conn *fc = fm->fc;
1418         struct virtio_fs *fs = fc->iq.priv;
1419         struct fuse_fs_context *ctx = fsc->fs_private;
1420         unsigned int i;
1421         int err;
1422 
1423         virtio_fs_ctx_set_defaults(ctx);
1424         mutex_lock(&virtio_fs_mutex);
1425 
1426         /* After holding mutex, make sure virtiofs device is still there.
1427          * Though we are holding a reference to it, drive ->remove might
1428          * still have cleaned up virtual queues. In that case bail out.
1429          */
1430         err = -EINVAL;
1431         if (list_empty(&fs->list)) {
1432                 pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
1433                 goto err;
1434         }
1435 
1436         err = -ENOMEM;
1437         /* Allocate fuse_dev for hiprio and notification queues */
1438         for (i = 0; i < fs->nvqs; i++) {
1439                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1440 
1441                 fsvq->fud = fuse_dev_alloc();
1442                 if (!fsvq->fud)
1443                         goto err_free_fuse_devs;
1444         }
1445 
1446         /* virtiofs allocates and installs its own fuse devices */
1447         ctx->fudptr = NULL;
1448         if (ctx->dax_mode != FUSE_DAX_NEVER) {
1449                 if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) {
1450                         err = -EINVAL;
1451                         pr_err("virtio-fs: dax can't be enabled as filesystem"
1452                                " device does not support it.\n");
1453                         goto err_free_fuse_devs;
1454                 }
1455                 ctx->dax_dev = fs->dax_dev;
1456         }
1457         err = fuse_fill_super_common(sb, ctx);
1458         if (err < 0)
1459                 goto err_free_fuse_devs;
1460 
1461         for (i = 0; i < fs->nvqs; i++) {
1462                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1463 
1464                 fuse_dev_install(fsvq->fud, fc);
1465         }
1466 
1467         /* Previous unmount will stop all queues. Start these again */
1468         virtio_fs_start_all_queues(fs);
1469         fuse_send_init(fm);
1470         mutex_unlock(&virtio_fs_mutex);
1471         return 0;
1472 
1473 err_free_fuse_devs:
1474         virtio_fs_free_devs(fs);
1475 err:
1476         mutex_unlock(&virtio_fs_mutex);
1477         return err;
1478 }
1479 
1480 static void virtio_fs_conn_destroy(struct fuse_mount *fm)
1481 {
1482         struct fuse_conn *fc = fm->fc;
1483         struct virtio_fs *vfs = fc->iq.priv;
1484         struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO];
1485 
1486         /* Stop dax worker. Soon evict_inodes() will be called which
1487          * will free all memory ranges belonging to all inodes.
1488          */
1489         if (IS_ENABLED(CONFIG_FUSE_DAX))
1490                 fuse_dax_cancel_work(fc);
1491 
1492         /* Stop forget queue. Soon destroy will be sent */
1493         spin_lock(&fsvq->lock);
1494         fsvq->connected = false;
1495         spin_unlock(&fsvq->lock);
1496         virtio_fs_drain_all_queues(vfs);
1497 
1498         fuse_conn_destroy(fm);
1499 
1500         /* fuse_conn_destroy() must have sent destroy. Stop all queues
1501          * and drain one more time and free fuse devices. Freeing fuse
1502          * devices will drop their reference on fuse_conn and that in
1503          * turn will drop its reference on virtio_fs object.
1504          */
1505         virtio_fs_stop_all_queues(vfs);
1506         virtio_fs_drain_all_queues(vfs);
1507         virtio_fs_free_devs(vfs);
1508 }
1509 
1510 static void virtio_kill_sb(struct super_block *sb)
1511 {
1512         struct fuse_mount *fm = get_fuse_mount_super(sb);
1513         bool last;
1514 
1515         /* If mount failed, we can still be called without any fc */
1516         if (sb->s_root) {
1517                 last = fuse_mount_remove(fm);
1518                 if (last)
1519                         virtio_fs_conn_destroy(fm);
1520         }
1521         kill_anon_super(sb);
1522         fuse_mount_destroy(fm);
1523 }
1524 
1525 static int virtio_fs_test_super(struct super_block *sb,
1526                                 struct fs_context *fsc)
1527 {
1528         struct fuse_mount *fsc_fm = fsc->s_fs_info;
1529         struct fuse_mount *sb_fm = get_fuse_mount_super(sb);
1530 
1531         return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv;
1532 }
1533 
1534 static int virtio_fs_get_tree(struct fs_context *fsc)
1535 {
1536         struct virtio_fs *fs;
1537         struct super_block *sb;
1538         struct fuse_conn *fc = NULL;
1539         struct fuse_mount *fm;
1540         unsigned int virtqueue_size;
1541         int err = -EIO;
1542 
1543         /* This gets a reference on virtio_fs object. This ptr gets installed
1544          * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1545          * to drop the reference to this object.
1546          */
1547         fs = virtio_fs_find_instance(fsc->source);
1548         if (!fs) {
1549                 pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
1550                 return -EINVAL;
1551         }
1552 
1553         virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq);
1554         if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD))
1555                 goto out_err;
1556 
1557         err = -ENOMEM;
1558         fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
1559         if (!fc)
1560                 goto out_err;
1561 
1562         fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
1563         if (!fm)
1564                 goto out_err;
1565 
1566         fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs);
1567         fc->release = fuse_free_conn;
1568         fc->delete_stale = true;
1569         fc->auto_submounts = true;
1570         fc->sync_fs = true;
1571 
1572         /* Tell FUSE to split requests that exceed the virtqueue's size */
1573         fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
1574                                     virtqueue_size - FUSE_HEADER_OVERHEAD);
1575 
1576         fsc->s_fs_info = fm;
1577         sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
1578         if (fsc->s_fs_info)
1579                 fuse_mount_destroy(fm);
1580         if (IS_ERR(sb))
1581                 return PTR_ERR(sb);
1582 
1583         if (!sb->s_root) {
1584                 err = virtio_fs_fill_super(sb, fsc);
1585                 if (err) {
1586                         deactivate_locked_super(sb);
1587                         return err;
1588                 }
1589 
1590                 sb->s_flags |= SB_ACTIVE;
1591         }
1592 
1593         WARN_ON(fsc->root);
1594         fsc->root = dget(sb->s_root);
1595         return 0;
1596 
1597 out_err:
1598         kfree(fc);
1599         mutex_lock(&virtio_fs_mutex);
1600         virtio_fs_put(fs);
1601         mutex_unlock(&virtio_fs_mutex);
1602         return err;
1603 }
1604 
1605 static const struct fs_context_operations virtio_fs_context_ops = {
1606         .free           = virtio_fs_free_fsc,
1607         .parse_param    = virtio_fs_parse_param,
1608         .get_tree       = virtio_fs_get_tree,
1609 };
1610 
1611 static int virtio_fs_init_fs_context(struct fs_context *fsc)
1612 {
1613         struct fuse_fs_context *ctx;
1614 
1615         if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT)
1616                 return fuse_init_fs_context_submount(fsc);
1617 
1618         ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
1619         if (!ctx)
1620                 return -ENOMEM;
1621         fsc->fs_private = ctx;
1622         fsc->ops = &virtio_fs_context_ops;
1623         return 0;
1624 }
1625 
1626 static struct file_system_type virtio_fs_type = {
1627         .owner          = THIS_MODULE,
1628         .name           = "virtiofs",
1629         .init_fs_context = virtio_fs_init_fs_context,
1630         .kill_sb        = virtio_kill_sb,
1631 };
1632 
1633 static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
1634 {
1635         const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
1636 
1637         add_uevent_var(env, "TAG=%s", fs->tag);
1638         return 0;
1639 }
1640 
1641 static const struct kset_uevent_ops virtio_fs_uevent_ops = {
1642         .uevent = virtio_fs_uevent,
1643 };
1644 
1645 static int __init virtio_fs_sysfs_init(void)
1646 {
1647         virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops,
1648                                              fs_kobj);
1649         if (!virtio_fs_kset)
1650                 return -ENOMEM;
1651         return 0;
1652 }
1653 
1654 static void virtio_fs_sysfs_exit(void)
1655 {
1656         kset_unregister(virtio_fs_kset);
1657         virtio_fs_kset = NULL;
1658 }
1659 
1660 static int __init virtio_fs_init(void)
1661 {
1662         int ret;
1663 
1664         ret = virtio_fs_sysfs_init();
1665         if (ret < 0)
1666                 return ret;
1667 
1668         ret = register_virtio_driver(&virtio_fs_driver);
1669         if (ret < 0)
1670                 goto sysfs_exit;
1671 
1672         ret = register_filesystem(&virtio_fs_type);
1673         if (ret < 0)
1674                 goto unregister_virtio_driver;
1675 
1676         return 0;
1677 
1678 unregister_virtio_driver:
1679         unregister_virtio_driver(&virtio_fs_driver);
1680 sysfs_exit:
1681         virtio_fs_sysfs_exit();
1682         return ret;
1683 }
1684 module_init(virtio_fs_init);
1685 
1686 static void __exit virtio_fs_exit(void)
1687 {
1688         unregister_filesystem(&virtio_fs_type);
1689         unregister_virtio_driver(&virtio_fs_driver);
1690         virtio_fs_sysfs_exit();
1691 }
1692 module_exit(virtio_fs_exit);
1693 
1694 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1695 MODULE_DESCRIPTION("Virtio Filesystem");
1696 MODULE_LICENSE("GPL");
1697 MODULE_ALIAS_FS(KBUILD_MODNAME);
1698 MODULE_DEVICE_TABLE(virtio, id_table);
1699 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php