~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/fcntl.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  *  linux/fs/fcntl.c
  4  *
  5  *  Copyright (C) 1991, 1992  Linus Torvalds
  6  */
  7 
  8 #include <linux/syscalls.h>
  9 #include <linux/init.h>
 10 #include <linux/mm.h>
 11 #include <linux/sched/task.h>
 12 #include <linux/fs.h>
 13 #include <linux/filelock.h>
 14 #include <linux/file.h>
 15 #include <linux/fdtable.h>
 16 #include <linux/capability.h>
 17 #include <linux/dnotify.h>
 18 #include <linux/slab.h>
 19 #include <linux/module.h>
 20 #include <linux/pipe_fs_i.h>
 21 #include <linux/security.h>
 22 #include <linux/ptrace.h>
 23 #include <linux/signal.h>
 24 #include <linux/rcupdate.h>
 25 #include <linux/pid_namespace.h>
 26 #include <linux/user_namespace.h>
 27 #include <linux/memfd.h>
 28 #include <linux/compat.h>
 29 #include <linux/mount.h>
 30 #include <linux/rw_hint.h>
 31 
 32 #include <linux/poll.h>
 33 #include <asm/siginfo.h>
 34 #include <linux/uaccess.h>
 35 
 36 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
 37 
 38 static int setfl(int fd, struct file * filp, unsigned int arg)
 39 {
 40         struct inode * inode = file_inode(filp);
 41         int error = 0;
 42 
 43         /*
 44          * O_APPEND cannot be cleared if the file is marked as append-only
 45          * and the file is open for write.
 46          */
 47         if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
 48                 return -EPERM;
 49 
 50         /* O_NOATIME can only be set by the owner or superuser */
 51         if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
 52                 if (!inode_owner_or_capable(file_mnt_idmap(filp), inode))
 53                         return -EPERM;
 54 
 55         /* required for strict SunOS emulation */
 56         if (O_NONBLOCK != O_NDELAY)
 57                if (arg & O_NDELAY)
 58                    arg |= O_NONBLOCK;
 59 
 60         /* Pipe packetized mode is controlled by O_DIRECT flag */
 61         if (!S_ISFIFO(inode->i_mode) &&
 62             (arg & O_DIRECT) &&
 63             !(filp->f_mode & FMODE_CAN_ODIRECT))
 64                 return -EINVAL;
 65 
 66         if (filp->f_op->check_flags)
 67                 error = filp->f_op->check_flags(arg);
 68         if (error)
 69                 return error;
 70 
 71         /*
 72          * ->fasync() is responsible for setting the FASYNC bit.
 73          */
 74         if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
 75                 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
 76                 if (error < 0)
 77                         goto out;
 78                 if (error > 0)
 79                         error = 0;
 80         }
 81         spin_lock(&filp->f_lock);
 82         filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
 83         filp->f_iocb_flags = iocb_flags(filp);
 84         spin_unlock(&filp->f_lock);
 85 
 86  out:
 87         return error;
 88 }
 89 
 90 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 91                 int force)
 92 {
 93         write_lock_irq(&filp->f_owner.lock);
 94         if (force || !filp->f_owner.pid) {
 95                 put_pid(filp->f_owner.pid);
 96                 filp->f_owner.pid = get_pid(pid);
 97                 filp->f_owner.pid_type = type;
 98 
 99                 if (pid) {
100                         const struct cred *cred = current_cred();
101                         security_file_set_fowner(filp);
102                         filp->f_owner.uid = cred->uid;
103                         filp->f_owner.euid = cred->euid;
104                 }
105         }
106         write_unlock_irq(&filp->f_owner.lock);
107 }
108 EXPORT_SYMBOL(__f_setown);
109 
110 int f_setown(struct file *filp, int who, int force)
111 {
112         enum pid_type type;
113         struct pid *pid = NULL;
114         int ret = 0;
115 
116         type = PIDTYPE_TGID;
117         if (who < 0) {
118                 /* avoid overflow below */
119                 if (who == INT_MIN)
120                         return -EINVAL;
121 
122                 type = PIDTYPE_PGID;
123                 who = -who;
124         }
125 
126         rcu_read_lock();
127         if (who) {
128                 pid = find_vpid(who);
129                 if (!pid)
130                         ret = -ESRCH;
131         }
132 
133         if (!ret)
134                 __f_setown(filp, pid, type, force);
135         rcu_read_unlock();
136 
137         return ret;
138 }
139 EXPORT_SYMBOL(f_setown);
140 
141 void f_delown(struct file *filp)
142 {
143         __f_setown(filp, NULL, PIDTYPE_TGID, 1);
144 }
145 
146 pid_t f_getown(struct file *filp)
147 {
148         pid_t pid = 0;
149 
150         read_lock_irq(&filp->f_owner.lock);
151         rcu_read_lock();
152         if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
153                 pid = pid_vnr(filp->f_owner.pid);
154                 if (filp->f_owner.pid_type == PIDTYPE_PGID)
155                         pid = -pid;
156         }
157         rcu_read_unlock();
158         read_unlock_irq(&filp->f_owner.lock);
159         return pid;
160 }
161 
162 static int f_setown_ex(struct file *filp, unsigned long arg)
163 {
164         struct f_owner_ex __user *owner_p = (void __user *)arg;
165         struct f_owner_ex owner;
166         struct pid *pid;
167         int type;
168         int ret;
169 
170         ret = copy_from_user(&owner, owner_p, sizeof(owner));
171         if (ret)
172                 return -EFAULT;
173 
174         switch (owner.type) {
175         case F_OWNER_TID:
176                 type = PIDTYPE_PID;
177                 break;
178 
179         case F_OWNER_PID:
180                 type = PIDTYPE_TGID;
181                 break;
182 
183         case F_OWNER_PGRP:
184                 type = PIDTYPE_PGID;
185                 break;
186 
187         default:
188                 return -EINVAL;
189         }
190 
191         rcu_read_lock();
192         pid = find_vpid(owner.pid);
193         if (owner.pid && !pid)
194                 ret = -ESRCH;
195         else
196                  __f_setown(filp, pid, type, 1);
197         rcu_read_unlock();
198 
199         return ret;
200 }
201 
202 static int f_getown_ex(struct file *filp, unsigned long arg)
203 {
204         struct f_owner_ex __user *owner_p = (void __user *)arg;
205         struct f_owner_ex owner = {};
206         int ret = 0;
207 
208         read_lock_irq(&filp->f_owner.lock);
209         rcu_read_lock();
210         if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
211                 owner.pid = pid_vnr(filp->f_owner.pid);
212         rcu_read_unlock();
213         switch (filp->f_owner.pid_type) {
214         case PIDTYPE_PID:
215                 owner.type = F_OWNER_TID;
216                 break;
217 
218         case PIDTYPE_TGID:
219                 owner.type = F_OWNER_PID;
220                 break;
221 
222         case PIDTYPE_PGID:
223                 owner.type = F_OWNER_PGRP;
224                 break;
225 
226         default:
227                 WARN_ON(1);
228                 ret = -EINVAL;
229                 break;
230         }
231         read_unlock_irq(&filp->f_owner.lock);
232 
233         if (!ret) {
234                 ret = copy_to_user(owner_p, &owner, sizeof(owner));
235                 if (ret)
236                         ret = -EFAULT;
237         }
238         return ret;
239 }
240 
241 #ifdef CONFIG_CHECKPOINT_RESTORE
242 static int f_getowner_uids(struct file *filp, unsigned long arg)
243 {
244         struct user_namespace *user_ns = current_user_ns();
245         uid_t __user *dst = (void __user *)arg;
246         uid_t src[2];
247         int err;
248 
249         read_lock_irq(&filp->f_owner.lock);
250         src[0] = from_kuid(user_ns, filp->f_owner.uid);
251         src[1] = from_kuid(user_ns, filp->f_owner.euid);
252         read_unlock_irq(&filp->f_owner.lock);
253 
254         err  = put_user(src[0], &dst[0]);
255         err |= put_user(src[1], &dst[1]);
256 
257         return err;
258 }
259 #else
260 static int f_getowner_uids(struct file *filp, unsigned long arg)
261 {
262         return -EINVAL;
263 }
264 #endif
265 
266 static bool rw_hint_valid(u64 hint)
267 {
268         BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
269         BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
270         BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
271         BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
272         BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
273         BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
274 
275         switch (hint) {
276         case RWH_WRITE_LIFE_NOT_SET:
277         case RWH_WRITE_LIFE_NONE:
278         case RWH_WRITE_LIFE_SHORT:
279         case RWH_WRITE_LIFE_MEDIUM:
280         case RWH_WRITE_LIFE_LONG:
281         case RWH_WRITE_LIFE_EXTREME:
282                 return true;
283         default:
284                 return false;
285         }
286 }
287 
288 static long fcntl_get_rw_hint(struct file *file, unsigned int cmd,
289                               unsigned long arg)
290 {
291         struct inode *inode = file_inode(file);
292         u64 __user *argp = (u64 __user *)arg;
293         u64 hint = READ_ONCE(inode->i_write_hint);
294 
295         if (copy_to_user(argp, &hint, sizeof(*argp)))
296                 return -EFAULT;
297         return 0;
298 }
299 
300 static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
301                               unsigned long arg)
302 {
303         struct inode *inode = file_inode(file);
304         u64 __user *argp = (u64 __user *)arg;
305         u64 hint;
306 
307         if (copy_from_user(&hint, argp, sizeof(hint)))
308                 return -EFAULT;
309         if (!rw_hint_valid(hint))
310                 return -EINVAL;
311 
312         WRITE_ONCE(inode->i_write_hint, hint);
313 
314         /*
315          * file->f_mapping->host may differ from inode. As an example,
316          * blkdev_open() modifies file->f_mapping.
317          */
318         if (file->f_mapping->host != inode)
319                 WRITE_ONCE(file->f_mapping->host->i_write_hint, hint);
320 
321         return 0;
322 }
323 
324 /* Is the file descriptor a dup of the file? */
325 static long f_dupfd_query(int fd, struct file *filp)
326 {
327         CLASS(fd_raw, f)(fd);
328 
329         /*
330          * We can do the 'fdput()' immediately, as the only thing that
331          * matters is the pointer value which isn't changed by the fdput.
332          *
333          * Technically we didn't need a ref at all, and 'fdget()' was
334          * overkill, but given our lockless file pointer lookup, the
335          * alternatives are complicated.
336          */
337         return f.file == filp;
338 }
339 
340 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
341                 struct file *filp)
342 {
343         void __user *argp = (void __user *)arg;
344         int argi = (int)arg;
345         struct flock flock;
346         long err = -EINVAL;
347 
348         switch (cmd) {
349         case F_DUPFD:
350                 err = f_dupfd(argi, filp, 0);
351                 break;
352         case F_DUPFD_CLOEXEC:
353                 err = f_dupfd(argi, filp, O_CLOEXEC);
354                 break;
355         case F_DUPFD_QUERY:
356                 err = f_dupfd_query(argi, filp);
357                 break;
358         case F_GETFD:
359                 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
360                 break;
361         case F_SETFD:
362                 err = 0;
363                 set_close_on_exec(fd, argi & FD_CLOEXEC);
364                 break;
365         case F_GETFL:
366                 err = filp->f_flags;
367                 break;
368         case F_SETFL:
369                 err = setfl(fd, filp, argi);
370                 break;
371 #if BITS_PER_LONG != 32
372         /* 32-bit arches must use fcntl64() */
373         case F_OFD_GETLK:
374 #endif
375         case F_GETLK:
376                 if (copy_from_user(&flock, argp, sizeof(flock)))
377                         return -EFAULT;
378                 err = fcntl_getlk(filp, cmd, &flock);
379                 if (!err && copy_to_user(argp, &flock, sizeof(flock)))
380                         return -EFAULT;
381                 break;
382 #if BITS_PER_LONG != 32
383         /* 32-bit arches must use fcntl64() */
384         case F_OFD_SETLK:
385         case F_OFD_SETLKW:
386                 fallthrough;
387 #endif
388         case F_SETLK:
389         case F_SETLKW:
390                 if (copy_from_user(&flock, argp, sizeof(flock)))
391                         return -EFAULT;
392                 err = fcntl_setlk(fd, filp, cmd, &flock);
393                 break;
394         case F_GETOWN:
395                 /*
396                  * XXX If f_owner is a process group, the
397                  * negative return value will get converted
398                  * into an error.  Oops.  If we keep the
399                  * current syscall conventions, the only way
400                  * to fix this will be in libc.
401                  */
402                 err = f_getown(filp);
403                 force_successful_syscall_return();
404                 break;
405         case F_SETOWN:
406                 err = f_setown(filp, argi, 1);
407                 break;
408         case F_GETOWN_EX:
409                 err = f_getown_ex(filp, arg);
410                 break;
411         case F_SETOWN_EX:
412                 err = f_setown_ex(filp, arg);
413                 break;
414         case F_GETOWNER_UIDS:
415                 err = f_getowner_uids(filp, arg);
416                 break;
417         case F_GETSIG:
418                 err = filp->f_owner.signum;
419                 break;
420         case F_SETSIG:
421                 /* arg == 0 restores default behaviour. */
422                 if (!valid_signal(argi)) {
423                         break;
424                 }
425                 err = 0;
426                 filp->f_owner.signum = argi;
427                 break;
428         case F_GETLEASE:
429                 err = fcntl_getlease(filp);
430                 break;
431         case F_SETLEASE:
432                 err = fcntl_setlease(fd, filp, argi);
433                 break;
434         case F_NOTIFY:
435                 err = fcntl_dirnotify(fd, filp, argi);
436                 break;
437         case F_SETPIPE_SZ:
438         case F_GETPIPE_SZ:
439                 err = pipe_fcntl(filp, cmd, argi);
440                 break;
441         case F_ADD_SEALS:
442         case F_GET_SEALS:
443                 err = memfd_fcntl(filp, cmd, argi);
444                 break;
445         case F_GET_RW_HINT:
446                 err = fcntl_get_rw_hint(filp, cmd, arg);
447                 break;
448         case F_SET_RW_HINT:
449                 err = fcntl_set_rw_hint(filp, cmd, arg);
450                 break;
451         default:
452                 break;
453         }
454         return err;
455 }
456 
457 static int check_fcntl_cmd(unsigned cmd)
458 {
459         switch (cmd) {
460         case F_DUPFD:
461         case F_DUPFD_CLOEXEC:
462         case F_DUPFD_QUERY:
463         case F_GETFD:
464         case F_SETFD:
465         case F_GETFL:
466                 return 1;
467         }
468         return 0;
469 }
470 
471 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
472 {       
473         struct fd f = fdget_raw(fd);
474         long err = -EBADF;
475 
476         if (!f.file)
477                 goto out;
478 
479         if (unlikely(f.file->f_mode & FMODE_PATH)) {
480                 if (!check_fcntl_cmd(cmd))
481                         goto out1;
482         }
483 
484         err = security_file_fcntl(f.file, cmd, arg);
485         if (!err)
486                 err = do_fcntl(fd, cmd, arg, f.file);
487 
488 out1:
489         fdput(f);
490 out:
491         return err;
492 }
493 
494 #if BITS_PER_LONG == 32
495 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
496                 unsigned long, arg)
497 {       
498         void __user *argp = (void __user *)arg;
499         struct fd f = fdget_raw(fd);
500         struct flock64 flock;
501         long err = -EBADF;
502 
503         if (!f.file)
504                 goto out;
505 
506         if (unlikely(f.file->f_mode & FMODE_PATH)) {
507                 if (!check_fcntl_cmd(cmd))
508                         goto out1;
509         }
510 
511         err = security_file_fcntl(f.file, cmd, arg);
512         if (err)
513                 goto out1;
514         
515         switch (cmd) {
516         case F_GETLK64:
517         case F_OFD_GETLK:
518                 err = -EFAULT;
519                 if (copy_from_user(&flock, argp, sizeof(flock)))
520                         break;
521                 err = fcntl_getlk64(f.file, cmd, &flock);
522                 if (!err && copy_to_user(argp, &flock, sizeof(flock)))
523                         err = -EFAULT;
524                 break;
525         case F_SETLK64:
526         case F_SETLKW64:
527         case F_OFD_SETLK:
528         case F_OFD_SETLKW:
529                 err = -EFAULT;
530                 if (copy_from_user(&flock, argp, sizeof(flock)))
531                         break;
532                 err = fcntl_setlk64(fd, f.file, cmd, &flock);
533                 break;
534         default:
535                 err = do_fcntl(fd, cmd, arg, f.file);
536                 break;
537         }
538 out1:
539         fdput(f);
540 out:
541         return err;
542 }
543 #endif
544 
545 #ifdef CONFIG_COMPAT
546 /* careful - don't use anywhere else */
547 #define copy_flock_fields(dst, src)             \
548         (dst)->l_type = (src)->l_type;          \
549         (dst)->l_whence = (src)->l_whence;      \
550         (dst)->l_start = (src)->l_start;        \
551         (dst)->l_len = (src)->l_len;            \
552         (dst)->l_pid = (src)->l_pid;
553 
554 static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
555 {
556         struct compat_flock fl;
557 
558         if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
559                 return -EFAULT;
560         copy_flock_fields(kfl, &fl);
561         return 0;
562 }
563 
564 static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
565 {
566         struct compat_flock64 fl;
567 
568         if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
569                 return -EFAULT;
570         copy_flock_fields(kfl, &fl);
571         return 0;
572 }
573 
574 static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
575 {
576         struct compat_flock fl;
577 
578         memset(&fl, 0, sizeof(struct compat_flock));
579         copy_flock_fields(&fl, kfl);
580         if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
581                 return -EFAULT;
582         return 0;
583 }
584 
585 static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
586 {
587         struct compat_flock64 fl;
588 
589         BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
590         BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
591 
592         memset(&fl, 0, sizeof(struct compat_flock64));
593         copy_flock_fields(&fl, kfl);
594         if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
595                 return -EFAULT;
596         return 0;
597 }
598 #undef copy_flock_fields
599 
600 static unsigned int
601 convert_fcntl_cmd(unsigned int cmd)
602 {
603         switch (cmd) {
604         case F_GETLK64:
605                 return F_GETLK;
606         case F_SETLK64:
607                 return F_SETLK;
608         case F_SETLKW64:
609                 return F_SETLKW;
610         }
611 
612         return cmd;
613 }
614 
615 /*
616  * GETLK was successful and we need to return the data, but it needs to fit in
617  * the compat structure.
618  * l_start shouldn't be too big, unless the original start + end is greater than
619  * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
620  * -EOVERFLOW in that case.  l_len could be too big, in which case we just
621  * truncate it, and only allow the app to see that part of the conflicting lock
622  * that might make sense to it anyway
623  */
624 static int fixup_compat_flock(struct flock *flock)
625 {
626         if (flock->l_start > COMPAT_OFF_T_MAX)
627                 return -EOVERFLOW;
628         if (flock->l_len > COMPAT_OFF_T_MAX)
629                 flock->l_len = COMPAT_OFF_T_MAX;
630         return 0;
631 }
632 
633 static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
634                              compat_ulong_t arg)
635 {
636         struct fd f = fdget_raw(fd);
637         struct flock flock;
638         long err = -EBADF;
639 
640         if (!f.file)
641                 return err;
642 
643         if (unlikely(f.file->f_mode & FMODE_PATH)) {
644                 if (!check_fcntl_cmd(cmd))
645                         goto out_put;
646         }
647 
648         err = security_file_fcntl(f.file, cmd, arg);
649         if (err)
650                 goto out_put;
651 
652         switch (cmd) {
653         case F_GETLK:
654                 err = get_compat_flock(&flock, compat_ptr(arg));
655                 if (err)
656                         break;
657                 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
658                 if (err)
659                         break;
660                 err = fixup_compat_flock(&flock);
661                 if (!err)
662                         err = put_compat_flock(&flock, compat_ptr(arg));
663                 break;
664         case F_GETLK64:
665         case F_OFD_GETLK:
666                 err = get_compat_flock64(&flock, compat_ptr(arg));
667                 if (err)
668                         break;
669                 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
670                 if (!err)
671                         err = put_compat_flock64(&flock, compat_ptr(arg));
672                 break;
673         case F_SETLK:
674         case F_SETLKW:
675                 err = get_compat_flock(&flock, compat_ptr(arg));
676                 if (err)
677                         break;
678                 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
679                 break;
680         case F_SETLK64:
681         case F_SETLKW64:
682         case F_OFD_SETLK:
683         case F_OFD_SETLKW:
684                 err = get_compat_flock64(&flock, compat_ptr(arg));
685                 if (err)
686                         break;
687                 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
688                 break;
689         default:
690                 err = do_fcntl(fd, cmd, arg, f.file);
691                 break;
692         }
693 out_put:
694         fdput(f);
695         return err;
696 }
697 
698 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
699                        compat_ulong_t, arg)
700 {
701         return do_compat_fcntl64(fd, cmd, arg);
702 }
703 
704 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
705                        compat_ulong_t, arg)
706 {
707         switch (cmd) {
708         case F_GETLK64:
709         case F_SETLK64:
710         case F_SETLKW64:
711         case F_OFD_GETLK:
712         case F_OFD_SETLK:
713         case F_OFD_SETLKW:
714                 return -EINVAL;
715         }
716         return do_compat_fcntl64(fd, cmd, arg);
717 }
718 #endif
719 
720 /* Table to convert sigio signal codes into poll band bitmaps */
721 
722 static const __poll_t band_table[NSIGPOLL] = {
723         EPOLLIN | EPOLLRDNORM,                  /* POLL_IN */
724         EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND,   /* POLL_OUT */
725         EPOLLIN | EPOLLRDNORM | EPOLLMSG,               /* POLL_MSG */
726         EPOLLERR,                               /* POLL_ERR */
727         EPOLLPRI | EPOLLRDBAND,                 /* POLL_PRI */
728         EPOLLHUP | EPOLLERR                     /* POLL_HUP */
729 };
730 
731 static inline int sigio_perm(struct task_struct *p,
732                              struct fown_struct *fown, int sig)
733 {
734         const struct cred *cred;
735         int ret;
736 
737         rcu_read_lock();
738         cred = __task_cred(p);
739         ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
740                 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
741                 uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
742                !security_file_send_sigiotask(p, fown, sig));
743         rcu_read_unlock();
744         return ret;
745 }
746 
747 static void send_sigio_to_task(struct task_struct *p,
748                                struct fown_struct *fown,
749                                int fd, int reason, enum pid_type type)
750 {
751         /*
752          * F_SETSIG can change ->signum lockless in parallel, make
753          * sure we read it once and use the same value throughout.
754          */
755         int signum = READ_ONCE(fown->signum);
756 
757         if (!sigio_perm(p, fown, signum))
758                 return;
759 
760         switch (signum) {
761                 default: {
762                         kernel_siginfo_t si;
763 
764                         /* Queue a rt signal with the appropriate fd as its
765                            value.  We use SI_SIGIO as the source, not 
766                            SI_KERNEL, since kernel signals always get 
767                            delivered even if we can't queue.  Failure to
768                            queue in this case _should_ be reported; we fall
769                            back to SIGIO in that case. --sct */
770                         clear_siginfo(&si);
771                         si.si_signo = signum;
772                         si.si_errno = 0;
773                         si.si_code  = reason;
774                         /*
775                          * Posix definies POLL_IN and friends to be signal
776                          * specific si_codes for SIG_POLL.  Linux extended
777                          * these si_codes to other signals in a way that is
778                          * ambiguous if other signals also have signal
779                          * specific si_codes.  In that case use SI_SIGIO instead
780                          * to remove the ambiguity.
781                          */
782                         if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
783                                 si.si_code = SI_SIGIO;
784 
785                         /* Make sure we are called with one of the POLL_*
786                            reasons, otherwise we could leak kernel stack into
787                            userspace.  */
788                         BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
789                         if (reason - POLL_IN >= NSIGPOLL)
790                                 si.si_band  = ~0L;
791                         else
792                                 si.si_band = mangle_poll(band_table[reason - POLL_IN]);
793                         si.si_fd    = fd;
794                         if (!do_send_sig_info(signum, &si, p, type))
795                                 break;
796                 }
797                         fallthrough;    /* fall back on the old plain SIGIO signal */
798                 case 0:
799                         do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
800         }
801 }
802 
803 void send_sigio(struct fown_struct *fown, int fd, int band)
804 {
805         struct task_struct *p;
806         enum pid_type type;
807         unsigned long flags;
808         struct pid *pid;
809         
810         read_lock_irqsave(&fown->lock, flags);
811 
812         type = fown->pid_type;
813         pid = fown->pid;
814         if (!pid)
815                 goto out_unlock_fown;
816 
817         if (type <= PIDTYPE_TGID) {
818                 rcu_read_lock();
819                 p = pid_task(pid, PIDTYPE_PID);
820                 if (p)
821                         send_sigio_to_task(p, fown, fd, band, type);
822                 rcu_read_unlock();
823         } else {
824                 read_lock(&tasklist_lock);
825                 do_each_pid_task(pid, type, p) {
826                         send_sigio_to_task(p, fown, fd, band, type);
827                 } while_each_pid_task(pid, type, p);
828                 read_unlock(&tasklist_lock);
829         }
830  out_unlock_fown:
831         read_unlock_irqrestore(&fown->lock, flags);
832 }
833 
834 static void send_sigurg_to_task(struct task_struct *p,
835                                 struct fown_struct *fown, enum pid_type type)
836 {
837         if (sigio_perm(p, fown, SIGURG))
838                 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
839 }
840 
841 int send_sigurg(struct fown_struct *fown)
842 {
843         struct task_struct *p;
844         enum pid_type type;
845         struct pid *pid;
846         unsigned long flags;
847         int ret = 0;
848         
849         read_lock_irqsave(&fown->lock, flags);
850 
851         type = fown->pid_type;
852         pid = fown->pid;
853         if (!pid)
854                 goto out_unlock_fown;
855 
856         ret = 1;
857 
858         if (type <= PIDTYPE_TGID) {
859                 rcu_read_lock();
860                 p = pid_task(pid, PIDTYPE_PID);
861                 if (p)
862                         send_sigurg_to_task(p, fown, type);
863                 rcu_read_unlock();
864         } else {
865                 read_lock(&tasklist_lock);
866                 do_each_pid_task(pid, type, p) {
867                         send_sigurg_to_task(p, fown, type);
868                 } while_each_pid_task(pid, type, p);
869                 read_unlock(&tasklist_lock);
870         }
871  out_unlock_fown:
872         read_unlock_irqrestore(&fown->lock, flags);
873         return ret;
874 }
875 
876 static DEFINE_SPINLOCK(fasync_lock);
877 static struct kmem_cache *fasync_cache __ro_after_init;
878 
879 /*
880  * Remove a fasync entry. If successfully removed, return
881  * positive and clear the FASYNC flag. If no entry exists,
882  * do nothing and return 0.
883  *
884  * NOTE! It is very important that the FASYNC flag always
885  * match the state "is the filp on a fasync list".
886  *
887  */
888 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
889 {
890         struct fasync_struct *fa, **fp;
891         int result = 0;
892 
893         spin_lock(&filp->f_lock);
894         spin_lock(&fasync_lock);
895         for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
896                 if (fa->fa_file != filp)
897                         continue;
898 
899                 write_lock_irq(&fa->fa_lock);
900                 fa->fa_file = NULL;
901                 write_unlock_irq(&fa->fa_lock);
902 
903                 *fp = fa->fa_next;
904                 kfree_rcu(fa, fa_rcu);
905                 filp->f_flags &= ~FASYNC;
906                 result = 1;
907                 break;
908         }
909         spin_unlock(&fasync_lock);
910         spin_unlock(&filp->f_lock);
911         return result;
912 }
913 
914 struct fasync_struct *fasync_alloc(void)
915 {
916         return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
917 }
918 
919 /*
920  * NOTE! This can be used only for unused fasync entries:
921  * entries that actually got inserted on the fasync list
922  * need to be released by rcu - see fasync_remove_entry.
923  */
924 void fasync_free(struct fasync_struct *new)
925 {
926         kmem_cache_free(fasync_cache, new);
927 }
928 
929 /*
930  * Insert a new entry into the fasync list.  Return the pointer to the
931  * old one if we didn't use the new one.
932  *
933  * NOTE! It is very important that the FASYNC flag always
934  * match the state "is the filp on a fasync list".
935  */
936 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
937 {
938         struct fasync_struct *fa, **fp;
939 
940         spin_lock(&filp->f_lock);
941         spin_lock(&fasync_lock);
942         for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
943                 if (fa->fa_file != filp)
944                         continue;
945 
946                 write_lock_irq(&fa->fa_lock);
947                 fa->fa_fd = fd;
948                 write_unlock_irq(&fa->fa_lock);
949                 goto out;
950         }
951 
952         rwlock_init(&new->fa_lock);
953         new->magic = FASYNC_MAGIC;
954         new->fa_file = filp;
955         new->fa_fd = fd;
956         new->fa_next = *fapp;
957         rcu_assign_pointer(*fapp, new);
958         filp->f_flags |= FASYNC;
959 
960 out:
961         spin_unlock(&fasync_lock);
962         spin_unlock(&filp->f_lock);
963         return fa;
964 }
965 
966 /*
967  * Add a fasync entry. Return negative on error, positive if
968  * added, and zero if did nothing but change an existing one.
969  */
970 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
971 {
972         struct fasync_struct *new;
973 
974         new = fasync_alloc();
975         if (!new)
976                 return -ENOMEM;
977 
978         /*
979          * fasync_insert_entry() returns the old (update) entry if
980          * it existed.
981          *
982          * So free the (unused) new entry and return 0 to let the
983          * caller know that we didn't add any new fasync entries.
984          */
985         if (fasync_insert_entry(fd, filp, fapp, new)) {
986                 fasync_free(new);
987                 return 0;
988         }
989 
990         return 1;
991 }
992 
993 /*
994  * fasync_helper() is used by almost all character device drivers
995  * to set up the fasync queue, and for regular files by the file
996  * lease code. It returns negative on error, 0 if it did no changes
997  * and positive if it added/deleted the entry.
998  */
999 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
1000 {
1001         if (!on)
1002                 return fasync_remove_entry(filp, fapp);
1003         return fasync_add_entry(fd, filp, fapp);
1004 }
1005 
1006 EXPORT_SYMBOL(fasync_helper);
1007 
1008 /*
1009  * rcu_read_lock() is held
1010  */
1011 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
1012 {
1013         while (fa) {
1014                 struct fown_struct *fown;
1015                 unsigned long flags;
1016 
1017                 if (fa->magic != FASYNC_MAGIC) {
1018                         printk(KERN_ERR "kill_fasync: bad magic number in "
1019                                "fasync_struct!\n");
1020                         return;
1021                 }
1022                 read_lock_irqsave(&fa->fa_lock, flags);
1023                 if (fa->fa_file) {
1024                         fown = &fa->fa_file->f_owner;
1025                         /* Don't send SIGURG to processes which have not set a
1026                            queued signum: SIGURG has its own default signalling
1027                            mechanism. */
1028                         if (!(sig == SIGURG && fown->signum == 0))
1029                                 send_sigio(fown, fa->fa_fd, band);
1030                 }
1031                 read_unlock_irqrestore(&fa->fa_lock, flags);
1032                 fa = rcu_dereference(fa->fa_next);
1033         }
1034 }
1035 
1036 void kill_fasync(struct fasync_struct **fp, int sig, int band)
1037 {
1038         /* First a quick test without locking: usually
1039          * the list is empty.
1040          */
1041         if (*fp) {
1042                 rcu_read_lock();
1043                 kill_fasync_rcu(rcu_dereference(*fp), sig, band);
1044                 rcu_read_unlock();
1045         }
1046 }
1047 EXPORT_SYMBOL(kill_fasync);
1048 
1049 static int __init fcntl_init(void)
1050 {
1051         /*
1052          * Please add new bits here to ensure allocation uniqueness.
1053          * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
1054          * is defined as O_NONBLOCK on some platforms and not on others.
1055          */
1056         BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
1057                 HWEIGHT32(
1058                         (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
1059                         __FMODE_EXEC | __FMODE_NONOTIFY));
1060 
1061         fasync_cache = kmem_cache_create("fasync_cache",
1062                                          sizeof(struct fasync_struct), 0,
1063                                          SLAB_PANIC | SLAB_ACCOUNT, NULL);
1064         return 0;
1065 }
1066 
1067 module_init(fcntl_init)
1068 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php