~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/open.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /fs/open.c (Architecture i386) and /fs/open.c (Architecture sparc64)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*                                                  2 /*
  3  *  linux/fs/open.c                                 3  *  linux/fs/open.c
  4  *                                                  4  *
  5  *  Copyright (C) 1991, 1992  Linus Torvalds        5  *  Copyright (C) 1991, 1992  Linus Torvalds
  6  */                                                 6  */
  7                                                     7 
  8 #include <linux/string.h>                           8 #include <linux/string.h>
  9 #include <linux/mm.h>                               9 #include <linux/mm.h>
 10 #include <linux/file.h>                            10 #include <linux/file.h>
 11 #include <linux/fdtable.h>                         11 #include <linux/fdtable.h>
 12 #include <linux/fsnotify.h>                        12 #include <linux/fsnotify.h>
 13 #include <linux/module.h>                          13 #include <linux/module.h>
 14 #include <linux/tty.h>                             14 #include <linux/tty.h>
 15 #include <linux/namei.h>                           15 #include <linux/namei.h>
 16 #include <linux/backing-dev.h>                     16 #include <linux/backing-dev.h>
 17 #include <linux/capability.h>                      17 #include <linux/capability.h>
 18 #include <linux/securebits.h>                      18 #include <linux/securebits.h>
 19 #include <linux/security.h>                        19 #include <linux/security.h>
 20 #include <linux/mount.h>                           20 #include <linux/mount.h>
 21 #include <linux/fcntl.h>                           21 #include <linux/fcntl.h>
 22 #include <linux/slab.h>                            22 #include <linux/slab.h>
 23 #include <linux/uaccess.h>                         23 #include <linux/uaccess.h>
 24 #include <linux/fs.h>                              24 #include <linux/fs.h>
 25 #include <linux/personality.h>                     25 #include <linux/personality.h>
 26 #include <linux/pagemap.h>                         26 #include <linux/pagemap.h>
 27 #include <linux/syscalls.h>                        27 #include <linux/syscalls.h>
 28 #include <linux/rcupdate.h>                        28 #include <linux/rcupdate.h>
 29 #include <linux/audit.h>                           29 #include <linux/audit.h>
 30 #include <linux/falloc.h>                          30 #include <linux/falloc.h>
 31 #include <linux/fs_struct.h>                       31 #include <linux/fs_struct.h>
 32 #include <linux/dnotify.h>                         32 #include <linux/dnotify.h>
 33 #include <linux/compat.h>                          33 #include <linux/compat.h>
 34 #include <linux/mnt_idmapping.h>                   34 #include <linux/mnt_idmapping.h>
 35 #include <linux/filelock.h>                        35 #include <linux/filelock.h>
 36                                                    36 
 37 #include "internal.h"                              37 #include "internal.h"
 38                                                    38 
 39 int do_truncate(struct mnt_idmap *idmap, struc     39 int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
 40                 loff_t length, unsigned int ti     40                 loff_t length, unsigned int time_attrs, struct file *filp)
 41 {                                                  41 {
 42         int ret;                                   42         int ret;
 43         struct iattr newattrs;                     43         struct iattr newattrs;
 44                                                    44 
 45         /* Not pretty: "inode->i_size" shouldn     45         /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
 46         if (length < 0)                            46         if (length < 0)
 47                 return -EINVAL;                    47                 return -EINVAL;
 48                                                    48 
 49         newattrs.ia_size = length;                 49         newattrs.ia_size = length;
 50         newattrs.ia_valid = ATTR_SIZE | time_a     50         newattrs.ia_valid = ATTR_SIZE | time_attrs;
 51         if (filp) {                                51         if (filp) {
 52                 newattrs.ia_file = filp;           52                 newattrs.ia_file = filp;
 53                 newattrs.ia_valid |= ATTR_FILE     53                 newattrs.ia_valid |= ATTR_FILE;
 54         }                                          54         }
 55                                                    55 
 56         /* Remove suid, sgid, and file capabil     56         /* Remove suid, sgid, and file capabilities on truncate too */
 57         ret = dentry_needs_remove_privs(idmap,     57         ret = dentry_needs_remove_privs(idmap, dentry);
 58         if (ret < 0)                               58         if (ret < 0)
 59                 return ret;                        59                 return ret;
 60         if (ret)                                   60         if (ret)
 61                 newattrs.ia_valid |= ret | ATT     61                 newattrs.ia_valid |= ret | ATTR_FORCE;
 62                                                    62 
 63         inode_lock(dentry->d_inode);               63         inode_lock(dentry->d_inode);
 64         /* Note any delegations or leases have     64         /* Note any delegations or leases have already been broken: */
 65         ret = notify_change(idmap, dentry, &ne     65         ret = notify_change(idmap, dentry, &newattrs, NULL);
 66         inode_unlock(dentry->d_inode);             66         inode_unlock(dentry->d_inode);
 67         return ret;                                67         return ret;
 68 }                                                  68 }
 69                                                    69 
 70 long vfs_truncate(const struct path *path, lof     70 long vfs_truncate(const struct path *path, loff_t length)
 71 {                                                  71 {
 72         struct mnt_idmap *idmap;                   72         struct mnt_idmap *idmap;
 73         struct inode *inode;                       73         struct inode *inode;
 74         long error;                                74         long error;
 75                                                    75 
 76         inode = path->dentry->d_inode;             76         inode = path->dentry->d_inode;
 77                                                    77 
 78         /* For directories it's -EISDIR, for o     78         /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
 79         if (S_ISDIR(inode->i_mode))                79         if (S_ISDIR(inode->i_mode))
 80                 return -EISDIR;                    80                 return -EISDIR;
 81         if (!S_ISREG(inode->i_mode))               81         if (!S_ISREG(inode->i_mode))
 82                 return -EINVAL;                    82                 return -EINVAL;
 83                                                    83 
 84         error = mnt_want_write(path->mnt);         84         error = mnt_want_write(path->mnt);
 85         if (error)                                 85         if (error)
 86                 goto out;                          86                 goto out;
 87                                                    87 
 88         idmap = mnt_idmap(path->mnt);              88         idmap = mnt_idmap(path->mnt);
 89         error = inode_permission(idmap, inode,     89         error = inode_permission(idmap, inode, MAY_WRITE);
 90         if (error)                                 90         if (error)
 91                 goto mnt_drop_write_and_out;       91                 goto mnt_drop_write_and_out;
 92                                                    92 
 93         error = -EPERM;                            93         error = -EPERM;
 94         if (IS_APPEND(inode))                      94         if (IS_APPEND(inode))
 95                 goto mnt_drop_write_and_out;       95                 goto mnt_drop_write_and_out;
 96                                                    96 
 97         error = get_write_access(inode);           97         error = get_write_access(inode);
 98         if (error)                                 98         if (error)
 99                 goto mnt_drop_write_and_out;       99                 goto mnt_drop_write_and_out;
100                                                   100 
101         /*                                        101         /*
102          * Make sure that there are no leases.    102          * Make sure that there are no leases.  get_write_access() protects
103          * against the truncate racing with a     103          * against the truncate racing with a lease-granting setlease().
104          */                                       104          */
105         error = break_lease(inode, O_WRONLY);     105         error = break_lease(inode, O_WRONLY);
106         if (error)                                106         if (error)
107                 goto put_write_and_out;           107                 goto put_write_and_out;
108                                                   108 
109         error = security_path_truncate(path);     109         error = security_path_truncate(path);
110         if (!error)                               110         if (!error)
111                 error = do_truncate(idmap, pat    111                 error = do_truncate(idmap, path->dentry, length, 0, NULL);
112                                                   112 
113 put_write_and_out:                                113 put_write_and_out:
114         put_write_access(inode);                  114         put_write_access(inode);
115 mnt_drop_write_and_out:                           115 mnt_drop_write_and_out:
116         mnt_drop_write(path->mnt);                116         mnt_drop_write(path->mnt);
117 out:                                              117 out:
118         return error;                             118         return error;
119 }                                                 119 }
120 EXPORT_SYMBOL_GPL(vfs_truncate);                  120 EXPORT_SYMBOL_GPL(vfs_truncate);
121                                                   121 
122 long do_sys_truncate(const char __user *pathna    122 long do_sys_truncate(const char __user *pathname, loff_t length)
123 {                                                 123 {
124         unsigned int lookup_flags = LOOKUP_FOL    124         unsigned int lookup_flags = LOOKUP_FOLLOW;
125         struct path path;                         125         struct path path;
126         int error;                                126         int error;
127                                                   127 
128         if (length < 0) /* sorry, but loff_t s    128         if (length < 0) /* sorry, but loff_t says... */
129                 return -EINVAL;                   129                 return -EINVAL;
130                                                   130 
131 retry:                                            131 retry:
132         error = user_path_at(AT_FDCWD, pathnam    132         error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
133         if (!error) {                             133         if (!error) {
134                 error = vfs_truncate(&path, le    134                 error = vfs_truncate(&path, length);
135                 path_put(&path);                  135                 path_put(&path);
136         }                                         136         }
137         if (retry_estale(error, lookup_flags))    137         if (retry_estale(error, lookup_flags)) {
138                 lookup_flags |= LOOKUP_REVAL;     138                 lookup_flags |= LOOKUP_REVAL;
139                 goto retry;                       139                 goto retry;
140         }                                         140         }
141         return error;                             141         return error;
142 }                                                 142 }
143                                                   143 
144 SYSCALL_DEFINE2(truncate, const char __user *,    144 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
145 {                                                 145 {
146         return do_sys_truncate(path, length);     146         return do_sys_truncate(path, length);
147 }                                                 147 }
148                                                   148 
149 #ifdef CONFIG_COMPAT                              149 #ifdef CONFIG_COMPAT
150 COMPAT_SYSCALL_DEFINE2(truncate, const char __    150 COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
151 {                                                 151 {
152         return do_sys_truncate(path, length);     152         return do_sys_truncate(path, length);
153 }                                                 153 }
154 #endif                                            154 #endif
155                                                   155 
156 long do_ftruncate(struct file *file, loff_t le    156 long do_ftruncate(struct file *file, loff_t length, int small)
157 {                                                 157 {
158         struct inode *inode;                      158         struct inode *inode;
159         struct dentry *dentry;                    159         struct dentry *dentry;
160         int error;                                160         int error;
161                                                   161 
162         /* explicitly opened as large or we ar    162         /* explicitly opened as large or we are on 64-bit box */
163         if (file->f_flags & O_LARGEFILE)          163         if (file->f_flags & O_LARGEFILE)
164                 small = 0;                        164                 small = 0;
165                                                   165 
166         dentry = file->f_path.dentry;             166         dentry = file->f_path.dentry;
167         inode = dentry->d_inode;                  167         inode = dentry->d_inode;
168         if (!S_ISREG(inode->i_mode) || !(file-    168         if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
169                 return -EINVAL;                   169                 return -EINVAL;
170                                                   170 
171         /* Cannot ftruncate over 2^31 bytes wi    171         /* Cannot ftruncate over 2^31 bytes without large file support */
172         if (small && length > MAX_NON_LFS)        172         if (small && length > MAX_NON_LFS)
173                 return -EINVAL;                   173                 return -EINVAL;
174                                                   174 
175         /* Check IS_APPEND on real upper inode    175         /* Check IS_APPEND on real upper inode */
176         if (IS_APPEND(file_inode(file)))          176         if (IS_APPEND(file_inode(file)))
177                 return -EPERM;                    177                 return -EPERM;
178         sb_start_write(inode->i_sb);              178         sb_start_write(inode->i_sb);
179         error = security_file_truncate(file);     179         error = security_file_truncate(file);
180         if (!error)                               180         if (!error)
181                 error = do_truncate(file_mnt_i    181                 error = do_truncate(file_mnt_idmap(file), dentry, length,
182                                     ATTR_MTIME    182                                     ATTR_MTIME | ATTR_CTIME, file);
183         sb_end_write(inode->i_sb);                183         sb_end_write(inode->i_sb);
184                                                   184 
185         return error;                             185         return error;
186 }                                                 186 }
187                                                   187 
188 long do_sys_ftruncate(unsigned int fd, loff_t     188 long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
189 {                                                 189 {
190         struct fd f;                              190         struct fd f;
191         int error;                                191         int error;
192                                                   192 
193         if (length < 0)                           193         if (length < 0)
194                 return -EINVAL;                   194                 return -EINVAL;
195         f = fdget(fd);                            195         f = fdget(fd);
196         if (!f.file)                              196         if (!f.file)
197                 return -EBADF;                    197                 return -EBADF;
198                                                   198 
199         error = do_ftruncate(f.file, length, s    199         error = do_ftruncate(f.file, length, small);
200                                                   200 
201         fdput(f);                                 201         fdput(f);
202         return error;                             202         return error;
203 }                                                 203 }
204                                                   204 
205 SYSCALL_DEFINE2(ftruncate, unsigned int, fd, o    205 SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length)
206 {                                                 206 {
207         return do_sys_ftruncate(fd, length, 1)    207         return do_sys_ftruncate(fd, length, 1);
208 }                                                 208 }
209                                                   209 
210 #ifdef CONFIG_COMPAT                              210 #ifdef CONFIG_COMPAT
211 COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int    211 COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length)
212 {                                                 212 {
213         return do_sys_ftruncate(fd, length, 1)    213         return do_sys_ftruncate(fd, length, 1);
214 }                                                 214 }
215 #endif                                            215 #endif
216                                                   216 
217 /* LFS versions of truncate are only needed on    217 /* LFS versions of truncate are only needed on 32 bit machines */
218 #if BITS_PER_LONG == 32                           218 #if BITS_PER_LONG == 32
219 SYSCALL_DEFINE2(truncate64, const char __user     219 SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
220 {                                                 220 {
221         return do_sys_truncate(path, length);     221         return do_sys_truncate(path, length);
222 }                                                 222 }
223                                                   223 
224 SYSCALL_DEFINE2(ftruncate64, unsigned int, fd,    224 SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
225 {                                                 225 {
226         return do_sys_ftruncate(fd, length, 0)    226         return do_sys_ftruncate(fd, length, 0);
227 }                                                 227 }
228 #endif /* BITS_PER_LONG == 32 */                  228 #endif /* BITS_PER_LONG == 32 */
229                                                   229 
230 #if defined(CONFIG_COMPAT) && defined(__ARCH_W    230 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_TRUNCATE64)
231 COMPAT_SYSCALL_DEFINE3(truncate64, const char     231 COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, pathname,
232                        compat_arg_u64_dual(len    232                        compat_arg_u64_dual(length))
233 {                                                 233 {
234         return ksys_truncate(pathname, compat_    234         return ksys_truncate(pathname, compat_arg_u64_glue(length));
235 }                                                 235 }
236 #endif                                            236 #endif
237                                                   237 
238 #if defined(CONFIG_COMPAT) && defined(__ARCH_W    238 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FTRUNCATE64)
239 COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned i    239 COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd,
240                        compat_arg_u64_dual(len    240                        compat_arg_u64_dual(length))
241 {                                                 241 {
242         return ksys_ftruncate(fd, compat_arg_u    242         return ksys_ftruncate(fd, compat_arg_u64_glue(length));
243 }                                                 243 }
244 #endif                                            244 #endif
245                                                   245 
246 int vfs_fallocate(struct file *file, int mode,    246 int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
247 {                                                 247 {
248         struct inode *inode = file_inode(file)    248         struct inode *inode = file_inode(file);
249         long ret;                                 249         long ret;
250         loff_t sum;                               250         loff_t sum;
251                                                   251 
252         if (offset < 0 || len <= 0)               252         if (offset < 0 || len <= 0)
253                 return -EINVAL;                   253                 return -EINVAL;
254                                                   254 
255         /* Return error if mode is not support    255         /* Return error if mode is not supported */
256         if (mode & ~FALLOC_FL_SUPPORTED_MASK)     256         if (mode & ~FALLOC_FL_SUPPORTED_MASK)
257                 return -EOPNOTSUPP;               257                 return -EOPNOTSUPP;
258                                                   258 
259         /* Punch hole and zero range are mutua    259         /* Punch hole and zero range are mutually exclusive */
260         if ((mode & (FALLOC_FL_PUNCH_HOLE | FA    260         if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
261             (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_    261             (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
262                 return -EOPNOTSUPP;               262                 return -EOPNOTSUPP;
263                                                   263 
264         /* Punch hole must have keep size set     264         /* Punch hole must have keep size set */
265         if ((mode & FALLOC_FL_PUNCH_HOLE) &&      265         if ((mode & FALLOC_FL_PUNCH_HOLE) &&
266             !(mode & FALLOC_FL_KEEP_SIZE))        266             !(mode & FALLOC_FL_KEEP_SIZE))
267                 return -EOPNOTSUPP;               267                 return -EOPNOTSUPP;
268                                                   268 
269         /* Collapse range should only be used     269         /* Collapse range should only be used exclusively. */
270         if ((mode & FALLOC_FL_COLLAPSE_RANGE)     270         if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
271             (mode & ~FALLOC_FL_COLLAPSE_RANGE)    271             (mode & ~FALLOC_FL_COLLAPSE_RANGE))
272                 return -EINVAL;                   272                 return -EINVAL;
273                                                   273 
274         /* Insert range should only be used ex    274         /* Insert range should only be used exclusively. */
275         if ((mode & FALLOC_FL_INSERT_RANGE) &&    275         if ((mode & FALLOC_FL_INSERT_RANGE) &&
276             (mode & ~FALLOC_FL_INSERT_RANGE))     276             (mode & ~FALLOC_FL_INSERT_RANGE))
277                 return -EINVAL;                   277                 return -EINVAL;
278                                                   278 
279         /* Unshare range should only be used w    279         /* Unshare range should only be used with allocate mode. */
280         if ((mode & FALLOC_FL_UNSHARE_RANGE) &    280         if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
281             (mode & ~(FALLOC_FL_UNSHARE_RANGE     281             (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
282                 return -EINVAL;                   282                 return -EINVAL;
283                                                   283 
284         if (!(file->f_mode & FMODE_WRITE))        284         if (!(file->f_mode & FMODE_WRITE))
285                 return -EBADF;                    285                 return -EBADF;
286                                                   286 
287         /*                                        287         /*
288          * We can only allow pure fallocate on    288          * We can only allow pure fallocate on append only files
289          */                                       289          */
290         if ((mode & ~FALLOC_FL_KEEP_SIZE) && I    290         if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
291                 return -EPERM;                    291                 return -EPERM;
292                                                   292 
293         if (IS_IMMUTABLE(inode))                  293         if (IS_IMMUTABLE(inode))
294                 return -EPERM;                    294                 return -EPERM;
295                                                   295 
296         /*                                        296         /*
297          * We cannot allow any fallocate opera    297          * We cannot allow any fallocate operation on an active swapfile
298          */                                       298          */
299         if (IS_SWAPFILE(inode))                   299         if (IS_SWAPFILE(inode))
300                 return -ETXTBSY;                  300                 return -ETXTBSY;
301                                                   301 
302         /*                                        302         /*
303          * Revalidate the write permissions, i    303          * Revalidate the write permissions, in case security policy has
304          * changed since the files were opened    304          * changed since the files were opened.
305          */                                       305          */
306         ret = security_file_permission(file, M    306         ret = security_file_permission(file, MAY_WRITE);
307         if (ret)                                  307         if (ret)
308                 return ret;                       308                 return ret;
309                                                   309 
310         ret = fsnotify_file_area_perm(file, MA    310         ret = fsnotify_file_area_perm(file, MAY_WRITE, &offset, len);
311         if (ret)                                  311         if (ret)
312                 return ret;                       312                 return ret;
313                                                   313 
314         if (S_ISFIFO(inode->i_mode))              314         if (S_ISFIFO(inode->i_mode))
315                 return -ESPIPE;                   315                 return -ESPIPE;
316                                                   316 
317         if (S_ISDIR(inode->i_mode))               317         if (S_ISDIR(inode->i_mode))
318                 return -EISDIR;                   318                 return -EISDIR;
319                                                   319 
320         if (!S_ISREG(inode->i_mode) && !S_ISBL    320         if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
321                 return -ENODEV;                   321                 return -ENODEV;
322                                                   322 
323         /* Check for wraparound */                323         /* Check for wraparound */
324         if (check_add_overflow(offset, len, &s    324         if (check_add_overflow(offset, len, &sum))
325                 return -EFBIG;                    325                 return -EFBIG;
326                                                   326 
327         if (sum > inode->i_sb->s_maxbytes)        327         if (sum > inode->i_sb->s_maxbytes)
328                 return -EFBIG;                    328                 return -EFBIG;
329                                                   329 
330         if (!file->f_op->fallocate)               330         if (!file->f_op->fallocate)
331                 return -EOPNOTSUPP;               331                 return -EOPNOTSUPP;
332                                                   332 
333         file_start_write(file);                   333         file_start_write(file);
334         ret = file->f_op->fallocate(file, mode    334         ret = file->f_op->fallocate(file, mode, offset, len);
335                                                   335 
336         /*                                        336         /*
337          * Create inotify and fanotify events.    337          * Create inotify and fanotify events.
338          *                                        338          *
339          * To keep the logic simple always cre    339          * To keep the logic simple always create events if fallocate succeeds.
340          * This implies that events are even c    340          * This implies that events are even created if the file size remains
341          * unchanged, e.g. when using flag FAL    341          * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE.
342          */                                       342          */
343         if (ret == 0)                             343         if (ret == 0)
344                 fsnotify_modify(file);            344                 fsnotify_modify(file);
345                                                   345 
346         file_end_write(file);                     346         file_end_write(file);
347         return ret;                               347         return ret;
348 }                                                 348 }
349 EXPORT_SYMBOL_GPL(vfs_fallocate);                 349 EXPORT_SYMBOL_GPL(vfs_fallocate);
350                                                   350 
351 int ksys_fallocate(int fd, int mode, loff_t of    351 int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
352 {                                                 352 {
353         struct fd f = fdget(fd);                  353         struct fd f = fdget(fd);
354         int error = -EBADF;                       354         int error = -EBADF;
355                                                   355 
356         if (f.file) {                             356         if (f.file) {
357                 error = vfs_fallocate(f.file,     357                 error = vfs_fallocate(f.file, mode, offset, len);
358                 fdput(f);                         358                 fdput(f);
359         }                                         359         }
360         return error;                             360         return error;
361 }                                                 361 }
362                                                   362 
363 SYSCALL_DEFINE4(fallocate, int, fd, int, mode,    363 SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
364 {                                                 364 {
365         return ksys_fallocate(fd, mode, offset    365         return ksys_fallocate(fd, mode, offset, len);
366 }                                                 366 }
367                                                   367 
368 #if defined(CONFIG_COMPAT) && defined(__ARCH_W    368 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FALLOCATE)
369 COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int    369 COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, compat_arg_u64_dual(offset),
370                        compat_arg_u64_dual(len    370                        compat_arg_u64_dual(len))
371 {                                                 371 {
372         return ksys_fallocate(fd, mode, compat    372         return ksys_fallocate(fd, mode, compat_arg_u64_glue(offset),
373                               compat_arg_u64_g    373                               compat_arg_u64_glue(len));
374 }                                                 374 }
375 #endif                                            375 #endif
376                                                   376 
377 /*                                                377 /*
378  * access() needs to use the real uid/gid, not    378  * access() needs to use the real uid/gid, not the effective uid/gid.
379  * We do this by temporarily clearing all FS-r    379  * We do this by temporarily clearing all FS-related capabilities and
380  * switching the fsuid/fsgid around to the rea    380  * switching the fsuid/fsgid around to the real ones.
381  *                                                381  *
382  * Creating new credentials is expensive, so w    382  * Creating new credentials is expensive, so we try to skip doing it,
383  * which we can if the result would match what    383  * which we can if the result would match what we already got.
384  */                                               384  */
385 static bool access_need_override_creds(int fla    385 static bool access_need_override_creds(int flags)
386 {                                                 386 {
387         const struct cred *cred;                  387         const struct cred *cred;
388                                                   388 
389         if (flags & AT_EACCESS)                   389         if (flags & AT_EACCESS)
390                 return false;                     390                 return false;
391                                                   391 
392         cred = current_cred();                    392         cred = current_cred();
393         if (!uid_eq(cred->fsuid, cred->uid) ||    393         if (!uid_eq(cred->fsuid, cred->uid) ||
394             !gid_eq(cred->fsgid, cred->gid))      394             !gid_eq(cred->fsgid, cred->gid))
395                 return true;                      395                 return true;
396                                                   396 
397         if (!issecure(SECURE_NO_SETUID_FIXUP))    397         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
398                 kuid_t root_uid = make_kuid(cr    398                 kuid_t root_uid = make_kuid(cred->user_ns, 0);
399                 if (!uid_eq(cred->uid, root_ui    399                 if (!uid_eq(cred->uid, root_uid)) {
400                         if (!cap_isclear(cred-    400                         if (!cap_isclear(cred->cap_effective))
401                                 return true;      401                                 return true;
402                 } else {                          402                 } else {
403                         if (!cap_isidentical(c    403                         if (!cap_isidentical(cred->cap_effective,
404                             cred->cap_permitte    404                             cred->cap_permitted))
405                                 return true;      405                                 return true;
406                 }                                 406                 }
407         }                                         407         }
408                                                   408 
409         return false;                             409         return false;
410 }                                                 410 }
411                                                   411 
412 static const struct cred *access_override_cred    412 static const struct cred *access_override_creds(void)
413 {                                                 413 {
414         const struct cred *old_cred;              414         const struct cred *old_cred;
415         struct cred *override_cred;               415         struct cred *override_cred;
416                                                   416 
417         override_cred = prepare_creds();          417         override_cred = prepare_creds();
418         if (!override_cred)                       418         if (!override_cred)
419                 return NULL;                      419                 return NULL;
420                                                   420 
421         /*                                        421         /*
422          * XXX access_need_override_creds perf    422          * XXX access_need_override_creds performs checks in hopes of skipping
423          * this work. Make sure it stays in sy    423          * this work. Make sure it stays in sync if making any changes in this
424          * routine.                               424          * routine.
425          */                                       425          */
426                                                   426 
427         override_cred->fsuid = override_cred->    427         override_cred->fsuid = override_cred->uid;
428         override_cred->fsgid = override_cred->    428         override_cred->fsgid = override_cred->gid;
429                                                   429 
430         if (!issecure(SECURE_NO_SETUID_FIXUP))    430         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
431                 /* Clear the capabilities if w    431                 /* Clear the capabilities if we switch to a non-root user */
432                 kuid_t root_uid = make_kuid(ov    432                 kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
433                 if (!uid_eq(override_cred->uid    433                 if (!uid_eq(override_cred->uid, root_uid))
434                         cap_clear(override_cre    434                         cap_clear(override_cred->cap_effective);
435                 else                              435                 else
436                         override_cred->cap_eff    436                         override_cred->cap_effective =
437                                 override_cred-    437                                 override_cred->cap_permitted;
438         }                                         438         }
439                                                   439 
440         /*                                        440         /*
441          * The new set of credentials can *onl    441          * The new set of credentials can *only* be used in
442          * task-synchronous circumstances, and    442          * task-synchronous circumstances, and does not need
443          * RCU freeing, unless somebody then t    443          * RCU freeing, unless somebody then takes a separate
444          * reference to it.                       444          * reference to it.
445          *                                        445          *
446          * NOTE! This is _only_ true because t    446          * NOTE! This is _only_ true because this credential
447          * is used purely for override_creds()    447          * is used purely for override_creds() that installs
448          * it as the subjective cred. Other th    448          * it as the subjective cred. Other threads will be
449          * accessing ->real_cred, not the subj    449          * accessing ->real_cred, not the subjective cred.
450          *                                        450          *
451          * If somebody _does_ make a copy of t    451          * If somebody _does_ make a copy of this (using the
452          * 'get_current_cred()' function), tha    452          * 'get_current_cred()' function), that will clear the
453          * non_rcu field, because now that oth    453          * non_rcu field, because now that other user may be
454          * expecting RCU freeing. But normal t    454          * expecting RCU freeing. But normal thread-synchronous
455          * cred accesses will keep things non-    455          * cred accesses will keep things non-racy to avoid RCU
456          * freeing.                               456          * freeing.
457          */                                       457          */
458         override_cred->non_rcu = 1;               458         override_cred->non_rcu = 1;
459                                                   459 
460         old_cred = override_creds(override_cre    460         old_cred = override_creds(override_cred);
461                                                   461 
462         /* override_cred() gets its own ref */    462         /* override_cred() gets its own ref */
463         put_cred(override_cred);                  463         put_cred(override_cred);
464                                                   464 
465         return old_cred;                          465         return old_cred;
466 }                                                 466 }
467                                                   467 
468 static long do_faccessat(int dfd, const char _    468 static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
469 {                                                 469 {
470         struct path path;                         470         struct path path;
471         struct inode *inode;                      471         struct inode *inode;
472         int res;                                  472         int res;
473         unsigned int lookup_flags = LOOKUP_FOL    473         unsigned int lookup_flags = LOOKUP_FOLLOW;
474         const struct cred *old_cred = NULL;       474         const struct cred *old_cred = NULL;
475                                                   475 
476         if (mode & ~S_IRWXO)    /* where's F_O    476         if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
477                 return -EINVAL;                   477                 return -EINVAL;
478                                                   478 
479         if (flags & ~(AT_EACCESS | AT_SYMLINK_    479         if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
480                 return -EINVAL;                   480                 return -EINVAL;
481                                                   481 
482         if (flags & AT_SYMLINK_NOFOLLOW)          482         if (flags & AT_SYMLINK_NOFOLLOW)
483                 lookup_flags &= ~LOOKUP_FOLLOW    483                 lookup_flags &= ~LOOKUP_FOLLOW;
484         if (flags & AT_EMPTY_PATH)                484         if (flags & AT_EMPTY_PATH)
485                 lookup_flags |= LOOKUP_EMPTY;     485                 lookup_flags |= LOOKUP_EMPTY;
486                                                   486 
487         if (access_need_override_creds(flags))    487         if (access_need_override_creds(flags)) {
488                 old_cred = access_override_cre    488                 old_cred = access_override_creds();
489                 if (!old_cred)                    489                 if (!old_cred)
490                         return -ENOMEM;           490                         return -ENOMEM;
491         }                                         491         }
492                                                   492 
493 retry:                                            493 retry:
494         res = user_path_at(dfd, filename, look    494         res = user_path_at(dfd, filename, lookup_flags, &path);
495         if (res)                                  495         if (res)
496                 goto out;                         496                 goto out;
497                                                   497 
498         inode = d_backing_inode(path.dentry);     498         inode = d_backing_inode(path.dentry);
499                                                   499 
500         if ((mode & MAY_EXEC) && S_ISREG(inode    500         if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
501                 /*                                501                 /*
502                  * MAY_EXEC on regular files i    502                  * MAY_EXEC on regular files is denied if the fs is mounted
503                  * with the "noexec" flag.        503                  * with the "noexec" flag.
504                  */                               504                  */
505                 res = -EACCES;                    505                 res = -EACCES;
506                 if (path_noexec(&path))           506                 if (path_noexec(&path))
507                         goto out_path_release;    507                         goto out_path_release;
508         }                                         508         }
509                                                   509 
510         res = inode_permission(mnt_idmap(path.    510         res = inode_permission(mnt_idmap(path.mnt), inode, mode | MAY_ACCESS);
511         /* SuS v2 requires we report a read on    511         /* SuS v2 requires we report a read only fs too */
512         if (res || !(mode & S_IWOTH) || specia    512         if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
513                 goto out_path_release;            513                 goto out_path_release;
514         /*                                        514         /*
515          * This is a rare case where using __m    515          * This is a rare case where using __mnt_is_readonly()
516          * is OK without a mnt_want/drop_write    516          * is OK without a mnt_want/drop_write() pair.  Since
517          * no actual write to the fs is perfor    517          * no actual write to the fs is performed here, we do
518          * not need to telegraph to that to an    518          * not need to telegraph to that to anyone.
519          *                                        519          *
520          * By doing this, we accept that this     520          * By doing this, we accept that this access is
521          * inherently racy and know that the f    521          * inherently racy and know that the fs may change
522          * state before we even see this resul    522          * state before we even see this result.
523          */                                       523          */
524         if (__mnt_is_readonly(path.mnt))          524         if (__mnt_is_readonly(path.mnt))
525                 res = -EROFS;                     525                 res = -EROFS;
526                                                   526 
527 out_path_release:                                 527 out_path_release:
528         path_put(&path);                          528         path_put(&path);
529         if (retry_estale(res, lookup_flags)) {    529         if (retry_estale(res, lookup_flags)) {
530                 lookup_flags |= LOOKUP_REVAL;     530                 lookup_flags |= LOOKUP_REVAL;
531                 goto retry;                       531                 goto retry;
532         }                                         532         }
533 out:                                              533 out:
534         if (old_cred)                             534         if (old_cred)
535                 revert_creds(old_cred);           535                 revert_creds(old_cred);
536                                                   536 
537         return res;                               537         return res;
538 }                                                 538 }
539                                                   539 
540 SYSCALL_DEFINE3(faccessat, int, dfd, const cha    540 SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
541 {                                                 541 {
542         return do_faccessat(dfd, filename, mod    542         return do_faccessat(dfd, filename, mode, 0);
543 }                                                 543 }
544                                                   544 
545 SYSCALL_DEFINE4(faccessat2, int, dfd, const ch    545 SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
546                 int, flags)                       546                 int, flags)
547 {                                                 547 {
548         return do_faccessat(dfd, filename, mod    548         return do_faccessat(dfd, filename, mode, flags);
549 }                                                 549 }
550                                                   550 
551 SYSCALL_DEFINE2(access, const char __user *, f    551 SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
552 {                                                 552 {
553         return do_faccessat(AT_FDCWD, filename    553         return do_faccessat(AT_FDCWD, filename, mode, 0);
554 }                                                 554 }
555                                                   555 
556 SYSCALL_DEFINE1(chdir, const char __user *, fi    556 SYSCALL_DEFINE1(chdir, const char __user *, filename)
557 {                                                 557 {
558         struct path path;                         558         struct path path;
559         int error;                                559         int error;
560         unsigned int lookup_flags = LOOKUP_FOL    560         unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
561 retry:                                            561 retry:
562         error = user_path_at(AT_FDCWD, filenam    562         error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
563         if (error)                                563         if (error)
564                 goto out;                         564                 goto out;
565                                                   565 
566         error = path_permission(&path, MAY_EXE    566         error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
567         if (error)                                567         if (error)
568                 goto dput_and_out;                568                 goto dput_and_out;
569                                                   569 
570         set_fs_pwd(current->fs, &path);           570         set_fs_pwd(current->fs, &path);
571                                                   571 
572 dput_and_out:                                     572 dput_and_out:
573         path_put(&path);                          573         path_put(&path);
574         if (retry_estale(error, lookup_flags))    574         if (retry_estale(error, lookup_flags)) {
575                 lookup_flags |= LOOKUP_REVAL;     575                 lookup_flags |= LOOKUP_REVAL;
576                 goto retry;                       576                 goto retry;
577         }                                         577         }
578 out:                                              578 out:
579         return error;                             579         return error;
580 }                                                 580 }
581                                                   581 
582 SYSCALL_DEFINE1(fchdir, unsigned int, fd)         582 SYSCALL_DEFINE1(fchdir, unsigned int, fd)
583 {                                                 583 {
584         struct fd f = fdget_raw(fd);              584         struct fd f = fdget_raw(fd);
585         int error;                                585         int error;
586                                                   586 
587         error = -EBADF;                           587         error = -EBADF;
588         if (!f.file)                              588         if (!f.file)
589                 goto out;                         589                 goto out;
590                                                   590 
591         error = -ENOTDIR;                         591         error = -ENOTDIR;
592         if (!d_can_lookup(f.file->f_path.dentr    592         if (!d_can_lookup(f.file->f_path.dentry))
593                 goto out_putf;                    593                 goto out_putf;
594                                                   594 
595         error = file_permission(f.file, MAY_EX    595         error = file_permission(f.file, MAY_EXEC | MAY_CHDIR);
596         if (!error)                               596         if (!error)
597                 set_fs_pwd(current->fs, &f.fil    597                 set_fs_pwd(current->fs, &f.file->f_path);
598 out_putf:                                         598 out_putf:
599         fdput(f);                                 599         fdput(f);
600 out:                                              600 out:
601         return error;                             601         return error;
602 }                                                 602 }
603                                                   603 
604 SYSCALL_DEFINE1(chroot, const char __user *, f    604 SYSCALL_DEFINE1(chroot, const char __user *, filename)
605 {                                                 605 {
606         struct path path;                         606         struct path path;
607         int error;                                607         int error;
608         unsigned int lookup_flags = LOOKUP_FOL    608         unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
609 retry:                                            609 retry:
610         error = user_path_at(AT_FDCWD, filenam    610         error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
611         if (error)                                611         if (error)
612                 goto out;                         612                 goto out;
613                                                   613 
614         error = path_permission(&path, MAY_EXE    614         error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
615         if (error)                                615         if (error)
616                 goto dput_and_out;                616                 goto dput_and_out;
617                                                   617 
618         error = -EPERM;                           618         error = -EPERM;
619         if (!ns_capable(current_user_ns(), CAP    619         if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
620                 goto dput_and_out;                620                 goto dput_and_out;
621         error = security_path_chroot(&path);      621         error = security_path_chroot(&path);
622         if (error)                                622         if (error)
623                 goto dput_and_out;                623                 goto dput_and_out;
624                                                   624 
625         set_fs_root(current->fs, &path);          625         set_fs_root(current->fs, &path);
626         error = 0;                                626         error = 0;
627 dput_and_out:                                     627 dput_and_out:
628         path_put(&path);                          628         path_put(&path);
629         if (retry_estale(error, lookup_flags))    629         if (retry_estale(error, lookup_flags)) {
630                 lookup_flags |= LOOKUP_REVAL;     630                 lookup_flags |= LOOKUP_REVAL;
631                 goto retry;                       631                 goto retry;
632         }                                         632         }
633 out:                                              633 out:
634         return error;                             634         return error;
635 }                                                 635 }
636                                                   636 
637 int chmod_common(const struct path *path, umod    637 int chmod_common(const struct path *path, umode_t mode)
638 {                                                 638 {
639         struct inode *inode = path->dentry->d_    639         struct inode *inode = path->dentry->d_inode;
640         struct inode *delegated_inode = NULL;     640         struct inode *delegated_inode = NULL;
641         struct iattr newattrs;                    641         struct iattr newattrs;
642         int error;                                642         int error;
643                                                   643 
644         error = mnt_want_write(path->mnt);        644         error = mnt_want_write(path->mnt);
645         if (error)                                645         if (error)
646                 return error;                     646                 return error;
647 retry_deleg:                                      647 retry_deleg:
648         inode_lock(inode);                        648         inode_lock(inode);
649         error = security_path_chmod(path, mode    649         error = security_path_chmod(path, mode);
650         if (error)                                650         if (error)
651                 goto out_unlock;                  651                 goto out_unlock;
652         newattrs.ia_mode = (mode & S_IALLUGO)     652         newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
653         newattrs.ia_valid = ATTR_MODE | ATTR_C    653         newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
654         error = notify_change(mnt_idmap(path->    654         error = notify_change(mnt_idmap(path->mnt), path->dentry,
655                               &newattrs, &dele    655                               &newattrs, &delegated_inode);
656 out_unlock:                                       656 out_unlock:
657         inode_unlock(inode);                      657         inode_unlock(inode);
658         if (delegated_inode) {                    658         if (delegated_inode) {
659                 error = break_deleg_wait(&dele    659                 error = break_deleg_wait(&delegated_inode);
660                 if (!error)                       660                 if (!error)
661                         goto retry_deleg;         661                         goto retry_deleg;
662         }                                         662         }
663         mnt_drop_write(path->mnt);                663         mnt_drop_write(path->mnt);
664         return error;                             664         return error;
665 }                                                 665 }
666                                                   666 
667 int vfs_fchmod(struct file *file, umode_t mode    667 int vfs_fchmod(struct file *file, umode_t mode)
668 {                                                 668 {
669         audit_file(file);                         669         audit_file(file);
670         return chmod_common(&file->f_path, mod    670         return chmod_common(&file->f_path, mode);
671 }                                                 671 }
672                                                   672 
673 SYSCALL_DEFINE2(fchmod, unsigned int, fd, umod    673 SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
674 {                                                 674 {
675         struct fd f = fdget(fd);                  675         struct fd f = fdget(fd);
676         int err = -EBADF;                         676         int err = -EBADF;
677                                                   677 
678         if (f.file) {                             678         if (f.file) {
679                 err = vfs_fchmod(f.file, mode)    679                 err = vfs_fchmod(f.file, mode);
680                 fdput(f);                         680                 fdput(f);
681         }                                         681         }
682         return err;                               682         return err;
683 }                                                 683 }
684                                                   684 
685 static int do_fchmodat(int dfd, const char __u    685 static int do_fchmodat(int dfd, const char __user *filename, umode_t mode,
686                        unsigned int flags)        686                        unsigned int flags)
687 {                                                 687 {
688         struct path path;                         688         struct path path;
689         int error;                                689         int error;
690         unsigned int lookup_flags;                690         unsigned int lookup_flags;
691                                                   691 
692         if (unlikely(flags & ~(AT_SYMLINK_NOFO    692         if (unlikely(flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)))
693                 return -EINVAL;                   693                 return -EINVAL;
694                                                   694 
695         lookup_flags = (flags & AT_SYMLINK_NOF    695         lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
696         if (flags & AT_EMPTY_PATH)                696         if (flags & AT_EMPTY_PATH)
697                 lookup_flags |= LOOKUP_EMPTY;     697                 lookup_flags |= LOOKUP_EMPTY;
698                                                   698 
699 retry:                                            699 retry:
700         error = user_path_at(dfd, filename, lo    700         error = user_path_at(dfd, filename, lookup_flags, &path);
701         if (!error) {                             701         if (!error) {
702                 error = chmod_common(&path, mo    702                 error = chmod_common(&path, mode);
703                 path_put(&path);                  703                 path_put(&path);
704                 if (retry_estale(error, lookup    704                 if (retry_estale(error, lookup_flags)) {
705                         lookup_flags |= LOOKUP    705                         lookup_flags |= LOOKUP_REVAL;
706                         goto retry;               706                         goto retry;
707                 }                                 707                 }
708         }                                         708         }
709         return error;                             709         return error;
710 }                                                 710 }
711                                                   711 
712 SYSCALL_DEFINE4(fchmodat2, int, dfd, const cha    712 SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
713                 umode_t, mode, unsigned int, f    713                 umode_t, mode, unsigned int, flags)
714 {                                                 714 {
715         return do_fchmodat(dfd, filename, mode    715         return do_fchmodat(dfd, filename, mode, flags);
716 }                                                 716 }
717                                                   717 
718 SYSCALL_DEFINE3(fchmodat, int, dfd, const char    718 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
719                 umode_t, mode)                    719                 umode_t, mode)
720 {                                                 720 {
721         return do_fchmodat(dfd, filename, mode    721         return do_fchmodat(dfd, filename, mode, 0);
722 }                                                 722 }
723                                                   723 
724 SYSCALL_DEFINE2(chmod, const char __user *, fi    724 SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
725 {                                                 725 {
726         return do_fchmodat(AT_FDCWD, filename,    726         return do_fchmodat(AT_FDCWD, filename, mode, 0);
727 }                                                 727 }
728                                                   728 
729 /*                                                729 /*
730  * Check whether @kuid is valid and if so gene    730  * Check whether @kuid is valid and if so generate and set vfsuid_t in
731  * ia_vfsuid.                                     731  * ia_vfsuid.
732  *                                                732  *
733  * Return: true if @kuid is valid, false if no    733  * Return: true if @kuid is valid, false if not.
734  */                                               734  */
735 static inline bool setattr_vfsuid(struct iattr    735 static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
736 {                                                 736 {
737         if (!uid_valid(kuid))                     737         if (!uid_valid(kuid))
738                 return false;                     738                 return false;
739         attr->ia_valid |= ATTR_UID;               739         attr->ia_valid |= ATTR_UID;
740         attr->ia_vfsuid = VFSUIDT_INIT(kuid);     740         attr->ia_vfsuid = VFSUIDT_INIT(kuid);
741         return true;                              741         return true;
742 }                                                 742 }
743                                                   743 
744 /*                                                744 /*
745  * Check whether @kgid is valid and if so gene    745  * Check whether @kgid is valid and if so generate and set vfsgid_t in
746  * ia_vfsgid.                                     746  * ia_vfsgid.
747  *                                                747  *
748  * Return: true if @kgid is valid, false if no    748  * Return: true if @kgid is valid, false if not.
749  */                                               749  */
750 static inline bool setattr_vfsgid(struct iattr    750 static inline bool setattr_vfsgid(struct iattr *attr, kgid_t kgid)
751 {                                                 751 {
752         if (!gid_valid(kgid))                     752         if (!gid_valid(kgid))
753                 return false;                     753                 return false;
754         attr->ia_valid |= ATTR_GID;               754         attr->ia_valid |= ATTR_GID;
755         attr->ia_vfsgid = VFSGIDT_INIT(kgid);     755         attr->ia_vfsgid = VFSGIDT_INIT(kgid);
756         return true;                              756         return true;
757 }                                                 757 }
758                                                   758 
759 int chown_common(const struct path *path, uid_    759 int chown_common(const struct path *path, uid_t user, gid_t group)
760 {                                                 760 {
761         struct mnt_idmap *idmap;                  761         struct mnt_idmap *idmap;
762         struct user_namespace *fs_userns;         762         struct user_namespace *fs_userns;
763         struct inode *inode = path->dentry->d_    763         struct inode *inode = path->dentry->d_inode;
764         struct inode *delegated_inode = NULL;     764         struct inode *delegated_inode = NULL;
765         int error;                                765         int error;
766         struct iattr newattrs;                    766         struct iattr newattrs;
767         kuid_t uid;                               767         kuid_t uid;
768         kgid_t gid;                               768         kgid_t gid;
769                                                   769 
770         uid = make_kuid(current_user_ns(), use    770         uid = make_kuid(current_user_ns(), user);
771         gid = make_kgid(current_user_ns(), gro    771         gid = make_kgid(current_user_ns(), group);
772                                                   772 
773         idmap = mnt_idmap(path->mnt);             773         idmap = mnt_idmap(path->mnt);
774         fs_userns = i_user_ns(inode);             774         fs_userns = i_user_ns(inode);
775                                                   775 
776 retry_deleg:                                      776 retry_deleg:
777         newattrs.ia_vfsuid = INVALID_VFSUID;      777         newattrs.ia_vfsuid = INVALID_VFSUID;
778         newattrs.ia_vfsgid = INVALID_VFSGID;      778         newattrs.ia_vfsgid = INVALID_VFSGID;
779         newattrs.ia_valid =  ATTR_CTIME;          779         newattrs.ia_valid =  ATTR_CTIME;
780         if ((user != (uid_t)-1) && !setattr_vf    780         if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
781                 return -EINVAL;                   781                 return -EINVAL;
782         if ((group != (gid_t)-1) && !setattr_v    782         if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
783                 return -EINVAL;                   783                 return -EINVAL;
784         inode_lock(inode);                        784         inode_lock(inode);
785         if (!S_ISDIR(inode->i_mode))              785         if (!S_ISDIR(inode->i_mode))
786                 newattrs.ia_valid |= ATTR_KILL    786                 newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
787                                      setattr_s    787                                      setattr_should_drop_sgid(idmap, inode);
788         /* Continue to send actual fs values,     788         /* Continue to send actual fs values, not the mount values. */
789         error = security_path_chown(              789         error = security_path_chown(
790                 path,                             790                 path,
791                 from_vfsuid(idmap, fs_userns,     791                 from_vfsuid(idmap, fs_userns, newattrs.ia_vfsuid),
792                 from_vfsgid(idmap, fs_userns,     792                 from_vfsgid(idmap, fs_userns, newattrs.ia_vfsgid));
793         if (!error)                               793         if (!error)
794                 error = notify_change(idmap, p    794                 error = notify_change(idmap, path->dentry, &newattrs,
795                                       &delegat    795                                       &delegated_inode);
796         inode_unlock(inode);                      796         inode_unlock(inode);
797         if (delegated_inode) {                    797         if (delegated_inode) {
798                 error = break_deleg_wait(&dele    798                 error = break_deleg_wait(&delegated_inode);
799                 if (!error)                       799                 if (!error)
800                         goto retry_deleg;         800                         goto retry_deleg;
801         }                                         801         }
802         return error;                             802         return error;
803 }                                                 803 }
804                                                   804 
805 int do_fchownat(int dfd, const char __user *fi    805 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
806                 int flag)                         806                 int flag)
807 {                                                 807 {
808         struct path path;                         808         struct path path;
809         int error = -EINVAL;                      809         int error = -EINVAL;
810         int lookup_flags;                         810         int lookup_flags;
811                                                   811 
812         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT    812         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
813                 goto out;                         813                 goto out;
814                                                   814 
815         lookup_flags = (flag & AT_SYMLINK_NOFO    815         lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
816         if (flag & AT_EMPTY_PATH)                 816         if (flag & AT_EMPTY_PATH)
817                 lookup_flags |= LOOKUP_EMPTY;     817                 lookup_flags |= LOOKUP_EMPTY;
818 retry:                                            818 retry:
819         error = user_path_at(dfd, filename, lo    819         error = user_path_at(dfd, filename, lookup_flags, &path);
820         if (error)                                820         if (error)
821                 goto out;                         821                 goto out;
822         error = mnt_want_write(path.mnt);         822         error = mnt_want_write(path.mnt);
823         if (error)                                823         if (error)
824                 goto out_release;                 824                 goto out_release;
825         error = chown_common(&path, user, grou    825         error = chown_common(&path, user, group);
826         mnt_drop_write(path.mnt);                 826         mnt_drop_write(path.mnt);
827 out_release:                                      827 out_release:
828         path_put(&path);                          828         path_put(&path);
829         if (retry_estale(error, lookup_flags))    829         if (retry_estale(error, lookup_flags)) {
830                 lookup_flags |= LOOKUP_REVAL;     830                 lookup_flags |= LOOKUP_REVAL;
831                 goto retry;                       831                 goto retry;
832         }                                         832         }
833 out:                                              833 out:
834         return error;                             834         return error;
835 }                                                 835 }
836                                                   836 
837 SYSCALL_DEFINE5(fchownat, int, dfd, const char    837 SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
838                 gid_t, group, int, flag)          838                 gid_t, group, int, flag)
839 {                                                 839 {
840         return do_fchownat(dfd, filename, user    840         return do_fchownat(dfd, filename, user, group, flag);
841 }                                                 841 }
842                                                   842 
843 SYSCALL_DEFINE3(chown, const char __user *, fi    843 SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
844 {                                                 844 {
845         return do_fchownat(AT_FDCWD, filename,    845         return do_fchownat(AT_FDCWD, filename, user, group, 0);
846 }                                                 846 }
847                                                   847 
848 SYSCALL_DEFINE3(lchown, const char __user *, f    848 SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
849 {                                                 849 {
850         return do_fchownat(AT_FDCWD, filename,    850         return do_fchownat(AT_FDCWD, filename, user, group,
851                            AT_SYMLINK_NOFOLLOW    851                            AT_SYMLINK_NOFOLLOW);
852 }                                                 852 }
853                                                   853 
854 int vfs_fchown(struct file *file, uid_t user,     854 int vfs_fchown(struct file *file, uid_t user, gid_t group)
855 {                                                 855 {
856         int error;                                856         int error;
857                                                   857 
858         error = mnt_want_write_file(file);        858         error = mnt_want_write_file(file);
859         if (error)                                859         if (error)
860                 return error;                     860                 return error;
861         audit_file(file);                         861         audit_file(file);
862         error = chown_common(&file->f_path, us    862         error = chown_common(&file->f_path, user, group);
863         mnt_drop_write_file(file);                863         mnt_drop_write_file(file);
864         return error;                             864         return error;
865 }                                                 865 }
866                                                   866 
867 int ksys_fchown(unsigned int fd, uid_t user, g    867 int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
868 {                                                 868 {
869         struct fd f = fdget(fd);                  869         struct fd f = fdget(fd);
870         int error = -EBADF;                       870         int error = -EBADF;
871                                                   871 
872         if (f.file) {                             872         if (f.file) {
873                 error = vfs_fchown(f.file, use    873                 error = vfs_fchown(f.file, user, group);
874                 fdput(f);                         874                 fdput(f);
875         }                                         875         }
876         return error;                             876         return error;
877 }                                                 877 }
878                                                   878 
879 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_    879 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
880 {                                                 880 {
881         return ksys_fchown(fd, user, group);      881         return ksys_fchown(fd, user, group);
882 }                                                 882 }
883                                                   883 
884 static inline int file_get_write_access(struct    884 static inline int file_get_write_access(struct file *f)
885 {                                                 885 {
886         int error;                                886         int error;
887                                                   887 
888         error = get_write_access(f->f_inode);     888         error = get_write_access(f->f_inode);
889         if (unlikely(error))                      889         if (unlikely(error))
890                 return error;                     890                 return error;
891         error = mnt_get_write_access(f->f_path    891         error = mnt_get_write_access(f->f_path.mnt);
892         if (unlikely(error))                      892         if (unlikely(error))
893                 goto cleanup_inode;               893                 goto cleanup_inode;
894         if (unlikely(f->f_mode & FMODE_BACKING    894         if (unlikely(f->f_mode & FMODE_BACKING)) {
895                 error = mnt_get_write_access(b    895                 error = mnt_get_write_access(backing_file_user_path(f)->mnt);
896                 if (unlikely(error))              896                 if (unlikely(error))
897                         goto cleanup_mnt;         897                         goto cleanup_mnt;
898         }                                         898         }
899         return 0;                                 899         return 0;
900                                                   900 
901 cleanup_mnt:                                      901 cleanup_mnt:
902         mnt_put_write_access(f->f_path.mnt);      902         mnt_put_write_access(f->f_path.mnt);
903 cleanup_inode:                                    903 cleanup_inode:
904         put_write_access(f->f_inode);             904         put_write_access(f->f_inode);
905         return error;                             905         return error;
906 }                                                 906 }
907                                                   907 
908 static int do_dentry_open(struct file *f,         908 static int do_dentry_open(struct file *f,
909                           int (*open)(struct i    909                           int (*open)(struct inode *, struct file *))
910 {                                                 910 {
911         static const struct file_operations em    911         static const struct file_operations empty_fops = {};
912         struct inode *inode = f->f_path.dentry    912         struct inode *inode = f->f_path.dentry->d_inode;
913         int error;                                913         int error;
914                                                   914 
915         path_get(&f->f_path);                     915         path_get(&f->f_path);
916         f->f_inode = inode;                       916         f->f_inode = inode;
917         f->f_mapping = inode->i_mapping;          917         f->f_mapping = inode->i_mapping;
918         f->f_wb_err = filemap_sample_wb_err(f-    918         f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
919         f->f_sb_err = file_sample_sb_err(f);      919         f->f_sb_err = file_sample_sb_err(f);
920                                                   920 
921         if (unlikely(f->f_flags & O_PATH)) {      921         if (unlikely(f->f_flags & O_PATH)) {
922                 f->f_mode = FMODE_PATH | FMODE    922                 f->f_mode = FMODE_PATH | FMODE_OPENED;
923                 f->f_op = &empty_fops;            923                 f->f_op = &empty_fops;
924                 return 0;                         924                 return 0;
925         }                                         925         }
926                                                   926 
927         if ((f->f_mode & (FMODE_READ | FMODE_W    927         if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
928                 i_readcount_inc(inode);           928                 i_readcount_inc(inode);
929         } else if (f->f_mode & FMODE_WRITE &&     929         } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
930                 error = file_get_write_access(    930                 error = file_get_write_access(f);
931                 if (unlikely(error))              931                 if (unlikely(error))
932                         goto cleanup_file;        932                         goto cleanup_file;
933                 f->f_mode |= FMODE_WRITER;        933                 f->f_mode |= FMODE_WRITER;
934         }                                         934         }
935                                                   935 
936         /* POSIX.1-2008/SUSv4 Section XSI 2.9.    936         /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
937         if (S_ISREG(inode->i_mode) || S_ISDIR(    937         if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
938                 f->f_mode |= FMODE_ATOMIC_POS;    938                 f->f_mode |= FMODE_ATOMIC_POS;
939                                                   939 
940         f->f_op = fops_get(inode->i_fop);         940         f->f_op = fops_get(inode->i_fop);
941         if (WARN_ON(!f->f_op)) {                  941         if (WARN_ON(!f->f_op)) {
942                 error = -ENODEV;                  942                 error = -ENODEV;
943                 goto cleanup_all;                 943                 goto cleanup_all;
944         }                                         944         }
945                                                   945 
946         error = security_file_open(f);            946         error = security_file_open(f);
947         if (error)                                947         if (error)
948                 goto cleanup_all;                 948                 goto cleanup_all;
949                                                   949 
950         error = break_lease(file_inode(f), f->    950         error = break_lease(file_inode(f), f->f_flags);
951         if (error)                                951         if (error)
952                 goto cleanup_all;                 952                 goto cleanup_all;
953                                                   953 
954         /* normally all 3 are set; ->open() ca    954         /* normally all 3 are set; ->open() can clear them if needed */
955         f->f_mode |= FMODE_LSEEK | FMODE_PREAD    955         f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
956         if (!open)                                956         if (!open)
957                 open = f->f_op->open;             957                 open = f->f_op->open;
958         if (open) {                               958         if (open) {
959                 error = open(inode, f);           959                 error = open(inode, f);
960                 if (error)                        960                 if (error)
961                         goto cleanup_all;         961                         goto cleanup_all;
962         }                                         962         }
963         f->f_mode |= FMODE_OPENED;                963         f->f_mode |= FMODE_OPENED;
964         if ((f->f_mode & FMODE_READ) &&           964         if ((f->f_mode & FMODE_READ) &&
965              likely(f->f_op->read || f->f_op->    965              likely(f->f_op->read || f->f_op->read_iter))
966                 f->f_mode |= FMODE_CAN_READ;      966                 f->f_mode |= FMODE_CAN_READ;
967         if ((f->f_mode & FMODE_WRITE) &&          967         if ((f->f_mode & FMODE_WRITE) &&
968              likely(f->f_op->write || f->f_op-    968              likely(f->f_op->write || f->f_op->write_iter))
969                 f->f_mode |= FMODE_CAN_WRITE;     969                 f->f_mode |= FMODE_CAN_WRITE;
970         if ((f->f_mode & FMODE_LSEEK) && !f->f    970         if ((f->f_mode & FMODE_LSEEK) && !f->f_op->llseek)
971                 f->f_mode &= ~FMODE_LSEEK;        971                 f->f_mode &= ~FMODE_LSEEK;
972         if (f->f_mapping->a_ops && f->f_mappin    972         if (f->f_mapping->a_ops && f->f_mapping->a_ops->direct_IO)
973                 f->f_mode |= FMODE_CAN_ODIRECT    973                 f->f_mode |= FMODE_CAN_ODIRECT;
974                                                   974 
975         f->f_flags &= ~(O_CREAT | O_EXCL | O_N    975         f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
976         f->f_iocb_flags = iocb_flags(f);          976         f->f_iocb_flags = iocb_flags(f);
977                                                   977 
978         file_ra_state_init(&f->f_ra, f->f_mapp    978         file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
979                                                   979 
980         if ((f->f_flags & O_DIRECT) && !(f->f_    980         if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT))
981                 return -EINVAL;                   981                 return -EINVAL;
982                                                   982 
983         /*                                        983         /*
984          * XXX: Huge page cache doesn't suppor    984          * XXX: Huge page cache doesn't support writing yet. Drop all page
985          * cache for this file before processi    985          * cache for this file before processing writes.
986          */                                       986          */
987         if (f->f_mode & FMODE_WRITE) {            987         if (f->f_mode & FMODE_WRITE) {
988                 /*                                988                 /*
989                  * Depends on full fence from     989                  * Depends on full fence from get_write_access() to synchronize
990                  * against collapse_file() reg    990                  * against collapse_file() regarding i_writecount and nr_thps
991                  * updates. Ensures subsequent    991                  * updates. Ensures subsequent insertion of THPs into the page
992                  * cache will fail.               992                  * cache will fail.
993                  */                               993                  */
994                 if (filemap_nr_thps(inode->i_m    994                 if (filemap_nr_thps(inode->i_mapping)) {
995                         struct address_space *    995                         struct address_space *mapping = inode->i_mapping;
996                                                   996 
997                         filemap_invalidate_loc    997                         filemap_invalidate_lock(inode->i_mapping);
998                         /*                        998                         /*
999                          * unmap_mapping_range    999                          * unmap_mapping_range just need to be called once
1000                          * here, because the     1000                          * here, because the private pages is not need to be
1001                          * unmapped mapping (    1001                          * unmapped mapping (e.g. data segment of dynamic
1002                          * shared libraries h    1002                          * shared libraries here).
1003                          */                      1003                          */
1004                         unmap_mapping_range(m    1004                         unmap_mapping_range(mapping, 0, 0, 0);
1005                         truncate_inode_pages(    1005                         truncate_inode_pages(mapping, 0);
1006                         filemap_invalidate_un    1006                         filemap_invalidate_unlock(inode->i_mapping);
1007                 }                                1007                 }
1008         }                                        1008         }
1009                                                  1009 
1010         return 0;                                1010         return 0;
1011                                                  1011 
1012 cleanup_all:                                     1012 cleanup_all:
1013         if (WARN_ON_ONCE(error > 0))             1013         if (WARN_ON_ONCE(error > 0))
1014                 error = -EINVAL;                 1014                 error = -EINVAL;
1015         fops_put(f->f_op);                       1015         fops_put(f->f_op);
1016         put_file_access(f);                      1016         put_file_access(f);
1017 cleanup_file:                                    1017 cleanup_file:
1018         path_put(&f->f_path);                    1018         path_put(&f->f_path);
1019         f->f_path.mnt = NULL;                    1019         f->f_path.mnt = NULL;
1020         f->f_path.dentry = NULL;                 1020         f->f_path.dentry = NULL;
1021         f->f_inode = NULL;                       1021         f->f_inode = NULL;
1022         return error;                            1022         return error;
1023 }                                                1023 }
1024                                                  1024 
1025 /**                                              1025 /**
1026  * finish_open - finish opening a file           1026  * finish_open - finish opening a file
1027  * @file: file pointer                           1027  * @file: file pointer
1028  * @dentry: pointer to dentry                    1028  * @dentry: pointer to dentry
1029  * @open: open callback                          1029  * @open: open callback
1030  *                                               1030  *
1031  * This can be used to finish opening a file     1031  * This can be used to finish opening a file passed to i_op->atomic_open().
1032  *                                               1032  *
1033  * If the open callback is set to NULL, then     1033  * If the open callback is set to NULL, then the standard f_op->open()
1034  * filesystem callback is substituted.           1034  * filesystem callback is substituted.
1035  *                                               1035  *
1036  * NB: the dentry reference is _not_ consumed    1036  * NB: the dentry reference is _not_ consumed.  If, for example, the dentry is
1037  * the return value of d_splice_alias(), then    1037  * the return value of d_splice_alias(), then the caller needs to perform dput()
1038  * on it after finish_open().                    1038  * on it after finish_open().
1039  *                                               1039  *
1040  * Returns zero on success or -errno if the o    1040  * Returns zero on success or -errno if the open failed.
1041  */                                              1041  */
1042 int finish_open(struct file *file, struct den    1042 int finish_open(struct file *file, struct dentry *dentry,
1043                 int (*open)(struct inode *, s    1043                 int (*open)(struct inode *, struct file *))
1044 {                                                1044 {
1045         BUG_ON(file->f_mode & FMODE_OPENED);     1045         BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
1046                                                  1046 
1047         file->f_path.dentry = dentry;            1047         file->f_path.dentry = dentry;
1048         return do_dentry_open(file, open);       1048         return do_dentry_open(file, open);
1049 }                                                1049 }
1050 EXPORT_SYMBOL(finish_open);                      1050 EXPORT_SYMBOL(finish_open);
1051                                                  1051 
1052 /**                                              1052 /**
1053  * finish_no_open - finish ->atomic_open() wi    1053  * finish_no_open - finish ->atomic_open() without opening the file
1054  *                                               1054  *
1055  * @file: file pointer                           1055  * @file: file pointer
1056  * @dentry: dentry or NULL (as returned from     1056  * @dentry: dentry or NULL (as returned from ->lookup())
1057  *                                               1057  *
1058  * This can be used to set the result of a su    1058  * This can be used to set the result of a successful lookup in ->atomic_open().
1059  *                                               1059  *
1060  * NB: unlike finish_open() this function doe    1060  * NB: unlike finish_open() this function does consume the dentry reference and
1061  * the caller need not dput() it.                1061  * the caller need not dput() it.
1062  *                                               1062  *
1063  * Returns "" which must be the return value     1063  * Returns "" which must be the return value of ->atomic_open() after having
1064  * called this function.                         1064  * called this function.
1065  */                                              1065  */
1066 int finish_no_open(struct file *file, struct     1066 int finish_no_open(struct file *file, struct dentry *dentry)
1067 {                                                1067 {
1068         file->f_path.dentry = dentry;            1068         file->f_path.dentry = dentry;
1069         return 0;                                1069         return 0;
1070 }                                                1070 }
1071 EXPORT_SYMBOL(finish_no_open);                   1071 EXPORT_SYMBOL(finish_no_open);
1072                                                  1072 
1073 char *file_path(struct file *filp, char *buf,    1073 char *file_path(struct file *filp, char *buf, int buflen)
1074 {                                                1074 {
1075         return d_path(&filp->f_path, buf, buf    1075         return d_path(&filp->f_path, buf, buflen);
1076 }                                                1076 }
1077 EXPORT_SYMBOL(file_path);                        1077 EXPORT_SYMBOL(file_path);
1078                                                  1078 
1079 /**                                              1079 /**
1080  * vfs_open - open the file at the given path    1080  * vfs_open - open the file at the given path
1081  * @path: path to open                           1081  * @path: path to open
1082  * @file: newly allocated file with f_flag in    1082  * @file: newly allocated file with f_flag initialized
1083  */                                              1083  */
1084 int vfs_open(const struct path *path, struct     1084 int vfs_open(const struct path *path, struct file *file)
1085 {                                                1085 {
1086         int ret;                                 1086         int ret;
1087                                                  1087 
1088         file->f_path = *path;                    1088         file->f_path = *path;
1089         ret = do_dentry_open(file, NULL);        1089         ret = do_dentry_open(file, NULL);
1090         if (!ret) {                              1090         if (!ret) {
1091                 /*                               1091                 /*
1092                  * Once we return a file with    1092                  * Once we return a file with FMODE_OPENED, __fput() will call
1093                  * fsnotify_close(), so we ne    1093                  * fsnotify_close(), so we need fsnotify_open() here for
1094                  * symmetry.                     1094                  * symmetry.
1095                  */                              1095                  */
1096                 fsnotify_open(file);             1096                 fsnotify_open(file);
1097         }                                        1097         }
1098         return ret;                              1098         return ret;
1099 }                                                1099 }
1100                                                  1100 
1101 struct file *dentry_open(const struct path *p    1101 struct file *dentry_open(const struct path *path, int flags,
1102                          const struct cred *c    1102                          const struct cred *cred)
1103 {                                                1103 {
1104         int error;                               1104         int error;
1105         struct file *f;                          1105         struct file *f;
1106                                                  1106 
1107         /* We must always pass in a valid mou    1107         /* We must always pass in a valid mount pointer. */
1108         BUG_ON(!path->mnt);                      1108         BUG_ON(!path->mnt);
1109                                                  1109 
1110         f = alloc_empty_file(flags, cred);       1110         f = alloc_empty_file(flags, cred);
1111         if (!IS_ERR(f)) {                        1111         if (!IS_ERR(f)) {
1112                 error = vfs_open(path, f);       1112                 error = vfs_open(path, f);
1113                 if (error) {                     1113                 if (error) {
1114                         fput(f);                 1114                         fput(f);
1115                         f = ERR_PTR(error);      1115                         f = ERR_PTR(error);
1116                 }                                1116                 }
1117         }                                        1117         }
1118         return f;                                1118         return f;
1119 }                                                1119 }
1120 EXPORT_SYMBOL(dentry_open);                      1120 EXPORT_SYMBOL(dentry_open);
1121                                                  1121 
1122 /**                                              1122 /**
1123  * dentry_create - Create and open a file        1123  * dentry_create - Create and open a file
1124  * @path: path to create                         1124  * @path: path to create
1125  * @flags: O_ flags                              1125  * @flags: O_ flags
1126  * @mode: mode bits for new file                 1126  * @mode: mode bits for new file
1127  * @cred: credentials to use                     1127  * @cred: credentials to use
1128  *                                               1128  *
1129  * Caller must hold the parent directory's lo    1129  * Caller must hold the parent directory's lock, and have prepared
1130  * a negative dentry, placed in @path->dentry    1130  * a negative dentry, placed in @path->dentry, for the new file.
1131  *                                               1131  *
1132  * Caller sets @path->mnt to the vfsmount of     1132  * Caller sets @path->mnt to the vfsmount of the filesystem where
1133  * the new file is to be created. The parent     1133  * the new file is to be created. The parent directory and the
1134  * negative dentry must reside on the same fi    1134  * negative dentry must reside on the same filesystem instance.
1135  *                                               1135  *
1136  * On success, returns a "struct file *". Oth    1136  * On success, returns a "struct file *". Otherwise a ERR_PTR
1137  * is returned.                                  1137  * is returned.
1138  */                                              1138  */
1139 struct file *dentry_create(const struct path     1139 struct file *dentry_create(const struct path *path, int flags, umode_t mode,
1140                            const struct cred     1140                            const struct cred *cred)
1141 {                                                1141 {
1142         struct file *f;                          1142         struct file *f;
1143         int error;                               1143         int error;
1144                                                  1144 
1145         f = alloc_empty_file(flags, cred);       1145         f = alloc_empty_file(flags, cred);
1146         if (IS_ERR(f))                           1146         if (IS_ERR(f))
1147                 return f;                        1147                 return f;
1148                                                  1148 
1149         error = vfs_create(mnt_idmap(path->mn    1149         error = vfs_create(mnt_idmap(path->mnt),
1150                            d_inode(path->dent    1150                            d_inode(path->dentry->d_parent),
1151                            path->dentry, mode    1151                            path->dentry, mode, true);
1152         if (!error)                              1152         if (!error)
1153                 error = vfs_open(path, f);       1153                 error = vfs_open(path, f);
1154                                                  1154 
1155         if (unlikely(error)) {                   1155         if (unlikely(error)) {
1156                 fput(f);                         1156                 fput(f);
1157                 return ERR_PTR(error);           1157                 return ERR_PTR(error);
1158         }                                        1158         }
1159         return f;                                1159         return f;
1160 }                                                1160 }
1161 EXPORT_SYMBOL(dentry_create);                    1161 EXPORT_SYMBOL(dentry_create);
1162                                                  1162 
1163 /**                                              1163 /**
1164  * kernel_file_open - open a file for kernel     1164  * kernel_file_open - open a file for kernel internal use
1165  * @path:       path of the file to open         1165  * @path:       path of the file to open
1166  * @flags:      open flags                       1166  * @flags:      open flags
1167  * @cred:       credentials for open             1167  * @cred:       credentials for open
1168  *                                               1168  *
1169  * Open a file for use by in-kernel consumers    1169  * Open a file for use by in-kernel consumers. The file is not accounted
1170  * against nr_files and must not be installed    1170  * against nr_files and must not be installed into the file descriptor
1171  * table.                                        1171  * table.
1172  *                                               1172  *
1173  * Return: Opened file on success, an error p    1173  * Return: Opened file on success, an error pointer on failure.
1174  */                                              1174  */
1175 struct file *kernel_file_open(const struct pa    1175 struct file *kernel_file_open(const struct path *path, int flags,
1176                                 const struct     1176                                 const struct cred *cred)
1177 {                                                1177 {
1178         struct file *f;                          1178         struct file *f;
1179         int error;                               1179         int error;
1180                                                  1180 
1181         f = alloc_empty_file_noaccount(flags,    1181         f = alloc_empty_file_noaccount(flags, cred);
1182         if (IS_ERR(f))                           1182         if (IS_ERR(f))
1183                 return f;                        1183                 return f;
1184                                                  1184 
1185         f->f_path = *path;                       1185         f->f_path = *path;
1186         error = do_dentry_open(f, NULL);         1186         error = do_dentry_open(f, NULL);
1187         if (error) {                             1187         if (error) {
1188                 fput(f);                         1188                 fput(f);
1189                 return ERR_PTR(error);           1189                 return ERR_PTR(error);
1190         }                                        1190         }
1191                                                  1191 
1192         fsnotify_open(f);                        1192         fsnotify_open(f);
1193         return f;                                1193         return f;
1194 }                                                1194 }
1195 EXPORT_SYMBOL_GPL(kernel_file_open);             1195 EXPORT_SYMBOL_GPL(kernel_file_open);
1196                                                  1196 
1197 #define WILL_CREATE(flags)      (flags & (O_C    1197 #define WILL_CREATE(flags)      (flags & (O_CREAT | __O_TMPFILE))
1198 #define O_PATH_FLAGS            (O_DIRECTORY     1198 #define O_PATH_FLAGS            (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
1199                                                  1199 
1200 inline struct open_how build_open_how(int fla    1200 inline struct open_how build_open_how(int flags, umode_t mode)
1201 {                                                1201 {
1202         struct open_how how = {                  1202         struct open_how how = {
1203                 .flags = flags & VALID_OPEN_F    1203                 .flags = flags & VALID_OPEN_FLAGS,
1204                 .mode = mode & S_IALLUGO,        1204                 .mode = mode & S_IALLUGO,
1205         };                                       1205         };
1206                                                  1206 
1207         /* O_PATH beats everything else. */      1207         /* O_PATH beats everything else. */
1208         if (how.flags & O_PATH)                  1208         if (how.flags & O_PATH)
1209                 how.flags &= O_PATH_FLAGS;       1209                 how.flags &= O_PATH_FLAGS;
1210         /* Modes should only be set for creat    1210         /* Modes should only be set for create-like flags. */
1211         if (!WILL_CREATE(how.flags))             1211         if (!WILL_CREATE(how.flags))
1212                 how.mode = 0;                    1212                 how.mode = 0;
1213         return how;                              1213         return how;
1214 }                                                1214 }
1215                                                  1215 
1216 inline int build_open_flags(const struct open    1216 inline int build_open_flags(const struct open_how *how, struct open_flags *op)
1217 {                                                1217 {
1218         u64 flags = how->flags;                  1218         u64 flags = how->flags;
1219         u64 strip = __FMODE_NONOTIFY | O_CLOE    1219         u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
1220         int lookup_flags = 0;                    1220         int lookup_flags = 0;
1221         int acc_mode = ACC_MODE(flags);          1221         int acc_mode = ACC_MODE(flags);
1222                                                  1222 
1223         BUILD_BUG_ON_MSG(upper_32_bits(VALID_    1223         BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS),
1224                          "struct open_flags d    1224                          "struct open_flags doesn't yet handle flags > 32 bits");
1225                                                  1225 
1226         /*                                       1226         /*
1227          * Strip flags that either shouldn't     1227          * Strip flags that either shouldn't be set by userspace like
1228          * FMODE_NONOTIFY or that aren't rele    1228          * FMODE_NONOTIFY or that aren't relevant in determining struct
1229          * open_flags like O_CLOEXEC.            1229          * open_flags like O_CLOEXEC.
1230          */                                      1230          */
1231         flags &= ~strip;                         1231         flags &= ~strip;
1232                                                  1232 
1233         /*                                       1233         /*
1234          * Older syscalls implicitly clear al    1234          * Older syscalls implicitly clear all of the invalid flags or argument
1235          * values before calling build_open_f    1235          * values before calling build_open_flags(), but openat2(2) checks all
1236          * of its arguments.                     1236          * of its arguments.
1237          */                                      1237          */
1238         if (flags & ~VALID_OPEN_FLAGS)           1238         if (flags & ~VALID_OPEN_FLAGS)
1239                 return -EINVAL;                  1239                 return -EINVAL;
1240         if (how->resolve & ~VALID_RESOLVE_FLA    1240         if (how->resolve & ~VALID_RESOLVE_FLAGS)
1241                 return -EINVAL;                  1241                 return -EINVAL;
1242                                                  1242 
1243         /* Scoping flags are mutually exclusi    1243         /* Scoping flags are mutually exclusive. */
1244         if ((how->resolve & RESOLVE_BENEATH)     1244         if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
1245                 return -EINVAL;                  1245                 return -EINVAL;
1246                                                  1246 
1247         /* Deal with the mode. */                1247         /* Deal with the mode. */
1248         if (WILL_CREATE(flags)) {                1248         if (WILL_CREATE(flags)) {
1249                 if (how->mode & ~S_IALLUGO)      1249                 if (how->mode & ~S_IALLUGO)
1250                         return -EINVAL;          1250                         return -EINVAL;
1251                 op->mode = how->mode | S_IFRE    1251                 op->mode = how->mode | S_IFREG;
1252         } else {                                 1252         } else {
1253                 if (how->mode != 0)              1253                 if (how->mode != 0)
1254                         return -EINVAL;          1254                         return -EINVAL;
1255                 op->mode = 0;                    1255                 op->mode = 0;
1256         }                                        1256         }
1257                                                  1257 
1258         /*                                       1258         /*
1259          * Block bugs where O_DIRECTORY | O_C    1259          * Block bugs where O_DIRECTORY | O_CREAT created regular files.
1260          * Note, that blocking O_DIRECTORY |     1260          * Note, that blocking O_DIRECTORY | O_CREAT here also protects
1261          * O_TMPFILE below which requires O_D    1261          * O_TMPFILE below which requires O_DIRECTORY being raised.
1262          */                                      1262          */
1263         if ((flags & (O_DIRECTORY | O_CREAT))    1263         if ((flags & (O_DIRECTORY | O_CREAT)) == (O_DIRECTORY | O_CREAT))
1264                 return -EINVAL;                  1264                 return -EINVAL;
1265                                                  1265 
1266         /* Now handle the creative implementa    1266         /* Now handle the creative implementation of O_TMPFILE. */
1267         if (flags & __O_TMPFILE) {               1267         if (flags & __O_TMPFILE) {
1268                 /*                               1268                 /*
1269                  * In order to ensure program    1269                  * In order to ensure programs get explicit errors when trying
1270                  * to use O_TMPFILE on old ke    1270                  * to use O_TMPFILE on old kernels we enforce that O_DIRECTORY
1271                  * is raised alongside __O_TM    1271                  * is raised alongside __O_TMPFILE.
1272                  */                              1272                  */
1273                 if (!(flags & O_DIRECTORY))      1273                 if (!(flags & O_DIRECTORY))
1274                         return -EINVAL;          1274                         return -EINVAL;
1275                 if (!(acc_mode & MAY_WRITE))     1275                 if (!(acc_mode & MAY_WRITE))
1276                         return -EINVAL;          1276                         return -EINVAL;
1277         }                                        1277         }
1278         if (flags & O_PATH) {                    1278         if (flags & O_PATH) {
1279                 /* O_PATH only permits certai    1279                 /* O_PATH only permits certain other flags to be set. */
1280                 if (flags & ~O_PATH_FLAGS)       1280                 if (flags & ~O_PATH_FLAGS)
1281                         return -EINVAL;          1281                         return -EINVAL;
1282                 acc_mode = 0;                    1282                 acc_mode = 0;
1283         }                                        1283         }
1284                                                  1284 
1285         /*                                       1285         /*
1286          * O_SYNC is implemented as __O_SYNC|    1286          * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
1287          * check for O_DSYNC if the need any     1287          * check for O_DSYNC if the need any syncing at all we enforce it's
1288          * always set instead of having to de    1288          * always set instead of having to deal with possibly weird behaviour
1289          * for malicious applications setting    1289          * for malicious applications setting only __O_SYNC.
1290          */                                      1290          */
1291         if (flags & __O_SYNC)                    1291         if (flags & __O_SYNC)
1292                 flags |= O_DSYNC;                1292                 flags |= O_DSYNC;
1293                                                  1293 
1294         op->open_flag = flags;                   1294         op->open_flag = flags;
1295                                                  1295 
1296         /* O_TRUNC implies we need access che    1296         /* O_TRUNC implies we need access checks for write permissions */
1297         if (flags & O_TRUNC)                     1297         if (flags & O_TRUNC)
1298                 acc_mode |= MAY_WRITE;           1298                 acc_mode |= MAY_WRITE;
1299                                                  1299 
1300         /* Allow the LSM permission hook to d    1300         /* Allow the LSM permission hook to distinguish append
1301            access from general write access.     1301            access from general write access. */
1302         if (flags & O_APPEND)                    1302         if (flags & O_APPEND)
1303                 acc_mode |= MAY_APPEND;          1303                 acc_mode |= MAY_APPEND;
1304                                                  1304 
1305         op->acc_mode = acc_mode;                 1305         op->acc_mode = acc_mode;
1306                                                  1306 
1307         op->intent = flags & O_PATH ? 0 : LOO    1307         op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
1308                                                  1308 
1309         if (flags & O_CREAT) {                   1309         if (flags & O_CREAT) {
1310                 op->intent |= LOOKUP_CREATE;     1310                 op->intent |= LOOKUP_CREATE;
1311                 if (flags & O_EXCL) {            1311                 if (flags & O_EXCL) {
1312                         op->intent |= LOOKUP_    1312                         op->intent |= LOOKUP_EXCL;
1313                         flags |= O_NOFOLLOW;     1313                         flags |= O_NOFOLLOW;
1314                 }                                1314                 }
1315         }                                        1315         }
1316                                                  1316 
1317         if (flags & O_DIRECTORY)                 1317         if (flags & O_DIRECTORY)
1318                 lookup_flags |= LOOKUP_DIRECT    1318                 lookup_flags |= LOOKUP_DIRECTORY;
1319         if (!(flags & O_NOFOLLOW))               1319         if (!(flags & O_NOFOLLOW))
1320                 lookup_flags |= LOOKUP_FOLLOW    1320                 lookup_flags |= LOOKUP_FOLLOW;
1321                                                  1321 
1322         if (how->resolve & RESOLVE_NO_XDEV)      1322         if (how->resolve & RESOLVE_NO_XDEV)
1323                 lookup_flags |= LOOKUP_NO_XDE    1323                 lookup_flags |= LOOKUP_NO_XDEV;
1324         if (how->resolve & RESOLVE_NO_MAGICLI    1324         if (how->resolve & RESOLVE_NO_MAGICLINKS)
1325                 lookup_flags |= LOOKUP_NO_MAG    1325                 lookup_flags |= LOOKUP_NO_MAGICLINKS;
1326         if (how->resolve & RESOLVE_NO_SYMLINK    1326         if (how->resolve & RESOLVE_NO_SYMLINKS)
1327                 lookup_flags |= LOOKUP_NO_SYM    1327                 lookup_flags |= LOOKUP_NO_SYMLINKS;
1328         if (how->resolve & RESOLVE_BENEATH)      1328         if (how->resolve & RESOLVE_BENEATH)
1329                 lookup_flags |= LOOKUP_BENEAT    1329                 lookup_flags |= LOOKUP_BENEATH;
1330         if (how->resolve & RESOLVE_IN_ROOT)      1330         if (how->resolve & RESOLVE_IN_ROOT)
1331                 lookup_flags |= LOOKUP_IN_ROO    1331                 lookup_flags |= LOOKUP_IN_ROOT;
1332         if (how->resolve & RESOLVE_CACHED) {     1332         if (how->resolve & RESOLVE_CACHED) {
1333                 /* Don't bother even trying f    1333                 /* Don't bother even trying for create/truncate/tmpfile open */
1334                 if (flags & (O_TRUNC | O_CREA    1334                 if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE))
1335                         return -EAGAIN;          1335                         return -EAGAIN;
1336                 lookup_flags |= LOOKUP_CACHED    1336                 lookup_flags |= LOOKUP_CACHED;
1337         }                                        1337         }
1338                                                  1338 
1339         op->lookup_flags = lookup_flags;         1339         op->lookup_flags = lookup_flags;
1340         return 0;                                1340         return 0;
1341 }                                                1341 }
1342                                                  1342 
1343 /**                                              1343 /**
1344  * file_open_name - open file and return file    1344  * file_open_name - open file and return file pointer
1345  *                                               1345  *
1346  * @name:       struct filename containing pa    1346  * @name:       struct filename containing path to open
1347  * @flags:      open flags as per the open(2)    1347  * @flags:      open flags as per the open(2) second argument
1348  * @mode:       mode for the new file if O_CR    1348  * @mode:       mode for the new file if O_CREAT is set, else ignored
1349  *                                               1349  *
1350  * This is the helper to open a file from ker    1350  * This is the helper to open a file from kernelspace if you really
1351  * have to.  But in generally you should not     1351  * have to.  But in generally you should not do this, so please move
1352  * along, nothing to see here..                  1352  * along, nothing to see here..
1353  */                                              1353  */
1354 struct file *file_open_name(struct filename *    1354 struct file *file_open_name(struct filename *name, int flags, umode_t mode)
1355 {                                                1355 {
1356         struct open_flags op;                    1356         struct open_flags op;
1357         struct open_how how = build_open_how(    1357         struct open_how how = build_open_how(flags, mode);
1358         int err = build_open_flags(&how, &op)    1358         int err = build_open_flags(&how, &op);
1359         if (err)                                 1359         if (err)
1360                 return ERR_PTR(err);             1360                 return ERR_PTR(err);
1361         return do_filp_open(AT_FDCWD, name, &    1361         return do_filp_open(AT_FDCWD, name, &op);
1362 }                                                1362 }
1363                                                  1363 
1364 /**                                              1364 /**
1365  * filp_open - open file and return file poin    1365  * filp_open - open file and return file pointer
1366  *                                               1366  *
1367  * @filename:   path to open                     1367  * @filename:   path to open
1368  * @flags:      open flags as per the open(2)    1368  * @flags:      open flags as per the open(2) second argument
1369  * @mode:       mode for the new file if O_CR    1369  * @mode:       mode for the new file if O_CREAT is set, else ignored
1370  *                                               1370  *
1371  * This is the helper to open a file from ker    1371  * This is the helper to open a file from kernelspace if you really
1372  * have to.  But in generally you should not     1372  * have to.  But in generally you should not do this, so please move
1373  * along, nothing to see here..                  1373  * along, nothing to see here..
1374  */                                              1374  */
1375 struct file *filp_open(const char *filename,     1375 struct file *filp_open(const char *filename, int flags, umode_t mode)
1376 {                                                1376 {
1377         struct filename *name = getname_kerne    1377         struct filename *name = getname_kernel(filename);
1378         struct file *file = ERR_CAST(name);      1378         struct file *file = ERR_CAST(name);
1379                                                  1379 
1380         if (!IS_ERR(name)) {                     1380         if (!IS_ERR(name)) {
1381                 file = file_open_name(name, f    1381                 file = file_open_name(name, flags, mode);
1382                 putname(name);                   1382                 putname(name);
1383         }                                        1383         }
1384         return file;                             1384         return file;
1385 }                                                1385 }
1386 EXPORT_SYMBOL(filp_open);                        1386 EXPORT_SYMBOL(filp_open);
1387                                                  1387 
1388 struct file *file_open_root(const struct path    1388 struct file *file_open_root(const struct path *root,
1389                             const char *filen    1389                             const char *filename, int flags, umode_t mode)
1390 {                                                1390 {
1391         struct open_flags op;                    1391         struct open_flags op;
1392         struct open_how how = build_open_how(    1392         struct open_how how = build_open_how(flags, mode);
1393         int err = build_open_flags(&how, &op)    1393         int err = build_open_flags(&how, &op);
1394         if (err)                                 1394         if (err)
1395                 return ERR_PTR(err);             1395                 return ERR_PTR(err);
1396         return do_file_open_root(root, filena    1396         return do_file_open_root(root, filename, &op);
1397 }                                                1397 }
1398 EXPORT_SYMBOL(file_open_root);                   1398 EXPORT_SYMBOL(file_open_root);
1399                                                  1399 
1400 static long do_sys_openat2(int dfd, const cha    1400 static long do_sys_openat2(int dfd, const char __user *filename,
1401                            struct open_how *h    1401                            struct open_how *how)
1402 {                                                1402 {
1403         struct open_flags op;                    1403         struct open_flags op;
1404         int fd = build_open_flags(how, &op);     1404         int fd = build_open_flags(how, &op);
1405         struct filename *tmp;                    1405         struct filename *tmp;
1406                                                  1406 
1407         if (fd)                                  1407         if (fd)
1408                 return fd;                       1408                 return fd;
1409                                                  1409 
1410         tmp = getname(filename);                 1410         tmp = getname(filename);
1411         if (IS_ERR(tmp))                         1411         if (IS_ERR(tmp))
1412                 return PTR_ERR(tmp);             1412                 return PTR_ERR(tmp);
1413                                                  1413 
1414         fd = get_unused_fd_flags(how->flags);    1414         fd = get_unused_fd_flags(how->flags);
1415         if (fd >= 0) {                           1415         if (fd >= 0) {
1416                 struct file *f = do_filp_open    1416                 struct file *f = do_filp_open(dfd, tmp, &op);
1417                 if (IS_ERR(f)) {                 1417                 if (IS_ERR(f)) {
1418                         put_unused_fd(fd);       1418                         put_unused_fd(fd);
1419                         fd = PTR_ERR(f);         1419                         fd = PTR_ERR(f);
1420                 } else {                         1420                 } else {
1421                         fd_install(fd, f);       1421                         fd_install(fd, f);
1422                 }                                1422                 }
1423         }                                        1423         }
1424         putname(tmp);                            1424         putname(tmp);
1425         return fd;                               1425         return fd;
1426 }                                                1426 }
1427                                                  1427 
1428 long do_sys_open(int dfd, const char __user *    1428 long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
1429 {                                                1429 {
1430         struct open_how how = build_open_how(    1430         struct open_how how = build_open_how(flags, mode);
1431         return do_sys_openat2(dfd, filename,     1431         return do_sys_openat2(dfd, filename, &how);
1432 }                                                1432 }
1433                                                  1433 
1434                                                  1434 
1435 SYSCALL_DEFINE3(open, const char __user *, fi    1435 SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1436 {                                                1436 {
1437         if (force_o_largefile())                 1437         if (force_o_largefile())
1438                 flags |= O_LARGEFILE;            1438                 flags |= O_LARGEFILE;
1439         return do_sys_open(AT_FDCWD, filename    1439         return do_sys_open(AT_FDCWD, filename, flags, mode);
1440 }                                                1440 }
1441                                                  1441 
1442 SYSCALL_DEFINE4(openat, int, dfd, const char     1442 SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
1443                 umode_t, mode)                   1443                 umode_t, mode)
1444 {                                                1444 {
1445         if (force_o_largefile())                 1445         if (force_o_largefile())
1446                 flags |= O_LARGEFILE;            1446                 flags |= O_LARGEFILE;
1447         return do_sys_open(dfd, filename, fla    1447         return do_sys_open(dfd, filename, flags, mode);
1448 }                                                1448 }
1449                                                  1449 
1450 SYSCALL_DEFINE4(openat2, int, dfd, const char    1450 SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
1451                 struct open_how __user *, how    1451                 struct open_how __user *, how, size_t, usize)
1452 {                                                1452 {
1453         int err;                                 1453         int err;
1454         struct open_how tmp;                     1454         struct open_how tmp;
1455                                                  1455 
1456         BUILD_BUG_ON(sizeof(struct open_how)     1456         BUILD_BUG_ON(sizeof(struct open_how) < OPEN_HOW_SIZE_VER0);
1457         BUILD_BUG_ON(sizeof(struct open_how)     1457         BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_LATEST);
1458                                                  1458 
1459         if (unlikely(usize < OPEN_HOW_SIZE_VE    1459         if (unlikely(usize < OPEN_HOW_SIZE_VER0))
1460                 return -EINVAL;                  1460                 return -EINVAL;
1461                                                  1461 
1462         err = copy_struct_from_user(&tmp, siz    1462         err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
1463         if (err)                                 1463         if (err)
1464                 return err;                      1464                 return err;
1465                                                  1465 
1466         audit_openat2_how(&tmp);                 1466         audit_openat2_how(&tmp);
1467                                                  1467 
1468         /* O_LARGEFILE is only allowed for no    1468         /* O_LARGEFILE is only allowed for non-O_PATH. */
1469         if (!(tmp.flags & O_PATH) && force_o_    1469         if (!(tmp.flags & O_PATH) && force_o_largefile())
1470                 tmp.flags |= O_LARGEFILE;        1470                 tmp.flags |= O_LARGEFILE;
1471                                                  1471 
1472         return do_sys_openat2(dfd, filename,     1472         return do_sys_openat2(dfd, filename, &tmp);
1473 }                                                1473 }
1474                                                  1474 
1475 #ifdef CONFIG_COMPAT                             1475 #ifdef CONFIG_COMPAT
1476 /*                                               1476 /*
1477  * Exactly like sys_open(), except that it do    1477  * Exactly like sys_open(), except that it doesn't set the
1478  * O_LARGEFILE flag.                             1478  * O_LARGEFILE flag.
1479  */                                              1479  */
1480 COMPAT_SYSCALL_DEFINE3(open, const char __use    1480 COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1481 {                                                1481 {
1482         return do_sys_open(AT_FDCWD, filename    1482         return do_sys_open(AT_FDCWD, filename, flags, mode);
1483 }                                                1483 }
1484                                                  1484 
1485 /*                                               1485 /*
1486  * Exactly like sys_openat(), except that it     1486  * Exactly like sys_openat(), except that it doesn't set the
1487  * O_LARGEFILE flag.                             1487  * O_LARGEFILE flag.
1488  */                                              1488  */
1489 COMPAT_SYSCALL_DEFINE4(openat, int, dfd, cons    1489 COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
1490 {                                                1490 {
1491         return do_sys_open(dfd, filename, fla    1491         return do_sys_open(dfd, filename, flags, mode);
1492 }                                                1492 }
1493 #endif                                           1493 #endif
1494                                                  1494 
1495 #ifndef __alpha__                                1495 #ifndef __alpha__
1496                                                  1496 
1497 /*                                               1497 /*
1498  * For backward compatibility?  Maybe this sh    1498  * For backward compatibility?  Maybe this should be moved
1499  * into arch/i386 instead?                       1499  * into arch/i386 instead?
1500  */                                              1500  */
1501 SYSCALL_DEFINE2(creat, const char __user *, p    1501 SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
1502 {                                                1502 {
1503         int flags = O_CREAT | O_WRONLY | O_TR    1503         int flags = O_CREAT | O_WRONLY | O_TRUNC;
1504                                                  1504 
1505         if (force_o_largefile())                 1505         if (force_o_largefile())
1506                 flags |= O_LARGEFILE;            1506                 flags |= O_LARGEFILE;
1507         return do_sys_open(AT_FDCWD, pathname    1507         return do_sys_open(AT_FDCWD, pathname, flags, mode);
1508 }                                                1508 }
1509 #endif                                           1509 #endif
1510                                                  1510 
1511 /*                                               1511 /*
1512  * "id" is the POSIX thread ID. We use the       1512  * "id" is the POSIX thread ID. We use the
1513  * files pointer for this..                      1513  * files pointer for this..
1514  */                                              1514  */
1515 static int filp_flush(struct file *filp, fl_o    1515 static int filp_flush(struct file *filp, fl_owner_t id)
1516 {                                                1516 {
1517         int retval = 0;                          1517         int retval = 0;
1518                                                  1518 
1519         if (CHECK_DATA_CORRUPTION(file_count(    1519         if (CHECK_DATA_CORRUPTION(file_count(filp) == 0,
1520                         "VFS: Close: file cou    1520                         "VFS: Close: file count is 0 (f_op=%ps)",
1521                         filp->f_op)) {           1521                         filp->f_op)) {
1522                 return 0;                        1522                 return 0;
1523         }                                        1523         }
1524                                                  1524 
1525         if (filp->f_op->flush)                   1525         if (filp->f_op->flush)
1526                 retval = filp->f_op->flush(fi    1526                 retval = filp->f_op->flush(filp, id);
1527                                                  1527 
1528         if (likely(!(filp->f_mode & FMODE_PAT    1528         if (likely(!(filp->f_mode & FMODE_PATH))) {
1529                 dnotify_flush(filp, id);         1529                 dnotify_flush(filp, id);
1530                 locks_remove_posix(filp, id);    1530                 locks_remove_posix(filp, id);
1531         }                                        1531         }
1532         return retval;                           1532         return retval;
1533 }                                                1533 }
1534                                                  1534 
1535 int filp_close(struct file *filp, fl_owner_t     1535 int filp_close(struct file *filp, fl_owner_t id)
1536 {                                                1536 {
1537         int retval;                              1537         int retval;
1538                                                  1538 
1539         retval = filp_flush(filp, id);           1539         retval = filp_flush(filp, id);
1540         fput(filp);                              1540         fput(filp);
1541                                                  1541 
1542         return retval;                           1542         return retval;
1543 }                                                1543 }
1544 EXPORT_SYMBOL(filp_close);                       1544 EXPORT_SYMBOL(filp_close);
1545                                                  1545 
1546 /*                                               1546 /*
1547  * Careful here! We test whether the file poi    1547  * Careful here! We test whether the file pointer is NULL before
1548  * releasing the fd. This ensures that one cl    1548  * releasing the fd. This ensures that one clone task can't release
1549  * an fd while another clone is opening it.      1549  * an fd while another clone is opening it.
1550  */                                              1550  */
1551 SYSCALL_DEFINE1(close, unsigned int, fd)         1551 SYSCALL_DEFINE1(close, unsigned int, fd)
1552 {                                                1552 {
1553         int retval;                              1553         int retval;
1554         struct file *file;                       1554         struct file *file;
1555                                                  1555 
1556         file = file_close_fd(fd);                1556         file = file_close_fd(fd);
1557         if (!file)                               1557         if (!file)
1558                 return -EBADF;                   1558                 return -EBADF;
1559                                                  1559 
1560         retval = filp_flush(file, current->fi    1560         retval = filp_flush(file, current->files);
1561                                                  1561 
1562         /*                                       1562         /*
1563          * We're returning to user space. Don    1563          * We're returning to user space. Don't bother
1564          * with any delayed fput() cases.        1564          * with any delayed fput() cases.
1565          */                                      1565          */
1566         __fput_sync(file);                       1566         __fput_sync(file);
1567                                                  1567 
1568         /* can't restart close syscall becaus    1568         /* can't restart close syscall because file table entry was cleared */
1569         if (unlikely(retval == -ERESTARTSYS |    1569         if (unlikely(retval == -ERESTARTSYS ||
1570                      retval == -ERESTARTNOINT    1570                      retval == -ERESTARTNOINTR ||
1571                      retval == -ERESTARTNOHAN    1571                      retval == -ERESTARTNOHAND ||
1572                      retval == -ERESTART_REST    1572                      retval == -ERESTART_RESTARTBLOCK))
1573                 retval = -EINTR;                 1573                 retval = -EINTR;
1574                                                  1574 
1575         return retval;                           1575         return retval;
1576 }                                                1576 }
1577                                                  1577 
1578 /**                                              1578 /**
1579  * sys_close_range() - Close all file descrip    1579  * sys_close_range() - Close all file descriptors in a given range.
1580  *                                               1580  *
1581  * @fd:     starting file descriptor to close    1581  * @fd:     starting file descriptor to close
1582  * @max_fd: last file descriptor to close        1582  * @max_fd: last file descriptor to close
1583  * @flags:  reserved for future extensions       1583  * @flags:  reserved for future extensions
1584  *                                               1584  *
1585  * This closes a range of file descriptors. A    1585  * This closes a range of file descriptors. All file descriptors
1586  * from @fd up to and including @max_fd are c    1586  * from @fd up to and including @max_fd are closed.
1587  * Currently, errors to close a given file de    1587  * Currently, errors to close a given file descriptor are ignored.
1588  */                                              1588  */
1589 SYSCALL_DEFINE3(close_range, unsigned int, fd    1589 SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
1590                 unsigned int, flags)             1590                 unsigned int, flags)
1591 {                                                1591 {
1592         return __close_range(fd, max_fd, flag    1592         return __close_range(fd, max_fd, flags);
1593 }                                                1593 }
1594                                                  1594 
1595 /*                                               1595 /*
1596  * This routine simulates a hangup on the tty    1596  * This routine simulates a hangup on the tty, to arrange that users
1597  * are given clean terminals at login time.      1597  * are given clean terminals at login time.
1598  */                                              1598  */
1599 SYSCALL_DEFINE0(vhangup)                         1599 SYSCALL_DEFINE0(vhangup)
1600 {                                                1600 {
1601         if (!ccs_capable(CCS_SYS_VHANGUP))       1601         if (!ccs_capable(CCS_SYS_VHANGUP))
1602                 return -EPERM;                   1602                 return -EPERM;
1603         if (capable(CAP_SYS_TTY_CONFIG)) {       1603         if (capable(CAP_SYS_TTY_CONFIG)) {
1604                 tty_vhangup_self();              1604                 tty_vhangup_self();
1605                 return 0;                        1605                 return 0;
1606         }                                        1606         }
1607         return -EPERM;                           1607         return -EPERM;
1608 }                                                1608 }
1609                                                  1609 
1610 /*                                               1610 /*
1611  * Called when an inode is about to be open.     1611  * Called when an inode is about to be open.
1612  * We use this to disallow opening large file    1612  * We use this to disallow opening large files on 32bit systems if
1613  * the caller didn't specify O_LARGEFILE.  On    1613  * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1614  * on this flag in sys_open.                     1614  * on this flag in sys_open.
1615  */                                              1615  */
1616 int generic_file_open(struct inode * inode, s    1616 int generic_file_open(struct inode * inode, struct file * filp)
1617 {                                                1617 {
1618         if (!(filp->f_flags & O_LARGEFILE) &&    1618         if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1619                 return -EOVERFLOW;               1619                 return -EOVERFLOW;
1620         return 0;                                1620         return 0;
1621 }                                                1621 }
1622                                                  1622 
1623 EXPORT_SYMBOL(generic_file_open);                1623 EXPORT_SYMBOL(generic_file_open);
1624                                                  1624 
1625 /*                                               1625 /*
1626  * This is used by subsystems that don't want    1626  * This is used by subsystems that don't want seekable
1627  * file descriptors. The function is not supp    1627  * file descriptors. The function is not supposed to ever fail, the only
1628  * reason it returns an 'int' and not 'void'     1628  * reason it returns an 'int' and not 'void' is so that it can be plugged
1629  * directly into file_operations structure.      1629  * directly into file_operations structure.
1630  */                                              1630  */
1631 int nonseekable_open(struct inode *inode, str    1631 int nonseekable_open(struct inode *inode, struct file *filp)
1632 {                                                1632 {
1633         filp->f_mode &= ~(FMODE_LSEEK | FMODE    1633         filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1634         return 0;                                1634         return 0;
1635 }                                                1635 }
1636                                                  1636 
1637 EXPORT_SYMBOL(nonseekable_open);                 1637 EXPORT_SYMBOL(nonseekable_open);
1638                                                  1638 
1639 /*                                               1639 /*
1640  * stream_open is used by subsystems that wan    1640  * stream_open is used by subsystems that want stream-like file descriptors.
1641  * Such file descriptors are not seekable and    1641  * Such file descriptors are not seekable and don't have notion of position
1642  * (file.f_pos is always 0 and ppos passed to    1642  * (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL).
1643  * Contrary to file descriptors of other regu    1643  * Contrary to file descriptors of other regular files, .read() and .write()
1644  * can run simultaneously.                       1644  * can run simultaneously.
1645  *                                               1645  *
1646  * stream_open never fails and is marked to r    1646  * stream_open never fails and is marked to return int so that it could be
1647  * directly used as file_operations.open .       1647  * directly used as file_operations.open .
1648  */                                              1648  */
1649 int stream_open(struct inode *inode, struct f    1649 int stream_open(struct inode *inode, struct file *filp)
1650 {                                                1650 {
1651         filp->f_mode &= ~(FMODE_LSEEK | FMODE    1651         filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
1652         filp->f_mode |= FMODE_STREAM;            1652         filp->f_mode |= FMODE_STREAM;
1653         return 0;                                1653         return 0;
1654 }                                                1654 }
1655                                                  1655 
1656 EXPORT_SYMBOL(stream_open);                      1656 EXPORT_SYMBOL(stream_open);
1657                                                  1657 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php