~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/file.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /fs/file.c (Version linux-6.12-rc7) and /fs/file.c (Version linux-5.10.229)


  1 // SPDX-License-Identifier: GPL-2.0                 1 // SPDX-License-Identifier: GPL-2.0
  2 /*                                                  2 /*
  3  *  linux/fs/file.c                                 3  *  linux/fs/file.c
  4  *                                                  4  *
  5  *  Copyright (C) 1998-1999, Stephen Tweedie a      5  *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
  6  *                                                  6  *
  7  *  Manage the dynamic fd arrays in the proces      7  *  Manage the dynamic fd arrays in the process files_struct.
  8  */                                                 8  */
  9                                                     9 
 10 #include <linux/syscalls.h>                        10 #include <linux/syscalls.h>
 11 #include <linux/export.h>                          11 #include <linux/export.h>
 12 #include <linux/fs.h>                              12 #include <linux/fs.h>
 13 #include <linux/kernel.h>                          13 #include <linux/kernel.h>
 14 #include <linux/mm.h>                              14 #include <linux/mm.h>
 15 #include <linux/sched/signal.h>                    15 #include <linux/sched/signal.h>
 16 #include <linux/slab.h>                            16 #include <linux/slab.h>
 17 #include <linux/file.h>                            17 #include <linux/file.h>
 18 #include <linux/fdtable.h>                         18 #include <linux/fdtable.h>
 19 #include <linux/bitops.h>                          19 #include <linux/bitops.h>
 20 #include <linux/spinlock.h>                        20 #include <linux/spinlock.h>
 21 #include <linux/rcupdate.h>                        21 #include <linux/rcupdate.h>
 22 #include <linux/close_range.h>                     22 #include <linux/close_range.h>
 23 #include <net/sock.h>                              23 #include <net/sock.h>
 24                                                    24 
 25 #include "internal.h"                              25 #include "internal.h"
 26                                                    26 
 27 unsigned int sysctl_nr_open __read_mostly = 10     27 unsigned int sysctl_nr_open __read_mostly = 1024*1024;
 28 unsigned int sysctl_nr_open_min = BITS_PER_LON     28 unsigned int sysctl_nr_open_min = BITS_PER_LONG;
 29 /* our min() is unusable in constant expressio     29 /* our min() is unusable in constant expressions ;-/ */
 30 #define __const_min(x, y) ((x) < (y) ? (x) : (     30 #define __const_min(x, y) ((x) < (y) ? (x) : (y))
 31 unsigned int sysctl_nr_open_max =                  31 unsigned int sysctl_nr_open_max =
 32         __const_min(INT_MAX, ~(size_t)0/sizeof     32         __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
 33                                                    33 
 34 static void __free_fdtable(struct fdtable *fdt     34 static void __free_fdtable(struct fdtable *fdt)
 35 {                                                  35 {
 36         kvfree(fdt->fd);                           36         kvfree(fdt->fd);
 37         kvfree(fdt->open_fds);                     37         kvfree(fdt->open_fds);
 38         kfree(fdt);                                38         kfree(fdt);
 39 }                                                  39 }
 40                                                    40 
 41 static void free_fdtable_rcu(struct rcu_head *     41 static void free_fdtable_rcu(struct rcu_head *rcu)
 42 {                                                  42 {
 43         __free_fdtable(container_of(rcu, struc     43         __free_fdtable(container_of(rcu, struct fdtable, rcu));
 44 }                                                  44 }
 45                                                    45 
 46 #define BITBIT_NR(nr)   BITS_TO_LONGS(BITS_TO_     46 #define BITBIT_NR(nr)   BITS_TO_LONGS(BITS_TO_LONGS(nr))
 47 #define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeo     47 #define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long))
 48                                                    48 
 49 #define fdt_words(fdt) ((fdt)->max_fds / BITS_     49 #define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds
 50 /*                                                 50 /*
 51  * Copy 'count' fd bits from the old table to      51  * Copy 'count' fd bits from the old table to the new table and clear the extra
 52  * space if any.  This does not copy the file      52  * space if any.  This does not copy the file pointers.  Called with the files
 53  * spinlock held for write.                        53  * spinlock held for write.
 54  */                                                54  */
 55 static inline void copy_fd_bitmaps(struct fdta     55 static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
 56                             unsigned int copy_     56                             unsigned int copy_words)
 57 {                                                  57 {
 58         unsigned int nwords = fdt_words(nfdt);     58         unsigned int nwords = fdt_words(nfdt);
 59                                                    59 
 60         bitmap_copy_and_extend(nfdt->open_fds,     60         bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds,
 61                         copy_words * BITS_PER_     61                         copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
 62         bitmap_copy_and_extend(nfdt->close_on_     62         bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec,
 63                         copy_words * BITS_PER_     63                         copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
 64         bitmap_copy_and_extend(nfdt->full_fds_     64         bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits,
 65                         copy_words, nwords);       65                         copy_words, nwords);
 66 }                                                  66 }
 67                                                    67 
 68 /*                                                 68 /*
 69  * Copy all file descriptors from the old tabl     69  * Copy all file descriptors from the old table to the new, expanded table and
 70  * clear the extra space.  Called with the fil     70  * clear the extra space.  Called with the files spinlock held for write.
 71  */                                                71  */
 72 static void copy_fdtable(struct fdtable *nfdt,     72 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
 73 {                                                  73 {
 74         size_t cpy, set;                           74         size_t cpy, set;
 75                                                    75 
 76         BUG_ON(nfdt->max_fds < ofdt->max_fds);     76         BUG_ON(nfdt->max_fds < ofdt->max_fds);
 77                                                    77 
 78         cpy = ofdt->max_fds * sizeof(struct fi     78         cpy = ofdt->max_fds * sizeof(struct file *);
 79         set = (nfdt->max_fds - ofdt->max_fds)      79         set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
 80         memcpy(nfdt->fd, ofdt->fd, cpy);           80         memcpy(nfdt->fd, ofdt->fd, cpy);
 81         memset((char *)nfdt->fd + cpy, 0, set)     81         memset((char *)nfdt->fd + cpy, 0, set);
 82                                                    82 
 83         copy_fd_bitmaps(nfdt, ofdt, fdt_words(     83         copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt));
 84 }                                                  84 }
 85                                                    85 
 86 /*                                                 86 /*
 87  * Note how the fdtable bitmap allocations ver     87  * Note how the fdtable bitmap allocations very much have to be a multiple of
 88  * BITS_PER_LONG. This is not only because we      88  * BITS_PER_LONG. This is not only because we walk those things in chunks of
 89  * 'unsigned long' in some places, but simply      89  * 'unsigned long' in some places, but simply because that is how the Linux
 90  * kernel bitmaps are defined to work: they ar     90  * kernel bitmaps are defined to work: they are not "bits in an array of bytes",
 91  * they are very much "bits in an array of uns     91  * they are very much "bits in an array of unsigned long".
 92  *                                                 92  *
 93  * The ALIGN(nr, BITS_PER_LONG) here is for cl     93  * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
 94  * by that "1024/sizeof(ptr)" before, we alrea     94  * by that "1024/sizeof(ptr)" before, we already know there are sufficient
 95  * clear low bits. Clang seems to realize that     95  * clear low bits. Clang seems to realize that, gcc ends up being confused.
 96  *                                                 96  *
 97  * On a 128-bit machine, the ALIGN() would act     97  * On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
 98  * let's consider it documentation (and maybe      98  * let's consider it documentation (and maybe a test-case for gcc to improve
 99  * its code generation ;)                          99  * its code generation ;)
100  */                                               100  */
101 static struct fdtable * alloc_fdtable(unsigned    101 static struct fdtable * alloc_fdtable(unsigned int nr)
102 {                                                 102 {
103         struct fdtable *fdt;                      103         struct fdtable *fdt;
104         void *data;                               104         void *data;
105                                                   105 
106         /*                                        106         /*
107          * Figure out how many fds we actually    107          * Figure out how many fds we actually want to support in this fdtable.
108          * Allocation steps are keyed to the s    108          * Allocation steps are keyed to the size of the fdarray, since it
109          * grows far faster than any of the ot    109          * grows far faster than any of the other dynamic data. We try to fit
110          * the fdarray into comfortable page-t    110          * the fdarray into comfortable page-tuned chunks: starting at 1024B
111          * and growing in powers of two from t    111          * and growing in powers of two from there on.
112          */                                       112          */
113         nr /= (1024 / sizeof(struct file *));     113         nr /= (1024 / sizeof(struct file *));
114         nr = roundup_pow_of_two(nr + 1);          114         nr = roundup_pow_of_two(nr + 1);
115         nr *= (1024 / sizeof(struct file *));     115         nr *= (1024 / sizeof(struct file *));
116         nr = ALIGN(nr, BITS_PER_LONG);            116         nr = ALIGN(nr, BITS_PER_LONG);
117         /*                                        117         /*
118          * Note that this can drive nr *below*    118          * Note that this can drive nr *below* what we had passed if sysctl_nr_open
119          * had been set lower between the chec    119          * had been set lower between the check in expand_files() and here.  Deal
120          * with that in caller, it's cheaper t    120          * with that in caller, it's cheaper that way.
121          *                                        121          *
122          * We make sure that nr remains a mult    122          * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
123          * bitmaps handling below becomes unpl    123          * bitmaps handling below becomes unpleasant, to put it mildly...
124          */                                       124          */
125         if (unlikely(nr > sysctl_nr_open))        125         if (unlikely(nr > sysctl_nr_open))
126                 nr = ((sysctl_nr_open - 1) | (    126                 nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
127                                                   127 
128         fdt = kmalloc(sizeof(struct fdtable),     128         fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
129         if (!fdt)                                 129         if (!fdt)
130                 goto out;                         130                 goto out;
131         fdt->max_fds = nr;                        131         fdt->max_fds = nr;
132         data = kvmalloc_array(nr, sizeof(struc    132         data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
133         if (!data)                                133         if (!data)
134                 goto out_fdt;                     134                 goto out_fdt;
135         fdt->fd = data;                           135         fdt->fd = data;
136                                                   136 
137         data = kvmalloc(max_t(size_t,             137         data = kvmalloc(max_t(size_t,
138                                  2 * nr / BITS    138                                  2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
139                                  GFP_KERNEL_AC    139                                  GFP_KERNEL_ACCOUNT);
140         if (!data)                                140         if (!data)
141                 goto out_arr;                     141                 goto out_arr;
142         fdt->open_fds = data;                     142         fdt->open_fds = data;
143         data += nr / BITS_PER_BYTE;               143         data += nr / BITS_PER_BYTE;
144         fdt->close_on_exec = data;                144         fdt->close_on_exec = data;
145         data += nr / BITS_PER_BYTE;               145         data += nr / BITS_PER_BYTE;
146         fdt->full_fds_bits = data;                146         fdt->full_fds_bits = data;
147                                                   147 
148         return fdt;                               148         return fdt;
149                                                   149 
150 out_arr:                                          150 out_arr:
151         kvfree(fdt->fd);                          151         kvfree(fdt->fd);
152 out_fdt:                                          152 out_fdt:
153         kfree(fdt);                               153         kfree(fdt);
154 out:                                              154 out:
155         return NULL;                              155         return NULL;
156 }                                                 156 }
157                                                   157 
158 /*                                                158 /*
159  * Expand the file descriptor table.              159  * Expand the file descriptor table.
160  * This function will allocate a new fdtable a    160  * This function will allocate a new fdtable and both fd array and fdset, of
161  * the given size.                                161  * the given size.
162  * Return <0 error code on error; 1 on success    162  * Return <0 error code on error; 1 on successful completion.
163  * The files->file_lock should be held on entr    163  * The files->file_lock should be held on entry, and will be held on exit.
164  */                                               164  */
165 static int expand_fdtable(struct files_struct     165 static int expand_fdtable(struct files_struct *files, unsigned int nr)
166         __releases(files->file_lock)              166         __releases(files->file_lock)
167         __acquires(files->file_lock)              167         __acquires(files->file_lock)
168 {                                                 168 {
169         struct fdtable *new_fdt, *cur_fdt;        169         struct fdtable *new_fdt, *cur_fdt;
170                                                   170 
171         spin_unlock(&files->file_lock);           171         spin_unlock(&files->file_lock);
172         new_fdt = alloc_fdtable(nr);              172         new_fdt = alloc_fdtable(nr);
173                                                   173 
174         /* make sure all fd_install() have see    174         /* make sure all fd_install() have seen resize_in_progress
175          * or have finished their rcu_read_loc    175          * or have finished their rcu_read_lock_sched() section.
176          */                                       176          */
177         if (atomic_read(&files->count) > 1)       177         if (atomic_read(&files->count) > 1)
178                 synchronize_rcu();                178                 synchronize_rcu();
179                                                   179 
180         spin_lock(&files->file_lock);             180         spin_lock(&files->file_lock);
181         if (!new_fdt)                             181         if (!new_fdt)
182                 return -ENOMEM;                   182                 return -ENOMEM;
183         /*                                        183         /*
184          * extremely unlikely race - sysctl_nr    184          * extremely unlikely race - sysctl_nr_open decreased between the check in
185          * caller and alloc_fdtable().  Cheape    185          * caller and alloc_fdtable().  Cheaper to catch it here...
186          */                                       186          */
187         if (unlikely(new_fdt->max_fds <= nr))     187         if (unlikely(new_fdt->max_fds <= nr)) {
188                 __free_fdtable(new_fdt);          188                 __free_fdtable(new_fdt);
189                 return -EMFILE;                   189                 return -EMFILE;
190         }                                         190         }
191         cur_fdt = files_fdtable(files);           191         cur_fdt = files_fdtable(files);
192         BUG_ON(nr < cur_fdt->max_fds);            192         BUG_ON(nr < cur_fdt->max_fds);
193         copy_fdtable(new_fdt, cur_fdt);           193         copy_fdtable(new_fdt, cur_fdt);
194         rcu_assign_pointer(files->fdt, new_fdt    194         rcu_assign_pointer(files->fdt, new_fdt);
195         if (cur_fdt != &files->fdtab)             195         if (cur_fdt != &files->fdtab)
196                 call_rcu(&cur_fdt->rcu, free_f    196                 call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
197         /* coupled with smp_rmb() in fd_instal    197         /* coupled with smp_rmb() in fd_install() */
198         smp_wmb();                                198         smp_wmb();
199         return 1;                                 199         return 1;
200 }                                                 200 }
201                                                   201 
202 /*                                                202 /*
203  * Expand files.                                  203  * Expand files.
204  * This function will expand the file structur    204  * This function will expand the file structures, if the requested size exceeds
205  * the current capacity and there is room for     205  * the current capacity and there is room for expansion.
206  * Return <0 error code on error; 0 when nothi    206  * Return <0 error code on error; 0 when nothing done; 1 when files were
207  * expanded and execution may have blocked.       207  * expanded and execution may have blocked.
208  * The files->file_lock should be held on entr    208  * The files->file_lock should be held on entry, and will be held on exit.
209  */                                               209  */
210 static int expand_files(struct files_struct *f    210 static int expand_files(struct files_struct *files, unsigned int nr)
211         __releases(files->file_lock)              211         __releases(files->file_lock)
212         __acquires(files->file_lock)              212         __acquires(files->file_lock)
213 {                                                 213 {
214         struct fdtable *fdt;                      214         struct fdtable *fdt;
215         int expanded = 0;                         215         int expanded = 0;
216                                                   216 
217 repeat:                                           217 repeat:
218         fdt = files_fdtable(files);               218         fdt = files_fdtable(files);
219                                                   219 
220         /* Do we need to expand? */               220         /* Do we need to expand? */
221         if (nr < fdt->max_fds)                    221         if (nr < fdt->max_fds)
222                 return expanded;                  222                 return expanded;
223                                                   223 
224         /* Can we expand? */                      224         /* Can we expand? */
225         if (nr >= sysctl_nr_open)                 225         if (nr >= sysctl_nr_open)
226                 return -EMFILE;                   226                 return -EMFILE;
227                                                   227 
228         if (unlikely(files->resize_in_progress    228         if (unlikely(files->resize_in_progress)) {
229                 spin_unlock(&files->file_lock)    229                 spin_unlock(&files->file_lock);
230                 expanded = 1;                     230                 expanded = 1;
231                 wait_event(files->resize_wait,    231                 wait_event(files->resize_wait, !files->resize_in_progress);
232                 spin_lock(&files->file_lock);     232                 spin_lock(&files->file_lock);
233                 goto repeat;                      233                 goto repeat;
234         }                                         234         }
235                                                   235 
236         /* All good, so we try */                 236         /* All good, so we try */
237         files->resize_in_progress = true;         237         files->resize_in_progress = true;
238         expanded = expand_fdtable(files, nr);     238         expanded = expand_fdtable(files, nr);
239         files->resize_in_progress = false;        239         files->resize_in_progress = false;
240                                                   240 
241         wake_up_all(&files->resize_wait);         241         wake_up_all(&files->resize_wait);
242         return expanded;                          242         return expanded;
243 }                                                 243 }
244                                                   244 
245 static inline void __set_close_on_exec(unsigne    245 static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
246 {                                                 246 {
247         __set_bit(fd, fdt->close_on_exec);        247         __set_bit(fd, fdt->close_on_exec);
248 }                                                 248 }
249                                                   249 
250 static inline void __clear_close_on_exec(unsig    250 static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
251 {                                                 251 {
252         if (test_bit(fd, fdt->close_on_exec))     252         if (test_bit(fd, fdt->close_on_exec))
253                 __clear_bit(fd, fdt->close_on_    253                 __clear_bit(fd, fdt->close_on_exec);
254 }                                                 254 }
255                                                   255 
256 static inline void __set_open_fd(unsigned int     256 static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
257 {                                                 257 {
258         __set_bit(fd, fdt->open_fds);             258         __set_bit(fd, fdt->open_fds);
259         fd /= BITS_PER_LONG;                      259         fd /= BITS_PER_LONG;
260         if (!~fdt->open_fds[fd])                  260         if (!~fdt->open_fds[fd])
261                 __set_bit(fd, fdt->full_fds_bi    261                 __set_bit(fd, fdt->full_fds_bits);
262 }                                                 262 }
263                                                   263 
264 static inline void __clear_open_fd(unsigned in    264 static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
265 {                                                 265 {
266         __clear_bit(fd, fdt->open_fds);           266         __clear_bit(fd, fdt->open_fds);
267         __clear_bit(fd / BITS_PER_LONG, fdt->f    267         __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
268 }                                                 268 }
269                                                   269 
270 static inline bool fd_is_open(unsigned int fd, !! 270 static unsigned int count_open_files(struct fdtable *fdt)
271 {                                                 271 {
272         return test_bit(fd, fdt->open_fds);    !! 272         unsigned int size = fdt->max_fds;
                                                   >> 273         unsigned int i;
                                                   >> 274 
                                                   >> 275         /* Find the last open fd */
                                                   >> 276         for (i = size / BITS_PER_LONG; i > 0; ) {
                                                   >> 277                 if (fdt->open_fds[--i])
                                                   >> 278                         break;
                                                   >> 279         }
                                                   >> 280         i = (i + 1) * BITS_PER_LONG;
                                                   >> 281         return i;
273 }                                                 282 }
274                                                   283 
275 /*                                                284 /*
276  * Note that a sane fdtable size always has to    285  * Note that a sane fdtable size always has to be a multiple of
277  * BITS_PER_LONG, since we have bitmaps that a    286  * BITS_PER_LONG, since we have bitmaps that are sized by this.
278  *                                                287  *
279  * punch_hole is optional - when close_range() !! 288  * 'max_fds' will normally already be properly aligned, but it
280  * and close, we don't need to copy descriptor !! 289  * turns out that in the close_range() -> __close_range() ->
281  * a smaller cloned descriptor table might suf !! 290  * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
282  * currently opened descriptor falls into that !! 291  * up having a 'max_fds' value that isn't already aligned.
283  */                                            !! 292  *
284 static unsigned int sane_fdtable_size(struct f !! 293  * Rather than make close_range() have to worry about this,
285 {                                              !! 294  * just make that BITS_PER_LONG alignment be part of a sane
286         unsigned int last = find_last_bit(fdt- !! 295  * fdtable size. Becuase that's really what it is.
287                                                !! 296  */
288         if (last == fdt->max_fds)              !! 297 static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
289                 return NR_OPEN_DEFAULT;        !! 298 {
290         if (punch_hole && punch_hole->to >= la !! 299         unsigned int count;
291                 last = find_last_bit(fdt->open !! 300 
292                 if (last == punch_hole->from)  !! 301         count = count_open_files(fdt);
293                         return NR_OPEN_DEFAULT !! 302         if (max_fds < NR_OPEN_DEFAULT)
294         }                                      !! 303                 max_fds = NR_OPEN_DEFAULT;
295         return ALIGN(last + 1, BITS_PER_LONG); !! 304         return ALIGN(min(count, max_fds), BITS_PER_LONG);
296 }                                                 305 }
297                                                   306 
298 /*                                                307 /*
299  * Allocate a new descriptor table and copy co !! 308  * Allocate a new files structure and copy contents from the
300  * instance.  Returns a pointer to cloned tabl !! 309  * passed in files structure.
301  * on failure.  For 'punch_hole' see sane_fdta !! 310  * errorp will be valid only when the returned files_struct is NULL.
302  */                                               311  */
303 struct files_struct *dup_fd(struct files_struc !! 312 struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
304 {                                                 313 {
305         struct files_struct *newf;                314         struct files_struct *newf;
306         struct file **old_fds, **new_fds;         315         struct file **old_fds, **new_fds;
307         unsigned int open_files, i;               316         unsigned int open_files, i;
308         struct fdtable *old_fdt, *new_fdt;        317         struct fdtable *old_fdt, *new_fdt;
309         int error;                             << 
310                                                   318 
                                                   >> 319         *errorp = -ENOMEM;
311         newf = kmem_cache_alloc(files_cachep,     320         newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
312         if (!newf)                                321         if (!newf)
313                 return ERR_PTR(-ENOMEM);       !! 322                 goto out;
314                                                   323 
315         atomic_set(&newf->count, 1);              324         atomic_set(&newf->count, 1);
316                                                   325 
317         spin_lock_init(&newf->file_lock);         326         spin_lock_init(&newf->file_lock);
318         newf->resize_in_progress = false;         327         newf->resize_in_progress = false;
319         init_waitqueue_head(&newf->resize_wait    328         init_waitqueue_head(&newf->resize_wait);
320         newf->next_fd = 0;                        329         newf->next_fd = 0;
321         new_fdt = &newf->fdtab;                   330         new_fdt = &newf->fdtab;
322         new_fdt->max_fds = NR_OPEN_DEFAULT;       331         new_fdt->max_fds = NR_OPEN_DEFAULT;
323         new_fdt->close_on_exec = newf->close_o    332         new_fdt->close_on_exec = newf->close_on_exec_init;
324         new_fdt->open_fds = newf->open_fds_ini    333         new_fdt->open_fds = newf->open_fds_init;
325         new_fdt->full_fds_bits = newf->full_fd    334         new_fdt->full_fds_bits = newf->full_fds_bits_init;
326         new_fdt->fd = &newf->fd_array[0];         335         new_fdt->fd = &newf->fd_array[0];
327                                                   336 
328         spin_lock(&oldf->file_lock);              337         spin_lock(&oldf->file_lock);
329         old_fdt = files_fdtable(oldf);            338         old_fdt = files_fdtable(oldf);
330         open_files = sane_fdtable_size(old_fdt !! 339         open_files = sane_fdtable_size(old_fdt, max_fds);
331                                                   340 
332         /*                                        341         /*
333          * Check whether we need to allocate a    342          * Check whether we need to allocate a larger fd array and fd set.
334          */                                       343          */
335         while (unlikely(open_files > new_fdt->    344         while (unlikely(open_files > new_fdt->max_fds)) {
336                 spin_unlock(&oldf->file_lock);    345                 spin_unlock(&oldf->file_lock);
337                                                   346 
338                 if (new_fdt != &newf->fdtab)      347                 if (new_fdt != &newf->fdtab)
339                         __free_fdtable(new_fdt    348                         __free_fdtable(new_fdt);
340                                                   349 
341                 new_fdt = alloc_fdtable(open_f    350                 new_fdt = alloc_fdtable(open_files - 1);
342                 if (!new_fdt) {                   351                 if (!new_fdt) {
343                         error = -ENOMEM;       !! 352                         *errorp = -ENOMEM;
344                         goto out_release;         353                         goto out_release;
345                 }                                 354                 }
346                                                   355 
347                 /* beyond sysctl_nr_open; noth    356                 /* beyond sysctl_nr_open; nothing to do */
348                 if (unlikely(new_fdt->max_fds     357                 if (unlikely(new_fdt->max_fds < open_files)) {
349                         __free_fdtable(new_fdt    358                         __free_fdtable(new_fdt);
350                         error = -EMFILE;       !! 359                         *errorp = -EMFILE;
351                         goto out_release;         360                         goto out_release;
352                 }                                 361                 }
353                                                   362 
354                 /*                                363                 /*
355                  * Reacquire the oldf lock and    364                  * Reacquire the oldf lock and a pointer to its fd table
356                  * who knows it may have a new    365                  * who knows it may have a new bigger fd table. We need
357                  * the latest pointer.            366                  * the latest pointer.
358                  */                               367                  */
359                 spin_lock(&oldf->file_lock);      368                 spin_lock(&oldf->file_lock);
360                 old_fdt = files_fdtable(oldf);    369                 old_fdt = files_fdtable(oldf);
361                 open_files = sane_fdtable_size !! 370                 open_files = sane_fdtable_size(old_fdt, max_fds);
362         }                                         371         }
363                                                   372 
364         copy_fd_bitmaps(new_fdt, old_fdt, open    373         copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
365                                                   374 
366         old_fds = old_fdt->fd;                    375         old_fds = old_fdt->fd;
367         new_fds = new_fdt->fd;                    376         new_fds = new_fdt->fd;
368                                                   377 
369         for (i = open_files; i != 0; i--) {       378         for (i = open_files; i != 0; i--) {
370                 struct file *f = *old_fds++;      379                 struct file *f = *old_fds++;
371                 if (f) {                          380                 if (f) {
372                         get_file(f);              381                         get_file(f);
373                 } else {                          382                 } else {
374                         /*                        383                         /*
375                          * The fd may be claim    384                          * The fd may be claimed in the fd bitmap but not yet
376                          * instantiated in the    385                          * instantiated in the files array if a sibling thread
377                          * is partway through     386                          * is partway through open().  So make sure that this
378                          * fd is available to     387                          * fd is available to the new process.
379                          */                       388                          */
380                         __clear_open_fd(open_f    389                         __clear_open_fd(open_files - i, new_fdt);
381                 }                                 390                 }
382                 rcu_assign_pointer(*new_fds++,    391                 rcu_assign_pointer(*new_fds++, f);
383         }                                         392         }
384         spin_unlock(&oldf->file_lock);            393         spin_unlock(&oldf->file_lock);
385                                                   394 
386         /* clear the remainder */                 395         /* clear the remainder */
387         memset(new_fds, 0, (new_fdt->max_fds -    396         memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *));
388                                                   397 
389         rcu_assign_pointer(newf->fdt, new_fdt)    398         rcu_assign_pointer(newf->fdt, new_fdt);
390                                                   399 
391         return newf;                              400         return newf;
392                                                   401 
393 out_release:                                      402 out_release:
394         kmem_cache_free(files_cachep, newf);      403         kmem_cache_free(files_cachep, newf);
395         return ERR_PTR(error);                 !! 404 out:
                                                   >> 405         return NULL;
396 }                                                 406 }
397                                                   407 
398 static struct fdtable *close_files(struct file    408 static struct fdtable *close_files(struct files_struct * files)
399 {                                                 409 {
400         /*                                        410         /*
401          * It is safe to dereference the fd ta    411          * It is safe to dereference the fd table without RCU or
402          * ->file_lock because this is the las    412          * ->file_lock because this is the last reference to the
403          * files structure.                       413          * files structure.
404          */                                       414          */
405         struct fdtable *fdt = rcu_dereference_    415         struct fdtable *fdt = rcu_dereference_raw(files->fdt);
406         unsigned int i, j = 0;                    416         unsigned int i, j = 0;
407                                                   417 
408         for (;;) {                                418         for (;;) {
409                 unsigned long set;                419                 unsigned long set;
410                 i = j * BITS_PER_LONG;            420                 i = j * BITS_PER_LONG;
411                 if (i >= fdt->max_fds)            421                 if (i >= fdt->max_fds)
412                         break;                    422                         break;
413                 set = fdt->open_fds[j++];         423                 set = fdt->open_fds[j++];
414                 while (set) {                     424                 while (set) {
415                         if (set & 1) {            425                         if (set & 1) {
416                                 struct file *     426                                 struct file * file = xchg(&fdt->fd[i], NULL);
417                                 if (file) {       427                                 if (file) {
418                                         filp_c    428                                         filp_close(file, files);
419                                         cond_r    429                                         cond_resched();
420                                 }                 430                                 }
421                         }                         431                         }
422                         i++;                      432                         i++;
423                         set >>= 1;                433                         set >>= 1;
424                 }                                 434                 }
425         }                                         435         }
426                                                   436 
427         return fdt;                               437         return fdt;
428 }                                                 438 }
429                                                   439 
                                                   >> 440 struct files_struct *get_files_struct(struct task_struct *task)
                                                   >> 441 {
                                                   >> 442         struct files_struct *files;
                                                   >> 443 
                                                   >> 444         task_lock(task);
                                                   >> 445         files = task->files;
                                                   >> 446         if (files)
                                                   >> 447                 atomic_inc(&files->count);
                                                   >> 448         task_unlock(task);
                                                   >> 449 
                                                   >> 450         return files;
                                                   >> 451 }
                                                   >> 452 
430 void put_files_struct(struct files_struct *fil    453 void put_files_struct(struct files_struct *files)
431 {                                                 454 {
432         if (atomic_dec_and_test(&files->count)    455         if (atomic_dec_and_test(&files->count)) {
433                 struct fdtable *fdt = close_fi    456                 struct fdtable *fdt = close_files(files);
434                                                   457 
435                 /* free the arrays if they are    458                 /* free the arrays if they are not embedded */
436                 if (fdt != &files->fdtab)         459                 if (fdt != &files->fdtab)
437                         __free_fdtable(fdt);      460                         __free_fdtable(fdt);
438                 kmem_cache_free(files_cachep,     461                 kmem_cache_free(files_cachep, files);
439         }                                         462         }
440 }                                                 463 }
441                                                   464 
442 void exit_files(struct task_struct *tsk)          465 void exit_files(struct task_struct *tsk)
443 {                                                 466 {
444         struct files_struct * files = tsk->fil    467         struct files_struct * files = tsk->files;
445                                                   468 
446         if (files) {                              469         if (files) {
447                 task_lock(tsk);                   470                 task_lock(tsk);
448                 tsk->files = NULL;                471                 tsk->files = NULL;
449                 task_unlock(tsk);                 472                 task_unlock(tsk);
450                 put_files_struct(files);          473                 put_files_struct(files);
451         }                                         474         }
452 }                                                 475 }
453                                                   476 
454 struct files_struct init_files = {                477 struct files_struct init_files = {
455         .count          = ATOMIC_INIT(1),         478         .count          = ATOMIC_INIT(1),
456         .fdt            = &init_files.fdtab,      479         .fdt            = &init_files.fdtab,
457         .fdtab          = {                       480         .fdtab          = {
458                 .max_fds        = NR_OPEN_DEFA    481                 .max_fds        = NR_OPEN_DEFAULT,
459                 .fd             = &init_files.    482                 .fd             = &init_files.fd_array[0],
460                 .close_on_exec  = init_files.c    483                 .close_on_exec  = init_files.close_on_exec_init,
461                 .open_fds       = init_files.o    484                 .open_fds       = init_files.open_fds_init,
462                 .full_fds_bits  = init_files.f    485                 .full_fds_bits  = init_files.full_fds_bits_init,
463         },                                        486         },
464         .file_lock      = __SPIN_LOCK_UNLOCKED    487         .file_lock      = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
465         .resize_wait    = __WAIT_QUEUE_HEAD_IN    488         .resize_wait    = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
466 };                                                489 };
467                                                   490 
468 static unsigned int find_next_fd(struct fdtabl    491 static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
469 {                                                 492 {
470         unsigned int maxfd = fdt->max_fds; /*     493         unsigned int maxfd = fdt->max_fds; /* always multiple of BITS_PER_LONG */
471         unsigned int maxbit = maxfd / BITS_PER    494         unsigned int maxbit = maxfd / BITS_PER_LONG;
472         unsigned int bitbit = start / BITS_PER    495         unsigned int bitbit = start / BITS_PER_LONG;
473                                                   496 
474         bitbit = find_next_zero_bit(fdt->full_    497         bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
475         if (bitbit >= maxfd)                      498         if (bitbit >= maxfd)
476                 return maxfd;                     499                 return maxfd;
477         if (bitbit > start)                       500         if (bitbit > start)
478                 start = bitbit;                   501                 start = bitbit;
479         return find_next_zero_bit(fdt->open_fd    502         return find_next_zero_bit(fdt->open_fds, maxfd, start);
480 }                                                 503 }
481                                                   504 
482 /*                                                505 /*
483  * allocate a file descriptor, mark it busy.      506  * allocate a file descriptor, mark it busy.
484  */                                               507  */
485 static int alloc_fd(unsigned start, unsigned e    508 static int alloc_fd(unsigned start, unsigned end, unsigned flags)
486 {                                                 509 {
487         struct files_struct *files = current->    510         struct files_struct *files = current->files;
488         unsigned int fd;                          511         unsigned int fd;
489         int error;                                512         int error;
490         struct fdtable *fdt;                      513         struct fdtable *fdt;
491                                                   514 
492         spin_lock(&files->file_lock);             515         spin_lock(&files->file_lock);
493 repeat:                                           516 repeat:
494         fdt = files_fdtable(files);               517         fdt = files_fdtable(files);
495         fd = start;                               518         fd = start;
496         if (fd < files->next_fd)                  519         if (fd < files->next_fd)
497                 fd = files->next_fd;              520                 fd = files->next_fd;
498                                                   521 
499         if (fd < fdt->max_fds)                    522         if (fd < fdt->max_fds)
500                 fd = find_next_fd(fdt, fd);       523                 fd = find_next_fd(fdt, fd);
501                                                   524 
502         /*                                        525         /*
503          * N.B. For clone tasks sharing a file    526          * N.B. For clone tasks sharing a files structure, this test
504          * will limit the total number of file    527          * will limit the total number of files that can be opened.
505          */                                       528          */
506         error = -EMFILE;                          529         error = -EMFILE;
507         if (fd >= end)                            530         if (fd >= end)
508                 goto out;                         531                 goto out;
509                                                   532 
510         error = expand_files(files, fd);          533         error = expand_files(files, fd);
511         if (error < 0)                            534         if (error < 0)
512                 goto out;                         535                 goto out;
513                                                   536 
514         /*                                        537         /*
515          * If we needed to expand the fs array    538          * If we needed to expand the fs array we
516          * might have blocked - try again.        539          * might have blocked - try again.
517          */                                       540          */
518         if (error)                                541         if (error)
519                 goto repeat;                      542                 goto repeat;
520                                                   543 
521         if (start <= files->next_fd)              544         if (start <= files->next_fd)
522                 files->next_fd = fd + 1;          545                 files->next_fd = fd + 1;
523                                                   546 
524         __set_open_fd(fd, fdt);                   547         __set_open_fd(fd, fdt);
525         if (flags & O_CLOEXEC)                    548         if (flags & O_CLOEXEC)
526                 __set_close_on_exec(fd, fdt);     549                 __set_close_on_exec(fd, fdt);
527         else                                      550         else
528                 __clear_close_on_exec(fd, fdt)    551                 __clear_close_on_exec(fd, fdt);
529         error = fd;                               552         error = fd;
530 #if 1                                             553 #if 1
531         /* Sanity check */                        554         /* Sanity check */
532         if (rcu_access_pointer(fdt->fd[fd]) !=    555         if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
533                 printk(KERN_WARNING "alloc_fd:    556                 printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
534                 rcu_assign_pointer(fdt->fd[fd]    557                 rcu_assign_pointer(fdt->fd[fd], NULL);
535         }                                         558         }
536 #endif                                            559 #endif
537                                                   560 
538 out:                                              561 out:
539         spin_unlock(&files->file_lock);           562         spin_unlock(&files->file_lock);
540         return error;                             563         return error;
541 }                                                 564 }
542                                                   565 
543 int __get_unused_fd_flags(unsigned flags, unsi    566 int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
544 {                                                 567 {
545         return alloc_fd(0, nofile, flags);        568         return alloc_fd(0, nofile, flags);
546 }                                                 569 }
547                                                   570 
548 int get_unused_fd_flags(unsigned flags)           571 int get_unused_fd_flags(unsigned flags)
549 {                                                 572 {
550         return __get_unused_fd_flags(flags, rl    573         return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
551 }                                                 574 }
552 EXPORT_SYMBOL(get_unused_fd_flags);               575 EXPORT_SYMBOL(get_unused_fd_flags);
553                                                   576 
554 static void __put_unused_fd(struct files_struc    577 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
555 {                                                 578 {
556         struct fdtable *fdt = files_fdtable(fi    579         struct fdtable *fdt = files_fdtable(files);
557         __clear_open_fd(fd, fdt);                 580         __clear_open_fd(fd, fdt);
558         if (fd < files->next_fd)                  581         if (fd < files->next_fd)
559                 files->next_fd = fd;              582                 files->next_fd = fd;
560 }                                                 583 }
561                                                   584 
562 void put_unused_fd(unsigned int fd)               585 void put_unused_fd(unsigned int fd)
563 {                                                 586 {
564         struct files_struct *files = current->    587         struct files_struct *files = current->files;
565         spin_lock(&files->file_lock);             588         spin_lock(&files->file_lock);
566         __put_unused_fd(files, fd);               589         __put_unused_fd(files, fd);
567         spin_unlock(&files->file_lock);           590         spin_unlock(&files->file_lock);
568 }                                                 591 }
569                                                   592 
570 EXPORT_SYMBOL(put_unused_fd);                     593 EXPORT_SYMBOL(put_unused_fd);
571                                                   594 
572 /*                                                595 /*
573  * Install a file pointer in the fd array.        596  * Install a file pointer in the fd array.
574  *                                                597  *
575  * The VFS is full of places where we drop the    598  * The VFS is full of places where we drop the files lock between
576  * setting the open_fds bitmap and installing     599  * setting the open_fds bitmap and installing the file in the file
577  * array.  At any such point, we are vulnerabl    600  * array.  At any such point, we are vulnerable to a dup2() race
578  * installing a file in the array before us.      601  * installing a file in the array before us.  We need to detect this and
579  * fput() the struct file we are about to over    602  * fput() the struct file we are about to overwrite in this case.
580  *                                                603  *
581  * It should never happen - if we allow dup2()    604  * It should never happen - if we allow dup2() do it, _really_ bad things
582  * will follow.                                   605  * will follow.
583  *                                                606  *
584  * This consumes the "file" refcount, so calle    607  * This consumes the "file" refcount, so callers should treat it
585  * as if they had called fput(file).              608  * as if they had called fput(file).
586  */                                               609  */
587                                                   610 
588 void fd_install(unsigned int fd, struct file *    611 void fd_install(unsigned int fd, struct file *file)
589 {                                                 612 {
590         struct files_struct *files = current->    613         struct files_struct *files = current->files;
591         struct fdtable *fdt;                      614         struct fdtable *fdt;
592                                                   615 
593         if (WARN_ON_ONCE(unlikely(file->f_mode << 
594                 return;                        << 
595                                                << 
596         rcu_read_lock_sched();                    616         rcu_read_lock_sched();
597                                                   617 
598         if (unlikely(files->resize_in_progress    618         if (unlikely(files->resize_in_progress)) {
599                 rcu_read_unlock_sched();          619                 rcu_read_unlock_sched();
600                 spin_lock(&files->file_lock);     620                 spin_lock(&files->file_lock);
601                 fdt = files_fdtable(files);       621                 fdt = files_fdtable(files);
602                 BUG_ON(fdt->fd[fd] != NULL);      622                 BUG_ON(fdt->fd[fd] != NULL);
603                 rcu_assign_pointer(fdt->fd[fd]    623                 rcu_assign_pointer(fdt->fd[fd], file);
604                 spin_unlock(&files->file_lock)    624                 spin_unlock(&files->file_lock);
605                 return;                           625                 return;
606         }                                         626         }
607         /* coupled with smp_wmb() in expand_fd    627         /* coupled with smp_wmb() in expand_fdtable() */
608         smp_rmb();                                628         smp_rmb();
609         fdt = rcu_dereference_sched(files->fdt    629         fdt = rcu_dereference_sched(files->fdt);
610         BUG_ON(fdt->fd[fd] != NULL);              630         BUG_ON(fdt->fd[fd] != NULL);
611         rcu_assign_pointer(fdt->fd[fd], file);    631         rcu_assign_pointer(fdt->fd[fd], file);
612         rcu_read_unlock_sched();                  632         rcu_read_unlock_sched();
613 }                                                 633 }
614                                                   634 
615 EXPORT_SYMBOL(fd_install);                        635 EXPORT_SYMBOL(fd_install);
616                                                   636 
617 /**                                            !! 637 static struct file *pick_file(struct files_struct *files, unsigned fd)
618  * file_close_fd_locked - return file associat << 
619  * @files: file struct to retrieve file from   << 
620  * @fd: file descriptor to retrieve file for   << 
621  *                                             << 
622  * Doesn't take a separate reference count.    << 
623  *                                             << 
624  * Context: files_lock must be held.           << 
625  *                                             << 
626  * Returns: The file associated with @fd (NULL << 
627  */                                            << 
628 struct file *file_close_fd_locked(struct files << 
629 {                                                 638 {
630         struct fdtable *fdt = files_fdtable(fi !! 639         struct file *file = NULL;
631         struct file *file;                     !! 640         struct fdtable *fdt;
632                                                << 
633         lockdep_assert_held(&files->file_lock) << 
634                                                   641 
                                                   >> 642         spin_lock(&files->file_lock);
                                                   >> 643         fdt = files_fdtable(files);
635         if (fd >= fdt->max_fds)                   644         if (fd >= fdt->max_fds)
636                 return NULL;                   !! 645                 goto out_unlock;
637                                                << 
638         fd = array_index_nospec(fd, fdt->max_f    646         fd = array_index_nospec(fd, fdt->max_fds);
639         file = fdt->fd[fd];                       647         file = fdt->fd[fd];
640         if (file) {                            !! 648         if (!file)
641                 rcu_assign_pointer(fdt->fd[fd] !! 649                 goto out_unlock;
642                 __put_unused_fd(files, fd);    !! 650         rcu_assign_pointer(fdt->fd[fd], NULL);
643         }                                      !! 651         __put_unused_fd(files, fd);
                                                   >> 652 
                                                   >> 653 out_unlock:
                                                   >> 654         spin_unlock(&files->file_lock);
644         return file;                              655         return file;
645 }                                                 656 }
646                                                   657 
647 int close_fd(unsigned fd)                         658 int close_fd(unsigned fd)
648 {                                                 659 {
649         struct files_struct *files = current->    660         struct files_struct *files = current->files;
650         struct file *file;                        661         struct file *file;
651                                                   662 
652         spin_lock(&files->file_lock);          !! 663         file = pick_file(files, fd);
653         file = file_close_fd_locked(files, fd) << 
654         spin_unlock(&files->file_lock);        << 
655         if (!file)                                664         if (!file)
656                 return -EBADF;                    665                 return -EBADF;
657                                                   666 
658         return filp_close(file, files);           667         return filp_close(file, files);
659 }                                                 668 }
660 EXPORT_SYMBOL(close_fd);                       !! 669 EXPORT_SYMBOL(close_fd); /* for ksys_close() */
661                                                << 
662 /**                                            << 
663  * last_fd - return last valid index into fd t << 
664  * @fdt: File descriptor table.                << 
665  *                                             << 
666  * Context: Either rcu read lock or files_lock << 
667  *                                             << 
668  * Returns: Last valid index into fdtable.     << 
669  */                                            << 
670 static inline unsigned last_fd(struct fdtable  << 
671 {                                              << 
672         return fdt->max_fds - 1;               << 
673 }                                              << 
674                                                << 
675 static inline void __range_cloexec(struct file << 
676                                    unsigned in << 
677 {                                              << 
678         struct fdtable *fdt;                   << 
679                                                << 
680         /* make sure we're using the correct m << 
681         spin_lock(&cur_fds->file_lock);        << 
682         fdt = files_fdtable(cur_fds);          << 
683         max_fd = min(last_fd(fdt), max_fd);    << 
684         if (fd <= max_fd)                      << 
685                 bitmap_set(fdt->close_on_exec, << 
686         spin_unlock(&cur_fds->file_lock);      << 
687 }                                              << 
688                                                << 
689 static inline void __range_close(struct files_ << 
690                                  unsigned int  << 
691 {                                              << 
692         struct file *file;                     << 
693         unsigned n;                            << 
694                                                << 
695         spin_lock(&files->file_lock);          << 
696         n = last_fd(files_fdtable(files));     << 
697         max_fd = min(max_fd, n);               << 
698                                                << 
699         for (; fd <= max_fd; fd++) {           << 
700                 file = file_close_fd_locked(fi << 
701                 if (file) {                    << 
702                         spin_unlock(&files->fi << 
703                         filp_close(file, files << 
704                         cond_resched();        << 
705                         spin_lock(&files->file << 
706                 } else if (need_resched()) {   << 
707                         spin_unlock(&files->fi << 
708                         cond_resched();        << 
709                         spin_lock(&files->file << 
710                 }                              << 
711         }                                      << 
712         spin_unlock(&files->file_lock);        << 
713 }                                              << 
714                                                   670 
715 /**                                               671 /**
716  * __close_range() - Close all file descriptor    672  * __close_range() - Close all file descriptors in a given range.
717  *                                                673  *
718  * @fd:     starting file descriptor to close     674  * @fd:     starting file descriptor to close
719  * @max_fd: last file descriptor to close         675  * @max_fd: last file descriptor to close
720  * @flags:  CLOSE_RANGE flags.                 << 
721  *                                                676  *
722  * This closes a range of file descriptors. Al    677  * This closes a range of file descriptors. All file descriptors
723  * from @fd up to and including @max_fd are cl    678  * from @fd up to and including @max_fd are closed.
724  */                                               679  */
725 int __close_range(unsigned fd, unsigned max_fd    680 int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
726 {                                                 681 {
                                                   >> 682         unsigned int cur_max;
727         struct task_struct *me = current;         683         struct task_struct *me = current;
728         struct files_struct *cur_fds = me->fil    684         struct files_struct *cur_fds = me->files, *fds = NULL;
729                                                   685 
730         if (flags & ~(CLOSE_RANGE_UNSHARE | CL !! 686         if (flags & ~CLOSE_RANGE_UNSHARE)
731                 return -EINVAL;                   687                 return -EINVAL;
732                                                   688 
733         if (fd > max_fd)                          689         if (fd > max_fd)
734                 return -EINVAL;                   690                 return -EINVAL;
735                                                   691 
736         if ((flags & CLOSE_RANGE_UNSHARE) && a !! 692         rcu_read_lock();
737                 struct fd_range range = {fd, m !! 693         cur_max = files_fdtable(cur_fds)->max_fds;
                                                   >> 694         rcu_read_unlock();
                                                   >> 695 
                                                   >> 696         /* cap to last valid index into fdtable */
                                                   >> 697         cur_max--;
                                                   >> 698 
                                                   >> 699         if (flags & CLOSE_RANGE_UNSHARE) {
                                                   >> 700                 int ret;
                                                   >> 701                 unsigned int max_unshare_fds = NR_OPEN_MAX;
738                                                   702 
739                 /*                                703                 /*
740                  * If the caller requested all !! 704                  * If the requested range is greater than the current maximum,
741                  * copy all of the file descri !! 705                  * we're closing everything so only copy all file descriptors
742                  * use them.                   !! 706                  * beneath the lowest file descriptor.
743                  */                               707                  */
744                 if (flags & CLOSE_RANGE_CLOEXE !! 708                 if (max_fd >= cur_max)
745                         punch_hole = NULL;     !! 709                         max_unshare_fds = fd;
                                                   >> 710 
                                                   >> 711                 ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
                                                   >> 712                 if (ret)
                                                   >> 713                         return ret;
746                                                   714 
747                 fds = dup_fd(cur_fds, punch_ho << 
748                 if (IS_ERR(fds))               << 
749                         return PTR_ERR(fds);   << 
750                 /*                                715                 /*
751                  * We used to share our file d    716                  * We used to share our file descriptor table, and have now
752                  * created a private one, make    717                  * created a private one, make sure we're using it below.
753                  */                               718                  */
754                 swap(cur_fds, fds);            !! 719                 if (fds)
                                                   >> 720                         swap(cur_fds, fds);
755         }                                         721         }
756                                                   722 
757         if (flags & CLOSE_RANGE_CLOEXEC)       !! 723         max_fd = min(max_fd, cur_max);
758                 __range_cloexec(cur_fds, fd, m !! 724         while (fd <= max_fd) {
759         else                                   !! 725                 struct file *file;
760                 __range_close(cur_fds, fd, max !! 726 
                                                   >> 727                 file = pick_file(cur_fds, fd++);
                                                   >> 728                 if (!file)
                                                   >> 729                         continue;
                                                   >> 730 
                                                   >> 731                 filp_close(file, cur_fds);
                                                   >> 732                 cond_resched();
                                                   >> 733         }
761                                                   734 
762         if (fds) {                                735         if (fds) {
763                 /*                                736                 /*
764                  * We're done closing the file    737                  * We're done closing the files we were supposed to. Time to install
765                  * the new file descriptor tab    738                  * the new file descriptor table and drop the old one.
766                  */                               739                  */
767                 task_lock(me);                    740                 task_lock(me);
768                 me->files = cur_fds;              741                 me->files = cur_fds;
769                 task_unlock(me);                  742                 task_unlock(me);
770                 put_files_struct(fds);            743                 put_files_struct(fds);
771         }                                         744         }
772                                                   745 
773         return 0;                                 746         return 0;
774 }                                                 747 }
775                                                   748 
776 /**                                            !! 749 /*
777  * file_close_fd - return file associated with !! 750  * See close_fd_get_file() below, this variant assumes current->files->file_lock
778  * @fd: file descriptor to retrieve file for   !! 751  * is held.
779  *                                             << 
780  * Doesn't take a separate reference count.    << 
781  *                                             << 
782  * Returns: The file associated with @fd (NULL << 
783  */                                               752  */
784 struct file *file_close_fd(unsigned int fd)    !! 753 int __close_fd_get_file(unsigned int fd, struct file **res)
785 {                                                 754 {
786         struct files_struct *files = current->    755         struct files_struct *files = current->files;
787         struct file *file;                        756         struct file *file;
                                                   >> 757         struct fdtable *fdt;
                                                   >> 758 
                                                   >> 759         fdt = files_fdtable(files);
                                                   >> 760         if (fd >= fdt->max_fds)
                                                   >> 761                 goto out_err;
                                                   >> 762         file = fdt->fd[fd];
                                                   >> 763         if (!file)
                                                   >> 764                 goto out_err;
                                                   >> 765         rcu_assign_pointer(fdt->fd[fd], NULL);
                                                   >> 766         __put_unused_fd(files, fd);
                                                   >> 767         get_file(file);
                                                   >> 768         *res = file;
                                                   >> 769         return 0;
                                                   >> 770 out_err:
                                                   >> 771         *res = NULL;
                                                   >> 772         return -ENOENT;
                                                   >> 773 }
                                                   >> 774 
                                                   >> 775 /*
                                                   >> 776  * variant of close_fd that gets a ref on the file for later fput.
                                                   >> 777  * The caller must ensure that filp_close() called on the file, and then
                                                   >> 778  * an fput().
                                                   >> 779  */
                                                   >> 780 int close_fd_get_file(unsigned int fd, struct file **res)
                                                   >> 781 {
                                                   >> 782         struct files_struct *files = current->files;
                                                   >> 783         int ret;
788                                                   784 
789         spin_lock(&files->file_lock);             785         spin_lock(&files->file_lock);
790         file = file_close_fd_locked(files, fd) !! 786         ret = __close_fd_get_file(fd, res);
791         spin_unlock(&files->file_lock);           787         spin_unlock(&files->file_lock);
792                                                   788 
793         return file;                           !! 789         return ret;
794 }                                                 790 }
795                                                   791 
796 void do_close_on_exec(struct files_struct *fil    792 void do_close_on_exec(struct files_struct *files)
797 {                                                 793 {
798         unsigned i;                               794         unsigned i;
799         struct fdtable *fdt;                      795         struct fdtable *fdt;
800                                                   796 
801         /* exec unshares first */                 797         /* exec unshares first */
802         spin_lock(&files->file_lock);             798         spin_lock(&files->file_lock);
803         for (i = 0; ; i++) {                      799         for (i = 0; ; i++) {
804                 unsigned long set;                800                 unsigned long set;
805                 unsigned fd = i * BITS_PER_LON    801                 unsigned fd = i * BITS_PER_LONG;
806                 fdt = files_fdtable(files);       802                 fdt = files_fdtable(files);
807                 if (fd >= fdt->max_fds)           803                 if (fd >= fdt->max_fds)
808                         break;                    804                         break;
809                 set = fdt->close_on_exec[i];      805                 set = fdt->close_on_exec[i];
810                 if (!set)                         806                 if (!set)
811                         continue;                 807                         continue;
812                 fdt->close_on_exec[i] = 0;        808                 fdt->close_on_exec[i] = 0;
813                 for ( ; set ; fd++, set >>= 1)    809                 for ( ; set ; fd++, set >>= 1) {
814                         struct file *file;        810                         struct file *file;
815                         if (!(set & 1))           811                         if (!(set & 1))
816                                 continue;         812                                 continue;
817                         file = fdt->fd[fd];       813                         file = fdt->fd[fd];
818                         if (!file)                814                         if (!file)
819                                 continue;         815                                 continue;
820                         rcu_assign_pointer(fdt    816                         rcu_assign_pointer(fdt->fd[fd], NULL);
821                         __put_unused_fd(files,    817                         __put_unused_fd(files, fd);
822                         spin_unlock(&files->fi    818                         spin_unlock(&files->file_lock);
823                         filp_close(file, files    819                         filp_close(file, files);
824                         cond_resched();           820                         cond_resched();
825                         spin_lock(&files->file    821                         spin_lock(&files->file_lock);
826                 }                                 822                 }
827                                                   823 
828         }                                         824         }
829         spin_unlock(&files->file_lock);           825         spin_unlock(&files->file_lock);
830 }                                                 826 }
831                                                   827 
832 static struct file *__get_file_rcu(struct file !! 828 static struct file *__fget_files(struct files_struct *files, unsigned int fd,
833 {                                              !! 829                                  fmode_t mask, unsigned int refs)
834         struct file __rcu *file;               << 
835         struct file __rcu *file_reloaded;      << 
836         struct file __rcu *file_reloaded_cmp;  << 
837                                                << 
838         file = rcu_dereference_raw(*f);        << 
839         if (!file)                             << 
840                 return NULL;                   << 
841                                                << 
842         if (unlikely(!atomic_long_inc_not_zero << 
843                 return ERR_PTR(-EAGAIN);       << 
844                                                << 
845         file_reloaded = rcu_dereference_raw(*f << 
846                                                << 
847         /*                                     << 
848          * Ensure that all accesses have a dep << 
849          * rcu_dereference_raw() above so we g << 
850          * between reuse/allocation and the po << 
851          */                                    << 
852         file_reloaded_cmp = file_reloaded;     << 
853         OPTIMIZER_HIDE_VAR(file_reloaded_cmp); << 
854                                                << 
855         /*                                     << 
856          * atomic_long_inc_not_zero() above pr << 
857          * barrier when we acquired a referenc << 
858          *                                     << 
859          * This is paired with the write barri << 
860          * __rcu protected file pointer so tha << 
861          * matches the current file, we know w << 
862          * acquired a reference to the right f << 
863          *                                     << 
864          * If the pointers don't match the fil << 
865          * SLAB_TYPESAFE_BY_RCU.               << 
866          */                                    << 
867         if (file == file_reloaded_cmp)         << 
868                 return file_reloaded;          << 
869                                                << 
870         fput(file);                            << 
871         return ERR_PTR(-EAGAIN);               << 
872 }                                              << 
873                                                << 
874 /**                                            << 
875  * get_file_rcu - try go get a reference to a  << 
876  * @f: the file to get a reference on          << 
877  *                                             << 
878  * This function tries to get a reference on @ << 
879  * @f hasn't been reused.                      << 
880  *                                             << 
881  * This function should rarely have to be used << 
882  * understand the implications of SLAB_TYPESAF << 
883  *                                             << 
884  * Return: Returns @f with the reference count << 
885  */                                            << 
886 struct file *get_file_rcu(struct file __rcu ** << 
887 {                                              << 
888         for (;;) {                             << 
889                 struct file __rcu *file;       << 
890                                                << 
891                 file = __get_file_rcu(f);      << 
892                 if (!IS_ERR(file))             << 
893                         return file;           << 
894         }                                      << 
895 }                                              << 
896 EXPORT_SYMBOL_GPL(get_file_rcu);               << 
897                                                << 
898 /**                                            << 
899  * get_file_active - try go get a reference to << 
900  * @f: the file to get a reference on          << 
901  *                                             << 
902  * In contast to get_file_rcu() the pointer it << 
903  * reference counting.                         << 
904  *                                             << 
905  * This function should rarely have to be used << 
906  * understand the implications of SLAB_TYPESAF << 
907  *                                             << 
908  * Return: Returns @f with the reference count << 
909  */                                            << 
910 struct file *get_file_active(struct file **f)  << 
911 {                                                 830 {
912         struct file __rcu *file;               !! 831         struct file *file;
913                                                   832 
914         rcu_read_lock();                          833         rcu_read_lock();
915         file = __get_file_rcu(f);              !! 834 loop:
916         rcu_read_unlock();                     !! 835         file = files_lookup_fd_rcu(files, fd);
917         if (IS_ERR(file))                      !! 836         if (file) {
918                 file = NULL;                   !! 837                 /* File object ref couldn't be taken.
919         return file;                           !! 838                  * dup2() atomicity guarantee is the reason
920 }                                              !! 839                  * we loop to catch the new file (or NULL pointer)
921 EXPORT_SYMBOL_GPL(get_file_active);            << 
922                                                << 
923 static inline struct file *__fget_files_rcu(st << 
924        unsigned int fd, fmode_t mask)          << 
925 {                                              << 
926         for (;;) {                             << 
927                 struct file *file;             << 
928                 struct fdtable *fdt = rcu_dere << 
929                 struct file __rcu **fdentry;   << 
930                 unsigned long nospec_mask;     << 
931                                                << 
932                 /* Mask is a 0 for invalid fd' << 
933                 nospec_mask = array_index_mask << 
934                                                << 
935                 /*                             << 
936                  * fdentry points to the 'fd'  << 
937                  * Loading from fdt->fd[0] is  << 
938                  * array always exists.        << 
939                  */                            << 
940                 fdentry = fdt->fd + (fd & nosp << 
941                                                << 
942                 /* Do the load, then mask any  << 
943                 file = rcu_dereference_raw(*fd << 
944                 file = (void *)(nospec_mask &  << 
945                 if (unlikely(!file))           << 
946                         return NULL;           << 
947                                                << 
948                 /*                             << 
949                  * Ok, we have a file pointer  << 
950                  * some point, but it might ha << 
951                  *                             << 
952                  * We need to confirm it by in << 
953                  * and then check the lookup a << 
954                  *                             << 
955                  * atomic_long_inc_not_zero()  << 
956                  * barrier. We only really nee << 
957                  * protect the loads below, bu << 
958                  */                            << 
959                 if (unlikely(!atomic_long_inc_ << 
960                         continue;              << 
961                                                << 
962                 /*                             << 
963                  * Such a race can take two fo << 
964                  *                             << 
965                  *  (a) the file ref already w << 
966                  *      file hasn't been reuse << 
967                  *      isn't zero but the fil << 
968                  *                             << 
969                  *  (b) the file table entry h << 
970                  *       Note that we don't ne << 
971                  *       pointer having change << 
972                  *       hand-in-hand with 'fd << 
973                  *                             << 
974                  * If so, we need to put our r << 
975                  */                            << 
976                 if (unlikely(file != rcu_deref << 
977                     unlikely(rcu_dereference_r << 
978                         fput(file);            << 
979                         continue;              << 
980                 }                              << 
981                                                << 
982                 /*                             << 
983                  * This isn't the file we're l << 
984                  * allowed to get a reference  << 
985                  */                               840                  */
986                 if (unlikely(file->f_mode & ma !! 841                 if (file->f_mode & mask)
987                         fput(file);            !! 842                         file = NULL;
988                         return NULL;           !! 843                 else if (!get_file_rcu_many(file, refs))
                                                   >> 844                         goto loop;
                                                   >> 845                 else if (files_lookup_fd_raw(files, fd) != file) {
                                                   >> 846                         fput_many(file, refs);
                                                   >> 847                         goto loop;
989                 }                                 848                 }
990                                                << 
991                 /*                             << 
992                  * Ok, we have a ref to the fi << 
993                  * still exists.               << 
994                  */                            << 
995                 return file;                   << 
996         }                                         849         }
997 }                                              << 
998                                                << 
999 static struct file *__fget_files(struct files_ << 
1000                                  fmode_t mask << 
1001 {                                             << 
1002         struct file *file;                    << 
1003                                               << 
1004         rcu_read_lock();                      << 
1005         file = __fget_files_rcu(files, fd, ma << 
1006         rcu_read_unlock();                       850         rcu_read_unlock();
1007                                                  851 
1008         return file;                             852         return file;
1009 }                                                853 }
1010                                                  854 
1011 static inline struct file *__fget(unsigned in !! 855 static inline struct file *__fget(unsigned int fd, fmode_t mask,
                                                   >> 856                                   unsigned int refs)
                                                   >> 857 {
                                                   >> 858         return __fget_files(current->files, fd, mask, refs);
                                                   >> 859 }
                                                   >> 860 
                                                   >> 861 struct file *fget_many(unsigned int fd, unsigned int refs)
1012 {                                                862 {
1013         return __fget_files(current->files, f !! 863         return __fget(fd, FMODE_PATH, refs);
1014 }                                                864 }
1015                                                  865 
1016 struct file *fget(unsigned int fd)               866 struct file *fget(unsigned int fd)
1017 {                                                867 {
1018         return __fget(fd, FMODE_PATH);        !! 868         return __fget(fd, FMODE_PATH, 1);
1019 }                                                869 }
1020 EXPORT_SYMBOL(fget);                             870 EXPORT_SYMBOL(fget);
1021                                                  871 
1022 struct file *fget_raw(unsigned int fd)           872 struct file *fget_raw(unsigned int fd)
1023 {                                                873 {
1024         return __fget(fd, 0);                 !! 874         return __fget(fd, 0, 1);
1025 }                                                875 }
1026 EXPORT_SYMBOL(fget_raw);                         876 EXPORT_SYMBOL(fget_raw);
1027                                                  877 
1028 struct file *fget_task(struct task_struct *ta    878 struct file *fget_task(struct task_struct *task, unsigned int fd)
1029 {                                                879 {
1030         struct file *file = NULL;                880         struct file *file = NULL;
1031                                                  881 
1032         task_lock(task);                         882         task_lock(task);
1033         if (task->files)                         883         if (task->files)
1034                 file = __fget_files(task->fil !! 884                 file = __fget_files(task->files, fd, 0, 1);
1035         task_unlock(task);                       885         task_unlock(task);
1036                                                  886 
1037         return file;                             887         return file;
1038 }                                                888 }
1039                                                  889 
1040 struct file *lookup_fdget_rcu(unsigned int fd !! 890 struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd)
1041 {                                             << 
1042         return __fget_files_rcu(current->file << 
1043                                               << 
1044 }                                             << 
1045 EXPORT_SYMBOL_GPL(lookup_fdget_rcu);          << 
1046                                               << 
1047 struct file *task_lookup_fdget_rcu(struct tas << 
1048 {                                                891 {
1049         /* Must be called with rcu_read_lock     892         /* Must be called with rcu_read_lock held */
1050         struct files_struct *files;              893         struct files_struct *files;
1051         struct file *file = NULL;                894         struct file *file = NULL;
1052                                                  895 
1053         task_lock(task);                         896         task_lock(task);
1054         files = task->files;                     897         files = task->files;
1055         if (files)                               898         if (files)
1056                 file = __fget_files_rcu(files !! 899                 file = files_lookup_fd_rcu(files, fd);
1057         task_unlock(task);                       900         task_unlock(task);
1058                                                  901 
1059         return file;                             902         return file;
1060 }                                                903 }
1061                                                  904 
1062 struct file *task_lookup_next_fdget_rcu(struc !! 905 struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret_fd)
1063 {                                                906 {
1064         /* Must be called with rcu_read_lock     907         /* Must be called with rcu_read_lock held */
1065         struct files_struct *files;              908         struct files_struct *files;
1066         unsigned int fd = *ret_fd;               909         unsigned int fd = *ret_fd;
1067         struct file *file = NULL;                910         struct file *file = NULL;
1068                                                  911 
1069         task_lock(task);                         912         task_lock(task);
1070         files = task->files;                     913         files = task->files;
1071         if (files) {                             914         if (files) {
1072                 for (; fd < files_fdtable(fil    915                 for (; fd < files_fdtable(files)->max_fds; fd++) {
1073                         file = __fget_files_r !! 916                         file = files_lookup_fd_rcu(files, fd);
1074                         if (file)                917                         if (file)
1075                                 break;           918                                 break;
1076                 }                                919                 }
1077         }                                        920         }
1078         task_unlock(task);                       921         task_unlock(task);
1079         *ret_fd = fd;                            922         *ret_fd = fd;
1080         return file;                             923         return file;
1081 }                                                924 }
1082 EXPORT_SYMBOL(task_lookup_next_fdget_rcu);    << 
1083                                                  925 
1084 /*                                               926 /*
1085  * Lightweight file lookup - no refcnt increm    927  * Lightweight file lookup - no refcnt increment if fd table isn't shared.
1086  *                                               928  *
1087  * You can use this instead of fget if you sa    929  * You can use this instead of fget if you satisfy all of the following
1088  * conditions:                                   930  * conditions:
1089  * 1) You must call fput_light before exiting    931  * 1) You must call fput_light before exiting the syscall and returning control
1090  *    to userspace (i.e. you cannot remember     932  *    to userspace (i.e. you cannot remember the returned struct file * after
1091  *    returning to userspace).                   933  *    returning to userspace).
1092  * 2) You must not call filp_close on the ret    934  * 2) You must not call filp_close on the returned struct file * in between
1093  *    calls to fget_light and fput_light.        935  *    calls to fget_light and fput_light.
1094  * 3) You must not clone the current task in     936  * 3) You must not clone the current task in between the calls to fget_light
1095  *    and fput_light.                            937  *    and fput_light.
1096  *                                               938  *
1097  * The fput_needed flag returned by fget_ligh    939  * The fput_needed flag returned by fget_light should be passed to the
1098  * corresponding fput_light.                     940  * corresponding fput_light.
1099  */                                              941  */
1100 static inline struct fd __fget_light(unsigned !! 942 static unsigned long __fget_light(unsigned int fd, fmode_t mask)
1101 {                                                943 {
1102         struct files_struct *files = current-    944         struct files_struct *files = current->files;
1103         struct file *file;                       945         struct file *file;
1104                                                  946 
1105         /*                                    !! 947         if (atomic_read(&files->count) == 1) {
1106          * If another thread is concurrently  << 
1107          * by put_files_struct(), we must not << 
1108          * entry combined with the new refcou << 
1109          * return a file that is concurrently << 
1110          *                                    << 
1111          * atomic_read_acquire() pairs with a << 
1112          * put_files_struct().                << 
1113          */                                   << 
1114         if (likely(atomic_read_acquire(&files << 
1115                 file = files_lookup_fd_raw(fi    948                 file = files_lookup_fd_raw(files, fd);
1116                 if (!file || unlikely(file->f    949                 if (!file || unlikely(file->f_mode & mask))
1117                         return EMPTY_FD;      !! 950                         return 0;
1118                 return BORROWED_FD(file);     !! 951                 return (unsigned long)file;
1119         } else {                                 952         } else {
1120                 file = __fget_files(files, fd !! 953                 file = __fget(fd, mask, 1);
1121                 if (!file)                       954                 if (!file)
1122                         return EMPTY_FD;      !! 955                         return 0;
1123                 return CLONED_FD(file);       !! 956                 return FDPUT_FPUT | (unsigned long)file;
1124         }                                        957         }
1125 }                                                958 }
1126 struct fd fdget(unsigned int fd)              !! 959 unsigned long __fdget(unsigned int fd)
1127 {                                                960 {
1128         return __fget_light(fd, FMODE_PATH);     961         return __fget_light(fd, FMODE_PATH);
1129 }                                                962 }
1130 EXPORT_SYMBOL(fdget);                         !! 963 EXPORT_SYMBOL(__fdget);
1131                                                  964 
1132 struct fd fdget_raw(unsigned int fd)          !! 965 unsigned long __fdget_raw(unsigned int fd)
1133 {                                                966 {
1134         return __fget_light(fd, 0);              967         return __fget_light(fd, 0);
1135 }                                                968 }
1136                                                  969 
1137 /*                                               970 /*
1138  * Try to avoid f_pos locking. We only need i    971  * Try to avoid f_pos locking. We only need it if the
1139  * file is marked for FMODE_ATOMIC_POS, and i    972  * file is marked for FMODE_ATOMIC_POS, and it can be
1140  * accessed multiple ways.                       973  * accessed multiple ways.
1141  *                                               974  *
1142  * Always do it for directories, because pidf    975  * Always do it for directories, because pidfd_getfd()
1143  * can make a file accessible even if it othe    976  * can make a file accessible even if it otherwise would
1144  * not be, and for directories this is a corr    977  * not be, and for directories this is a correctness
1145  * issue, not a "POSIX requirement".             978  * issue, not a "POSIX requirement".
1146  */                                              979  */
1147 static inline bool file_needs_f_pos_lock(stru    980 static inline bool file_needs_f_pos_lock(struct file *file)
1148 {                                                981 {
1149         return (file->f_mode & FMODE_ATOMIC_P    982         return (file->f_mode & FMODE_ATOMIC_POS) &&
1150                 (file_count(file) > 1 || file !! 983                 (file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode));
1151 }                                                984 }
1152                                                  985 
1153 struct fd fdget_pos(unsigned int fd)          !! 986 unsigned long __fdget_pos(unsigned int fd)
1154 {                                                987 {
1155         struct fd f = fdget(fd);              !! 988         unsigned long v = __fdget(fd);
1156         struct file *file = fd_file(f);       !! 989         struct file *file = (struct file *)(v & ~3);
1157                                                  990 
1158         if (file && file_needs_f_pos_lock(fil    991         if (file && file_needs_f_pos_lock(file)) {
1159                 f.word |= FDPUT_POS_UNLOCK;   !! 992                 v |= FDPUT_POS_UNLOCK;
1160                 mutex_lock(&file->f_pos_lock)    993                 mutex_lock(&file->f_pos_lock);
1161         }                                        994         }
1162         return f;                             !! 995         return v;
1163 }                                                996 }
1164                                                  997 
1165 void __f_unlock_pos(struct file *f)              998 void __f_unlock_pos(struct file *f)
1166 {                                                999 {
1167         mutex_unlock(&f->f_pos_lock);            1000         mutex_unlock(&f->f_pos_lock);
1168 }                                                1001 }
1169                                                  1002 
1170 /*                                               1003 /*
1171  * We only lock f_pos if we have threads or i    1004  * We only lock f_pos if we have threads or if the file might be
1172  * shared with another process. In both cases    1005  * shared with another process. In both cases we'll have an elevated
1173  * file count (done either by fdget() or by f    1006  * file count (done either by fdget() or by fork()).
1174  */                                              1007  */
1175                                                  1008 
1176 void set_close_on_exec(unsigned int fd, int f    1009 void set_close_on_exec(unsigned int fd, int flag)
1177 {                                                1010 {
1178         struct files_struct *files = current-    1011         struct files_struct *files = current->files;
1179         struct fdtable *fdt;                     1012         struct fdtable *fdt;
1180         spin_lock(&files->file_lock);            1013         spin_lock(&files->file_lock);
1181         fdt = files_fdtable(files);              1014         fdt = files_fdtable(files);
1182         if (flag)                                1015         if (flag)
1183                 __set_close_on_exec(fd, fdt);    1016                 __set_close_on_exec(fd, fdt);
1184         else                                     1017         else
1185                 __clear_close_on_exec(fd, fdt    1018                 __clear_close_on_exec(fd, fdt);
1186         spin_unlock(&files->file_lock);          1019         spin_unlock(&files->file_lock);
1187 }                                                1020 }
1188                                                  1021 
1189 bool get_close_on_exec(unsigned int fd)          1022 bool get_close_on_exec(unsigned int fd)
1190 {                                                1023 {
                                                   >> 1024         struct files_struct *files = current->files;
                                                   >> 1025         struct fdtable *fdt;
1191         bool res;                                1026         bool res;
1192         rcu_read_lock();                         1027         rcu_read_lock();
1193         res = close_on_exec(fd, current->file !! 1028         fdt = files_fdtable(files);
                                                   >> 1029         res = close_on_exec(fd, fdt);
1194         rcu_read_unlock();                       1030         rcu_read_unlock();
1195         return res;                              1031         return res;
1196 }                                                1032 }
1197                                                  1033 
1198 static int do_dup2(struct files_struct *files    1034 static int do_dup2(struct files_struct *files,
1199         struct file *file, unsigned fd, unsig    1035         struct file *file, unsigned fd, unsigned flags)
1200 __releases(&files->file_lock)                    1036 __releases(&files->file_lock)
1201 {                                                1037 {
1202         struct file *tofree;                     1038         struct file *tofree;
1203         struct fdtable *fdt;                     1039         struct fdtable *fdt;
1204                                                  1040 
1205         /*                                       1041         /*
1206          * We need to detect attempts to do d    1042          * We need to detect attempts to do dup2() over allocated but still
1207          * not finished descriptor.  NB: Open    1043          * not finished descriptor.  NB: OpenBSD avoids that at the price of
1208          * extra work in their equivalent of     1044          * extra work in their equivalent of fget() - they insert struct
1209          * file immediately after grabbing de    1045          * file immediately after grabbing descriptor, mark it larval if
1210          * more work (e.g. actual opening) is    1046          * more work (e.g. actual opening) is needed and make sure that
1211          * fget() treats larval files as abse    1047          * fget() treats larval files as absent.  Potentially interesting,
1212          * but while extra work in fget() is     1048          * but while extra work in fget() is trivial, locking implications
1213          * and amount of surgery on open()-re    1049          * and amount of surgery on open()-related paths in VFS are not.
1214          * FreeBSD fails with -EBADF in the s    1050          * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
1215          * deadlocks in rather amusing ways,     1051          * deadlocks in rather amusing ways, AFAICS.  All of that is out of
1216          * scope of POSIX or SUS, since neith    1052          * scope of POSIX or SUS, since neither considers shared descriptor
1217          * tables and this condition does not    1053          * tables and this condition does not arise without those.
1218          */                                      1054          */
1219         fdt = files_fdtable(files);              1055         fdt = files_fdtable(files);
1220         fd = array_index_nospec(fd, fdt->max_    1056         fd = array_index_nospec(fd, fdt->max_fds);
1221         tofree = fdt->fd[fd];                    1057         tofree = fdt->fd[fd];
1222         if (!tofree && fd_is_open(fd, fdt))      1058         if (!tofree && fd_is_open(fd, fdt))
1223                 goto Ebusy;                      1059                 goto Ebusy;
1224         get_file(file);                          1060         get_file(file);
1225         rcu_assign_pointer(fdt->fd[fd], file)    1061         rcu_assign_pointer(fdt->fd[fd], file);
1226         __set_open_fd(fd, fdt);                  1062         __set_open_fd(fd, fdt);
1227         if (flags & O_CLOEXEC)                   1063         if (flags & O_CLOEXEC)
1228                 __set_close_on_exec(fd, fdt);    1064                 __set_close_on_exec(fd, fdt);
1229         else                                     1065         else
1230                 __clear_close_on_exec(fd, fdt    1066                 __clear_close_on_exec(fd, fdt);
1231         spin_unlock(&files->file_lock);          1067         spin_unlock(&files->file_lock);
1232                                                  1068 
1233         if (tofree)                              1069         if (tofree)
1234                 filp_close(tofree, files);       1070                 filp_close(tofree, files);
1235                                                  1071 
1236         return fd;                               1072         return fd;
1237                                                  1073 
1238 Ebusy:                                           1074 Ebusy:
1239         spin_unlock(&files->file_lock);          1075         spin_unlock(&files->file_lock);
1240         return -EBUSY;                           1076         return -EBUSY;
1241 }                                                1077 }
1242                                                  1078 
1243 int replace_fd(unsigned fd, struct file *file    1079 int replace_fd(unsigned fd, struct file *file, unsigned flags)
1244 {                                                1080 {
1245         int err;                                 1081         int err;
1246         struct files_struct *files = current-    1082         struct files_struct *files = current->files;
1247                                                  1083 
1248         if (!file)                               1084         if (!file)
1249                 return close_fd(fd);             1085                 return close_fd(fd);
1250                                                  1086 
1251         if (fd >= rlimit(RLIMIT_NOFILE))         1087         if (fd >= rlimit(RLIMIT_NOFILE))
1252                 return -EBADF;                   1088                 return -EBADF;
1253                                                  1089 
1254         spin_lock(&files->file_lock);            1090         spin_lock(&files->file_lock);
1255         err = expand_files(files, fd);           1091         err = expand_files(files, fd);
1256         if (unlikely(err < 0))                   1092         if (unlikely(err < 0))
1257                 goto out_unlock;                 1093                 goto out_unlock;
1258         return do_dup2(files, file, fd, flags    1094         return do_dup2(files, file, fd, flags);
1259                                                  1095 
1260 out_unlock:                                      1096 out_unlock:
1261         spin_unlock(&files->file_lock);          1097         spin_unlock(&files->file_lock);
1262         return err;                              1098         return err;
1263 }                                                1099 }
1264                                                  1100 
1265 /**                                              1101 /**
1266  * receive_fd() - Install received file into  !! 1102  * __receive_fd() - Install received file into file descriptor table
                                                   >> 1103  *
                                                   >> 1104  * @fd: fd to install into (if negative, a new fd will be allocated)
1267  * @file: struct file that was received from     1105  * @file: struct file that was received from another process
1268  * @ufd: __user pointer to write new fd numbe    1106  * @ufd: __user pointer to write new fd number to
1269  * @o_flags: the O_* flags to apply to the ne    1107  * @o_flags: the O_* flags to apply to the new fd entry
1270  *                                               1108  *
1271  * Installs a received file into the file des    1109  * Installs a received file into the file descriptor table, with appropriate
1272  * checks and count updates. Optionally write    1110  * checks and count updates. Optionally writes the fd number to userspace, if
1273  * @ufd is non-NULL.                             1111  * @ufd is non-NULL.
1274  *                                               1112  *
1275  * This helper handles its own reference coun    1113  * This helper handles its own reference counting of the incoming
1276  * struct file.                                  1114  * struct file.
1277  *                                               1115  *
1278  * Returns newly install fd or -ve on error.     1116  * Returns newly install fd or -ve on error.
1279  */                                              1117  */
1280 int receive_fd(struct file *file, int __user  !! 1118 int __receive_fd(int fd, struct file *file, int __user *ufd, unsigned int o_flags)
1281 {                                                1119 {
1282         int new_fd;                              1120         int new_fd;
1283         int error;                               1121         int error;
1284                                                  1122 
1285         error = security_file_receive(file);     1123         error = security_file_receive(file);
1286         if (error)                               1124         if (error)
1287                 return error;                    1125                 return error;
1288                                                  1126 
1289         new_fd = get_unused_fd_flags(o_flags) !! 1127         if (fd < 0) {
1290         if (new_fd < 0)                       !! 1128                 new_fd = get_unused_fd_flags(o_flags);
1291                 return new_fd;                !! 1129                 if (new_fd < 0)
                                                   >> 1130                         return new_fd;
                                                   >> 1131         } else {
                                                   >> 1132                 new_fd = fd;
                                                   >> 1133         }
1292                                                  1134 
1293         if (ufd) {                               1135         if (ufd) {
1294                 error = put_user(new_fd, ufd)    1136                 error = put_user(new_fd, ufd);
1295                 if (error) {                     1137                 if (error) {
1296                         put_unused_fd(new_fd) !! 1138                         if (fd < 0)
                                                   >> 1139                                 put_unused_fd(new_fd);
1297                         return error;            1140                         return error;
1298                 }                                1141                 }
1299         }                                        1142         }
1300                                                  1143 
1301         fd_install(new_fd, get_file(file));   !! 1144         if (fd < 0) {
1302         __receive_sock(file);                 !! 1145                 fd_install(new_fd, get_file(file));
1303         return new_fd;                        !! 1146         } else {
1304 }                                             !! 1147                 error = replace_fd(new_fd, file, o_flags);
1305 EXPORT_SYMBOL_GPL(receive_fd);                !! 1148                 if (error)
1306                                               !! 1149                         return error;
1307 int receive_fd_replace(int new_fd, struct fil !! 1150         }
1308 {                                             << 
1309         int error;                            << 
1310                                                  1151 
1311         error = security_file_receive(file);  !! 1152         /* Bump the sock usage counts, if any. */
1312         if (error)                            << 
1313                 return error;                 << 
1314         error = replace_fd(new_fd, file, o_fl << 
1315         if (error)                            << 
1316                 return error;                 << 
1317         __receive_sock(file);                    1153         __receive_sock(file);
1318         return new_fd;                           1154         return new_fd;
1319 }                                                1155 }
1320                                                  1156 
1321 static int ksys_dup3(unsigned int oldfd, unsi    1157 static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
1322 {                                                1158 {
1323         int err = -EBADF;                        1159         int err = -EBADF;
1324         struct file *file;                       1160         struct file *file;
1325         struct files_struct *files = current-    1161         struct files_struct *files = current->files;
1326                                                  1162 
1327         if ((flags & ~O_CLOEXEC) != 0)           1163         if ((flags & ~O_CLOEXEC) != 0)
1328                 return -EINVAL;                  1164                 return -EINVAL;
1329                                                  1165 
1330         if (unlikely(oldfd == newfd))            1166         if (unlikely(oldfd == newfd))
1331                 return -EINVAL;                  1167                 return -EINVAL;
1332                                                  1168 
1333         if (newfd >= rlimit(RLIMIT_NOFILE))      1169         if (newfd >= rlimit(RLIMIT_NOFILE))
1334                 return -EBADF;                   1170                 return -EBADF;
1335                                                  1171 
1336         spin_lock(&files->file_lock);            1172         spin_lock(&files->file_lock);
1337         err = expand_files(files, newfd);        1173         err = expand_files(files, newfd);
1338         file = files_lookup_fd_locked(files,     1174         file = files_lookup_fd_locked(files, oldfd);
1339         if (unlikely(!file))                     1175         if (unlikely(!file))
1340                 goto Ebadf;                      1176                 goto Ebadf;
1341         if (unlikely(err < 0)) {                 1177         if (unlikely(err < 0)) {
1342                 if (err == -EMFILE)              1178                 if (err == -EMFILE)
1343                         goto Ebadf;              1179                         goto Ebadf;
1344                 goto out_unlock;                 1180                 goto out_unlock;
1345         }                                        1181         }
1346         return do_dup2(files, file, newfd, fl    1182         return do_dup2(files, file, newfd, flags);
1347                                                  1183 
1348 Ebadf:                                           1184 Ebadf:
1349         err = -EBADF;                            1185         err = -EBADF;
1350 out_unlock:                                      1186 out_unlock:
1351         spin_unlock(&files->file_lock);          1187         spin_unlock(&files->file_lock);
1352         return err;                              1188         return err;
1353 }                                                1189 }
1354                                                  1190 
1355 SYSCALL_DEFINE3(dup3, unsigned int, oldfd, un    1191 SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
1356 {                                                1192 {
1357         return ksys_dup3(oldfd, newfd, flags)    1193         return ksys_dup3(oldfd, newfd, flags);
1358 }                                                1194 }
1359                                                  1195 
1360 SYSCALL_DEFINE2(dup2, unsigned int, oldfd, un    1196 SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
1361 {                                                1197 {
1362         if (unlikely(newfd == oldfd)) { /* co    1198         if (unlikely(newfd == oldfd)) { /* corner case */
1363                 struct files_struct *files =     1199                 struct files_struct *files = current->files;
1364                 struct file *f;               << 
1365                 int retval = oldfd;              1200                 int retval = oldfd;
1366                                                  1201 
1367                 rcu_read_lock();                 1202                 rcu_read_lock();
1368                 f = __fget_files_rcu(files, o !! 1203                 if (!files_lookup_fd_rcu(files, oldfd))
1369                 if (!f)                       << 
1370                         retval = -EBADF;         1204                         retval = -EBADF;
1371                 rcu_read_unlock();               1205                 rcu_read_unlock();
1372                 if (f)                        << 
1373                         fput(f);              << 
1374                 return retval;                   1206                 return retval;
1375         }                                        1207         }
1376         return ksys_dup3(oldfd, newfd, 0);       1208         return ksys_dup3(oldfd, newfd, 0);
1377 }                                                1209 }
1378                                                  1210 
1379 SYSCALL_DEFINE1(dup, unsigned int, fildes)       1211 SYSCALL_DEFINE1(dup, unsigned int, fildes)
1380 {                                                1212 {
1381         int ret = -EBADF;                        1213         int ret = -EBADF;
1382         struct file *file = fget_raw(fildes);    1214         struct file *file = fget_raw(fildes);
1383                                                  1215 
1384         if (file) {                              1216         if (file) {
1385                 ret = get_unused_fd_flags(0);    1217                 ret = get_unused_fd_flags(0);
1386                 if (ret >= 0)                    1218                 if (ret >= 0)
1387                         fd_install(ret, file)    1219                         fd_install(ret, file);
1388                 else                             1220                 else
1389                         fput(file);              1221                         fput(file);
1390         }                                        1222         }
1391         return ret;                              1223         return ret;
1392 }                                                1224 }
1393                                                  1225 
1394 int f_dupfd(unsigned int from, struct file *f    1226 int f_dupfd(unsigned int from, struct file *file, unsigned flags)
1395 {                                                1227 {
1396         unsigned long nofile = rlimit(RLIMIT_    1228         unsigned long nofile = rlimit(RLIMIT_NOFILE);
1397         int err;                                 1229         int err;
1398         if (from >= nofile)                      1230         if (from >= nofile)
1399                 return -EINVAL;                  1231                 return -EINVAL;
1400         err = alloc_fd(from, nofile, flags);     1232         err = alloc_fd(from, nofile, flags);
1401         if (err >= 0) {                          1233         if (err >= 0) {
1402                 get_file(file);                  1234                 get_file(file);
1403                 fd_install(err, file);           1235                 fd_install(err, file);
1404         }                                        1236         }
1405         return err;                              1237         return err;
1406 }                                                1238 }
1407                                                  1239 
1408 int iterate_fd(struct files_struct *files, un    1240 int iterate_fd(struct files_struct *files, unsigned n,
1409                 int (*f)(const void *, struct    1241                 int (*f)(const void *, struct file *, unsigned),
1410                 const void *p)                   1242                 const void *p)
1411 {                                                1243 {
1412         struct fdtable *fdt;                     1244         struct fdtable *fdt;
1413         int res = 0;                             1245         int res = 0;
1414         if (!files)                              1246         if (!files)
1415                 return 0;                        1247                 return 0;
1416         spin_lock(&files->file_lock);            1248         spin_lock(&files->file_lock);
1417         for (fdt = files_fdtable(files); n <     1249         for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
1418                 struct file *file;               1250                 struct file *file;
1419                 file = rcu_dereference_check_    1251                 file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
1420                 if (!file)                       1252                 if (!file)
1421                         continue;                1253                         continue;
1422                 res = f(p, file, n);             1254                 res = f(p, file, n);
1423                 if (res)                         1255                 if (res)
1424                         break;                   1256                         break;
1425         }                                        1257         }
1426         spin_unlock(&files->file_lock);          1258         spin_unlock(&files->file_lock);
1427         return res;                              1259         return res;
1428 }                                                1260 }
1429 EXPORT_SYMBOL(iterate_fd);                       1261 EXPORT_SYMBOL(iterate_fd);
1430                                                  1262 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php